|
--- |
|
library_name: transformers |
|
license: apache-2.0 |
|
--- |
|
|
|
# BEE-spoke-data/tiny-random-MPNetForMaskedLM |
|
|
|
test model and test tokenizer (has smaller vocab) |
|
|
|
``` |
|
MPNetForMaskedLM( |
|
(mpnet): MPNetModel( |
|
(embeddings): MPNetEmbeddings( |
|
(word_embeddings): Embedding(1125, 64, padding_idx=1) |
|
(position_embeddings): Embedding(512, 64, padding_idx=1) |
|
(LayerNorm): LayerNorm((64,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(encoder): MPNetEncoder( |
|
(layer): ModuleList( |
|
(0-4): 5 x MPNetLayer( |
|
(attention): MPNetAttention( |
|
(attn): MPNetSelfAttention( |
|
(q): Linear(in_features=64, out_features=64, bias=True) |
|
(k): Linear(in_features=64, out_features=64, bias=True) |
|
(v): Linear(in_features=64, out_features=64, bias=True) |
|
(o): Linear(in_features=64, out_features=64, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(LayerNorm): LayerNorm((64,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(intermediate): MPNetIntermediate( |
|
(dense): Linear(in_features=64, out_features=64, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): MPNetOutput( |
|
(dense): Linear(in_features=64, out_features=64, bias=True) |
|
(LayerNorm): LayerNorm((64,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
) |
|
(relative_attention_bias): Embedding(32, 4) |
|
) |
|
) |
|
(lm_head): MPNetLMHead( |
|
(dense): Linear(in_features=64, out_features=64, bias=True) |
|
(layer_norm): LayerNorm((64,), eps=1e-12, elementwise_affine=True) |
|
(decoder): Linear(in_features=64, out_features=1125, bias=True) |
|
) |
|
) |
|
``` |