File size: 1,885 Bytes
d1e28e5
 
b187bff
d1e28e5
 
b187bff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
---
library_name: transformers
license: apache-2.0
---

# BEE-spoke-data/tiny-random-MPNetForMaskedLM

test model and test tokenizer (has smaller vocab)

```
MPNetForMaskedLM(
  (mpnet): MPNetModel(
    (embeddings): MPNetEmbeddings(
      (word_embeddings): Embedding(1125, 64, padding_idx=1)
      (position_embeddings): Embedding(512, 64, padding_idx=1)
      (LayerNorm): LayerNorm((64,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): MPNetEncoder(
      (layer): ModuleList(
        (0-4): 5 x MPNetLayer(
          (attention): MPNetAttention(
            (attn): MPNetSelfAttention(
              (q): Linear(in_features=64, out_features=64, bias=True)
              (k): Linear(in_features=64, out_features=64, bias=True)
              (v): Linear(in_features=64, out_features=64, bias=True)
              (o): Linear(in_features=64, out_features=64, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (LayerNorm): LayerNorm((64,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (intermediate): MPNetIntermediate(
            (dense): Linear(in_features=64, out_features=64, bias=True)
            (intermediate_act_fn): GELUActivation()
          )
          (output): MPNetOutput(
            (dense): Linear(in_features=64, out_features=64, bias=True)
            (LayerNorm): LayerNorm((64,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (relative_attention_bias): Embedding(32, 4)
    )
  )
  (lm_head): MPNetLMHead(
    (dense): Linear(in_features=64, out_features=64, bias=True)
    (layer_norm): LayerNorm((64,), eps=1e-12, elementwise_affine=True)
    (decoder): Linear(in_features=64, out_features=1125, bias=True)
  )
)
```