balhafni commited on
Commit
2cdd961
·
verified ·
1 Parent(s): fb5dad9

zaebuc pnx pretrained model

Browse files
config.json ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/scratch/ba63/BERT_models/bert-base-arabertv02",
3
+ "architectures": [
4
+ "BertForTokenClassificationSingleLabel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "A_[\"]K*",
13
+ "1": "A_[-]K*",
14
+ "2": "I_[ . ]K*",
15
+ "3": "I_[ : ]K*",
16
+ "4": "I_[ \u060c ]K*",
17
+ "5": "I_[ \u061b ]K*",
18
+ "6": "I_[ \u061f ]K*",
19
+ "7": "I_[.]K*",
20
+ "8": "K*",
21
+ "9": "K*A_[!]",
22
+ "10": "K*A_[\" .]",
23
+ "11": "K*A_[\" \u060c]",
24
+ "12": "K*A_[\"]",
25
+ "13": "K*A_[(]",
26
+ "14": "K*A_[) .]",
27
+ "15": "K*A_[)]",
28
+ "16": "K*A_[,]",
29
+ "17": "K*A_[- .]",
30
+ "18": "K*A_[- : \"]",
31
+ "19": "K*A_[-]",
32
+ "20": "K*A_[. \"]",
33
+ "21": "K*A_[. -]",
34
+ "22": "K*A_[. . .]",
35
+ "23": "K*A_[.]",
36
+ "24": "K*A_[: \"]",
37
+ "25": "K*A_[:]",
38
+ "26": "K*A_[\u060c]",
39
+ "27": "K*A_[\u061b]",
40
+ "28": "K*A_[\u061f .]",
41
+ "29": "K*A_[\u061f]",
42
+ "30": "K*I_[ . ]K",
43
+ "31": "K*I_[ \u060c ]K",
44
+ "32": "R_[!]",
45
+ "33": "R_[\"]",
46
+ "34": "R_[(]",
47
+ "35": "R_[)]",
48
+ "36": "R_[,]",
49
+ "37": "R_[-]",
50
+ "38": "R_[.]",
51
+ "39": "R_[:]",
52
+ "40": "R_[?]",
53
+ "41": "R_[{]",
54
+ "42": "R_[}]",
55
+ "43": "R_[\u060c]",
56
+ "44": "R_[\u061b]",
57
+ "45": "R_[\u061f]",
58
+ "46": "R_[\"]A_[.]",
59
+ "47": "R_[\"]A_[\u060c]",
60
+ "48": "R_[.]A_[\"]",
61
+ "49": "R_[.]A_[.]",
62
+ "50": "R_[:]A_[\"]"
63
+ },
64
+ "initializer_range": 0.02,
65
+ "intermediate_size": 3072,
66
+ "label2id": {
67
+ "A_[\"]K*": 0,
68
+ "A_[-]K*": 1,
69
+ "I_[ . ]K*": 2,
70
+ "I_[ : ]K*": 3,
71
+ "I_[ \u060c ]K*": 4,
72
+ "I_[ \u061b ]K*": 5,
73
+ "I_[ \u061f ]K*": 6,
74
+ "I_[.]K*": 7,
75
+ "K*": 8,
76
+ "K*A_[!]": 9,
77
+ "K*A_[\" .]": 10,
78
+ "K*A_[\" \u060c]": 11,
79
+ "K*A_[\"]": 12,
80
+ "K*A_[(]": 13,
81
+ "K*A_[) .]": 14,
82
+ "K*A_[)]": 15,
83
+ "K*A_[,]": 16,
84
+ "K*A_[- .]": 17,
85
+ "K*A_[- : \"]": 18,
86
+ "K*A_[-]": 19,
87
+ "K*A_[. \"]": 20,
88
+ "K*A_[. -]": 21,
89
+ "K*A_[. . .]": 22,
90
+ "K*A_[.]": 23,
91
+ "K*A_[: \"]": 24,
92
+ "K*A_[:]": 25,
93
+ "K*A_[\u060c]": 26,
94
+ "K*A_[\u061b]": 27,
95
+ "K*A_[\u061f .]": 28,
96
+ "K*A_[\u061f]": 29,
97
+ "K*I_[ . ]K": 30,
98
+ "K*I_[ \u060c ]K": 31,
99
+ "R_[!]": 32,
100
+ "R_[\"]": 33,
101
+ "R_[\"]A_[.]": 46,
102
+ "R_[\"]A_[\u060c]": 47,
103
+ "R_[(]": 34,
104
+ "R_[)]": 35,
105
+ "R_[,]": 36,
106
+ "R_[-]": 37,
107
+ "R_[.]": 38,
108
+ "R_[.]A_[\"]": 48,
109
+ "R_[.]A_[.]": 49,
110
+ "R_[:]": 39,
111
+ "R_[:]A_[\"]": 50,
112
+ "R_[?]": 40,
113
+ "R_[{]": 41,
114
+ "R_[}]": 42,
115
+ "R_[\u060c]": 43,
116
+ "R_[\u061b]": 44,
117
+ "R_[\u061f]": 45
118
+ },
119
+ "layer_norm_eps": 1e-12,
120
+ "max_position_embeddings": 512,
121
+ "model_type": "bert",
122
+ "num_attention_heads": 12,
123
+ "num_hidden_layers": 12,
124
+ "pad_token_id": 0,
125
+ "position_embedding_type": "absolute",
126
+ "torch_dtype": "float32",
127
+ "transformers_version": "4.30.0",
128
+ "type_vocab_size": 2,
129
+ "use_cache": true,
130
+ "vocab_size": 64000
131
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:195696eaf09a5b92d8141f473e0e3a0d5a710649114b3180afb25cf76cdaa4f3
3
+ size 538638321
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "clean_up_tokenization_spaces": true,
3
+ "cls_token": "[CLS]",
4
+ "do_basic_tokenize": true,
5
+ "do_lower_case": false,
6
+ "mask_token": "[MASK]",
7
+ "max_len": 512,
8
+ "model_max_length": 512,
9
+ "never_split": [
10
+ "[بريد]",
11
+ "[مستخدم]",
12
+ "[رابط]"
13
+ ],
14
+ "pad_token": "[PAD]",
15
+ "sep_token": "[SEP]",
16
+ "strip_accents": null,
17
+ "tokenize_chinese_chars": true,
18
+ "tokenizer_class": "BertTokenizer",
19
+ "unk_token": "[UNK]"
20
+ }
trainer_state.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.764525993883792,
5
+ "global_step": 500,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.76,
12
+ "learning_rate": 4.745158002038736e-05,
13
+ "loss": 0.1738,
14
+ "step": 500
15
+ }
16
+ ],
17
+ "max_steps": 9810,
18
+ "num_train_epochs": 15,
19
+ "total_flos": 1718607550347072.0,
20
+ "trial_name": null,
21
+ "trial_params": null
22
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a07b231dba304eb522c36ff96a50c5f3d416a69fdd5c4abe5edcba7dabe657b
3
+ size 4143
vocab.txt ADDED
The diff for this file is too large to render. See raw diff