sagawa commited on
Commit
d984041
·
1 Parent(s): edd4fd8

Delete tokenizer.json

Browse files
Files changed (1) hide show
  1. tokenizer.json +0 -1552
tokenizer.json DELETED
@@ -1,1552 +0,0 @@
1
- {
2
- "version": "1.0",
3
- "truncation": null,
4
- "padding": null,
5
- "added_tokens": [
6
- {
7
- "id": 0,
8
- "content": "<pad>",
9
- "single_word": false,
10
- "lstrip": false,
11
- "rstrip": false,
12
- "normalized": false,
13
- "special": true
14
- },
15
- {
16
- "id": 1,
17
- "content": "</s>",
18
- "single_word": false,
19
- "lstrip": false,
20
- "rstrip": false,
21
- "normalized": false,
22
- "special": true
23
- },
24
- {
25
- "id": 2,
26
- "content": "<unk>",
27
- "single_word": false,
28
- "lstrip": false,
29
- "rstrip": false,
30
- "normalized": false,
31
- "special": true
32
- },
33
- {
34
- "id": 121,
35
- "content": "<extra_id_0>",
36
- "single_word": false,
37
- "lstrip": false,
38
- "rstrip": false,
39
- "normalized": false,
40
- "special": true
41
- },
42
- {
43
- "id": 122,
44
- "content": "<extra_id_1>",
45
- "single_word": false,
46
- "lstrip": false,
47
- "rstrip": false,
48
- "normalized": false,
49
- "special": true
50
- },
51
- {
52
- "id": 123,
53
- "content": "<extra_id_2>",
54
- "single_word": false,
55
- "lstrip": false,
56
- "rstrip": false,
57
- "normalized": false,
58
- "special": true
59
- },
60
- {
61
- "id": 124,
62
- "content": "<extra_id_3>",
63
- "single_word": false,
64
- "lstrip": false,
65
- "rstrip": false,
66
- "normalized": false,
67
- "special": true
68
- },
69
- {
70
- "id": 125,
71
- "content": "<extra_id_4>",
72
- "single_word": false,
73
- "lstrip": false,
74
- "rstrip": false,
75
- "normalized": false,
76
- "special": true
77
- },
78
- {
79
- "id": 126,
80
- "content": "<extra_id_5>",
81
- "single_word": false,
82
- "lstrip": false,
83
- "rstrip": false,
84
- "normalized": false,
85
- "special": true
86
- },
87
- {
88
- "id": 127,
89
- "content": "<extra_id_6>",
90
- "single_word": false,
91
- "lstrip": false,
92
- "rstrip": false,
93
- "normalized": false,
94
- "special": true
95
- },
96
- {
97
- "id": 128,
98
- "content": "<extra_id_7>",
99
- "single_word": false,
100
- "lstrip": false,
101
- "rstrip": false,
102
- "normalized": false,
103
- "special": true
104
- },
105
- {
106
- "id": 129,
107
- "content": "<extra_id_8>",
108
- "single_word": false,
109
- "lstrip": false,
110
- "rstrip": false,
111
- "normalized": false,
112
- "special": true
113
- },
114
- {
115
- "id": 130,
116
- "content": "<extra_id_9>",
117
- "single_word": false,
118
- "lstrip": false,
119
- "rstrip": false,
120
- "normalized": false,
121
- "special": true
122
- },
123
- {
124
- "id": 131,
125
- "content": "<extra_id_10>",
126
- "single_word": false,
127
- "lstrip": false,
128
- "rstrip": false,
129
- "normalized": false,
130
- "special": true
131
- },
132
- {
133
- "id": 132,
134
- "content": "<extra_id_11>",
135
- "single_word": false,
136
- "lstrip": false,
137
- "rstrip": false,
138
- "normalized": false,
139
- "special": true
140
- },
141
- {
142
- "id": 133,
143
- "content": "<extra_id_12>",
144
- "single_word": false,
145
- "lstrip": false,
146
- "rstrip": false,
147
- "normalized": false,
148
- "special": true
149
- },
150
- {
151
- "id": 134,
152
- "content": "<extra_id_13>",
153
- "single_word": false,
154
- "lstrip": false,
155
- "rstrip": false,
156
- "normalized": false,
157
- "special": true
158
- },
159
- {
160
- "id": 135,
161
- "content": "<extra_id_14>",
162
- "single_word": false,
163
- "lstrip": false,
164
- "rstrip": false,
165
- "normalized": false,
166
- "special": true
167
- },
168
- {
169
- "id": 136,
170
- "content": "<extra_id_15>",
171
- "single_word": false,
172
- "lstrip": false,
173
- "rstrip": false,
174
- "normalized": false,
175
- "special": true
176
- },
177
- {
178
- "id": 137,
179
- "content": "<extra_id_16>",
180
- "single_word": false,
181
- "lstrip": false,
182
- "rstrip": false,
183
- "normalized": false,
184
- "special": true
185
- },
186
- {
187
- "id": 138,
188
- "content": "<extra_id_17>",
189
- "single_word": false,
190
- "lstrip": false,
191
- "rstrip": false,
192
- "normalized": false,
193
- "special": true
194
- },
195
- {
196
- "id": 139,
197
- "content": "<extra_id_18>",
198
- "single_word": false,
199
- "lstrip": false,
200
- "rstrip": false,
201
- "normalized": false,
202
- "special": true
203
- },
204
- {
205
- "id": 140,
206
- "content": "<extra_id_19>",
207
- "single_word": false,
208
- "lstrip": false,
209
- "rstrip": false,
210
- "normalized": false,
211
- "special": true
212
- },
213
- {
214
- "id": 141,
215
- "content": "<extra_id_20>",
216
- "single_word": false,
217
- "lstrip": false,
218
- "rstrip": false,
219
- "normalized": false,
220
- "special": true
221
- },
222
- {
223
- "id": 142,
224
- "content": "<extra_id_21>",
225
- "single_word": false,
226
- "lstrip": false,
227
- "rstrip": false,
228
- "normalized": false,
229
- "special": true
230
- },
231
- {
232
- "id": 143,
233
- "content": "<extra_id_22>",
234
- "single_word": false,
235
- "lstrip": false,
236
- "rstrip": false,
237
- "normalized": false,
238
- "special": true
239
- },
240
- {
241
- "id": 144,
242
- "content": "<extra_id_23>",
243
- "single_word": false,
244
- "lstrip": false,
245
- "rstrip": false,
246
- "normalized": false,
247
- "special": true
248
- },
249
- {
250
- "id": 145,
251
- "content": "<extra_id_24>",
252
- "single_word": false,
253
- "lstrip": false,
254
- "rstrip": false,
255
- "normalized": false,
256
- "special": true
257
- },
258
- {
259
- "id": 146,
260
- "content": "<extra_id_25>",
261
- "single_word": false,
262
- "lstrip": false,
263
- "rstrip": false,
264
- "normalized": false,
265
- "special": true
266
- },
267
- {
268
- "id": 147,
269
- "content": "<extra_id_26>",
270
- "single_word": false,
271
- "lstrip": false,
272
- "rstrip": false,
273
- "normalized": false,
274
- "special": true
275
- },
276
- {
277
- "id": 148,
278
- "content": "<extra_id_27>",
279
- "single_word": false,
280
- "lstrip": false,
281
- "rstrip": false,
282
- "normalized": false,
283
- "special": true
284
- },
285
- {
286
- "id": 149,
287
- "content": "<extra_id_28>",
288
- "single_word": false,
289
- "lstrip": false,
290
- "rstrip": false,
291
- "normalized": false,
292
- "special": true
293
- },
294
- {
295
- "id": 150,
296
- "content": "<extra_id_29>",
297
- "single_word": false,
298
- "lstrip": false,
299
- "rstrip": false,
300
- "normalized": false,
301
- "special": true
302
- },
303
- {
304
- "id": 151,
305
- "content": "<extra_id_30>",
306
- "single_word": false,
307
- "lstrip": false,
308
- "rstrip": false,
309
- "normalized": false,
310
- "special": true
311
- },
312
- {
313
- "id": 152,
314
- "content": "<extra_id_31>",
315
- "single_word": false,
316
- "lstrip": false,
317
- "rstrip": false,
318
- "normalized": false,
319
- "special": true
320
- },
321
- {
322
- "id": 153,
323
- "content": "<extra_id_32>",
324
- "single_word": false,
325
- "lstrip": false,
326
- "rstrip": false,
327
- "normalized": false,
328
- "special": true
329
- },
330
- {
331
- "id": 154,
332
- "content": "<extra_id_33>",
333
- "single_word": false,
334
- "lstrip": false,
335
- "rstrip": false,
336
- "normalized": false,
337
- "special": true
338
- },
339
- {
340
- "id": 155,
341
- "content": "<extra_id_34>",
342
- "single_word": false,
343
- "lstrip": false,
344
- "rstrip": false,
345
- "normalized": false,
346
- "special": true
347
- },
348
- {
349
- "id": 156,
350
- "content": "<extra_id_35>",
351
- "single_word": false,
352
- "lstrip": false,
353
- "rstrip": false,
354
- "normalized": false,
355
- "special": true
356
- },
357
- {
358
- "id": 157,
359
- "content": "<extra_id_36>",
360
- "single_word": false,
361
- "lstrip": false,
362
- "rstrip": false,
363
- "normalized": false,
364
- "special": true
365
- },
366
- {
367
- "id": 158,
368
- "content": "<extra_id_37>",
369
- "single_word": false,
370
- "lstrip": false,
371
- "rstrip": false,
372
- "normalized": false,
373
- "special": true
374
- },
375
- {
376
- "id": 159,
377
- "content": "<extra_id_38>",
378
- "single_word": false,
379
- "lstrip": false,
380
- "rstrip": false,
381
- "normalized": false,
382
- "special": true
383
- },
384
- {
385
- "id": 160,
386
- "content": "<extra_id_39>",
387
- "single_word": false,
388
- "lstrip": false,
389
- "rstrip": false,
390
- "normalized": false,
391
- "special": true
392
- },
393
- {
394
- "id": 161,
395
- "content": "<extra_id_40>",
396
- "single_word": false,
397
- "lstrip": false,
398
- "rstrip": false,
399
- "normalized": false,
400
- "special": true
401
- },
402
- {
403
- "id": 162,
404
- "content": "<extra_id_41>",
405
- "single_word": false,
406
- "lstrip": false,
407
- "rstrip": false,
408
- "normalized": false,
409
- "special": true
410
- },
411
- {
412
- "id": 163,
413
- "content": "<extra_id_42>",
414
- "single_word": false,
415
- "lstrip": false,
416
- "rstrip": false,
417
- "normalized": false,
418
- "special": true
419
- },
420
- {
421
- "id": 164,
422
- "content": "<extra_id_43>",
423
- "single_word": false,
424
- "lstrip": false,
425
- "rstrip": false,
426
- "normalized": false,
427
- "special": true
428
- },
429
- {
430
- "id": 165,
431
- "content": "<extra_id_44>",
432
- "single_word": false,
433
- "lstrip": false,
434
- "rstrip": false,
435
- "normalized": false,
436
- "special": true
437
- },
438
- {
439
- "id": 166,
440
- "content": "<extra_id_45>",
441
- "single_word": false,
442
- "lstrip": false,
443
- "rstrip": false,
444
- "normalized": false,
445
- "special": true
446
- },
447
- {
448
- "id": 167,
449
- "content": "<extra_id_46>",
450
- "single_word": false,
451
- "lstrip": false,
452
- "rstrip": false,
453
- "normalized": false,
454
- "special": true
455
- },
456
- {
457
- "id": 168,
458
- "content": "<extra_id_47>",
459
- "single_word": false,
460
- "lstrip": false,
461
- "rstrip": false,
462
- "normalized": false,
463
- "special": true
464
- },
465
- {
466
- "id": 169,
467
- "content": "<extra_id_48>",
468
- "single_word": false,
469
- "lstrip": false,
470
- "rstrip": false,
471
- "normalized": false,
472
- "special": true
473
- },
474
- {
475
- "id": 170,
476
- "content": "<extra_id_49>",
477
- "single_word": false,
478
- "lstrip": false,
479
- "rstrip": false,
480
- "normalized": false,
481
- "special": true
482
- },
483
- {
484
- "id": 171,
485
- "content": "<extra_id_50>",
486
- "single_word": false,
487
- "lstrip": false,
488
- "rstrip": false,
489
- "normalized": false,
490
- "special": true
491
- },
492
- {
493
- "id": 172,
494
- "content": "<extra_id_51>",
495
- "single_word": false,
496
- "lstrip": false,
497
- "rstrip": false,
498
- "normalized": false,
499
- "special": true
500
- },
501
- {
502
- "id": 173,
503
- "content": "<extra_id_52>",
504
- "single_word": false,
505
- "lstrip": false,
506
- "rstrip": false,
507
- "normalized": false,
508
- "special": true
509
- },
510
- {
511
- "id": 174,
512
- "content": "<extra_id_53>",
513
- "single_word": false,
514
- "lstrip": false,
515
- "rstrip": false,
516
- "normalized": false,
517
- "special": true
518
- },
519
- {
520
- "id": 175,
521
- "content": "<extra_id_54>",
522
- "single_word": false,
523
- "lstrip": false,
524
- "rstrip": false,
525
- "normalized": false,
526
- "special": true
527
- },
528
- {
529
- "id": 176,
530
- "content": "<extra_id_55>",
531
- "single_word": false,
532
- "lstrip": false,
533
- "rstrip": false,
534
- "normalized": false,
535
- "special": true
536
- },
537
- {
538
- "id": 177,
539
- "content": "<extra_id_56>",
540
- "single_word": false,
541
- "lstrip": false,
542
- "rstrip": false,
543
- "normalized": false,
544
- "special": true
545
- },
546
- {
547
- "id": 178,
548
- "content": "<extra_id_57>",
549
- "single_word": false,
550
- "lstrip": false,
551
- "rstrip": false,
552
- "normalized": false,
553
- "special": true
554
- },
555
- {
556
- "id": 179,
557
- "content": "<extra_id_58>",
558
- "single_word": false,
559
- "lstrip": false,
560
- "rstrip": false,
561
- "normalized": false,
562
- "special": true
563
- },
564
- {
565
- "id": 180,
566
- "content": "<extra_id_59>",
567
- "single_word": false,
568
- "lstrip": false,
569
- "rstrip": false,
570
- "normalized": false,
571
- "special": true
572
- },
573
- {
574
- "id": 181,
575
- "content": "<extra_id_60>",
576
- "single_word": false,
577
- "lstrip": false,
578
- "rstrip": false,
579
- "normalized": false,
580
- "special": true
581
- },
582
- {
583
- "id": 182,
584
- "content": "<extra_id_61>",
585
- "single_word": false,
586
- "lstrip": false,
587
- "rstrip": false,
588
- "normalized": false,
589
- "special": true
590
- },
591
- {
592
- "id": 183,
593
- "content": "<extra_id_62>",
594
- "single_word": false,
595
- "lstrip": false,
596
- "rstrip": false,
597
- "normalized": false,
598
- "special": true
599
- },
600
- {
601
- "id": 184,
602
- "content": "<extra_id_63>",
603
- "single_word": false,
604
- "lstrip": false,
605
- "rstrip": false,
606
- "normalized": false,
607
- "special": true
608
- },
609
- {
610
- "id": 185,
611
- "content": "<extra_id_64>",
612
- "single_word": false,
613
- "lstrip": false,
614
- "rstrip": false,
615
- "normalized": false,
616
- "special": true
617
- },
618
- {
619
- "id": 186,
620
- "content": "<extra_id_65>",
621
- "single_word": false,
622
- "lstrip": false,
623
- "rstrip": false,
624
- "normalized": false,
625
- "special": true
626
- },
627
- {
628
- "id": 187,
629
- "content": "<extra_id_66>",
630
- "single_word": false,
631
- "lstrip": false,
632
- "rstrip": false,
633
- "normalized": false,
634
- "special": true
635
- },
636
- {
637
- "id": 188,
638
- "content": "<extra_id_67>",
639
- "single_word": false,
640
- "lstrip": false,
641
- "rstrip": false,
642
- "normalized": false,
643
- "special": true
644
- },
645
- {
646
- "id": 189,
647
- "content": "<extra_id_68>",
648
- "single_word": false,
649
- "lstrip": false,
650
- "rstrip": false,
651
- "normalized": false,
652
- "special": true
653
- },
654
- {
655
- "id": 190,
656
- "content": "<extra_id_69>",
657
- "single_word": false,
658
- "lstrip": false,
659
- "rstrip": false,
660
- "normalized": false,
661
- "special": true
662
- },
663
- {
664
- "id": 191,
665
- "content": "<extra_id_70>",
666
- "single_word": false,
667
- "lstrip": false,
668
- "rstrip": false,
669
- "normalized": false,
670
- "special": true
671
- },
672
- {
673
- "id": 192,
674
- "content": "<extra_id_71>",
675
- "single_word": false,
676
- "lstrip": false,
677
- "rstrip": false,
678
- "normalized": false,
679
- "special": true
680
- },
681
- {
682
- "id": 193,
683
- "content": "<extra_id_72>",
684
- "single_word": false,
685
- "lstrip": false,
686
- "rstrip": false,
687
- "normalized": false,
688
- "special": true
689
- },
690
- {
691
- "id": 194,
692
- "content": "<extra_id_73>",
693
- "single_word": false,
694
- "lstrip": false,
695
- "rstrip": false,
696
- "normalized": false,
697
- "special": true
698
- },
699
- {
700
- "id": 195,
701
- "content": "<extra_id_74>",
702
- "single_word": false,
703
- "lstrip": false,
704
- "rstrip": false,
705
- "normalized": false,
706
- "special": true
707
- },
708
- {
709
- "id": 196,
710
- "content": "<extra_id_75>",
711
- "single_word": false,
712
- "lstrip": false,
713
- "rstrip": false,
714
- "normalized": false,
715
- "special": true
716
- },
717
- {
718
- "id": 197,
719
- "content": "<extra_id_76>",
720
- "single_word": false,
721
- "lstrip": false,
722
- "rstrip": false,
723
- "normalized": false,
724
- "special": true
725
- },
726
- {
727
- "id": 198,
728
- "content": "<extra_id_77>",
729
- "single_word": false,
730
- "lstrip": false,
731
- "rstrip": false,
732
- "normalized": false,
733
- "special": true
734
- },
735
- {
736
- "id": 199,
737
- "content": "<extra_id_78>",
738
- "single_word": false,
739
- "lstrip": false,
740
- "rstrip": false,
741
- "normalized": false,
742
- "special": true
743
- },
744
- {
745
- "id": 200,
746
- "content": "<extra_id_79>",
747
- "single_word": false,
748
- "lstrip": false,
749
- "rstrip": false,
750
- "normalized": false,
751
- "special": true
752
- },
753
- {
754
- "id": 201,
755
- "content": "<extra_id_80>",
756
- "single_word": false,
757
- "lstrip": false,
758
- "rstrip": false,
759
- "normalized": false,
760
- "special": true
761
- },
762
- {
763
- "id": 202,
764
- "content": "<extra_id_81>",
765
- "single_word": false,
766
- "lstrip": false,
767
- "rstrip": false,
768
- "normalized": false,
769
- "special": true
770
- },
771
- {
772
- "id": 203,
773
- "content": "<extra_id_82>",
774
- "single_word": false,
775
- "lstrip": false,
776
- "rstrip": false,
777
- "normalized": false,
778
- "special": true
779
- },
780
- {
781
- "id": 204,
782
- "content": "<extra_id_83>",
783
- "single_word": false,
784
- "lstrip": false,
785
- "rstrip": false,
786
- "normalized": false,
787
- "special": true
788
- },
789
- {
790
- "id": 205,
791
- "content": "<extra_id_84>",
792
- "single_word": false,
793
- "lstrip": false,
794
- "rstrip": false,
795
- "normalized": false,
796
- "special": true
797
- },
798
- {
799
- "id": 206,
800
- "content": "<extra_id_85>",
801
- "single_word": false,
802
- "lstrip": false,
803
- "rstrip": false,
804
- "normalized": false,
805
- "special": true
806
- },
807
- {
808
- "id": 207,
809
- "content": "<extra_id_86>",
810
- "single_word": false,
811
- "lstrip": false,
812
- "rstrip": false,
813
- "normalized": false,
814
- "special": true
815
- },
816
- {
817
- "id": 208,
818
- "content": "<extra_id_87>",
819
- "single_word": false,
820
- "lstrip": false,
821
- "rstrip": false,
822
- "normalized": false,
823
- "special": true
824
- },
825
- {
826
- "id": 209,
827
- "content": "<extra_id_88>",
828
- "single_word": false,
829
- "lstrip": false,
830
- "rstrip": false,
831
- "normalized": false,
832
- "special": true
833
- },
834
- {
835
- "id": 210,
836
- "content": "<extra_id_89>",
837
- "single_word": false,
838
- "lstrip": false,
839
- "rstrip": false,
840
- "normalized": false,
841
- "special": true
842
- },
843
- {
844
- "id": 211,
845
- "content": "<extra_id_90>",
846
- "single_word": false,
847
- "lstrip": false,
848
- "rstrip": false,
849
- "normalized": false,
850
- "special": true
851
- },
852
- {
853
- "id": 212,
854
- "content": "<extra_id_91>",
855
- "single_word": false,
856
- "lstrip": false,
857
- "rstrip": false,
858
- "normalized": false,
859
- "special": true
860
- },
861
- {
862
- "id": 213,
863
- "content": "<extra_id_92>",
864
- "single_word": false,
865
- "lstrip": false,
866
- "rstrip": false,
867
- "normalized": false,
868
- "special": true
869
- },
870
- {
871
- "id": 214,
872
- "content": "<extra_id_93>",
873
- "single_word": false,
874
- "lstrip": false,
875
- "rstrip": false,
876
- "normalized": false,
877
- "special": true
878
- },
879
- {
880
- "id": 215,
881
- "content": "<extra_id_94>",
882
- "single_word": false,
883
- "lstrip": false,
884
- "rstrip": false,
885
- "normalized": false,
886
- "special": true
887
- },
888
- {
889
- "id": 216,
890
- "content": "<extra_id_95>",
891
- "single_word": false,
892
- "lstrip": false,
893
- "rstrip": false,
894
- "normalized": false,
895
- "special": true
896
- },
897
- {
898
- "id": 217,
899
- "content": "<extra_id_96>",
900
- "single_word": false,
901
- "lstrip": false,
902
- "rstrip": false,
903
- "normalized": false,
904
- "special": true
905
- },
906
- {
907
- "id": 218,
908
- "content": "<extra_id_97>",
909
- "single_word": false,
910
- "lstrip": false,
911
- "rstrip": false,
912
- "normalized": false,
913
- "special": true
914
- },
915
- {
916
- "id": 219,
917
- "content": "<extra_id_98>",
918
- "single_word": false,
919
- "lstrip": false,
920
- "rstrip": false,
921
- "normalized": false,
922
- "special": true
923
- },
924
- {
925
- "id": 220,
926
- "content": "<extra_id_99>",
927
- "single_word": false,
928
- "lstrip": false,
929
- "rstrip": false,
930
- "normalized": false,
931
- "special": true
932
- },
933
- {
934
- "id": 221,
935
- "content": ".",
936
- "single_word": false,
937
- "lstrip": false,
938
- "rstrip": false,
939
- "normalized": true,
940
- "special": false
941
- },
942
- {
943
- "id": 222,
944
- "content": "REACTANT:",
945
- "single_word": false,
946
- "lstrip": false,
947
- "rstrip": false,
948
- "normalized": false,
949
- "special": true
950
- },
951
- {
952
- "id": 223,
953
- "content": "PRODUCT:",
954
- "single_word": false,
955
- "lstrip": false,
956
- "rstrip": false,
957
- "normalized": false,
958
- "special": true
959
- },
960
- {
961
- "id": 224,
962
- "content": "CATALYST:",
963
- "single_word": false,
964
- "lstrip": false,
965
- "rstrip": false,
966
- "normalized": false,
967
- "special": true
968
- },
969
- {
970
- "id": 225,
971
- "content": "REAGENT:",
972
- "single_word": false,
973
- "lstrip": false,
974
- "rstrip": false,
975
- "normalized": false,
976
- "special": true
977
- }
978
- ],
979
- "normalizer": {
980
- "type": "Sequence",
981
- "normalizers": [
982
- {
983
- "type": "Nmt"
984
- },
985
- {
986
- "type": "NFKC"
987
- },
988
- {
989
- "type": "Replace",
990
- "pattern": {
991
- "Regex": " {2,}"
992
- },
993
- "content": " "
994
- }
995
- ]
996
- },
997
- "pre_tokenizer": {
998
- "type": "Sequence",
999
- "pretokenizers": [
1000
- {
1001
- "type": "Metaspace",
1002
- "replacement": "▁",
1003
- "add_prefix_space": true
1004
- },
1005
- {
1006
- "type": "Digits",
1007
- "individual_digits": true
1008
- },
1009
- {
1010
- "type": "Punctuation",
1011
- "behavior": "Isolated"
1012
- }
1013
- ]
1014
- },
1015
- "post_processor": {
1016
- "type": "TemplateProcessing",
1017
- "single": [
1018
- {
1019
- "Sequence": {
1020
- "id": "A",
1021
- "type_id": 0
1022
- }
1023
- },
1024
- {
1025
- "SpecialToken": {
1026
- "id": "</s>",
1027
- "type_id": 0
1028
- }
1029
- }
1030
- ],
1031
- "pair": [
1032
- {
1033
- "Sequence": {
1034
- "id": "A",
1035
- "type_id": 0
1036
- }
1037
- },
1038
- {
1039
- "Sequence": {
1040
- "id": "B",
1041
- "type_id": 1
1042
- }
1043
- }
1044
- ],
1045
- "special_tokens": {
1046
- "</s>": {
1047
- "id": "</s>",
1048
- "ids": [
1049
- 1
1050
- ],
1051
- "tokens": [
1052
- "</s>"
1053
- ]
1054
- }
1055
- }
1056
- },
1057
- "decoder": {
1058
- "type": "Metaspace",
1059
- "replacement": "▁",
1060
- "add_prefix_space": true
1061
- },
1062
- "model": {
1063
- "type": "Unigram",
1064
- "unk_id": 2,
1065
- "vocab": [
1066
- [
1067
- "<pad>",
1068
- 0.0
1069
- ],
1070
- [
1071
- "</s>",
1072
- 0.0
1073
- ],
1074
- [
1075
- "<unk>",
1076
- 0.0
1077
- ],
1078
- [
1079
- "C",
1080
- -1.934062026530869
1081
- ],
1082
- [
1083
- "c",
1084
- -2.005135123981896
1085
- ],
1086
- [
1087
- ")",
1088
- -2.3305416341016603
1089
- ],
1090
- [
1091
- "(",
1092
- -2.3305416341016603
1093
- ],
1094
- [
1095
- "1",
1096
- -2.659489340254079
1097
- ],
1098
- [
1099
- "2",
1100
- -3.0147314517826524
1101
- ],
1102
- [
1103
- "O",
1104
- -3.1705660462226426
1105
- ],
1106
- [
1107
- "]",
1108
- -3.306692943444802
1109
- ],
1110
- [
1111
- "[",
1112
- -3.306692943444802
1113
- ],
1114
- [
1115
- "N",
1116
- -3.3379525475363145
1117
- ],
1118
- [
1119
- "@",
1120
- -3.375613737475505
1121
- ],
1122
- [
1123
- "H",
1124
- -3.3960003181228675
1125
- ],
1126
- [
1127
- "=",
1128
- -3.4342358584336026
1129
- ],
1130
- [
1131
- "n",
1132
- -3.9996436484078535
1133
- ],
1134
- [
1135
- "3",
1136
- -4.06624150723795
1137
- ],
1138
- [
1139
- "+",
1140
- -4.576587821405814
1141
- ],
1142
- [
1143
- "ccc",
1144
- -4.581329068949029
1145
- ],
1146
- [
1147
- "cccc",
1148
- -4.794168477161054
1149
- ],
1150
- [
1151
- "F",
1152
- -4.844444469349959
1153
- ],
1154
- [
1155
- "▁C",
1156
- -4.930731418706828
1157
- ],
1158
- [
1159
- "-",
1160
- -5.128151794820781
1161
- ],
1162
- [
1163
- "CC",
1164
- -5.188054847421637
1165
- ],
1166
- [
1167
- "▁",
1168
- -5.243375183264384
1169
- ],
1170
- [
1171
- "▁CC",
1172
- -5.43665410318811
1173
- ],
1174
- [
1175
- "cc",
1176
- -5.5072446210190025
1177
- ],
1178
- [
1179
- "S",
1180
- -5.607990514129369
1181
- ],
1182
- [
1183
- "CCC",
1184
- -5.610845771534942
1185
- ],
1186
- [
1187
- "4",
1188
- -5.6728058623804705
1189
- ],
1190
- [
1191
- "CCN",
1192
- -5.984229431727735
1193
- ],
1194
- [
1195
- "▁CO",
1196
- -6.136725098692515
1197
- ],
1198
- [
1199
- "s",
1200
- -6.163972117410355
1201
- ],
1202
- [
1203
- "l",
1204
- -6.244242813422005
1205
- ],
1206
- [
1207
- "nc",
1208
- -6.260369976115332
1209
- ],
1210
- [
1211
- "Cl",
1212
- -6.29041030199948
1213
- ],
1214
- [
1215
- "NC",
1216
- -6.301741325285667
1217
- ],
1218
- [
1219
- "#",
1220
- -6.475695381808137
1221
- ],
1222
- [
1223
- "CCCC",
1224
- -6.630889749196982
1225
- ],
1226
- [
1227
- "Br",
1228
- -6.708882955827546
1229
- ],
1230
- [
1231
- "o",
1232
- -6.8821690990922
1233
- ],
1234
- [
1235
- "no",
1236
- -7.085301372799254
1237
- ],
1238
- [
1239
- "nn",
1240
- -7.118314757276033
1241
- ],
1242
- [
1243
- "CN",
1244
- -7.148023077707199
1245
- ],
1246
- [
1247
- "▁O",
1248
- -7.184905330255651
1249
- ],
1250
- [
1251
- "CCO",
1252
- -7.223262672570243
1253
- ],
1254
- [
1255
- "▁CN",
1256
- -7.340957119808042
1257
- ],
1258
- [
1259
- "ccs",
1260
- -7.358439269448935
1261
- ],
1262
- [
1263
- "▁CCC",
1264
- -7.384181818093239
1265
- ],
1266
- [
1267
- "OC",
1268
- -7.401588967543117
1269
- ],
1270
- [
1271
- "/",
1272
- -7.412130091887384
1273
- ],
1274
- [
1275
- "▁COC",
1276
- -7.453342081307827
1277
- ],
1278
- [
1279
- "nnc",
1280
- -7.47808245293119
1281
- ],
1282
- [
1283
- "cccn",
1284
- -7.526117466256242
1285
- ],
1286
- [
1287
- "▁CCOC",
1288
- -7.581460400684973
1289
- ],
1290
- [
1291
- "sc",
1292
- -7.587476370908391
1293
- ],
1294
- [
1295
- "cco",
1296
- -7.601812309424911
1297
- ],
1298
- [
1299
- "CCOCC",
1300
- -7.70962259415791
1301
- ],
1302
- [
1303
- "CCCN",
1304
- -7.716050546596486
1305
- ],
1306
- [
1307
- "oc",
1308
- -7.76950331715161
1309
- ],
1310
- [
1311
- "ccnc",
1312
- -7.885377075776418
1313
- ],
1314
- [
1315
- "▁CCO",
1316
- -7.958354042052764
1317
- ],
1318
- [
1319
- "cs",
1320
- -7.963618836898841
1321
- ],
1322
- [
1323
- "▁CCN",
1324
- -7.969745690077621
1325
- ],
1326
- [
1327
- "NCC",
1328
- -7.982831223650715
1329
- ],
1330
- [
1331
- "CNC",
1332
- -8.015812540478317
1333
- ],
1334
- [
1335
- "CCCO",
1336
- -8.02283744281797
1337
- ],
1338
- [
1339
- "nccn",
1340
- -8.023781204384559
1341
- ],
1342
- [
1343
- "CCCCC",
1344
- -8.113410349713947
1345
- ],
1346
- [
1347
- "csc",
1348
- -8.123340128859317
1349
- ],
1350
- [
1351
- "nnn",
1352
- -8.16923209155111
1353
- ],
1354
- [
1355
- "CO",
1356
- -8.197076591417558
1357
- ],
1358
- [
1359
- "cnc",
1360
- -8.24948897316689
1361
- ],
1362
- [
1363
- "OCC",
1364
- -8.337244843539168
1365
- ],
1366
- [
1367
- "ncc",
1368
- -8.35350988501787
1369
- ],
1370
- [
1371
- "CCNC",
1372
- -8.374584543880331
1373
- ],
1374
- [
1375
- "cnn",
1376
- -8.379764985534148
1377
- ],
1378
- [
1379
- "▁CCCC",
1380
- -8.408946579060395
1381
- ],
1382
- [
1383
- "ccn",
1384
- -8.441316367356151
1385
- ],
1386
- [
1387
- "▁CCCN",
1388
- -8.455940453267049
1389
- ],
1390
- [
1391
- "ncn",
1392
- -8.509281787409483
1393
- ],
1394
- [
1395
- "CCOC",
1396
- -8.545791064619086
1397
- ],
1398
- [
1399
- "5",
1400
- -8.595231126796552
1401
- ],
1402
- [
1403
- "OCCO",
1404
- -8.620568633537287
1405
- ],
1406
- [
1407
- "SCC",
1408
- -8.675945846432537
1409
- ],
1410
- [
1411
- "\\",
1412
- -8.738088269693655
1413
- ],
1414
- [
1415
- "▁CS",
1416
- -8.745760398405787
1417
- ],
1418
- [
1419
- "cn",
1420
- -8.81287462848524
1421
- ],
1422
- [
1423
- "CS",
1424
- -8.852680543942768
1425
- ],
1426
- [
1427
- "COC",
1428
- -8.901532120821837
1429
- ],
1430
- [
1431
- "▁CCCO",
1432
- -8.971729419021525
1433
- ],
1434
- [
1435
- "▁COCC",
1436
- -8.986602855501445
1437
- ],
1438
- [
1439
- "▁F",
1440
- -9.08172188898926
1441
- ],
1442
- [
1443
- "▁CCNC",
1444
- -9.213239929783953
1445
- ],
1446
- [
1447
- "nccc",
1448
- -9.267617565078302
1449
- ],
1450
- [
1451
- "▁CCS",
1452
- -9.284123840101111
1453
- ],
1454
- [
1455
- "▁N",
1456
- -9.46465900903722
1457
- ],
1458
- [
1459
- "▁CCCCO",
1460
- -9.549659454320029
1461
- ],
1462
- [
1463
- "NCCC",
1464
- -9.592808545839064
1465
- ],
1466
- [
1467
- "NCCN",
1468
- -9.616093114468232
1469
- ],
1470
- [
1471
- "CCS",
1472
- -9.68568843530331
1473
- ],
1474
- [
1475
- "cncc",
1476
- -9.884757958758378
1477
- ],
1478
- [
1479
- "CCCS",
1480
- -10.07490132384844
1481
- ],
1482
- [
1483
- "▁NC",
1484
- -10.174180411834037
1485
- ],
1486
- [
1487
- "▁COCCN",
1488
- -10.182867019690027
1489
- ],
1490
- [
1491
- "▁COCCO",
1492
- -10.210382702506402
1493
- ],
1494
- [
1495
- "▁COCCC",
1496
- -10.390960567106395
1497
- ],
1498
- [
1499
- "OCO",
1500
- -10.445584736697652
1501
- ],
1502
- [
1503
- "COCC",
1504
- -10.469925833149428
1505
- ],
1506
- [
1507
- "▁CCOCCN",
1508
- -10.487938027145828
1509
- ],
1510
- [
1511
- "SC",
1512
- -10.868655865451124
1513
- ],
1514
- [
1515
- "co",
1516
- -11.175046369147724
1517
- ],
1518
- [
1519
- "I",
1520
- -11.188088269693656
1521
- ],
1522
- [
1523
- "on",
1524
- -11.199838029113598
1525
- ],
1526
- [
1527
- "CCCCN",
1528
- -11.273835881601958
1529
- ],
1530
- [
1531
- "▁CCCCN",
1532
- -11.694782342260607
1533
- ],
1534
- [
1535
- "CCOCCN",
1536
- -11.98760417613926
1537
- ],
1538
- [
1539
- "r",
1540
- -12.313357692926305
1541
- ],
1542
- [
1543
- "B",
1544
- -12.313457692926304
1545
- ],
1546
- [
1547
- "CCCCO",
1548
- -12.313457692926304
1549
- ]
1550
- ]
1551
- }
1552
- }