1NEYRON1 commited on
Commit
cdc1680
·
1 Parent(s): 22445af

Delete checkpoint/trainer_state.json

Browse files
Files changed (1) hide show
  1. checkpoint/trainer_state.json +0 -1249
checkpoint/trainer_state.json DELETED
@@ -1,1249 +0,0 @@
1
- {
2
- "best_global_step": 1500,
3
- "best_metric": 0.6884484673104748,
4
- "best_model_checkpoint": "./results_2/checkpoint-1500",
5
- "epoch": 0.7317073170731707,
6
- "eval_steps": 100,
7
- "global_step": 1500,
8
- "is_hyper_param_search": false,
9
- "is_local_process_zero": true,
10
- "is_world_process_zero": true,
11
- "log_history": [
12
- {
13
- "epoch": 0.004878048780487805,
14
- "grad_norm": 0.09305039048194885,
15
- "learning_rate": 1.0000000000000002e-06,
16
- "loss": 0.0173,
17
- "step": 10
18
- },
19
- {
20
- "epoch": 0.00975609756097561,
21
- "grad_norm": 0.09173406660556793,
22
- "learning_rate": 2.0000000000000003e-06,
23
- "loss": 0.0167,
24
- "step": 20
25
- },
26
- {
27
- "epoch": 0.014634146341463415,
28
- "grad_norm": 0.09779594093561172,
29
- "learning_rate": 3e-06,
30
- "loss": 0.0165,
31
- "step": 30
32
- },
33
- {
34
- "epoch": 0.01951219512195122,
35
- "grad_norm": 0.1044146940112114,
36
- "learning_rate": 4.000000000000001e-06,
37
- "loss": 0.0144,
38
- "step": 40
39
- },
40
- {
41
- "epoch": 0.024390243902439025,
42
- "grad_norm": 0.09117760509252548,
43
- "learning_rate": 5e-06,
44
- "loss": 0.0156,
45
- "step": 50
46
- },
47
- {
48
- "epoch": 0.02926829268292683,
49
- "grad_norm": 0.08018726110458374,
50
- "learning_rate": 6e-06,
51
- "loss": 0.0149,
52
- "step": 60
53
- },
54
- {
55
- "epoch": 0.03414634146341464,
56
- "grad_norm": 0.1090320348739624,
57
- "learning_rate": 7.000000000000001e-06,
58
- "loss": 0.0152,
59
- "step": 70
60
- },
61
- {
62
- "epoch": 0.03902439024390244,
63
- "grad_norm": 0.09608811885118484,
64
- "learning_rate": 8.000000000000001e-06,
65
- "loss": 0.017,
66
- "step": 80
67
- },
68
- {
69
- "epoch": 0.04390243902439024,
70
- "grad_norm": 0.08458913117647171,
71
- "learning_rate": 9e-06,
72
- "loss": 0.0167,
73
- "step": 90
74
- },
75
- {
76
- "epoch": 0.04878048780487805,
77
- "grad_norm": 0.07629529386758804,
78
- "learning_rate": 1e-05,
79
- "loss": 0.0153,
80
- "step": 100
81
- },
82
- {
83
- "epoch": 0.04878048780487805,
84
- "eval_f1": 0.6863881803425316,
85
- "eval_loss": 0.018864748999476433,
86
- "eval_precision": 0.6192098499790808,
87
- "eval_recall": 0.7699167657550535,
88
- "eval_runtime": 130.1056,
89
- "eval_samples_per_second": 63.026,
90
- "eval_steps_per_second": 0.992,
91
- "step": 100
92
- },
93
- {
94
- "epoch": 0.05365853658536585,
95
- "grad_norm": 0.09111520648002625,
96
- "learning_rate": 1.1000000000000001e-05,
97
- "loss": 0.0164,
98
- "step": 110
99
- },
100
- {
101
- "epoch": 0.05853658536585366,
102
- "grad_norm": 0.09741566330194473,
103
- "learning_rate": 1.2e-05,
104
- "loss": 0.0134,
105
- "step": 120
106
- },
107
- {
108
- "epoch": 0.06341463414634146,
109
- "grad_norm": 0.07199473679065704,
110
- "learning_rate": 1.3000000000000001e-05,
111
- "loss": 0.0154,
112
- "step": 130
113
- },
114
- {
115
- "epoch": 0.06829268292682927,
116
- "grad_norm": 0.08497001975774765,
117
- "learning_rate": 1.4000000000000001e-05,
118
- "loss": 0.0161,
119
- "step": 140
120
- },
121
- {
122
- "epoch": 0.07317073170731707,
123
- "grad_norm": 0.08193599432706833,
124
- "learning_rate": 1.5e-05,
125
- "loss": 0.0147,
126
- "step": 150
127
- },
128
- {
129
- "epoch": 0.07804878048780488,
130
- "grad_norm": 0.10380581766366959,
131
- "learning_rate": 1.6000000000000003e-05,
132
- "loss": 0.018,
133
- "step": 160
134
- },
135
- {
136
- "epoch": 0.08292682926829269,
137
- "grad_norm": 0.11271784454584122,
138
- "learning_rate": 1.7000000000000003e-05,
139
- "loss": 0.0158,
140
- "step": 170
141
- },
142
- {
143
- "epoch": 0.08780487804878048,
144
- "grad_norm": 0.0855347067117691,
145
- "learning_rate": 1.8e-05,
146
- "loss": 0.0166,
147
- "step": 180
148
- },
149
- {
150
- "epoch": 0.09268292682926829,
151
- "grad_norm": 0.09468505531549454,
152
- "learning_rate": 1.9e-05,
153
- "loss": 0.0138,
154
- "step": 190
155
- },
156
- {
157
- "epoch": 0.0975609756097561,
158
- "grad_norm": 0.06498368084430695,
159
- "learning_rate": 2e-05,
160
- "loss": 0.0138,
161
- "step": 200
162
- },
163
- {
164
- "epoch": 0.0975609756097561,
165
- "eval_f1": 0.6788566213617548,
166
- "eval_loss": 0.019105251878499985,
167
- "eval_precision": 0.6045863570391873,
168
- "eval_recall": 0.7739298454221165,
169
- "eval_runtime": 126.9518,
170
- "eval_samples_per_second": 64.591,
171
- "eval_steps_per_second": 1.016,
172
- "step": 200
173
- },
174
- {
175
- "epoch": 0.1024390243902439,
176
- "grad_norm": 0.08939243853092194,
177
- "learning_rate": 2.1e-05,
178
- "loss": 0.0177,
179
- "step": 210
180
- },
181
- {
182
- "epoch": 0.1073170731707317,
183
- "grad_norm": 0.0708846002817154,
184
- "learning_rate": 2.2000000000000003e-05,
185
- "loss": 0.0155,
186
- "step": 220
187
- },
188
- {
189
- "epoch": 0.11219512195121951,
190
- "grad_norm": 0.10868319869041443,
191
- "learning_rate": 2.3000000000000003e-05,
192
- "loss": 0.0172,
193
- "step": 230
194
- },
195
- {
196
- "epoch": 0.11707317073170732,
197
- "grad_norm": 0.06580492854118347,
198
- "learning_rate": 2.4e-05,
199
- "loss": 0.0142,
200
- "step": 240
201
- },
202
- {
203
- "epoch": 0.12195121951219512,
204
- "grad_norm": 0.08139664679765701,
205
- "learning_rate": 2.5e-05,
206
- "loss": 0.0181,
207
- "step": 250
208
- },
209
- {
210
- "epoch": 0.12682926829268293,
211
- "grad_norm": 0.07353170961141586,
212
- "learning_rate": 2.6000000000000002e-05,
213
- "loss": 0.0149,
214
- "step": 260
215
- },
216
- {
217
- "epoch": 0.13170731707317074,
218
- "grad_norm": 0.09713993221521378,
219
- "learning_rate": 2.7000000000000002e-05,
220
- "loss": 0.0133,
221
- "step": 270
222
- },
223
- {
224
- "epoch": 0.13658536585365855,
225
- "grad_norm": 0.0805194154381752,
226
- "learning_rate": 2.8000000000000003e-05,
227
- "loss": 0.0187,
228
- "step": 280
229
- },
230
- {
231
- "epoch": 0.14146341463414633,
232
- "grad_norm": 0.11184240132570267,
233
- "learning_rate": 2.9e-05,
234
- "loss": 0.0169,
235
- "step": 290
236
- },
237
- {
238
- "epoch": 0.14634146341463414,
239
- "grad_norm": 0.11055561900138855,
240
- "learning_rate": 3e-05,
241
- "loss": 0.0179,
242
- "step": 300
243
- },
244
- {
245
- "epoch": 0.14634146341463414,
246
- "eval_f1": 0.6787136697277903,
247
- "eval_loss": 0.019281357526779175,
248
- "eval_precision": 0.6091580502215658,
249
- "eval_recall": 0.7662009512485137,
250
- "eval_runtime": 126.6013,
251
- "eval_samples_per_second": 64.77,
252
- "eval_steps_per_second": 1.019,
253
- "step": 300
254
- },
255
- {
256
- "epoch": 0.15121951219512195,
257
- "grad_norm": 0.07896488904953003,
258
- "learning_rate": 3.1e-05,
259
- "loss": 0.0152,
260
- "step": 310
261
- },
262
- {
263
- "epoch": 0.15609756097560976,
264
- "grad_norm": 0.10503584146499634,
265
- "learning_rate": 3.2000000000000005e-05,
266
- "loss": 0.0181,
267
- "step": 320
268
- },
269
- {
270
- "epoch": 0.16097560975609757,
271
- "grad_norm": 0.10049130022525787,
272
- "learning_rate": 3.3e-05,
273
- "loss": 0.0169,
274
- "step": 330
275
- },
276
- {
277
- "epoch": 0.16585365853658537,
278
- "grad_norm": 0.10558082908391953,
279
- "learning_rate": 3.4000000000000007e-05,
280
- "loss": 0.0158,
281
- "step": 340
282
- },
283
- {
284
- "epoch": 0.17073170731707318,
285
- "grad_norm": 0.11450464278459549,
286
- "learning_rate": 3.5e-05,
287
- "loss": 0.0168,
288
- "step": 350
289
- },
290
- {
291
- "epoch": 0.17560975609756097,
292
- "grad_norm": 0.15014208853244781,
293
- "learning_rate": 3.6e-05,
294
- "loss": 0.0145,
295
- "step": 360
296
- },
297
- {
298
- "epoch": 0.18048780487804877,
299
- "grad_norm": 0.06404370069503784,
300
- "learning_rate": 3.7e-05,
301
- "loss": 0.0143,
302
- "step": 370
303
- },
304
- {
305
- "epoch": 0.18536585365853658,
306
- "grad_norm": 0.09269659221172333,
307
- "learning_rate": 3.8e-05,
308
- "loss": 0.018,
309
- "step": 380
310
- },
311
- {
312
- "epoch": 0.1902439024390244,
313
- "grad_norm": 0.09888678789138794,
314
- "learning_rate": 3.9000000000000006e-05,
315
- "loss": 0.0142,
316
- "step": 390
317
- },
318
- {
319
- "epoch": 0.1951219512195122,
320
- "grad_norm": 0.16841690242290497,
321
- "learning_rate": 4e-05,
322
- "loss": 0.0207,
323
- "step": 400
324
- },
325
- {
326
- "epoch": 0.1951219512195122,
327
- "eval_f1": 0.670402918255679,
328
- "eval_loss": 0.02026216685771942,
329
- "eval_precision": 0.6053057069285586,
330
- "eval_recall": 0.7511890606420928,
331
- "eval_runtime": 125.682,
332
- "eval_samples_per_second": 65.244,
333
- "eval_steps_per_second": 1.026,
334
- "step": 400
335
- },
336
- {
337
- "epoch": 0.2,
338
- "grad_norm": 0.10583117604255676,
339
- "learning_rate": 4.1e-05,
340
- "loss": 0.0161,
341
- "step": 410
342
- },
343
- {
344
- "epoch": 0.2048780487804878,
345
- "grad_norm": 0.08883402496576309,
346
- "learning_rate": 4.2e-05,
347
- "loss": 0.0167,
348
- "step": 420
349
- },
350
- {
351
- "epoch": 0.2097560975609756,
352
- "grad_norm": 0.08243865519762039,
353
- "learning_rate": 4.3e-05,
354
- "loss": 0.0161,
355
- "step": 430
356
- },
357
- {
358
- "epoch": 0.2146341463414634,
359
- "grad_norm": 0.06643176078796387,
360
- "learning_rate": 4.4000000000000006e-05,
361
- "loss": 0.0163,
362
- "step": 440
363
- },
364
- {
365
- "epoch": 0.21951219512195122,
366
- "grad_norm": 0.05310194939374924,
367
- "learning_rate": 4.5e-05,
368
- "loss": 0.0149,
369
- "step": 450
370
- },
371
- {
372
- "epoch": 0.22439024390243903,
373
- "grad_norm": 0.08369413018226624,
374
- "learning_rate": 4.600000000000001e-05,
375
- "loss": 0.0149,
376
- "step": 460
377
- },
378
- {
379
- "epoch": 0.22926829268292684,
380
- "grad_norm": 0.06853285431861877,
381
- "learning_rate": 4.7e-05,
382
- "loss": 0.0142,
383
- "step": 470
384
- },
385
- {
386
- "epoch": 0.23414634146341465,
387
- "grad_norm": 0.09447991102933884,
388
- "learning_rate": 4.8e-05,
389
- "loss": 0.0188,
390
- "step": 480
391
- },
392
- {
393
- "epoch": 0.23902439024390243,
394
- "grad_norm": 0.09374509006738663,
395
- "learning_rate": 4.9e-05,
396
- "loss": 0.0174,
397
- "step": 490
398
- },
399
- {
400
- "epoch": 0.24390243902439024,
401
- "grad_norm": 0.08009267598390579,
402
- "learning_rate": 5e-05,
403
- "loss": 0.0178,
404
- "step": 500
405
- },
406
- {
407
- "epoch": 0.24390243902439024,
408
- "eval_f1": 0.6727972910165327,
409
- "eval_loss": 0.019859502092003822,
410
- "eval_precision": 0.6080043201728069,
411
- "eval_recall": 0.7530469678953626,
412
- "eval_runtime": 125.7109,
413
- "eval_samples_per_second": 65.229,
414
- "eval_steps_per_second": 1.026,
415
- "step": 500
416
- },
417
- {
418
- "epoch": 0.24878048780487805,
419
- "grad_norm": 0.10110995173454285,
420
- "learning_rate": 4.9911504424778765e-05,
421
- "loss": 0.0176,
422
- "step": 510
423
- },
424
- {
425
- "epoch": 0.25365853658536586,
426
- "grad_norm": 0.13534288108348846,
427
- "learning_rate": 4.982300884955752e-05,
428
- "loss": 0.019,
429
- "step": 520
430
- },
431
- {
432
- "epoch": 0.25853658536585367,
433
- "grad_norm": 0.08551128208637238,
434
- "learning_rate": 4.9734513274336284e-05,
435
- "loss": 0.0156,
436
- "step": 530
437
- },
438
- {
439
- "epoch": 0.2634146341463415,
440
- "grad_norm": 0.10786890238523483,
441
- "learning_rate": 4.964601769911505e-05,
442
- "loss": 0.0141,
443
- "step": 540
444
- },
445
- {
446
- "epoch": 0.2682926829268293,
447
- "grad_norm": 0.10076329857110977,
448
- "learning_rate": 4.955752212389381e-05,
449
- "loss": 0.0175,
450
- "step": 550
451
- },
452
- {
453
- "epoch": 0.2731707317073171,
454
- "grad_norm": 0.10950438678264618,
455
- "learning_rate": 4.946902654867257e-05,
456
- "loss": 0.0204,
457
- "step": 560
458
- },
459
- {
460
- "epoch": 0.2780487804878049,
461
- "grad_norm": 0.14525355398654938,
462
- "learning_rate": 4.938053097345133e-05,
463
- "loss": 0.0188,
464
- "step": 570
465
- },
466
- {
467
- "epoch": 0.28292682926829266,
468
- "grad_norm": 0.10379478335380554,
469
- "learning_rate": 4.929203539823009e-05,
470
- "loss": 0.0172,
471
- "step": 580
472
- },
473
- {
474
- "epoch": 0.28780487804878047,
475
- "grad_norm": 0.11898482590913773,
476
- "learning_rate": 4.9203539823008854e-05,
477
- "loss": 0.0182,
478
- "step": 590
479
- },
480
- {
481
- "epoch": 0.2926829268292683,
482
- "grad_norm": 0.09698235988616943,
483
- "learning_rate": 4.911504424778761e-05,
484
- "loss": 0.0161,
485
- "step": 600
486
- },
487
- {
488
- "epoch": 0.2926829268292683,
489
- "eval_f1": 0.679977521404251,
490
- "eval_loss": 0.01935453712940216,
491
- "eval_precision": 0.612384638285204,
492
- "eval_recall": 0.7643430439952438,
493
- "eval_runtime": 126.1147,
494
- "eval_samples_per_second": 65.02,
495
- "eval_steps_per_second": 1.023,
496
- "step": 600
497
- },
498
- {
499
- "epoch": 0.2975609756097561,
500
- "grad_norm": 0.11621833592653275,
501
- "learning_rate": 4.902654867256637e-05,
502
- "loss": 0.0172,
503
- "step": 610
504
- },
505
- {
506
- "epoch": 0.3024390243902439,
507
- "grad_norm": 0.09230925887823105,
508
- "learning_rate": 4.893805309734513e-05,
509
- "loss": 0.0184,
510
- "step": 620
511
- },
512
- {
513
- "epoch": 0.3073170731707317,
514
- "grad_norm": 0.07670127600431442,
515
- "learning_rate": 4.88495575221239e-05,
516
- "loss": 0.0157,
517
- "step": 630
518
- },
519
- {
520
- "epoch": 0.3121951219512195,
521
- "grad_norm": 0.09875659644603729,
522
- "learning_rate": 4.876106194690266e-05,
523
- "loss": 0.0153,
524
- "step": 640
525
- },
526
- {
527
- "epoch": 0.3170731707317073,
528
- "grad_norm": 0.07239075750112534,
529
- "learning_rate": 4.867256637168142e-05,
530
- "loss": 0.0137,
531
- "step": 650
532
- },
533
- {
534
- "epoch": 0.32195121951219513,
535
- "grad_norm": 0.0903860479593277,
536
- "learning_rate": 4.858407079646018e-05,
537
- "loss": 0.0157,
538
- "step": 660
539
- },
540
- {
541
- "epoch": 0.32682926829268294,
542
- "grad_norm": 0.10013315826654434,
543
- "learning_rate": 4.849557522123894e-05,
544
- "loss": 0.0163,
545
- "step": 670
546
- },
547
- {
548
- "epoch": 0.33170731707317075,
549
- "grad_norm": 0.10445868223905563,
550
- "learning_rate": 4.84070796460177e-05,
551
- "loss": 0.0162,
552
- "step": 680
553
- },
554
- {
555
- "epoch": 0.33658536585365856,
556
- "grad_norm": 0.09091856330633163,
557
- "learning_rate": 4.831858407079646e-05,
558
- "loss": 0.0173,
559
- "step": 690
560
- },
561
- {
562
- "epoch": 0.34146341463414637,
563
- "grad_norm": 0.07674919068813324,
564
- "learning_rate": 4.823008849557522e-05,
565
- "loss": 0.0186,
566
- "step": 700
567
- },
568
- {
569
- "epoch": 0.34146341463414637,
570
- "eval_f1": 0.6647128916943094,
571
- "eval_loss": 0.019945966079831123,
572
- "eval_precision": 0.5865302642796248,
573
- "eval_recall": 0.7669441141498217,
574
- "eval_runtime": 125.0727,
575
- "eval_samples_per_second": 65.562,
576
- "eval_steps_per_second": 1.031,
577
- "step": 700
578
- },
579
- {
580
- "epoch": 0.3463414634146341,
581
- "grad_norm": 0.10548862814903259,
582
- "learning_rate": 4.814159292035398e-05,
583
- "loss": 0.0206,
584
- "step": 710
585
- },
586
- {
587
- "epoch": 0.35121951219512193,
588
- "grad_norm": 0.0873124822974205,
589
- "learning_rate": 4.805309734513275e-05,
590
- "loss": 0.0178,
591
- "step": 720
592
- },
593
- {
594
- "epoch": 0.35609756097560974,
595
- "grad_norm": 0.13294565677642822,
596
- "learning_rate": 4.7964601769911506e-05,
597
- "loss": 0.0171,
598
- "step": 730
599
- },
600
- {
601
- "epoch": 0.36097560975609755,
602
- "grad_norm": 0.09687939286231995,
603
- "learning_rate": 4.787610619469027e-05,
604
- "loss": 0.021,
605
- "step": 740
606
- },
607
- {
608
- "epoch": 0.36585365853658536,
609
- "grad_norm": 0.0876525342464447,
610
- "learning_rate": 4.778761061946903e-05,
611
- "loss": 0.0167,
612
- "step": 750
613
- },
614
- {
615
- "epoch": 0.37073170731707317,
616
- "grad_norm": 0.09069986641407013,
617
- "learning_rate": 4.769911504424779e-05,
618
- "loss": 0.0159,
619
- "step": 760
620
- },
621
- {
622
- "epoch": 0.375609756097561,
623
- "grad_norm": 0.09903474152088165,
624
- "learning_rate": 4.761061946902655e-05,
625
- "loss": 0.0179,
626
- "step": 770
627
- },
628
- {
629
- "epoch": 0.3804878048780488,
630
- "grad_norm": 0.09864596277475357,
631
- "learning_rate": 4.752212389380531e-05,
632
- "loss": 0.0186,
633
- "step": 780
634
- },
635
- {
636
- "epoch": 0.3853658536585366,
637
- "grad_norm": 0.14535708725452423,
638
- "learning_rate": 4.743362831858407e-05,
639
- "loss": 0.0163,
640
- "step": 790
641
- },
642
- {
643
- "epoch": 0.3902439024390244,
644
- "grad_norm": 0.09460794180631638,
645
- "learning_rate": 4.734513274336283e-05,
646
- "loss": 0.0188,
647
- "step": 800
648
- },
649
- {
650
- "epoch": 0.3902439024390244,
651
- "eval_f1": 0.6686207562971075,
652
- "eval_loss": 0.019426193088293076,
653
- "eval_precision": 0.5900511654349062,
654
- "eval_recall": 0.7713287752675386,
655
- "eval_runtime": 125.9109,
656
- "eval_samples_per_second": 65.125,
657
- "eval_steps_per_second": 1.025,
658
- "step": 800
659
- },
660
- {
661
- "epoch": 0.3951219512195122,
662
- "grad_norm": 0.10055914521217346,
663
- "learning_rate": 4.7256637168141595e-05,
664
- "loss": 0.0163,
665
- "step": 810
666
- },
667
- {
668
- "epoch": 0.4,
669
- "grad_norm": 0.09273848682641983,
670
- "learning_rate": 4.716814159292036e-05,
671
- "loss": 0.0169,
672
- "step": 820
673
- },
674
- {
675
- "epoch": 0.40487804878048783,
676
- "grad_norm": 0.11031738668680191,
677
- "learning_rate": 4.707964601769912e-05,
678
- "loss": 0.0175,
679
- "step": 830
680
- },
681
- {
682
- "epoch": 0.4097560975609756,
683
- "grad_norm": 0.09007064253091812,
684
- "learning_rate": 4.699115044247788e-05,
685
- "loss": 0.0161,
686
- "step": 840
687
- },
688
- {
689
- "epoch": 0.4146341463414634,
690
- "grad_norm": 0.10067761689424515,
691
- "learning_rate": 4.690265486725664e-05,
692
- "loss": 0.0177,
693
- "step": 850
694
- },
695
- {
696
- "epoch": 0.4195121951219512,
697
- "grad_norm": 0.15128421783447266,
698
- "learning_rate": 4.6814159292035396e-05,
699
- "loss": 0.0183,
700
- "step": 860
701
- },
702
- {
703
- "epoch": 0.424390243902439,
704
- "grad_norm": 0.10395248234272003,
705
- "learning_rate": 4.672566371681416e-05,
706
- "loss": 0.0151,
707
- "step": 870
708
- },
709
- {
710
- "epoch": 0.4292682926829268,
711
- "grad_norm": 0.08134312182664871,
712
- "learning_rate": 4.663716814159292e-05,
713
- "loss": 0.0184,
714
- "step": 880
715
- },
716
- {
717
- "epoch": 0.43414634146341463,
718
- "grad_norm": 0.09317316114902496,
719
- "learning_rate": 4.6548672566371684e-05,
720
- "loss": 0.0177,
721
- "step": 890
722
- },
723
- {
724
- "epoch": 0.43902439024390244,
725
- "grad_norm": 0.09974437952041626,
726
- "learning_rate": 4.646017699115045e-05,
727
- "loss": 0.0164,
728
- "step": 900
729
- },
730
- {
731
- "epoch": 0.43902439024390244,
732
- "eval_f1": 0.6717267552182163,
733
- "eval_loss": 0.01958908513188362,
734
- "eval_precision": 0.6,
735
- "eval_recall": 0.7629310344827587,
736
- "eval_runtime": 125.8067,
737
- "eval_samples_per_second": 65.179,
738
- "eval_steps_per_second": 1.025,
739
- "step": 900
740
- },
741
- {
742
- "epoch": 0.44390243902439025,
743
- "grad_norm": 0.10190931707620621,
744
- "learning_rate": 4.637168141592921e-05,
745
- "loss": 0.0219,
746
- "step": 910
747
- },
748
- {
749
- "epoch": 0.44878048780487806,
750
- "grad_norm": 0.08647562563419342,
751
- "learning_rate": 4.6283185840707966e-05,
752
- "loss": 0.0152,
753
- "step": 920
754
- },
755
- {
756
- "epoch": 0.45365853658536587,
757
- "grad_norm": 0.07675183564424515,
758
- "learning_rate": 4.619469026548673e-05,
759
- "loss": 0.0164,
760
- "step": 930
761
- },
762
- {
763
- "epoch": 0.4585365853658537,
764
- "grad_norm": 0.11960858851671219,
765
- "learning_rate": 4.6106194690265485e-05,
766
- "loss": 0.0172,
767
- "step": 940
768
- },
769
- {
770
- "epoch": 0.4634146341463415,
771
- "grad_norm": 0.09075548499822617,
772
- "learning_rate": 4.601769911504425e-05,
773
- "loss": 0.0159,
774
- "step": 950
775
- },
776
- {
777
- "epoch": 0.4682926829268293,
778
- "grad_norm": 0.05601793900132179,
779
- "learning_rate": 4.592920353982301e-05,
780
- "loss": 0.0171,
781
- "step": 960
782
- },
783
- {
784
- "epoch": 0.47317073170731705,
785
- "grad_norm": 0.0845816358923912,
786
- "learning_rate": 4.584070796460177e-05,
787
- "loss": 0.0185,
788
- "step": 970
789
- },
790
- {
791
- "epoch": 0.47804878048780486,
792
- "grad_norm": 0.08288878947496414,
793
- "learning_rate": 4.5752212389380536e-05,
794
- "loss": 0.017,
795
- "step": 980
796
- },
797
- {
798
- "epoch": 0.48292682926829267,
799
- "grad_norm": 0.1054663434624672,
800
- "learning_rate": 4.56637168141593e-05,
801
- "loss": 0.0148,
802
- "step": 990
803
- },
804
- {
805
- "epoch": 0.4878048780487805,
806
- "grad_norm": 0.08182746171951294,
807
- "learning_rate": 4.5575221238938055e-05,
808
- "loss": 0.0164,
809
- "step": 1000
810
- },
811
- {
812
- "epoch": 0.4878048780487805,
813
- "eval_f1": 0.6684826669260953,
814
- "eval_loss": 0.019462432712316513,
815
- "eval_precision": 0.5930038547839594,
816
- "eval_recall": 0.7659780023781213,
817
- "eval_runtime": 125.7047,
818
- "eval_samples_per_second": 65.232,
819
- "eval_steps_per_second": 1.026,
820
- "step": 1000
821
- },
822
- {
823
- "epoch": 0.4926829268292683,
824
- "grad_norm": 0.09717566519975662,
825
- "learning_rate": 4.548672566371682e-05,
826
- "loss": 0.0182,
827
- "step": 1010
828
- },
829
- {
830
- "epoch": 0.4975609756097561,
831
- "grad_norm": 0.09170341491699219,
832
- "learning_rate": 4.5398230088495574e-05,
833
- "loss": 0.0175,
834
- "step": 1020
835
- },
836
- {
837
- "epoch": 0.5024390243902439,
838
- "grad_norm": 0.08827481418848038,
839
- "learning_rate": 4.5309734513274336e-05,
840
- "loss": 0.0177,
841
- "step": 1030
842
- },
843
- {
844
- "epoch": 0.5073170731707317,
845
- "grad_norm": 0.08792293816804886,
846
- "learning_rate": 4.52212389380531e-05,
847
- "loss": 0.0184,
848
- "step": 1040
849
- },
850
- {
851
- "epoch": 0.5121951219512195,
852
- "grad_norm": 0.13856299221515656,
853
- "learning_rate": 4.5132743362831855e-05,
854
- "loss": 0.0182,
855
- "step": 1050
856
- },
857
- {
858
- "epoch": 0.5170731707317073,
859
- "grad_norm": 0.0926881730556488,
860
- "learning_rate": 4.5044247787610625e-05,
861
- "loss": 0.0177,
862
- "step": 1060
863
- },
864
- {
865
- "epoch": 0.5219512195121951,
866
- "grad_norm": 0.0846000462770462,
867
- "learning_rate": 4.495575221238939e-05,
868
- "loss": 0.0157,
869
- "step": 1070
870
- },
871
- {
872
- "epoch": 0.526829268292683,
873
- "grad_norm": 0.10093377530574799,
874
- "learning_rate": 4.4867256637168144e-05,
875
- "loss": 0.0188,
876
- "step": 1080
877
- },
878
- {
879
- "epoch": 0.5317073170731708,
880
- "grad_norm": 0.09538795799016953,
881
- "learning_rate": 4.4778761061946906e-05,
882
- "loss": 0.0184,
883
- "step": 1090
884
- },
885
- {
886
- "epoch": 0.5365853658536586,
887
- "grad_norm": 0.14831095933914185,
888
- "learning_rate": 4.469026548672566e-05,
889
- "loss": 0.0159,
890
- "step": 1100
891
- },
892
- {
893
- "epoch": 0.5365853658536586,
894
- "eval_f1": 0.6750937261537441,
895
- "eval_loss": 0.020070159807801247,
896
- "eval_precision": 0.6097692538207972,
897
- "eval_recall": 0.7560939357907254,
898
- "eval_runtime": 126.5385,
899
- "eval_samples_per_second": 64.802,
900
- "eval_steps_per_second": 1.019,
901
- "step": 1100
902
- },
903
- {
904
- "epoch": 0.5414634146341464,
905
- "grad_norm": 0.09911943227052689,
906
- "learning_rate": 4.4601769911504425e-05,
907
- "loss": 0.0174,
908
- "step": 1110
909
- },
910
- {
911
- "epoch": 0.5463414634146342,
912
- "grad_norm": 0.07620345056056976,
913
- "learning_rate": 4.451327433628319e-05,
914
- "loss": 0.0157,
915
- "step": 1120
916
- },
917
- {
918
- "epoch": 0.551219512195122,
919
- "grad_norm": 0.09838444739580154,
920
- "learning_rate": 4.4424778761061944e-05,
921
- "loss": 0.0146,
922
- "step": 1130
923
- },
924
- {
925
- "epoch": 0.5560975609756098,
926
- "grad_norm": 0.08282183855772018,
927
- "learning_rate": 4.433628318584071e-05,
928
- "loss": 0.0178,
929
- "step": 1140
930
- },
931
- {
932
- "epoch": 0.5609756097560976,
933
- "grad_norm": 0.09383049607276917,
934
- "learning_rate": 4.4247787610619477e-05,
935
- "loss": 0.0164,
936
- "step": 1150
937
- },
938
- {
939
- "epoch": 0.5658536585365853,
940
- "grad_norm": 0.1041053757071495,
941
- "learning_rate": 4.415929203539823e-05,
942
- "loss": 0.018,
943
- "step": 1160
944
- },
945
- {
946
- "epoch": 0.5707317073170731,
947
- "grad_norm": 0.0997629165649414,
948
- "learning_rate": 4.4070796460176995e-05,
949
- "loss": 0.016,
950
- "step": 1170
951
- },
952
- {
953
- "epoch": 0.5756097560975609,
954
- "grad_norm": 0.1428525149822235,
955
- "learning_rate": 4.398230088495575e-05,
956
- "loss": 0.018,
957
- "step": 1180
958
- },
959
- {
960
- "epoch": 0.5804878048780487,
961
- "grad_norm": 0.1270400732755661,
962
- "learning_rate": 4.3893805309734514e-05,
963
- "loss": 0.0172,
964
- "step": 1190
965
- },
966
- {
967
- "epoch": 0.5853658536585366,
968
- "grad_norm": 0.08549308031797409,
969
- "learning_rate": 4.380530973451328e-05,
970
- "loss": 0.0147,
971
- "step": 1200
972
- },
973
- {
974
- "epoch": 0.5853658536585366,
975
- "eval_f1": 0.6807967485136542,
976
- "eval_loss": 0.019661063328385353,
977
- "eval_precision": 0.621146184492798,
978
- "eval_recall": 0.7531212841854935,
979
- "eval_runtime": 125.9122,
980
- "eval_samples_per_second": 65.125,
981
- "eval_steps_per_second": 1.025,
982
- "step": 1200
983
- },
984
- {
985
- "epoch": 0.5902439024390244,
986
- "grad_norm": 0.05166243761777878,
987
- "learning_rate": 4.371681415929203e-05,
988
- "loss": 0.0167,
989
- "step": 1210
990
- },
991
- {
992
- "epoch": 0.5951219512195122,
993
- "grad_norm": 0.0989900454878807,
994
- "learning_rate": 4.3628318584070796e-05,
995
- "loss": 0.0185,
996
- "step": 1220
997
- },
998
- {
999
- "epoch": 0.6,
1000
- "grad_norm": 0.09084061533212662,
1001
- "learning_rate": 4.353982300884956e-05,
1002
- "loss": 0.0175,
1003
- "step": 1230
1004
- },
1005
- {
1006
- "epoch": 0.6048780487804878,
1007
- "grad_norm": 0.08588280528783798,
1008
- "learning_rate": 4.345132743362832e-05,
1009
- "loss": 0.0154,
1010
- "step": 1240
1011
- },
1012
- {
1013
- "epoch": 0.6097560975609756,
1014
- "grad_norm": 0.07876714318990707,
1015
- "learning_rate": 4.3362831858407084e-05,
1016
- "loss": 0.0155,
1017
- "step": 1250
1018
- },
1019
- {
1020
- "epoch": 0.6146341463414634,
1021
- "grad_norm": 0.10766426473855972,
1022
- "learning_rate": 4.327433628318584e-05,
1023
- "loss": 0.0162,
1024
- "step": 1260
1025
- },
1026
- {
1027
- "epoch": 0.6195121951219512,
1028
- "grad_norm": 0.08020301163196564,
1029
- "learning_rate": 4.31858407079646e-05,
1030
- "loss": 0.0174,
1031
- "step": 1270
1032
- },
1033
- {
1034
- "epoch": 0.624390243902439,
1035
- "grad_norm": 0.11564213037490845,
1036
- "learning_rate": 4.3097345132743366e-05,
1037
- "loss": 0.0167,
1038
- "step": 1280
1039
- },
1040
- {
1041
- "epoch": 0.6292682926829268,
1042
- "grad_norm": 0.11487080901861191,
1043
- "learning_rate": 4.300884955752212e-05,
1044
- "loss": 0.0155,
1045
- "step": 1290
1046
- },
1047
- {
1048
- "epoch": 0.6341463414634146,
1049
- "grad_norm": 0.09950511902570724,
1050
- "learning_rate": 4.2920353982300885e-05,
1051
- "loss": 0.0159,
1052
- "step": 1300
1053
- },
1054
- {
1055
- "epoch": 0.6341463414634146,
1056
- "eval_f1": 0.687796188619635,
1057
- "eval_loss": 0.01919134519994259,
1058
- "eval_precision": 0.6278456157575014,
1059
- "eval_recall": 0.7604042806183116,
1060
- "eval_runtime": 125.3379,
1061
- "eval_samples_per_second": 65.423,
1062
- "eval_steps_per_second": 1.029,
1063
- "step": 1300
1064
- },
1065
- {
1066
- "epoch": 0.6390243902439025,
1067
- "grad_norm": 0.08389411866664886,
1068
- "learning_rate": 4.283185840707965e-05,
1069
- "loss": 0.0156,
1070
- "step": 1310
1071
- },
1072
- {
1073
- "epoch": 0.6439024390243903,
1074
- "grad_norm": 0.08979038149118423,
1075
- "learning_rate": 4.274336283185841e-05,
1076
- "loss": 0.0173,
1077
- "step": 1320
1078
- },
1079
- {
1080
- "epoch": 0.6487804878048781,
1081
- "grad_norm": 0.069185771048069,
1082
- "learning_rate": 4.265486725663717e-05,
1083
- "loss": 0.0178,
1084
- "step": 1330
1085
- },
1086
- {
1087
- "epoch": 0.6536585365853659,
1088
- "grad_norm": 0.07186874747276306,
1089
- "learning_rate": 4.256637168141593e-05,
1090
- "loss": 0.0153,
1091
- "step": 1340
1092
- },
1093
- {
1094
- "epoch": 0.6585365853658537,
1095
- "grad_norm": 0.11203644424676895,
1096
- "learning_rate": 4.247787610619469e-05,
1097
- "loss": 0.0167,
1098
- "step": 1350
1099
- },
1100
- {
1101
- "epoch": 0.6634146341463415,
1102
- "grad_norm": 0.11567346006631851,
1103
- "learning_rate": 4.2389380530973455e-05,
1104
- "loss": 0.017,
1105
- "step": 1360
1106
- },
1107
- {
1108
- "epoch": 0.6682926829268293,
1109
- "grad_norm": 0.11276240646839142,
1110
- "learning_rate": 4.230088495575221e-05,
1111
- "loss": 0.0158,
1112
- "step": 1370
1113
- },
1114
- {
1115
- "epoch": 0.6731707317073171,
1116
- "grad_norm": 0.07067442685365677,
1117
- "learning_rate": 4.2212389380530974e-05,
1118
- "loss": 0.0156,
1119
- "step": 1380
1120
- },
1121
- {
1122
- "epoch": 0.6780487804878049,
1123
- "grad_norm": 0.08775879442691803,
1124
- "learning_rate": 4.2123893805309737e-05,
1125
- "loss": 0.0155,
1126
- "step": 1390
1127
- },
1128
- {
1129
- "epoch": 0.6829268292682927,
1130
- "grad_norm": 0.07028288394212723,
1131
- "learning_rate": 4.20353982300885e-05,
1132
- "loss": 0.0142,
1133
- "step": 1400
1134
- },
1135
- {
1136
- "epoch": 0.6829268292682927,
1137
- "eval_f1": 0.6818793753365644,
1138
- "eval_loss": 0.019489064812660217,
1139
- "eval_precision": 0.6231545275590551,
1140
- "eval_recall": 0.7528240190249703,
1141
- "eval_runtime": 125.915,
1142
- "eval_samples_per_second": 65.123,
1143
- "eval_steps_per_second": 1.025,
1144
- "step": 1400
1145
- },
1146
- {
1147
- "epoch": 0.6878048780487804,
1148
- "grad_norm": 0.08612460643053055,
1149
- "learning_rate": 4.194690265486726e-05,
1150
- "loss": 0.0163,
1151
- "step": 1410
1152
- },
1153
- {
1154
- "epoch": 0.6926829268292682,
1155
- "grad_norm": 0.1031985655426979,
1156
- "learning_rate": 4.185840707964602e-05,
1157
- "loss": 0.0146,
1158
- "step": 1420
1159
- },
1160
- {
1161
- "epoch": 0.697560975609756,
1162
- "grad_norm": 0.10174574702978134,
1163
- "learning_rate": 4.176991150442478e-05,
1164
- "loss": 0.017,
1165
- "step": 1430
1166
- },
1167
- {
1168
- "epoch": 0.7024390243902439,
1169
- "grad_norm": 0.06620911508798599,
1170
- "learning_rate": 4.1681415929203544e-05,
1171
- "loss": 0.0171,
1172
- "step": 1440
1173
- },
1174
- {
1175
- "epoch": 0.7073170731707317,
1176
- "grad_norm": 0.11920668184757233,
1177
- "learning_rate": 4.15929203539823e-05,
1178
- "loss": 0.0191,
1179
- "step": 1450
1180
- },
1181
- {
1182
- "epoch": 0.7121951219512195,
1183
- "grad_norm": 0.10696449875831604,
1184
- "learning_rate": 4.150442477876106e-05,
1185
- "loss": 0.0164,
1186
- "step": 1460
1187
- },
1188
- {
1189
- "epoch": 0.7170731707317073,
1190
- "grad_norm": 0.09047581255435944,
1191
- "learning_rate": 4.1415929203539825e-05,
1192
- "loss": 0.0185,
1193
- "step": 1470
1194
- },
1195
- {
1196
- "epoch": 0.7219512195121951,
1197
- "grad_norm": 0.08915913105010986,
1198
- "learning_rate": 4.132743362831858e-05,
1199
- "loss": 0.0172,
1200
- "step": 1480
1201
- },
1202
- {
1203
- "epoch": 0.7268292682926829,
1204
- "grad_norm": 0.10107399523258209,
1205
- "learning_rate": 4.123893805309735e-05,
1206
- "loss": 0.0188,
1207
- "step": 1490
1208
- },
1209
- {
1210
- "epoch": 0.7317073170731707,
1211
- "grad_norm": 0.1132921576499939,
1212
- "learning_rate": 4.115044247787611e-05,
1213
- "loss": 0.0166,
1214
- "step": 1500
1215
- },
1216
- {
1217
- "epoch": 0.7317073170731707,
1218
- "eval_f1": 0.6884484673104748,
1219
- "eval_loss": 0.0191908348351717,
1220
- "eval_precision": 0.6290352333517801,
1221
- "eval_recall": 0.7602556480380499,
1222
- "eval_runtime": 126.3472,
1223
- "eval_samples_per_second": 64.901,
1224
- "eval_steps_per_second": 1.021,
1225
- "step": 1500
1226
- }
1227
- ],
1228
- "logging_steps": 10,
1229
- "max_steps": 6150,
1230
- "num_input_tokens_seen": 0,
1231
- "num_train_epochs": 3,
1232
- "save_steps": 100,
1233
- "stateful_callbacks": {
1234
- "TrainerControl": {
1235
- "args": {
1236
- "should_epoch_stop": false,
1237
- "should_evaluate": false,
1238
- "should_log": false,
1239
- "should_save": true,
1240
- "should_training_stop": false
1241
- },
1242
- "attributes": {}
1243
- }
1244
- },
1245
- "total_flos": 3186928336896000.0,
1246
- "train_batch_size": 16,
1247
- "trial_name": null,
1248
- "trial_params": null
1249
- }