YWZBrandon's picture
Upload folder using huggingface_hub
3102e25 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 902,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.011092623405435386,
"grad_norm": 2.009918689727783,
"learning_rate": 1.998002219755827e-05,
"loss": 0.5022,
"step": 10
},
{
"epoch": 0.022185246810870772,
"grad_norm": 0.49915584921836853,
"learning_rate": 1.995782463928968e-05,
"loss": 0.1943,
"step": 20
},
{
"epoch": 0.033277870216306155,
"grad_norm": 0.47061625123023987,
"learning_rate": 1.993562708102109e-05,
"loss": 0.1436,
"step": 30
},
{
"epoch": 0.044370493621741544,
"grad_norm": 0.2260124832391739,
"learning_rate": 1.99134295227525e-05,
"loss": 0.1106,
"step": 40
},
{
"epoch": 0.05546311702717693,
"grad_norm": 0.31766048073768616,
"learning_rate": 1.989123196448391e-05,
"loss": 0.0994,
"step": 50
},
{
"epoch": 0.06655574043261231,
"grad_norm": 0.2788391709327698,
"learning_rate": 1.9869034406215316e-05,
"loss": 0.111,
"step": 60
},
{
"epoch": 0.0776483638380477,
"grad_norm": 0.24376386404037476,
"learning_rate": 1.9846836847946726e-05,
"loss": 0.0849,
"step": 70
},
{
"epoch": 0.08874098724348309,
"grad_norm": 0.1789788454771042,
"learning_rate": 1.982463928967814e-05,
"loss": 0.0971,
"step": 80
},
{
"epoch": 0.09983361064891846,
"grad_norm": 0.29698437452316284,
"learning_rate": 1.9802441731409546e-05,
"loss": 0.0856,
"step": 90
},
{
"epoch": 0.11092623405435385,
"grad_norm": 0.2553412616252899,
"learning_rate": 1.9780244173140956e-05,
"loss": 0.0976,
"step": 100
},
{
"epoch": 0.12201885745978924,
"grad_norm": 0.20608791708946228,
"learning_rate": 1.9758046614872365e-05,
"loss": 0.0947,
"step": 110
},
{
"epoch": 0.13311148086522462,
"grad_norm": 0.23551669716835022,
"learning_rate": 1.9735849056603775e-05,
"loss": 0.0965,
"step": 120
},
{
"epoch": 0.14420410427066002,
"grad_norm": 0.3207148611545563,
"learning_rate": 1.9713651498335185e-05,
"loss": 0.0931,
"step": 130
},
{
"epoch": 0.1552967276760954,
"grad_norm": 0.34813177585601807,
"learning_rate": 1.9691453940066595e-05,
"loss": 0.1,
"step": 140
},
{
"epoch": 0.16638935108153077,
"grad_norm": 0.41501104831695557,
"learning_rate": 1.9669256381798e-05,
"loss": 0.0922,
"step": 150
},
{
"epoch": 0.17748197448696618,
"grad_norm": 0.24550506472587585,
"learning_rate": 1.964705882352941e-05,
"loss": 0.0857,
"step": 160
},
{
"epoch": 0.18857459789240155,
"grad_norm": 0.4205056130886078,
"learning_rate": 1.9624861265260825e-05,
"loss": 0.0831,
"step": 170
},
{
"epoch": 0.19966722129783693,
"grad_norm": 0.6002993583679199,
"learning_rate": 1.9602663706992235e-05,
"loss": 0.0792,
"step": 180
},
{
"epoch": 0.21075984470327233,
"grad_norm": 0.27535638213157654,
"learning_rate": 1.958046614872364e-05,
"loss": 0.0786,
"step": 190
},
{
"epoch": 0.2218524681087077,
"grad_norm": 0.41602805256843567,
"learning_rate": 1.955826859045505e-05,
"loss": 0.0789,
"step": 200
},
{
"epoch": 0.23294509151414308,
"grad_norm": 0.41827332973480225,
"learning_rate": 1.953607103218646e-05,
"loss": 0.0663,
"step": 210
},
{
"epoch": 0.24403771491957849,
"grad_norm": 0.47976112365722656,
"learning_rate": 1.951387347391787e-05,
"loss": 0.0727,
"step": 220
},
{
"epoch": 0.25513033832501386,
"grad_norm": 0.7771281599998474,
"learning_rate": 1.949167591564928e-05,
"loss": 0.0776,
"step": 230
},
{
"epoch": 0.26622296173044924,
"grad_norm": 1.0573914051055908,
"learning_rate": 1.946947835738069e-05,
"loss": 0.0828,
"step": 240
},
{
"epoch": 0.2773155851358846,
"grad_norm": 0.5186040997505188,
"learning_rate": 1.9447280799112097e-05,
"loss": 0.0767,
"step": 250
},
{
"epoch": 0.28840820854132004,
"grad_norm": 0.5594862699508667,
"learning_rate": 1.942508324084351e-05,
"loss": 0.0699,
"step": 260
},
{
"epoch": 0.2995008319467554,
"grad_norm": 0.5147818922996521,
"learning_rate": 1.940288568257492e-05,
"loss": 0.0591,
"step": 270
},
{
"epoch": 0.3105934553521908,
"grad_norm": 0.5510437488555908,
"learning_rate": 1.938068812430633e-05,
"loss": 0.0761,
"step": 280
},
{
"epoch": 0.32168607875762617,
"grad_norm": 0.4817625880241394,
"learning_rate": 1.9358490566037736e-05,
"loss": 0.0688,
"step": 290
},
{
"epoch": 0.33277870216306155,
"grad_norm": 0.504362940788269,
"learning_rate": 1.9336293007769146e-05,
"loss": 0.0691,
"step": 300
},
{
"epoch": 0.343871325568497,
"grad_norm": 0.5732384920120239,
"learning_rate": 1.9314095449500556e-05,
"loss": 0.057,
"step": 310
},
{
"epoch": 0.35496394897393235,
"grad_norm": 0.5892298221588135,
"learning_rate": 1.9291897891231966e-05,
"loss": 0.059,
"step": 320
},
{
"epoch": 0.36605657237936773,
"grad_norm": 0.3680512309074402,
"learning_rate": 1.9269700332963376e-05,
"loss": 0.0706,
"step": 330
},
{
"epoch": 0.3771491957848031,
"grad_norm": 0.41971662640571594,
"learning_rate": 1.9247502774694786e-05,
"loss": 0.053,
"step": 340
},
{
"epoch": 0.3882418191902385,
"grad_norm": 0.37261390686035156,
"learning_rate": 1.9225305216426195e-05,
"loss": 0.0632,
"step": 350
},
{
"epoch": 0.39933444259567386,
"grad_norm": 0.48256734013557434,
"learning_rate": 1.9203107658157605e-05,
"loss": 0.0651,
"step": 360
},
{
"epoch": 0.4104270660011093,
"grad_norm": 0.7914339303970337,
"learning_rate": 1.9180910099889015e-05,
"loss": 0.0659,
"step": 370
},
{
"epoch": 0.42151968940654466,
"grad_norm": 0.6772429347038269,
"learning_rate": 1.915871254162042e-05,
"loss": 0.0655,
"step": 380
},
{
"epoch": 0.43261231281198004,
"grad_norm": 0.5621687173843384,
"learning_rate": 1.913651498335183e-05,
"loss": 0.0707,
"step": 390
},
{
"epoch": 0.4437049362174154,
"grad_norm": 0.30515748262405396,
"learning_rate": 1.911431742508324e-05,
"loss": 0.0548,
"step": 400
},
{
"epoch": 0.4547975596228508,
"grad_norm": 0.5506859421730042,
"learning_rate": 1.909211986681465e-05,
"loss": 0.0554,
"step": 410
},
{
"epoch": 0.46589018302828616,
"grad_norm": 0.6387749314308167,
"learning_rate": 1.906992230854606e-05,
"loss": 0.0554,
"step": 420
},
{
"epoch": 0.4769828064337216,
"grad_norm": 0.18500734865665436,
"learning_rate": 1.904772475027747e-05,
"loss": 0.0558,
"step": 430
},
{
"epoch": 0.48807542983915697,
"grad_norm": 0.6815407276153564,
"learning_rate": 1.902552719200888e-05,
"loss": 0.0537,
"step": 440
},
{
"epoch": 0.49916805324459235,
"grad_norm": 0.4826994836330414,
"learning_rate": 1.900332963374029e-05,
"loss": 0.0567,
"step": 450
},
{
"epoch": 0.5102606766500277,
"grad_norm": 0.24618124961853027,
"learning_rate": 1.89811320754717e-05,
"loss": 0.0614,
"step": 460
},
{
"epoch": 0.5213533000554631,
"grad_norm": 1.0037415027618408,
"learning_rate": 1.895893451720311e-05,
"loss": 0.0604,
"step": 470
},
{
"epoch": 0.5324459234608985,
"grad_norm": 0.6000948548316956,
"learning_rate": 1.8936736958934517e-05,
"loss": 0.0683,
"step": 480
},
{
"epoch": 0.5435385468663338,
"grad_norm": 0.3302474319934845,
"learning_rate": 1.8914539400665927e-05,
"loss": 0.0543,
"step": 490
},
{
"epoch": 0.5546311702717692,
"grad_norm": 0.5560783743858337,
"learning_rate": 1.8892341842397337e-05,
"loss": 0.0569,
"step": 500
},
{
"epoch": 0.5657237936772047,
"grad_norm": 0.8041097521781921,
"learning_rate": 1.8870144284128747e-05,
"loss": 0.0544,
"step": 510
},
{
"epoch": 0.5768164170826401,
"grad_norm": 0.6846103668212891,
"learning_rate": 1.8847946725860156e-05,
"loss": 0.058,
"step": 520
},
{
"epoch": 0.5879090404880755,
"grad_norm": 0.50434809923172,
"learning_rate": 1.8825749167591566e-05,
"loss": 0.0654,
"step": 530
},
{
"epoch": 0.5990016638935108,
"grad_norm": 0.54362553358078,
"learning_rate": 1.8803551609322976e-05,
"loss": 0.0582,
"step": 540
},
{
"epoch": 0.6100942872989462,
"grad_norm": 0.6166839599609375,
"learning_rate": 1.8781354051054386e-05,
"loss": 0.0672,
"step": 550
},
{
"epoch": 0.6211869107043816,
"grad_norm": 0.4353054165840149,
"learning_rate": 1.8759156492785796e-05,
"loss": 0.0532,
"step": 560
},
{
"epoch": 0.632279534109817,
"grad_norm": 0.6025580167770386,
"learning_rate": 1.8736958934517206e-05,
"loss": 0.0746,
"step": 570
},
{
"epoch": 0.6433721575152523,
"grad_norm": 0.7259892225265503,
"learning_rate": 1.8714761376248612e-05,
"loss": 0.0642,
"step": 580
},
{
"epoch": 0.6544647809206877,
"grad_norm": 0.4940318465232849,
"learning_rate": 1.8692563817980022e-05,
"loss": 0.0547,
"step": 590
},
{
"epoch": 0.6655574043261231,
"grad_norm": 0.7005699872970581,
"learning_rate": 1.8670366259711435e-05,
"loss": 0.0522,
"step": 600
},
{
"epoch": 0.6766500277315585,
"grad_norm": 0.4530707895755768,
"learning_rate": 1.8648168701442845e-05,
"loss": 0.054,
"step": 610
},
{
"epoch": 0.687742651136994,
"grad_norm": 0.9097110629081726,
"learning_rate": 1.8625971143174252e-05,
"loss": 0.0622,
"step": 620
},
{
"epoch": 0.6988352745424293,
"grad_norm": 0.5374599695205688,
"learning_rate": 1.860377358490566e-05,
"loss": 0.0522,
"step": 630
},
{
"epoch": 0.7099278979478647,
"grad_norm": 0.3849945664405823,
"learning_rate": 1.858157602663707e-05,
"loss": 0.0571,
"step": 640
},
{
"epoch": 0.7210205213533001,
"grad_norm": 0.5918008685112,
"learning_rate": 1.855937846836848e-05,
"loss": 0.0576,
"step": 650
},
{
"epoch": 0.7321131447587355,
"grad_norm": 0.3229956030845642,
"learning_rate": 1.853718091009989e-05,
"loss": 0.0537,
"step": 660
},
{
"epoch": 0.7432057681641708,
"grad_norm": 0.5264039039611816,
"learning_rate": 1.85149833518313e-05,
"loss": 0.06,
"step": 670
},
{
"epoch": 0.7542983915696062,
"grad_norm": 0.36795660853385925,
"learning_rate": 1.8492785793562708e-05,
"loss": 0.0511,
"step": 680
},
{
"epoch": 0.7653910149750416,
"grad_norm": 0.5905130505561829,
"learning_rate": 1.847058823529412e-05,
"loss": 0.0636,
"step": 690
},
{
"epoch": 0.776483638380477,
"grad_norm": 0.36266571283340454,
"learning_rate": 1.844839067702553e-05,
"loss": 0.0598,
"step": 700
},
{
"epoch": 0.7875762617859123,
"grad_norm": 0.4978592097759247,
"learning_rate": 1.8426193118756937e-05,
"loss": 0.0607,
"step": 710
},
{
"epoch": 0.7986688851913477,
"grad_norm": 0.4635021686553955,
"learning_rate": 1.8403995560488347e-05,
"loss": 0.0543,
"step": 720
},
{
"epoch": 0.8097615085967831,
"grad_norm": 0.44571858644485474,
"learning_rate": 1.8381798002219757e-05,
"loss": 0.0678,
"step": 730
},
{
"epoch": 0.8208541320022186,
"grad_norm": 0.8265877366065979,
"learning_rate": 1.8359600443951167e-05,
"loss": 0.0552,
"step": 740
},
{
"epoch": 0.831946755407654,
"grad_norm": 0.5776472091674805,
"learning_rate": 1.8337402885682577e-05,
"loss": 0.0505,
"step": 750
},
{
"epoch": 0.8430393788130893,
"grad_norm": 0.299274742603302,
"learning_rate": 1.8315205327413986e-05,
"loss": 0.0619,
"step": 760
},
{
"epoch": 0.8541320022185247,
"grad_norm": 0.587645947933197,
"learning_rate": 1.8293007769145393e-05,
"loss": 0.0575,
"step": 770
},
{
"epoch": 0.8652246256239601,
"grad_norm": 0.39164137840270996,
"learning_rate": 1.8270810210876806e-05,
"loss": 0.0458,
"step": 780
},
{
"epoch": 0.8763172490293955,
"grad_norm": 0.4663292169570923,
"learning_rate": 1.8248612652608216e-05,
"loss": 0.048,
"step": 790
},
{
"epoch": 0.8874098724348308,
"grad_norm": 0.5804581642150879,
"learning_rate": 1.8226415094339626e-05,
"loss": 0.0557,
"step": 800
},
{
"epoch": 0.8985024958402662,
"grad_norm": 0.4279440939426422,
"learning_rate": 1.8204217536071032e-05,
"loss": 0.051,
"step": 810
},
{
"epoch": 0.9095951192457016,
"grad_norm": 0.5384302735328674,
"learning_rate": 1.8182019977802442e-05,
"loss": 0.0558,
"step": 820
},
{
"epoch": 0.920687742651137,
"grad_norm": 0.5049973726272583,
"learning_rate": 1.8159822419533852e-05,
"loss": 0.047,
"step": 830
},
{
"epoch": 0.9317803660565723,
"grad_norm": 0.8061177134513855,
"learning_rate": 1.8137624861265262e-05,
"loss": 0.0572,
"step": 840
},
{
"epoch": 0.9428729894620078,
"grad_norm": 0.6056540012359619,
"learning_rate": 1.8115427302996672e-05,
"loss": 0.0539,
"step": 850
},
{
"epoch": 0.9539656128674432,
"grad_norm": 0.3001384139060974,
"learning_rate": 1.8093229744728082e-05,
"loss": 0.053,
"step": 860
},
{
"epoch": 0.9650582362728786,
"grad_norm": 0.6709749102592468,
"learning_rate": 1.807103218645949e-05,
"loss": 0.0526,
"step": 870
},
{
"epoch": 0.9761508596783139,
"grad_norm": 0.8233507871627808,
"learning_rate": 1.80488346281909e-05,
"loss": 0.0621,
"step": 880
},
{
"epoch": 0.9872434830837493,
"grad_norm": 0.5757150650024414,
"learning_rate": 1.802663706992231e-05,
"loss": 0.0546,
"step": 890
},
{
"epoch": 0.9983361064891847,
"grad_norm": 0.6834889054298401,
"learning_rate": 1.800443951165372e-05,
"loss": 0.0462,
"step": 900
},
{
"epoch": 1.0,
"eval_accuracy": 0.8720027017899359,
"eval_f1": 0.5678449258836944,
"eval_loss": 0.34414270520210266,
"eval_precision": 0.7929936305732485,
"eval_recall": 0.4422735346358792,
"eval_runtime": 2.727,
"eval_samples_per_second": 361.937,
"eval_steps_per_second": 11.368,
"step": 902
}
],
"logging_steps": 10,
"max_steps": 9010,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 9622905843747798.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}