pii-sensitive-ner-german / trainer_state.json
HuggingLil's picture
Upload folder using huggingface_hub
d124f35 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 24564,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.012212994626282364,
"grad_norm": 2.7971887588500977,
"learning_rate": 4.9796450089561964e-05,
"loss": 0.3257,
"step": 100
},
{
"epoch": 0.024425989252564728,
"grad_norm": 2.295069456100464,
"learning_rate": 4.9592900179123925e-05,
"loss": 0.1159,
"step": 200
},
{
"epoch": 0.03663898387884709,
"grad_norm": 1.1914141178131104,
"learning_rate": 4.9389350268685886e-05,
"loss": 0.1073,
"step": 300
},
{
"epoch": 0.048851978505129456,
"grad_norm": 0.8838738203048706,
"learning_rate": 4.918580035824785e-05,
"loss": 0.1076,
"step": 400
},
{
"epoch": 0.06106497313141182,
"grad_norm": 1.8016488552093506,
"learning_rate": 4.898225044780981e-05,
"loss": 0.0851,
"step": 500
},
{
"epoch": 0.07327796775769418,
"grad_norm": 2.5403218269348145,
"learning_rate": 4.8778700537371764e-05,
"loss": 0.1119,
"step": 600
},
{
"epoch": 0.08549096238397655,
"grad_norm": 1.6208165884017944,
"learning_rate": 4.8575150626933725e-05,
"loss": 0.0968,
"step": 700
},
{
"epoch": 0.09770395701025891,
"grad_norm": 1.7685565948486328,
"learning_rate": 4.8371600716495686e-05,
"loss": 0.0848,
"step": 800
},
{
"epoch": 0.10991695163654128,
"grad_norm": 5.9024882316589355,
"learning_rate": 4.816805080605765e-05,
"loss": 0.0816,
"step": 900
},
{
"epoch": 0.12212994626282364,
"grad_norm": 1.4809324741363525,
"learning_rate": 4.796450089561961e-05,
"loss": 0.0815,
"step": 1000
},
{
"epoch": 0.134342940889106,
"grad_norm": 1.8953092098236084,
"learning_rate": 4.776095098518157e-05,
"loss": 0.0835,
"step": 1100
},
{
"epoch": 0.14655593551538837,
"grad_norm": 0.2310028374195099,
"learning_rate": 4.755740107474353e-05,
"loss": 0.0768,
"step": 1200
},
{
"epoch": 0.15876893014167073,
"grad_norm": 4.047482013702393,
"learning_rate": 4.735385116430549e-05,
"loss": 0.0785,
"step": 1300
},
{
"epoch": 0.1709819247679531,
"grad_norm": 5.049030303955078,
"learning_rate": 4.7150301253867454e-05,
"loss": 0.0609,
"step": 1400
},
{
"epoch": 0.18319491939423546,
"grad_norm": 4.447434902191162,
"learning_rate": 4.6946751343429415e-05,
"loss": 0.0625,
"step": 1500
},
{
"epoch": 0.19540791402051783,
"grad_norm": 0.3569432199001312,
"learning_rate": 4.674320143299138e-05,
"loss": 0.0612,
"step": 1600
},
{
"epoch": 0.2076209086468002,
"grad_norm": 0.6527674794197083,
"learning_rate": 4.653965152255334e-05,
"loss": 0.07,
"step": 1700
},
{
"epoch": 0.21983390327308255,
"grad_norm": 5.448887825012207,
"learning_rate": 4.633610161211529e-05,
"loss": 0.0669,
"step": 1800
},
{
"epoch": 0.23204689789936492,
"grad_norm": 1.804388165473938,
"learning_rate": 4.6132551701677254e-05,
"loss": 0.0636,
"step": 1900
},
{
"epoch": 0.24425989252564728,
"grad_norm": 1.574012279510498,
"learning_rate": 4.5929001791239215e-05,
"loss": 0.0798,
"step": 2000
},
{
"epoch": 0.2564728871519297,
"grad_norm": 0.8235198259353638,
"learning_rate": 4.5725451880801177e-05,
"loss": 0.054,
"step": 2100
},
{
"epoch": 0.268685881778212,
"grad_norm": 0.6802400350570679,
"learning_rate": 4.552190197036313e-05,
"loss": 0.0613,
"step": 2200
},
{
"epoch": 0.2808988764044944,
"grad_norm": 0.5732834935188293,
"learning_rate": 4.531835205992509e-05,
"loss": 0.0544,
"step": 2300
},
{
"epoch": 0.29311187103077674,
"grad_norm": 0.9544196128845215,
"learning_rate": 4.5114802149487054e-05,
"loss": 0.0735,
"step": 2400
},
{
"epoch": 0.30532486565705913,
"grad_norm": 0.8141427040100098,
"learning_rate": 4.4911252239049015e-05,
"loss": 0.0586,
"step": 2500
},
{
"epoch": 0.31753786028334147,
"grad_norm": 3.740103006362915,
"learning_rate": 4.4707702328610976e-05,
"loss": 0.0498,
"step": 2600
},
{
"epoch": 0.32975085490962386,
"grad_norm": 0.24550916254520416,
"learning_rate": 4.450415241817294e-05,
"loss": 0.056,
"step": 2700
},
{
"epoch": 0.3419638495359062,
"grad_norm": 0.7254294157028198,
"learning_rate": 4.43006025077349e-05,
"loss": 0.053,
"step": 2800
},
{
"epoch": 0.3541768441621886,
"grad_norm": 0.5358878970146179,
"learning_rate": 4.409705259729686e-05,
"loss": 0.0561,
"step": 2900
},
{
"epoch": 0.3663898387884709,
"grad_norm": 0.3604901432991028,
"learning_rate": 4.3893502686858815e-05,
"loss": 0.059,
"step": 3000
},
{
"epoch": 0.3786028334147533,
"grad_norm": 0.19227269291877747,
"learning_rate": 4.3689952776420776e-05,
"loss": 0.0548,
"step": 3100
},
{
"epoch": 0.39081582804103565,
"grad_norm": 0.3620028495788574,
"learning_rate": 4.348640286598274e-05,
"loss": 0.0535,
"step": 3200
},
{
"epoch": 0.40302882266731804,
"grad_norm": 4.794760704040527,
"learning_rate": 4.32828529555447e-05,
"loss": 0.0549,
"step": 3300
},
{
"epoch": 0.4152418172936004,
"grad_norm": 0.5320255160331726,
"learning_rate": 4.307930304510666e-05,
"loss": 0.051,
"step": 3400
},
{
"epoch": 0.42745481191988277,
"grad_norm": 2.048164129257202,
"learning_rate": 4.287575313466862e-05,
"loss": 0.067,
"step": 3500
},
{
"epoch": 0.4396678065461651,
"grad_norm": 3.6915972232818604,
"learning_rate": 4.267220322423058e-05,
"loss": 0.0494,
"step": 3600
},
{
"epoch": 0.4518808011724475,
"grad_norm": 0.7861614227294922,
"learning_rate": 4.2468653313792544e-05,
"loss": 0.0652,
"step": 3700
},
{
"epoch": 0.46409379579872984,
"grad_norm": 1.9440407752990723,
"learning_rate": 4.2265103403354505e-05,
"loss": 0.0699,
"step": 3800
},
{
"epoch": 0.4763067904250122,
"grad_norm": 0.15847598016262054,
"learning_rate": 4.206155349291647e-05,
"loss": 0.0575,
"step": 3900
},
{
"epoch": 0.48851978505129456,
"grad_norm": 0.2988128960132599,
"learning_rate": 4.185800358247843e-05,
"loss": 0.0468,
"step": 4000
},
{
"epoch": 0.5007327796775769,
"grad_norm": 1.6092756986618042,
"learning_rate": 4.165445367204039e-05,
"loss": 0.0555,
"step": 4100
},
{
"epoch": 0.5129457743038593,
"grad_norm": 13.008635520935059,
"learning_rate": 4.1450903761602344e-05,
"loss": 0.0604,
"step": 4200
},
{
"epoch": 0.5251587689301417,
"grad_norm": 2.7357659339904785,
"learning_rate": 4.1247353851164305e-05,
"loss": 0.0478,
"step": 4300
},
{
"epoch": 0.537371763556424,
"grad_norm": 0.4085894823074341,
"learning_rate": 4.104380394072627e-05,
"loss": 0.0478,
"step": 4400
},
{
"epoch": 0.5495847581827064,
"grad_norm": 1.5472468137741089,
"learning_rate": 4.084025403028823e-05,
"loss": 0.0559,
"step": 4500
},
{
"epoch": 0.5617977528089888,
"grad_norm": 2.616894006729126,
"learning_rate": 4.063670411985019e-05,
"loss": 0.0444,
"step": 4600
},
{
"epoch": 0.5740107474352711,
"grad_norm": 0.7861430644989014,
"learning_rate": 4.043315420941215e-05,
"loss": 0.0431,
"step": 4700
},
{
"epoch": 0.5862237420615535,
"grad_norm": 1.3745653629302979,
"learning_rate": 4.022960429897411e-05,
"loss": 0.0422,
"step": 4800
},
{
"epoch": 0.5984367366878358,
"grad_norm": 1.471048355102539,
"learning_rate": 4.002605438853607e-05,
"loss": 0.052,
"step": 4900
},
{
"epoch": 0.6106497313141183,
"grad_norm": 0.3034818470478058,
"learning_rate": 3.9822504478098035e-05,
"loss": 0.0481,
"step": 5000
},
{
"epoch": 0.6228627259404006,
"grad_norm": 0.5265262722969055,
"learning_rate": 3.9618954567659996e-05,
"loss": 0.0592,
"step": 5100
},
{
"epoch": 0.6350757205666829,
"grad_norm": 1.226517915725708,
"learning_rate": 3.941540465722196e-05,
"loss": 0.0554,
"step": 5200
},
{
"epoch": 0.6472887151929653,
"grad_norm": 0.365315705537796,
"learning_rate": 3.921185474678392e-05,
"loss": 0.0466,
"step": 5300
},
{
"epoch": 0.6595017098192477,
"grad_norm": 0.4508240818977356,
"learning_rate": 3.900830483634587e-05,
"loss": 0.0468,
"step": 5400
},
{
"epoch": 0.67171470444553,
"grad_norm": 0.51467365026474,
"learning_rate": 3.8804754925907834e-05,
"loss": 0.054,
"step": 5500
},
{
"epoch": 0.6839276990718124,
"grad_norm": 0.6186398267745972,
"learning_rate": 3.8601205015469796e-05,
"loss": 0.0554,
"step": 5600
},
{
"epoch": 0.6961406936980947,
"grad_norm": 0.6723864674568176,
"learning_rate": 3.839765510503176e-05,
"loss": 0.0556,
"step": 5700
},
{
"epoch": 0.7083536883243772,
"grad_norm": 3.6368353366851807,
"learning_rate": 3.819410519459372e-05,
"loss": 0.0512,
"step": 5800
},
{
"epoch": 0.7205666829506595,
"grad_norm": 3.5719096660614014,
"learning_rate": 3.799055528415568e-05,
"loss": 0.0545,
"step": 5900
},
{
"epoch": 0.7327796775769418,
"grad_norm": 1.1756514310836792,
"learning_rate": 3.778700537371764e-05,
"loss": 0.0465,
"step": 6000
},
{
"epoch": 0.7449926722032242,
"grad_norm": 1.2159337997436523,
"learning_rate": 3.7583455463279596e-05,
"loss": 0.0463,
"step": 6100
},
{
"epoch": 0.7572056668295066,
"grad_norm": 1.0632232427597046,
"learning_rate": 3.737990555284156e-05,
"loss": 0.0444,
"step": 6200
},
{
"epoch": 0.769418661455789,
"grad_norm": 0.669765293598175,
"learning_rate": 3.717635564240352e-05,
"loss": 0.0433,
"step": 6300
},
{
"epoch": 0.7816316560820713,
"grad_norm": 0.13478492200374603,
"learning_rate": 3.697280573196548e-05,
"loss": 0.0469,
"step": 6400
},
{
"epoch": 0.7938446507083536,
"grad_norm": 0.5963812470436096,
"learning_rate": 3.676925582152744e-05,
"loss": 0.0504,
"step": 6500
},
{
"epoch": 0.8060576453346361,
"grad_norm": 0.5829123258590698,
"learning_rate": 3.6565705911089395e-05,
"loss": 0.0483,
"step": 6600
},
{
"epoch": 0.8182706399609184,
"grad_norm": 2.3114776611328125,
"learning_rate": 3.636215600065136e-05,
"loss": 0.0601,
"step": 6700
},
{
"epoch": 0.8304836345872008,
"grad_norm": 0.27553310990333557,
"learning_rate": 3.615860609021332e-05,
"loss": 0.0527,
"step": 6800
},
{
"epoch": 0.8426966292134831,
"grad_norm": 0.3668135106563568,
"learning_rate": 3.595505617977528e-05,
"loss": 0.0541,
"step": 6900
},
{
"epoch": 0.8549096238397655,
"grad_norm": 3.1305336952209473,
"learning_rate": 3.575150626933724e-05,
"loss": 0.0534,
"step": 7000
},
{
"epoch": 0.8671226184660479,
"grad_norm": 0.08432205021381378,
"learning_rate": 3.55479563588992e-05,
"loss": 0.0523,
"step": 7100
},
{
"epoch": 0.8793356130923302,
"grad_norm": 1.3692104816436768,
"learning_rate": 3.534440644846116e-05,
"loss": 0.0428,
"step": 7200
},
{
"epoch": 0.8915486077186126,
"grad_norm": 1.1145917177200317,
"learning_rate": 3.5140856538023125e-05,
"loss": 0.0528,
"step": 7300
},
{
"epoch": 0.903761602344895,
"grad_norm": 0.07234195619821548,
"learning_rate": 3.4937306627585086e-05,
"loss": 0.0449,
"step": 7400
},
{
"epoch": 0.9159745969711773,
"grad_norm": 0.12755821645259857,
"learning_rate": 3.473375671714705e-05,
"loss": 0.0381,
"step": 7500
},
{
"epoch": 0.9281875915974597,
"grad_norm": 1.066666603088379,
"learning_rate": 3.453020680670901e-05,
"loss": 0.0411,
"step": 7600
},
{
"epoch": 0.940400586223742,
"grad_norm": 2.3837034702301025,
"learning_rate": 3.432665689627097e-05,
"loss": 0.0471,
"step": 7700
},
{
"epoch": 0.9526135808500245,
"grad_norm": 0.2601478397846222,
"learning_rate": 3.4123106985832924e-05,
"loss": 0.0408,
"step": 7800
},
{
"epoch": 0.9648265754763068,
"grad_norm": 1.0532914400100708,
"learning_rate": 3.3919557075394886e-05,
"loss": 0.0422,
"step": 7900
},
{
"epoch": 0.9770395701025891,
"grad_norm": 0.16507047414779663,
"learning_rate": 3.371600716495685e-05,
"loss": 0.0464,
"step": 8000
},
{
"epoch": 0.9892525647288715,
"grad_norm": 0.310465544462204,
"learning_rate": 3.351245725451881e-05,
"loss": 0.056,
"step": 8100
},
{
"epoch": 1.0,
"eval_f1": 0.8987804878048782,
"eval_loss": 0.04277478903532028,
"eval_precision": 0.8910749615300066,
"eval_recall": 0.9066204428539477,
"eval_runtime": 133.6334,
"eval_samples_per_second": 61.272,
"eval_steps_per_second": 7.663,
"step": 8188
},
{
"epoch": 1.0014655593551538,
"grad_norm": 0.8648662567138672,
"learning_rate": 3.330890734408077e-05,
"loss": 0.0441,
"step": 8200
},
{
"epoch": 1.0136785539814364,
"grad_norm": 0.16131815314292908,
"learning_rate": 3.310535743364273e-05,
"loss": 0.0306,
"step": 8300
},
{
"epoch": 1.0258915486077187,
"grad_norm": 4.484282970428467,
"learning_rate": 3.290180752320469e-05,
"loss": 0.0383,
"step": 8400
},
{
"epoch": 1.038104543234001,
"grad_norm": 0.5343158841133118,
"learning_rate": 3.2698257612766654e-05,
"loss": 0.0331,
"step": 8500
},
{
"epoch": 1.0503175378602834,
"grad_norm": 0.028084266930818558,
"learning_rate": 3.2494707702328615e-05,
"loss": 0.0381,
"step": 8600
},
{
"epoch": 1.0625305324865657,
"grad_norm": 0.483477920293808,
"learning_rate": 3.2291157791890576e-05,
"loss": 0.0462,
"step": 8700
},
{
"epoch": 1.074743527112848,
"grad_norm": 0.8194773197174072,
"learning_rate": 3.208760788145254e-05,
"loss": 0.0346,
"step": 8800
},
{
"epoch": 1.0869565217391304,
"grad_norm": 0.11062140762805939,
"learning_rate": 3.188405797101449e-05,
"loss": 0.0369,
"step": 8900
},
{
"epoch": 1.0991695163654127,
"grad_norm": 0.300889790058136,
"learning_rate": 3.1680508060576454e-05,
"loss": 0.0297,
"step": 9000
},
{
"epoch": 1.111382510991695,
"grad_norm": 0.6278924345970154,
"learning_rate": 3.1476958150138415e-05,
"loss": 0.0349,
"step": 9100
},
{
"epoch": 1.1235955056179776,
"grad_norm": 0.542029082775116,
"learning_rate": 3.1273408239700376e-05,
"loss": 0.0473,
"step": 9200
},
{
"epoch": 1.13580850024426,
"grad_norm": 0.6147358417510986,
"learning_rate": 3.106985832926234e-05,
"loss": 0.0356,
"step": 9300
},
{
"epoch": 1.1480214948705423,
"grad_norm": 1.301965355873108,
"learning_rate": 3.08663084188243e-05,
"loss": 0.0371,
"step": 9400
},
{
"epoch": 1.1602344894968246,
"grad_norm": 0.026711974292993546,
"learning_rate": 3.066275850838626e-05,
"loss": 0.0346,
"step": 9500
},
{
"epoch": 1.172447484123107,
"grad_norm": 1.258608102798462,
"learning_rate": 3.0459208597948218e-05,
"loss": 0.0334,
"step": 9600
},
{
"epoch": 1.1846604787493893,
"grad_norm": 0.36877045035362244,
"learning_rate": 3.025565868751018e-05,
"loss": 0.032,
"step": 9700
},
{
"epoch": 1.1968734733756716,
"grad_norm": 0.2519334852695465,
"learning_rate": 3.005210877707214e-05,
"loss": 0.0416,
"step": 9800
},
{
"epoch": 1.2090864680019542,
"grad_norm": 0.5204672813415527,
"learning_rate": 2.9848558866634102e-05,
"loss": 0.0366,
"step": 9900
},
{
"epoch": 1.2212994626282365,
"grad_norm": 0.43101000785827637,
"learning_rate": 2.9645008956196063e-05,
"loss": 0.0379,
"step": 10000
},
{
"epoch": 1.2335124572545189,
"grad_norm": 0.681117057800293,
"learning_rate": 2.9441459045758018e-05,
"loss": 0.0353,
"step": 10100
},
{
"epoch": 1.2457254518808012,
"grad_norm": 0.49889543652534485,
"learning_rate": 2.923790913531998e-05,
"loss": 0.0394,
"step": 10200
},
{
"epoch": 1.2579384465070835,
"grad_norm": 0.8064567446708679,
"learning_rate": 2.903435922488194e-05,
"loss": 0.034,
"step": 10300
},
{
"epoch": 1.2701514411333659,
"grad_norm": 0.21315552294254303,
"learning_rate": 2.8830809314443902e-05,
"loss": 0.0292,
"step": 10400
},
{
"epoch": 1.2823644357596482,
"grad_norm": 1.6466035842895508,
"learning_rate": 2.8627259404005863e-05,
"loss": 0.0333,
"step": 10500
},
{
"epoch": 1.2945774303859308,
"grad_norm": 1.1944749355316162,
"learning_rate": 2.8423709493567825e-05,
"loss": 0.0357,
"step": 10600
},
{
"epoch": 1.3067904250122129,
"grad_norm": 0.6488074064254761,
"learning_rate": 2.8220159583129786e-05,
"loss": 0.0315,
"step": 10700
},
{
"epoch": 1.3190034196384954,
"grad_norm": 0.030384689569473267,
"learning_rate": 2.8016609672691747e-05,
"loss": 0.0422,
"step": 10800
},
{
"epoch": 1.3312164142647778,
"grad_norm": 0.2363937795162201,
"learning_rate": 2.781305976225371e-05,
"loss": 0.0335,
"step": 10900
},
{
"epoch": 1.34342940889106,
"grad_norm": 0.04548358544707298,
"learning_rate": 2.7609509851815666e-05,
"loss": 0.0371,
"step": 11000
},
{
"epoch": 1.3556424035173424,
"grad_norm": 0.6259112358093262,
"learning_rate": 2.7405959941377628e-05,
"loss": 0.0412,
"step": 11100
},
{
"epoch": 1.3678553981436248,
"grad_norm": 0.42487379908561707,
"learning_rate": 2.720241003093959e-05,
"loss": 0.0252,
"step": 11200
},
{
"epoch": 1.3800683927699071,
"grad_norm": 0.9125863313674927,
"learning_rate": 2.6998860120501547e-05,
"loss": 0.0367,
"step": 11300
},
{
"epoch": 1.3922813873961895,
"grad_norm": 0.7670263051986694,
"learning_rate": 2.679531021006351e-05,
"loss": 0.0288,
"step": 11400
},
{
"epoch": 1.404494382022472,
"grad_norm": 0.1614452451467514,
"learning_rate": 2.6591760299625466e-05,
"loss": 0.0399,
"step": 11500
},
{
"epoch": 1.4167073766487543,
"grad_norm": 3.3551249504089355,
"learning_rate": 2.6388210389187428e-05,
"loss": 0.0391,
"step": 11600
},
{
"epoch": 1.4289203712750367,
"grad_norm": 0.7188284397125244,
"learning_rate": 2.618466047874939e-05,
"loss": 0.0315,
"step": 11700
},
{
"epoch": 1.441133365901319,
"grad_norm": 0.48031413555145264,
"learning_rate": 2.598111056831135e-05,
"loss": 0.0244,
"step": 11800
},
{
"epoch": 1.4533463605276014,
"grad_norm": 0.7492583394050598,
"learning_rate": 2.577756065787331e-05,
"loss": 0.0359,
"step": 11900
},
{
"epoch": 1.4655593551538837,
"grad_norm": 0.6593573689460754,
"learning_rate": 2.5574010747435273e-05,
"loss": 0.0327,
"step": 12000
},
{
"epoch": 1.477772349780166,
"grad_norm": 0.2940855026245117,
"learning_rate": 2.5370460836997234e-05,
"loss": 0.0336,
"step": 12100
},
{
"epoch": 1.4899853444064486,
"grad_norm": 0.45900267362594604,
"learning_rate": 2.5166910926559195e-05,
"loss": 0.0242,
"step": 12200
},
{
"epoch": 1.5021983390327307,
"grad_norm": 2.2023909091949463,
"learning_rate": 2.4963361016121153e-05,
"loss": 0.0348,
"step": 12300
},
{
"epoch": 1.5144113336590133,
"grad_norm": 0.12489739805459976,
"learning_rate": 2.4759811105683115e-05,
"loss": 0.0335,
"step": 12400
},
{
"epoch": 1.5266243282852956,
"grad_norm": 1.0575867891311646,
"learning_rate": 2.4556261195245076e-05,
"loss": 0.0199,
"step": 12500
},
{
"epoch": 1.538837322911578,
"grad_norm": 1.7309564352035522,
"learning_rate": 2.4352711284807037e-05,
"loss": 0.0316,
"step": 12600
},
{
"epoch": 1.5510503175378603,
"grad_norm": 0.925658643245697,
"learning_rate": 2.4149161374369e-05,
"loss": 0.0445,
"step": 12700
},
{
"epoch": 1.5632633121641426,
"grad_norm": 0.48667579889297485,
"learning_rate": 2.3945611463930957e-05,
"loss": 0.0437,
"step": 12800
},
{
"epoch": 1.5754763067904252,
"grad_norm": 0.11213243752717972,
"learning_rate": 2.3742061553492918e-05,
"loss": 0.0387,
"step": 12900
},
{
"epoch": 1.5876893014167073,
"grad_norm": 0.14116732776165009,
"learning_rate": 2.353851164305488e-05,
"loss": 0.033,
"step": 13000
},
{
"epoch": 1.5999022960429898,
"grad_norm": 0.686268150806427,
"learning_rate": 2.333496173261684e-05,
"loss": 0.0281,
"step": 13100
},
{
"epoch": 1.612115290669272,
"grad_norm": 0.4795430898666382,
"learning_rate": 2.31314118221788e-05,
"loss": 0.0436,
"step": 13200
},
{
"epoch": 1.6243282852955545,
"grad_norm": 0.026416413486003876,
"learning_rate": 2.292786191174076e-05,
"loss": 0.0343,
"step": 13300
},
{
"epoch": 1.6365412799218368,
"grad_norm": 0.582073986530304,
"learning_rate": 2.2724312001302718e-05,
"loss": 0.0312,
"step": 13400
},
{
"epoch": 1.6487542745481192,
"grad_norm": 1.669487476348877,
"learning_rate": 2.252076209086468e-05,
"loss": 0.0384,
"step": 13500
},
{
"epoch": 1.6609672691744015,
"grad_norm": 0.19379857182502747,
"learning_rate": 2.231721218042664e-05,
"loss": 0.0322,
"step": 13600
},
{
"epoch": 1.6731802638006839,
"grad_norm": 4.540911674499512,
"learning_rate": 2.2113662269988602e-05,
"loss": 0.0363,
"step": 13700
},
{
"epoch": 1.6853932584269664,
"grad_norm": 0.24804505705833435,
"learning_rate": 2.1910112359550563e-05,
"loss": 0.0326,
"step": 13800
},
{
"epoch": 1.6976062530532485,
"grad_norm": 1.8535521030426025,
"learning_rate": 2.1706562449112524e-05,
"loss": 0.0316,
"step": 13900
},
{
"epoch": 1.709819247679531,
"grad_norm": 0.04862889647483826,
"learning_rate": 2.1503012538674482e-05,
"loss": 0.0248,
"step": 14000
},
{
"epoch": 1.7220322423058134,
"grad_norm": 0.3953320384025574,
"learning_rate": 2.1299462628236444e-05,
"loss": 0.0393,
"step": 14100
},
{
"epoch": 1.7342452369320958,
"grad_norm": 0.5966042876243591,
"learning_rate": 2.1095912717798405e-05,
"loss": 0.0358,
"step": 14200
},
{
"epoch": 1.746458231558378,
"grad_norm": 0.1555975377559662,
"learning_rate": 2.0892362807360366e-05,
"loss": 0.0425,
"step": 14300
},
{
"epoch": 1.7586712261846604,
"grad_norm": 0.8556230068206787,
"learning_rate": 2.0688812896922328e-05,
"loss": 0.0267,
"step": 14400
},
{
"epoch": 1.770884220810943,
"grad_norm": 0.03833279386162758,
"learning_rate": 2.048526298648429e-05,
"loss": 0.034,
"step": 14500
},
{
"epoch": 1.783097215437225,
"grad_norm": 0.043861281126737595,
"learning_rate": 2.0281713076046247e-05,
"loss": 0.0288,
"step": 14600
},
{
"epoch": 1.7953102100635077,
"grad_norm": 0.28712257742881775,
"learning_rate": 2.0078163165608208e-05,
"loss": 0.0285,
"step": 14700
},
{
"epoch": 1.8075232046897898,
"grad_norm": 1.3535864353179932,
"learning_rate": 1.987461325517017e-05,
"loss": 0.0377,
"step": 14800
},
{
"epoch": 1.8197361993160723,
"grad_norm": 3.164818048477173,
"learning_rate": 1.967106334473213e-05,
"loss": 0.0334,
"step": 14900
},
{
"epoch": 1.8319491939423547,
"grad_norm": 0.08736918866634369,
"learning_rate": 1.9467513434294092e-05,
"loss": 0.0294,
"step": 15000
},
{
"epoch": 1.844162188568637,
"grad_norm": 1.25545072555542,
"learning_rate": 1.926396352385605e-05,
"loss": 0.0285,
"step": 15100
},
{
"epoch": 1.8563751831949193,
"grad_norm": 0.030480826273560524,
"learning_rate": 1.906041361341801e-05,
"loss": 0.0328,
"step": 15200
},
{
"epoch": 1.8685881778212017,
"grad_norm": 1.6334197521209717,
"learning_rate": 1.8856863702979973e-05,
"loss": 0.037,
"step": 15300
},
{
"epoch": 1.8808011724474842,
"grad_norm": 1.2553733587265015,
"learning_rate": 1.865331379254193e-05,
"loss": 0.0256,
"step": 15400
},
{
"epoch": 1.8930141670737664,
"grad_norm": 0.061297524720430374,
"learning_rate": 1.8449763882103892e-05,
"loss": 0.0276,
"step": 15500
},
{
"epoch": 1.905227161700049,
"grad_norm": 1.0915943384170532,
"learning_rate": 1.8246213971665853e-05,
"loss": 0.0362,
"step": 15600
},
{
"epoch": 1.9174401563263312,
"grad_norm": 0.020990842953324318,
"learning_rate": 1.8042664061227815e-05,
"loss": 0.025,
"step": 15700
},
{
"epoch": 1.9296531509526136,
"grad_norm": 0.09211856126785278,
"learning_rate": 1.7839114150789773e-05,
"loss": 0.0265,
"step": 15800
},
{
"epoch": 1.941866145578896,
"grad_norm": 1.5800979137420654,
"learning_rate": 1.7635564240351734e-05,
"loss": 0.0256,
"step": 15900
},
{
"epoch": 1.9540791402051783,
"grad_norm": 0.39250850677490234,
"learning_rate": 1.7432014329913695e-05,
"loss": 0.0249,
"step": 16000
},
{
"epoch": 1.9662921348314608,
"grad_norm": 0.8597753047943115,
"learning_rate": 1.7228464419475657e-05,
"loss": 0.0355,
"step": 16100
},
{
"epoch": 1.978505129457743,
"grad_norm": 0.16734100878238678,
"learning_rate": 1.7024914509037618e-05,
"loss": 0.032,
"step": 16200
},
{
"epoch": 1.9907181240840255,
"grad_norm": 0.11750225722789764,
"learning_rate": 1.682136459859958e-05,
"loss": 0.0227,
"step": 16300
},
{
"epoch": 2.0,
"eval_f1": 0.9197428223035141,
"eval_loss": 0.036899276077747345,
"eval_precision": 0.9117582417582417,
"eval_recall": 0.9278684857973608,
"eval_runtime": 75.3931,
"eval_samples_per_second": 108.604,
"eval_steps_per_second": 13.582,
"step": 16376
},
{
"epoch": 2.0029311187103076,
"grad_norm": 0.6276179552078247,
"learning_rate": 1.6617814688161537e-05,
"loss": 0.0326,
"step": 16400
},
{
"epoch": 2.01514411333659,
"grad_norm": 0.27882876992225647,
"learning_rate": 1.64142647777235e-05,
"loss": 0.0206,
"step": 16500
},
{
"epoch": 2.0273571079628727,
"grad_norm": 0.9930168986320496,
"learning_rate": 1.621071486728546e-05,
"loss": 0.0135,
"step": 16600
},
{
"epoch": 2.039570102589155,
"grad_norm": 0.21392406523227692,
"learning_rate": 1.600716495684742e-05,
"loss": 0.028,
"step": 16700
},
{
"epoch": 2.0517830972154374,
"grad_norm": 2.1995363235473633,
"learning_rate": 1.5803615046409382e-05,
"loss": 0.0273,
"step": 16800
},
{
"epoch": 2.0639960918417195,
"grad_norm": 1.91357421875,
"learning_rate": 1.560006513597134e-05,
"loss": 0.0152,
"step": 16900
},
{
"epoch": 2.076209086468002,
"grad_norm": 0.057265687733888626,
"learning_rate": 1.53965152255333e-05,
"loss": 0.0206,
"step": 17000
},
{
"epoch": 2.088422081094284,
"grad_norm": 0.05291162431240082,
"learning_rate": 1.5192965315095261e-05,
"loss": 0.022,
"step": 17100
},
{
"epoch": 2.1006350757205667,
"grad_norm": 2.424394369125366,
"learning_rate": 1.4989415404657223e-05,
"loss": 0.0178,
"step": 17200
},
{
"epoch": 2.112848070346849,
"grad_norm": 8.053882598876953,
"learning_rate": 1.4785865494219184e-05,
"loss": 0.0256,
"step": 17300
},
{
"epoch": 2.1250610649731314,
"grad_norm": 1.606079339981079,
"learning_rate": 1.4582315583781145e-05,
"loss": 0.017,
"step": 17400
},
{
"epoch": 2.137274059599414,
"grad_norm": 0.26984503865242004,
"learning_rate": 1.4378765673343103e-05,
"loss": 0.0202,
"step": 17500
},
{
"epoch": 2.149487054225696,
"grad_norm": 0.044966306537389755,
"learning_rate": 1.4175215762905064e-05,
"loss": 0.0234,
"step": 17600
},
{
"epoch": 2.1617000488519786,
"grad_norm": 0.05067300796508789,
"learning_rate": 1.3971665852467026e-05,
"loss": 0.0263,
"step": 17700
},
{
"epoch": 2.1739130434782608,
"grad_norm": 0.5125128030776978,
"learning_rate": 1.3768115942028985e-05,
"loss": 0.0216,
"step": 17800
},
{
"epoch": 2.1861260381045433,
"grad_norm": 0.04719540849328041,
"learning_rate": 1.3564566031590947e-05,
"loss": 0.0256,
"step": 17900
},
{
"epoch": 2.1983390327308254,
"grad_norm": 0.11627175658941269,
"learning_rate": 1.3361016121152908e-05,
"loss": 0.0185,
"step": 18000
},
{
"epoch": 2.210552027357108,
"grad_norm": 0.2016720473766327,
"learning_rate": 1.3157466210714866e-05,
"loss": 0.0111,
"step": 18100
},
{
"epoch": 2.22276502198339,
"grad_norm": 1.6914150714874268,
"learning_rate": 1.2953916300276827e-05,
"loss": 0.0237,
"step": 18200
},
{
"epoch": 2.2349780166096727,
"grad_norm": 0.3582985997200012,
"learning_rate": 1.2750366389838789e-05,
"loss": 0.0188,
"step": 18300
},
{
"epoch": 2.247191011235955,
"grad_norm": 0.9769508838653564,
"learning_rate": 1.254681647940075e-05,
"loss": 0.024,
"step": 18400
},
{
"epoch": 2.2594040058622373,
"grad_norm": 0.03454025089740753,
"learning_rate": 1.2343266568962711e-05,
"loss": 0.0307,
"step": 18500
},
{
"epoch": 2.27161700048852,
"grad_norm": 0.0919230654835701,
"learning_rate": 1.2139716658524671e-05,
"loss": 0.0183,
"step": 18600
},
{
"epoch": 2.283829995114802,
"grad_norm": 0.05342525988817215,
"learning_rate": 1.1936166748086632e-05,
"loss": 0.0295,
"step": 18700
},
{
"epoch": 2.2960429897410846,
"grad_norm": 0.11520762741565704,
"learning_rate": 1.1732616837648592e-05,
"loss": 0.0187,
"step": 18800
},
{
"epoch": 2.308255984367367,
"grad_norm": 1.8612200021743774,
"learning_rate": 1.1529066927210551e-05,
"loss": 0.0228,
"step": 18900
},
{
"epoch": 2.3204689789936492,
"grad_norm": 0.9779945611953735,
"learning_rate": 1.1325517016772513e-05,
"loss": 0.0182,
"step": 19000
},
{
"epoch": 2.332681973619932,
"grad_norm": 1.9669654369354248,
"learning_rate": 1.1121967106334474e-05,
"loss": 0.0247,
"step": 19100
},
{
"epoch": 2.344894968246214,
"grad_norm": 0.1722841113805771,
"learning_rate": 1.0918417195896434e-05,
"loss": 0.0206,
"step": 19200
},
{
"epoch": 2.3571079628724965,
"grad_norm": 0.1652793437242508,
"learning_rate": 1.0714867285458395e-05,
"loss": 0.0146,
"step": 19300
},
{
"epoch": 2.3693209574987786,
"grad_norm": 0.07285087555646896,
"learning_rate": 1.0511317375020356e-05,
"loss": 0.0151,
"step": 19400
},
{
"epoch": 2.381533952125061,
"grad_norm": 2.59061861038208,
"learning_rate": 1.0307767464582316e-05,
"loss": 0.0192,
"step": 19500
},
{
"epoch": 2.3937469467513433,
"grad_norm": 0.02776254341006279,
"learning_rate": 1.0104217554144277e-05,
"loss": 0.0245,
"step": 19600
},
{
"epoch": 2.405959941377626,
"grad_norm": 0.48207101225852966,
"learning_rate": 9.900667643706239e-06,
"loss": 0.0147,
"step": 19700
},
{
"epoch": 2.4181729360039084,
"grad_norm": 0.7725105285644531,
"learning_rate": 9.697117733268198e-06,
"loss": 0.0206,
"step": 19800
},
{
"epoch": 2.4303859306301905,
"grad_norm": 1.8201816082000732,
"learning_rate": 9.493567822830158e-06,
"loss": 0.0205,
"step": 19900
},
{
"epoch": 2.442598925256473,
"grad_norm": 0.2930428385734558,
"learning_rate": 9.29001791239212e-06,
"loss": 0.0163,
"step": 20000
},
{
"epoch": 2.454811919882755,
"grad_norm": 0.7441920638084412,
"learning_rate": 9.086468001954079e-06,
"loss": 0.0181,
"step": 20100
},
{
"epoch": 2.4670249145090377,
"grad_norm": 0.5970872640609741,
"learning_rate": 8.88291809151604e-06,
"loss": 0.0172,
"step": 20200
},
{
"epoch": 2.47923790913532,
"grad_norm": 0.17312058806419373,
"learning_rate": 8.679368181078002e-06,
"loss": 0.0163,
"step": 20300
},
{
"epoch": 2.4914509037616024,
"grad_norm": 0.26520836353302,
"learning_rate": 8.475818270639961e-06,
"loss": 0.016,
"step": 20400
},
{
"epoch": 2.5036638983878845,
"grad_norm": 0.08623456209897995,
"learning_rate": 8.272268360201922e-06,
"loss": 0.018,
"step": 20500
},
{
"epoch": 2.515876893014167,
"grad_norm": 0.16404370963573456,
"learning_rate": 8.068718449763882e-06,
"loss": 0.0164,
"step": 20600
},
{
"epoch": 2.5280898876404496,
"grad_norm": 0.051970474421978,
"learning_rate": 7.865168539325843e-06,
"loss": 0.0203,
"step": 20700
},
{
"epoch": 2.5403028822667317,
"grad_norm": 0.08457406610250473,
"learning_rate": 7.661618628887805e-06,
"loss": 0.0211,
"step": 20800
},
{
"epoch": 2.5525158768930143,
"grad_norm": 0.35134220123291016,
"learning_rate": 7.4580687184497635e-06,
"loss": 0.018,
"step": 20900
},
{
"epoch": 2.5647288715192964,
"grad_norm": 0.487570196390152,
"learning_rate": 7.254518808011725e-06,
"loss": 0.0281,
"step": 21000
},
{
"epoch": 2.576941866145579,
"grad_norm": 2.1460368633270264,
"learning_rate": 7.050968897573685e-06,
"loss": 0.0196,
"step": 21100
},
{
"epoch": 2.5891548607718615,
"grad_norm": 0.3036395311355591,
"learning_rate": 6.847418987135645e-06,
"loss": 0.0191,
"step": 21200
},
{
"epoch": 2.6013678553981436,
"grad_norm": 0.3689348101615906,
"learning_rate": 6.643869076697606e-06,
"loss": 0.0173,
"step": 21300
},
{
"epoch": 2.6135808500244258,
"grad_norm": 1.0098440647125244,
"learning_rate": 6.440319166259568e-06,
"loss": 0.0162,
"step": 21400
},
{
"epoch": 2.6257938446507083,
"grad_norm": 0.30733248591423035,
"learning_rate": 6.236769255821528e-06,
"loss": 0.0194,
"step": 21500
},
{
"epoch": 2.638006839276991,
"grad_norm": 0.4835430085659027,
"learning_rate": 6.0332193453834885e-06,
"loss": 0.0295,
"step": 21600
},
{
"epoch": 2.650219833903273,
"grad_norm": 0.041551847010850906,
"learning_rate": 5.829669434945449e-06,
"loss": 0.0209,
"step": 21700
},
{
"epoch": 2.6624328285295555,
"grad_norm": 1.990522027015686,
"learning_rate": 5.6261195245074095e-06,
"loss": 0.0269,
"step": 21800
},
{
"epoch": 2.6746458231558377,
"grad_norm": 0.04139232635498047,
"learning_rate": 5.42256961406937e-06,
"loss": 0.0226,
"step": 21900
},
{
"epoch": 2.68685881778212,
"grad_norm": 0.9341286420822144,
"learning_rate": 5.219019703631331e-06,
"loss": 0.0201,
"step": 22000
},
{
"epoch": 2.6990718124084028,
"grad_norm": 0.11153418570756912,
"learning_rate": 5.015469793193292e-06,
"loss": 0.0222,
"step": 22100
},
{
"epoch": 2.711284807034685,
"grad_norm": 1.0574121475219727,
"learning_rate": 4.811919882755251e-06,
"loss": 0.0212,
"step": 22200
},
{
"epoch": 2.723497801660967,
"grad_norm": 0.9357222318649292,
"learning_rate": 4.608369972317213e-06,
"loss": 0.0219,
"step": 22300
},
{
"epoch": 2.7357107962872496,
"grad_norm": 0.18769215047359467,
"learning_rate": 4.404820061879173e-06,
"loss": 0.0192,
"step": 22400
},
{
"epoch": 2.747923790913532,
"grad_norm": 1.0952208042144775,
"learning_rate": 4.201270151441134e-06,
"loss": 0.0165,
"step": 22500
},
{
"epoch": 2.7601367855398142,
"grad_norm": 0.046009525656700134,
"learning_rate": 3.997720241003094e-06,
"loss": 0.0161,
"step": 22600
},
{
"epoch": 2.772349780166097,
"grad_norm": 0.3359615206718445,
"learning_rate": 3.794170330565055e-06,
"loss": 0.0198,
"step": 22700
},
{
"epoch": 2.784562774792379,
"grad_norm": 0.03583957999944687,
"learning_rate": 3.590620420127015e-06,
"loss": 0.0157,
"step": 22800
},
{
"epoch": 2.7967757694186615,
"grad_norm": 2.4570398330688477,
"learning_rate": 3.3870705096889755e-06,
"loss": 0.0183,
"step": 22900
},
{
"epoch": 2.808988764044944,
"grad_norm": 0.0799492597579956,
"learning_rate": 3.1835205992509364e-06,
"loss": 0.0154,
"step": 23000
},
{
"epoch": 2.821201758671226,
"grad_norm": 0.17097431421279907,
"learning_rate": 2.979970688812897e-06,
"loss": 0.0208,
"step": 23100
},
{
"epoch": 2.8334147532975087,
"grad_norm": 0.042323142290115356,
"learning_rate": 2.776420778374858e-06,
"loss": 0.0107,
"step": 23200
},
{
"epoch": 2.845627747923791,
"grad_norm": 0.6305797100067139,
"learning_rate": 2.5728708679368183e-06,
"loss": 0.0225,
"step": 23300
},
{
"epoch": 2.8578407425500734,
"grad_norm": 0.05080363526940346,
"learning_rate": 2.3693209574987788e-06,
"loss": 0.0238,
"step": 23400
},
{
"epoch": 2.8700537371763555,
"grad_norm": 0.04388800263404846,
"learning_rate": 2.1657710470607397e-06,
"loss": 0.0184,
"step": 23500
},
{
"epoch": 2.882266731802638,
"grad_norm": 2.4991371631622314,
"learning_rate": 1.9622211366226997e-06,
"loss": 0.0196,
"step": 23600
},
{
"epoch": 2.89447972642892,
"grad_norm": 0.059519946575164795,
"learning_rate": 1.7586712261846606e-06,
"loss": 0.0156,
"step": 23700
},
{
"epoch": 2.9066927210552027,
"grad_norm": 0.044085703790187836,
"learning_rate": 1.5551213157466213e-06,
"loss": 0.0161,
"step": 23800
},
{
"epoch": 2.9189057156814853,
"grad_norm": 0.024006502702832222,
"learning_rate": 1.3515714053085818e-06,
"loss": 0.0172,
"step": 23900
},
{
"epoch": 2.9311187103077674,
"grad_norm": 0.7654680609703064,
"learning_rate": 1.1480214948705422e-06,
"loss": 0.0165,
"step": 24000
},
{
"epoch": 2.94333170493405,
"grad_norm": 0.8878483772277832,
"learning_rate": 9.444715844325028e-07,
"loss": 0.0174,
"step": 24100
},
{
"epoch": 2.955544699560332,
"grad_norm": 3.2117550373077393,
"learning_rate": 7.409216739944635e-07,
"loss": 0.0171,
"step": 24200
},
{
"epoch": 2.9677576941866146,
"grad_norm": 0.6114596128463745,
"learning_rate": 5.373717635564241e-07,
"loss": 0.0136,
"step": 24300
},
{
"epoch": 2.979970688812897,
"grad_norm": 0.0984087809920311,
"learning_rate": 3.3382185311838467e-07,
"loss": 0.0166,
"step": 24400
},
{
"epoch": 2.9921836834391793,
"grad_norm": 0.01637178845703602,
"learning_rate": 1.3027194268034525e-07,
"loss": 0.0133,
"step": 24500
},
{
"epoch": 3.0,
"eval_f1": 0.9273861231763003,
"eval_loss": 0.03446226194500923,
"eval_precision": 0.9235803016858918,
"eval_recall": 0.9312234399463207,
"eval_runtime": 62.6474,
"eval_samples_per_second": 130.7,
"eval_steps_per_second": 16.345,
"step": 24564
}
],
"logging_steps": 100,
"max_steps": 24564,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.283942398980096e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}