|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 24564, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.012212994626282364, |
|
"grad_norm": 2.7971887588500977, |
|
"learning_rate": 4.9796450089561964e-05, |
|
"loss": 0.3257, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.024425989252564728, |
|
"grad_norm": 2.295069456100464, |
|
"learning_rate": 4.9592900179123925e-05, |
|
"loss": 0.1159, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03663898387884709, |
|
"grad_norm": 1.1914141178131104, |
|
"learning_rate": 4.9389350268685886e-05, |
|
"loss": 0.1073, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.048851978505129456, |
|
"grad_norm": 0.8838738203048706, |
|
"learning_rate": 4.918580035824785e-05, |
|
"loss": 0.1076, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.06106497313141182, |
|
"grad_norm": 1.8016488552093506, |
|
"learning_rate": 4.898225044780981e-05, |
|
"loss": 0.0851, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07327796775769418, |
|
"grad_norm": 2.5403218269348145, |
|
"learning_rate": 4.8778700537371764e-05, |
|
"loss": 0.1119, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.08549096238397655, |
|
"grad_norm": 1.6208165884017944, |
|
"learning_rate": 4.8575150626933725e-05, |
|
"loss": 0.0968, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.09770395701025891, |
|
"grad_norm": 1.7685565948486328, |
|
"learning_rate": 4.8371600716495686e-05, |
|
"loss": 0.0848, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.10991695163654128, |
|
"grad_norm": 5.9024882316589355, |
|
"learning_rate": 4.816805080605765e-05, |
|
"loss": 0.0816, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.12212994626282364, |
|
"grad_norm": 1.4809324741363525, |
|
"learning_rate": 4.796450089561961e-05, |
|
"loss": 0.0815, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.134342940889106, |
|
"grad_norm": 1.8953092098236084, |
|
"learning_rate": 4.776095098518157e-05, |
|
"loss": 0.0835, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.14655593551538837, |
|
"grad_norm": 0.2310028374195099, |
|
"learning_rate": 4.755740107474353e-05, |
|
"loss": 0.0768, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.15876893014167073, |
|
"grad_norm": 4.047482013702393, |
|
"learning_rate": 4.735385116430549e-05, |
|
"loss": 0.0785, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.1709819247679531, |
|
"grad_norm": 5.049030303955078, |
|
"learning_rate": 4.7150301253867454e-05, |
|
"loss": 0.0609, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.18319491939423546, |
|
"grad_norm": 4.447434902191162, |
|
"learning_rate": 4.6946751343429415e-05, |
|
"loss": 0.0625, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.19540791402051783, |
|
"grad_norm": 0.3569432199001312, |
|
"learning_rate": 4.674320143299138e-05, |
|
"loss": 0.0612, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.2076209086468002, |
|
"grad_norm": 0.6527674794197083, |
|
"learning_rate": 4.653965152255334e-05, |
|
"loss": 0.07, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.21983390327308255, |
|
"grad_norm": 5.448887825012207, |
|
"learning_rate": 4.633610161211529e-05, |
|
"loss": 0.0669, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.23204689789936492, |
|
"grad_norm": 1.804388165473938, |
|
"learning_rate": 4.6132551701677254e-05, |
|
"loss": 0.0636, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.24425989252564728, |
|
"grad_norm": 1.574012279510498, |
|
"learning_rate": 4.5929001791239215e-05, |
|
"loss": 0.0798, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.2564728871519297, |
|
"grad_norm": 0.8235198259353638, |
|
"learning_rate": 4.5725451880801177e-05, |
|
"loss": 0.054, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.268685881778212, |
|
"grad_norm": 0.6802400350570679, |
|
"learning_rate": 4.552190197036313e-05, |
|
"loss": 0.0613, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.2808988764044944, |
|
"grad_norm": 0.5732834935188293, |
|
"learning_rate": 4.531835205992509e-05, |
|
"loss": 0.0544, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.29311187103077674, |
|
"grad_norm": 0.9544196128845215, |
|
"learning_rate": 4.5114802149487054e-05, |
|
"loss": 0.0735, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.30532486565705913, |
|
"grad_norm": 0.8141427040100098, |
|
"learning_rate": 4.4911252239049015e-05, |
|
"loss": 0.0586, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.31753786028334147, |
|
"grad_norm": 3.740103006362915, |
|
"learning_rate": 4.4707702328610976e-05, |
|
"loss": 0.0498, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.32975085490962386, |
|
"grad_norm": 0.24550916254520416, |
|
"learning_rate": 4.450415241817294e-05, |
|
"loss": 0.056, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.3419638495359062, |
|
"grad_norm": 0.7254294157028198, |
|
"learning_rate": 4.43006025077349e-05, |
|
"loss": 0.053, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.3541768441621886, |
|
"grad_norm": 0.5358878970146179, |
|
"learning_rate": 4.409705259729686e-05, |
|
"loss": 0.0561, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.3663898387884709, |
|
"grad_norm": 0.3604901432991028, |
|
"learning_rate": 4.3893502686858815e-05, |
|
"loss": 0.059, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.3786028334147533, |
|
"grad_norm": 0.19227269291877747, |
|
"learning_rate": 4.3689952776420776e-05, |
|
"loss": 0.0548, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.39081582804103565, |
|
"grad_norm": 0.3620028495788574, |
|
"learning_rate": 4.348640286598274e-05, |
|
"loss": 0.0535, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.40302882266731804, |
|
"grad_norm": 4.794760704040527, |
|
"learning_rate": 4.32828529555447e-05, |
|
"loss": 0.0549, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.4152418172936004, |
|
"grad_norm": 0.5320255160331726, |
|
"learning_rate": 4.307930304510666e-05, |
|
"loss": 0.051, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.42745481191988277, |
|
"grad_norm": 2.048164129257202, |
|
"learning_rate": 4.287575313466862e-05, |
|
"loss": 0.067, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.4396678065461651, |
|
"grad_norm": 3.6915972232818604, |
|
"learning_rate": 4.267220322423058e-05, |
|
"loss": 0.0494, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.4518808011724475, |
|
"grad_norm": 0.7861614227294922, |
|
"learning_rate": 4.2468653313792544e-05, |
|
"loss": 0.0652, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.46409379579872984, |
|
"grad_norm": 1.9440407752990723, |
|
"learning_rate": 4.2265103403354505e-05, |
|
"loss": 0.0699, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.4763067904250122, |
|
"grad_norm": 0.15847598016262054, |
|
"learning_rate": 4.206155349291647e-05, |
|
"loss": 0.0575, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.48851978505129456, |
|
"grad_norm": 0.2988128960132599, |
|
"learning_rate": 4.185800358247843e-05, |
|
"loss": 0.0468, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.5007327796775769, |
|
"grad_norm": 1.6092756986618042, |
|
"learning_rate": 4.165445367204039e-05, |
|
"loss": 0.0555, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.5129457743038593, |
|
"grad_norm": 13.008635520935059, |
|
"learning_rate": 4.1450903761602344e-05, |
|
"loss": 0.0604, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.5251587689301417, |
|
"grad_norm": 2.7357659339904785, |
|
"learning_rate": 4.1247353851164305e-05, |
|
"loss": 0.0478, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.537371763556424, |
|
"grad_norm": 0.4085894823074341, |
|
"learning_rate": 4.104380394072627e-05, |
|
"loss": 0.0478, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.5495847581827064, |
|
"grad_norm": 1.5472468137741089, |
|
"learning_rate": 4.084025403028823e-05, |
|
"loss": 0.0559, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.5617977528089888, |
|
"grad_norm": 2.616894006729126, |
|
"learning_rate": 4.063670411985019e-05, |
|
"loss": 0.0444, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.5740107474352711, |
|
"grad_norm": 0.7861430644989014, |
|
"learning_rate": 4.043315420941215e-05, |
|
"loss": 0.0431, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.5862237420615535, |
|
"grad_norm": 1.3745653629302979, |
|
"learning_rate": 4.022960429897411e-05, |
|
"loss": 0.0422, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.5984367366878358, |
|
"grad_norm": 1.471048355102539, |
|
"learning_rate": 4.002605438853607e-05, |
|
"loss": 0.052, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.6106497313141183, |
|
"grad_norm": 0.3034818470478058, |
|
"learning_rate": 3.9822504478098035e-05, |
|
"loss": 0.0481, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.6228627259404006, |
|
"grad_norm": 0.5265262722969055, |
|
"learning_rate": 3.9618954567659996e-05, |
|
"loss": 0.0592, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.6350757205666829, |
|
"grad_norm": 1.226517915725708, |
|
"learning_rate": 3.941540465722196e-05, |
|
"loss": 0.0554, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.6472887151929653, |
|
"grad_norm": 0.365315705537796, |
|
"learning_rate": 3.921185474678392e-05, |
|
"loss": 0.0466, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.6595017098192477, |
|
"grad_norm": 0.4508240818977356, |
|
"learning_rate": 3.900830483634587e-05, |
|
"loss": 0.0468, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.67171470444553, |
|
"grad_norm": 0.51467365026474, |
|
"learning_rate": 3.8804754925907834e-05, |
|
"loss": 0.054, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.6839276990718124, |
|
"grad_norm": 0.6186398267745972, |
|
"learning_rate": 3.8601205015469796e-05, |
|
"loss": 0.0554, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.6961406936980947, |
|
"grad_norm": 0.6723864674568176, |
|
"learning_rate": 3.839765510503176e-05, |
|
"loss": 0.0556, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.7083536883243772, |
|
"grad_norm": 3.6368353366851807, |
|
"learning_rate": 3.819410519459372e-05, |
|
"loss": 0.0512, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.7205666829506595, |
|
"grad_norm": 3.5719096660614014, |
|
"learning_rate": 3.799055528415568e-05, |
|
"loss": 0.0545, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.7327796775769418, |
|
"grad_norm": 1.1756514310836792, |
|
"learning_rate": 3.778700537371764e-05, |
|
"loss": 0.0465, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.7449926722032242, |
|
"grad_norm": 1.2159337997436523, |
|
"learning_rate": 3.7583455463279596e-05, |
|
"loss": 0.0463, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.7572056668295066, |
|
"grad_norm": 1.0632232427597046, |
|
"learning_rate": 3.737990555284156e-05, |
|
"loss": 0.0444, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.769418661455789, |
|
"grad_norm": 0.669765293598175, |
|
"learning_rate": 3.717635564240352e-05, |
|
"loss": 0.0433, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.7816316560820713, |
|
"grad_norm": 0.13478492200374603, |
|
"learning_rate": 3.697280573196548e-05, |
|
"loss": 0.0469, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.7938446507083536, |
|
"grad_norm": 0.5963812470436096, |
|
"learning_rate": 3.676925582152744e-05, |
|
"loss": 0.0504, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.8060576453346361, |
|
"grad_norm": 0.5829123258590698, |
|
"learning_rate": 3.6565705911089395e-05, |
|
"loss": 0.0483, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.8182706399609184, |
|
"grad_norm": 2.3114776611328125, |
|
"learning_rate": 3.636215600065136e-05, |
|
"loss": 0.0601, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.8304836345872008, |
|
"grad_norm": 0.27553310990333557, |
|
"learning_rate": 3.615860609021332e-05, |
|
"loss": 0.0527, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.8426966292134831, |
|
"grad_norm": 0.3668135106563568, |
|
"learning_rate": 3.595505617977528e-05, |
|
"loss": 0.0541, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.8549096238397655, |
|
"grad_norm": 3.1305336952209473, |
|
"learning_rate": 3.575150626933724e-05, |
|
"loss": 0.0534, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.8671226184660479, |
|
"grad_norm": 0.08432205021381378, |
|
"learning_rate": 3.55479563588992e-05, |
|
"loss": 0.0523, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.8793356130923302, |
|
"grad_norm": 1.3692104816436768, |
|
"learning_rate": 3.534440644846116e-05, |
|
"loss": 0.0428, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.8915486077186126, |
|
"grad_norm": 1.1145917177200317, |
|
"learning_rate": 3.5140856538023125e-05, |
|
"loss": 0.0528, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.903761602344895, |
|
"grad_norm": 0.07234195619821548, |
|
"learning_rate": 3.4937306627585086e-05, |
|
"loss": 0.0449, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.9159745969711773, |
|
"grad_norm": 0.12755821645259857, |
|
"learning_rate": 3.473375671714705e-05, |
|
"loss": 0.0381, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.9281875915974597, |
|
"grad_norm": 1.066666603088379, |
|
"learning_rate": 3.453020680670901e-05, |
|
"loss": 0.0411, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.940400586223742, |
|
"grad_norm": 2.3837034702301025, |
|
"learning_rate": 3.432665689627097e-05, |
|
"loss": 0.0471, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.9526135808500245, |
|
"grad_norm": 0.2601478397846222, |
|
"learning_rate": 3.4123106985832924e-05, |
|
"loss": 0.0408, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.9648265754763068, |
|
"grad_norm": 1.0532914400100708, |
|
"learning_rate": 3.3919557075394886e-05, |
|
"loss": 0.0422, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.9770395701025891, |
|
"grad_norm": 0.16507047414779663, |
|
"learning_rate": 3.371600716495685e-05, |
|
"loss": 0.0464, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.9892525647288715, |
|
"grad_norm": 0.310465544462204, |
|
"learning_rate": 3.351245725451881e-05, |
|
"loss": 0.056, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.8987804878048782, |
|
"eval_loss": 0.04277478903532028, |
|
"eval_precision": 0.8910749615300066, |
|
"eval_recall": 0.9066204428539477, |
|
"eval_runtime": 133.6334, |
|
"eval_samples_per_second": 61.272, |
|
"eval_steps_per_second": 7.663, |
|
"step": 8188 |
|
}, |
|
{ |
|
"epoch": 1.0014655593551538, |
|
"grad_norm": 0.8648662567138672, |
|
"learning_rate": 3.330890734408077e-05, |
|
"loss": 0.0441, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.0136785539814364, |
|
"grad_norm": 0.16131815314292908, |
|
"learning_rate": 3.310535743364273e-05, |
|
"loss": 0.0306, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.0258915486077187, |
|
"grad_norm": 4.484282970428467, |
|
"learning_rate": 3.290180752320469e-05, |
|
"loss": 0.0383, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.038104543234001, |
|
"grad_norm": 0.5343158841133118, |
|
"learning_rate": 3.2698257612766654e-05, |
|
"loss": 0.0331, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.0503175378602834, |
|
"grad_norm": 0.028084266930818558, |
|
"learning_rate": 3.2494707702328615e-05, |
|
"loss": 0.0381, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.0625305324865657, |
|
"grad_norm": 0.483477920293808, |
|
"learning_rate": 3.2291157791890576e-05, |
|
"loss": 0.0462, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.074743527112848, |
|
"grad_norm": 0.8194773197174072, |
|
"learning_rate": 3.208760788145254e-05, |
|
"loss": 0.0346, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.0869565217391304, |
|
"grad_norm": 0.11062140762805939, |
|
"learning_rate": 3.188405797101449e-05, |
|
"loss": 0.0369, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.0991695163654127, |
|
"grad_norm": 0.300889790058136, |
|
"learning_rate": 3.1680508060576454e-05, |
|
"loss": 0.0297, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.111382510991695, |
|
"grad_norm": 0.6278924345970154, |
|
"learning_rate": 3.1476958150138415e-05, |
|
"loss": 0.0349, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.1235955056179776, |
|
"grad_norm": 0.542029082775116, |
|
"learning_rate": 3.1273408239700376e-05, |
|
"loss": 0.0473, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.13580850024426, |
|
"grad_norm": 0.6147358417510986, |
|
"learning_rate": 3.106985832926234e-05, |
|
"loss": 0.0356, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.1480214948705423, |
|
"grad_norm": 1.301965355873108, |
|
"learning_rate": 3.08663084188243e-05, |
|
"loss": 0.0371, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.1602344894968246, |
|
"grad_norm": 0.026711974292993546, |
|
"learning_rate": 3.066275850838626e-05, |
|
"loss": 0.0346, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.172447484123107, |
|
"grad_norm": 1.258608102798462, |
|
"learning_rate": 3.0459208597948218e-05, |
|
"loss": 0.0334, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.1846604787493893, |
|
"grad_norm": 0.36877045035362244, |
|
"learning_rate": 3.025565868751018e-05, |
|
"loss": 0.032, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.1968734733756716, |
|
"grad_norm": 0.2519334852695465, |
|
"learning_rate": 3.005210877707214e-05, |
|
"loss": 0.0416, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.2090864680019542, |
|
"grad_norm": 0.5204672813415527, |
|
"learning_rate": 2.9848558866634102e-05, |
|
"loss": 0.0366, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.2212994626282365, |
|
"grad_norm": 0.43101000785827637, |
|
"learning_rate": 2.9645008956196063e-05, |
|
"loss": 0.0379, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.2335124572545189, |
|
"grad_norm": 0.681117057800293, |
|
"learning_rate": 2.9441459045758018e-05, |
|
"loss": 0.0353, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 1.2457254518808012, |
|
"grad_norm": 0.49889543652534485, |
|
"learning_rate": 2.923790913531998e-05, |
|
"loss": 0.0394, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.2579384465070835, |
|
"grad_norm": 0.8064567446708679, |
|
"learning_rate": 2.903435922488194e-05, |
|
"loss": 0.034, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 1.2701514411333659, |
|
"grad_norm": 0.21315552294254303, |
|
"learning_rate": 2.8830809314443902e-05, |
|
"loss": 0.0292, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 1.2823644357596482, |
|
"grad_norm": 1.6466035842895508, |
|
"learning_rate": 2.8627259404005863e-05, |
|
"loss": 0.0333, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.2945774303859308, |
|
"grad_norm": 1.1944749355316162, |
|
"learning_rate": 2.8423709493567825e-05, |
|
"loss": 0.0357, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 1.3067904250122129, |
|
"grad_norm": 0.6488074064254761, |
|
"learning_rate": 2.8220159583129786e-05, |
|
"loss": 0.0315, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 1.3190034196384954, |
|
"grad_norm": 0.030384689569473267, |
|
"learning_rate": 2.8016609672691747e-05, |
|
"loss": 0.0422, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 1.3312164142647778, |
|
"grad_norm": 0.2363937795162201, |
|
"learning_rate": 2.781305976225371e-05, |
|
"loss": 0.0335, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 1.34342940889106, |
|
"grad_norm": 0.04548358544707298, |
|
"learning_rate": 2.7609509851815666e-05, |
|
"loss": 0.0371, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.3556424035173424, |
|
"grad_norm": 0.6259112358093262, |
|
"learning_rate": 2.7405959941377628e-05, |
|
"loss": 0.0412, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 1.3678553981436248, |
|
"grad_norm": 0.42487379908561707, |
|
"learning_rate": 2.720241003093959e-05, |
|
"loss": 0.0252, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 1.3800683927699071, |
|
"grad_norm": 0.9125863313674927, |
|
"learning_rate": 2.6998860120501547e-05, |
|
"loss": 0.0367, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 1.3922813873961895, |
|
"grad_norm": 0.7670263051986694, |
|
"learning_rate": 2.679531021006351e-05, |
|
"loss": 0.0288, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 1.404494382022472, |
|
"grad_norm": 0.1614452451467514, |
|
"learning_rate": 2.6591760299625466e-05, |
|
"loss": 0.0399, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.4167073766487543, |
|
"grad_norm": 3.3551249504089355, |
|
"learning_rate": 2.6388210389187428e-05, |
|
"loss": 0.0391, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 1.4289203712750367, |
|
"grad_norm": 0.7188284397125244, |
|
"learning_rate": 2.618466047874939e-05, |
|
"loss": 0.0315, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 1.441133365901319, |
|
"grad_norm": 0.48031413555145264, |
|
"learning_rate": 2.598111056831135e-05, |
|
"loss": 0.0244, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 1.4533463605276014, |
|
"grad_norm": 0.7492583394050598, |
|
"learning_rate": 2.577756065787331e-05, |
|
"loss": 0.0359, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 1.4655593551538837, |
|
"grad_norm": 0.6593573689460754, |
|
"learning_rate": 2.5574010747435273e-05, |
|
"loss": 0.0327, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.477772349780166, |
|
"grad_norm": 0.2940855026245117, |
|
"learning_rate": 2.5370460836997234e-05, |
|
"loss": 0.0336, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 1.4899853444064486, |
|
"grad_norm": 0.45900267362594604, |
|
"learning_rate": 2.5166910926559195e-05, |
|
"loss": 0.0242, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 1.5021983390327307, |
|
"grad_norm": 2.2023909091949463, |
|
"learning_rate": 2.4963361016121153e-05, |
|
"loss": 0.0348, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 1.5144113336590133, |
|
"grad_norm": 0.12489739805459976, |
|
"learning_rate": 2.4759811105683115e-05, |
|
"loss": 0.0335, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 1.5266243282852956, |
|
"grad_norm": 1.0575867891311646, |
|
"learning_rate": 2.4556261195245076e-05, |
|
"loss": 0.0199, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.538837322911578, |
|
"grad_norm": 1.7309564352035522, |
|
"learning_rate": 2.4352711284807037e-05, |
|
"loss": 0.0316, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 1.5510503175378603, |
|
"grad_norm": 0.925658643245697, |
|
"learning_rate": 2.4149161374369e-05, |
|
"loss": 0.0445, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 1.5632633121641426, |
|
"grad_norm": 0.48667579889297485, |
|
"learning_rate": 2.3945611463930957e-05, |
|
"loss": 0.0437, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 1.5754763067904252, |
|
"grad_norm": 0.11213243752717972, |
|
"learning_rate": 2.3742061553492918e-05, |
|
"loss": 0.0387, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 1.5876893014167073, |
|
"grad_norm": 0.14116732776165009, |
|
"learning_rate": 2.353851164305488e-05, |
|
"loss": 0.033, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.5999022960429898, |
|
"grad_norm": 0.686268150806427, |
|
"learning_rate": 2.333496173261684e-05, |
|
"loss": 0.0281, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 1.612115290669272, |
|
"grad_norm": 0.4795430898666382, |
|
"learning_rate": 2.31314118221788e-05, |
|
"loss": 0.0436, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 1.6243282852955545, |
|
"grad_norm": 0.026416413486003876, |
|
"learning_rate": 2.292786191174076e-05, |
|
"loss": 0.0343, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 1.6365412799218368, |
|
"grad_norm": 0.582073986530304, |
|
"learning_rate": 2.2724312001302718e-05, |
|
"loss": 0.0312, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 1.6487542745481192, |
|
"grad_norm": 1.669487476348877, |
|
"learning_rate": 2.252076209086468e-05, |
|
"loss": 0.0384, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.6609672691744015, |
|
"grad_norm": 0.19379857182502747, |
|
"learning_rate": 2.231721218042664e-05, |
|
"loss": 0.0322, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 1.6731802638006839, |
|
"grad_norm": 4.540911674499512, |
|
"learning_rate": 2.2113662269988602e-05, |
|
"loss": 0.0363, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 1.6853932584269664, |
|
"grad_norm": 0.24804505705833435, |
|
"learning_rate": 2.1910112359550563e-05, |
|
"loss": 0.0326, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 1.6976062530532485, |
|
"grad_norm": 1.8535521030426025, |
|
"learning_rate": 2.1706562449112524e-05, |
|
"loss": 0.0316, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 1.709819247679531, |
|
"grad_norm": 0.04862889647483826, |
|
"learning_rate": 2.1503012538674482e-05, |
|
"loss": 0.0248, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.7220322423058134, |
|
"grad_norm": 0.3953320384025574, |
|
"learning_rate": 2.1299462628236444e-05, |
|
"loss": 0.0393, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 1.7342452369320958, |
|
"grad_norm": 0.5966042876243591, |
|
"learning_rate": 2.1095912717798405e-05, |
|
"loss": 0.0358, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 1.746458231558378, |
|
"grad_norm": 0.1555975377559662, |
|
"learning_rate": 2.0892362807360366e-05, |
|
"loss": 0.0425, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 1.7586712261846604, |
|
"grad_norm": 0.8556230068206787, |
|
"learning_rate": 2.0688812896922328e-05, |
|
"loss": 0.0267, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 1.770884220810943, |
|
"grad_norm": 0.03833279386162758, |
|
"learning_rate": 2.048526298648429e-05, |
|
"loss": 0.034, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.783097215437225, |
|
"grad_norm": 0.043861281126737595, |
|
"learning_rate": 2.0281713076046247e-05, |
|
"loss": 0.0288, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 1.7953102100635077, |
|
"grad_norm": 0.28712257742881775, |
|
"learning_rate": 2.0078163165608208e-05, |
|
"loss": 0.0285, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 1.8075232046897898, |
|
"grad_norm": 1.3535864353179932, |
|
"learning_rate": 1.987461325517017e-05, |
|
"loss": 0.0377, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 1.8197361993160723, |
|
"grad_norm": 3.164818048477173, |
|
"learning_rate": 1.967106334473213e-05, |
|
"loss": 0.0334, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 1.8319491939423547, |
|
"grad_norm": 0.08736918866634369, |
|
"learning_rate": 1.9467513434294092e-05, |
|
"loss": 0.0294, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.844162188568637, |
|
"grad_norm": 1.25545072555542, |
|
"learning_rate": 1.926396352385605e-05, |
|
"loss": 0.0285, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 1.8563751831949193, |
|
"grad_norm": 0.030480826273560524, |
|
"learning_rate": 1.906041361341801e-05, |
|
"loss": 0.0328, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 1.8685881778212017, |
|
"grad_norm": 1.6334197521209717, |
|
"learning_rate": 1.8856863702979973e-05, |
|
"loss": 0.037, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 1.8808011724474842, |
|
"grad_norm": 1.2553733587265015, |
|
"learning_rate": 1.865331379254193e-05, |
|
"loss": 0.0256, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 1.8930141670737664, |
|
"grad_norm": 0.061297524720430374, |
|
"learning_rate": 1.8449763882103892e-05, |
|
"loss": 0.0276, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.905227161700049, |
|
"grad_norm": 1.0915943384170532, |
|
"learning_rate": 1.8246213971665853e-05, |
|
"loss": 0.0362, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 1.9174401563263312, |
|
"grad_norm": 0.020990842953324318, |
|
"learning_rate": 1.8042664061227815e-05, |
|
"loss": 0.025, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 1.9296531509526136, |
|
"grad_norm": 0.09211856126785278, |
|
"learning_rate": 1.7839114150789773e-05, |
|
"loss": 0.0265, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 1.941866145578896, |
|
"grad_norm": 1.5800979137420654, |
|
"learning_rate": 1.7635564240351734e-05, |
|
"loss": 0.0256, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 1.9540791402051783, |
|
"grad_norm": 0.39250850677490234, |
|
"learning_rate": 1.7432014329913695e-05, |
|
"loss": 0.0249, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.9662921348314608, |
|
"grad_norm": 0.8597753047943115, |
|
"learning_rate": 1.7228464419475657e-05, |
|
"loss": 0.0355, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 1.978505129457743, |
|
"grad_norm": 0.16734100878238678, |
|
"learning_rate": 1.7024914509037618e-05, |
|
"loss": 0.032, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 1.9907181240840255, |
|
"grad_norm": 0.11750225722789764, |
|
"learning_rate": 1.682136459859958e-05, |
|
"loss": 0.0227, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.9197428223035141, |
|
"eval_loss": 0.036899276077747345, |
|
"eval_precision": 0.9117582417582417, |
|
"eval_recall": 0.9278684857973608, |
|
"eval_runtime": 75.3931, |
|
"eval_samples_per_second": 108.604, |
|
"eval_steps_per_second": 13.582, |
|
"step": 16376 |
|
}, |
|
{ |
|
"epoch": 2.0029311187103076, |
|
"grad_norm": 0.6276179552078247, |
|
"learning_rate": 1.6617814688161537e-05, |
|
"loss": 0.0326, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 2.01514411333659, |
|
"grad_norm": 0.27882876992225647, |
|
"learning_rate": 1.64142647777235e-05, |
|
"loss": 0.0206, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.0273571079628727, |
|
"grad_norm": 0.9930168986320496, |
|
"learning_rate": 1.621071486728546e-05, |
|
"loss": 0.0135, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 2.039570102589155, |
|
"grad_norm": 0.21392406523227692, |
|
"learning_rate": 1.600716495684742e-05, |
|
"loss": 0.028, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 2.0517830972154374, |
|
"grad_norm": 2.1995363235473633, |
|
"learning_rate": 1.5803615046409382e-05, |
|
"loss": 0.0273, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 2.0639960918417195, |
|
"grad_norm": 1.91357421875, |
|
"learning_rate": 1.560006513597134e-05, |
|
"loss": 0.0152, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 2.076209086468002, |
|
"grad_norm": 0.057265687733888626, |
|
"learning_rate": 1.53965152255333e-05, |
|
"loss": 0.0206, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.088422081094284, |
|
"grad_norm": 0.05291162431240082, |
|
"learning_rate": 1.5192965315095261e-05, |
|
"loss": 0.022, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 2.1006350757205667, |
|
"grad_norm": 2.424394369125366, |
|
"learning_rate": 1.4989415404657223e-05, |
|
"loss": 0.0178, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 2.112848070346849, |
|
"grad_norm": 8.053882598876953, |
|
"learning_rate": 1.4785865494219184e-05, |
|
"loss": 0.0256, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 2.1250610649731314, |
|
"grad_norm": 1.606079339981079, |
|
"learning_rate": 1.4582315583781145e-05, |
|
"loss": 0.017, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 2.137274059599414, |
|
"grad_norm": 0.26984503865242004, |
|
"learning_rate": 1.4378765673343103e-05, |
|
"loss": 0.0202, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.149487054225696, |
|
"grad_norm": 0.044966306537389755, |
|
"learning_rate": 1.4175215762905064e-05, |
|
"loss": 0.0234, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 2.1617000488519786, |
|
"grad_norm": 0.05067300796508789, |
|
"learning_rate": 1.3971665852467026e-05, |
|
"loss": 0.0263, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 2.1739130434782608, |
|
"grad_norm": 0.5125128030776978, |
|
"learning_rate": 1.3768115942028985e-05, |
|
"loss": 0.0216, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 2.1861260381045433, |
|
"grad_norm": 0.04719540849328041, |
|
"learning_rate": 1.3564566031590947e-05, |
|
"loss": 0.0256, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 2.1983390327308254, |
|
"grad_norm": 0.11627175658941269, |
|
"learning_rate": 1.3361016121152908e-05, |
|
"loss": 0.0185, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.210552027357108, |
|
"grad_norm": 0.2016720473766327, |
|
"learning_rate": 1.3157466210714866e-05, |
|
"loss": 0.0111, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 2.22276502198339, |
|
"grad_norm": 1.6914150714874268, |
|
"learning_rate": 1.2953916300276827e-05, |
|
"loss": 0.0237, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 2.2349780166096727, |
|
"grad_norm": 0.3582985997200012, |
|
"learning_rate": 1.2750366389838789e-05, |
|
"loss": 0.0188, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 2.247191011235955, |
|
"grad_norm": 0.9769508838653564, |
|
"learning_rate": 1.254681647940075e-05, |
|
"loss": 0.024, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 2.2594040058622373, |
|
"grad_norm": 0.03454025089740753, |
|
"learning_rate": 1.2343266568962711e-05, |
|
"loss": 0.0307, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.27161700048852, |
|
"grad_norm": 0.0919230654835701, |
|
"learning_rate": 1.2139716658524671e-05, |
|
"loss": 0.0183, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 2.283829995114802, |
|
"grad_norm": 0.05342525988817215, |
|
"learning_rate": 1.1936166748086632e-05, |
|
"loss": 0.0295, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 2.2960429897410846, |
|
"grad_norm": 0.11520762741565704, |
|
"learning_rate": 1.1732616837648592e-05, |
|
"loss": 0.0187, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 2.308255984367367, |
|
"grad_norm": 1.8612200021743774, |
|
"learning_rate": 1.1529066927210551e-05, |
|
"loss": 0.0228, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 2.3204689789936492, |
|
"grad_norm": 0.9779945611953735, |
|
"learning_rate": 1.1325517016772513e-05, |
|
"loss": 0.0182, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.332681973619932, |
|
"grad_norm": 1.9669654369354248, |
|
"learning_rate": 1.1121967106334474e-05, |
|
"loss": 0.0247, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 2.344894968246214, |
|
"grad_norm": 0.1722841113805771, |
|
"learning_rate": 1.0918417195896434e-05, |
|
"loss": 0.0206, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 2.3571079628724965, |
|
"grad_norm": 0.1652793437242508, |
|
"learning_rate": 1.0714867285458395e-05, |
|
"loss": 0.0146, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 2.3693209574987786, |
|
"grad_norm": 0.07285087555646896, |
|
"learning_rate": 1.0511317375020356e-05, |
|
"loss": 0.0151, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 2.381533952125061, |
|
"grad_norm": 2.59061861038208, |
|
"learning_rate": 1.0307767464582316e-05, |
|
"loss": 0.0192, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.3937469467513433, |
|
"grad_norm": 0.02776254341006279, |
|
"learning_rate": 1.0104217554144277e-05, |
|
"loss": 0.0245, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 2.405959941377626, |
|
"grad_norm": 0.48207101225852966, |
|
"learning_rate": 9.900667643706239e-06, |
|
"loss": 0.0147, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 2.4181729360039084, |
|
"grad_norm": 0.7725105285644531, |
|
"learning_rate": 9.697117733268198e-06, |
|
"loss": 0.0206, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 2.4303859306301905, |
|
"grad_norm": 1.8201816082000732, |
|
"learning_rate": 9.493567822830158e-06, |
|
"loss": 0.0205, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 2.442598925256473, |
|
"grad_norm": 0.2930428385734558, |
|
"learning_rate": 9.29001791239212e-06, |
|
"loss": 0.0163, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.454811919882755, |
|
"grad_norm": 0.7441920638084412, |
|
"learning_rate": 9.086468001954079e-06, |
|
"loss": 0.0181, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 2.4670249145090377, |
|
"grad_norm": 0.5970872640609741, |
|
"learning_rate": 8.88291809151604e-06, |
|
"loss": 0.0172, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 2.47923790913532, |
|
"grad_norm": 0.17312058806419373, |
|
"learning_rate": 8.679368181078002e-06, |
|
"loss": 0.0163, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 2.4914509037616024, |
|
"grad_norm": 0.26520836353302, |
|
"learning_rate": 8.475818270639961e-06, |
|
"loss": 0.016, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 2.5036638983878845, |
|
"grad_norm": 0.08623456209897995, |
|
"learning_rate": 8.272268360201922e-06, |
|
"loss": 0.018, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 2.515876893014167, |
|
"grad_norm": 0.16404370963573456, |
|
"learning_rate": 8.068718449763882e-06, |
|
"loss": 0.0164, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 2.5280898876404496, |
|
"grad_norm": 0.051970474421978, |
|
"learning_rate": 7.865168539325843e-06, |
|
"loss": 0.0203, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 2.5403028822667317, |
|
"grad_norm": 0.08457406610250473, |
|
"learning_rate": 7.661618628887805e-06, |
|
"loss": 0.0211, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 2.5525158768930143, |
|
"grad_norm": 0.35134220123291016, |
|
"learning_rate": 7.4580687184497635e-06, |
|
"loss": 0.018, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 2.5647288715192964, |
|
"grad_norm": 0.487570196390152, |
|
"learning_rate": 7.254518808011725e-06, |
|
"loss": 0.0281, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.576941866145579, |
|
"grad_norm": 2.1460368633270264, |
|
"learning_rate": 7.050968897573685e-06, |
|
"loss": 0.0196, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 2.5891548607718615, |
|
"grad_norm": 0.3036395311355591, |
|
"learning_rate": 6.847418987135645e-06, |
|
"loss": 0.0191, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 2.6013678553981436, |
|
"grad_norm": 0.3689348101615906, |
|
"learning_rate": 6.643869076697606e-06, |
|
"loss": 0.0173, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 2.6135808500244258, |
|
"grad_norm": 1.0098440647125244, |
|
"learning_rate": 6.440319166259568e-06, |
|
"loss": 0.0162, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 2.6257938446507083, |
|
"grad_norm": 0.30733248591423035, |
|
"learning_rate": 6.236769255821528e-06, |
|
"loss": 0.0194, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 2.638006839276991, |
|
"grad_norm": 0.4835430085659027, |
|
"learning_rate": 6.0332193453834885e-06, |
|
"loss": 0.0295, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 2.650219833903273, |
|
"grad_norm": 0.041551847010850906, |
|
"learning_rate": 5.829669434945449e-06, |
|
"loss": 0.0209, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 2.6624328285295555, |
|
"grad_norm": 1.990522027015686, |
|
"learning_rate": 5.6261195245074095e-06, |
|
"loss": 0.0269, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 2.6746458231558377, |
|
"grad_norm": 0.04139232635498047, |
|
"learning_rate": 5.42256961406937e-06, |
|
"loss": 0.0226, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 2.68685881778212, |
|
"grad_norm": 0.9341286420822144, |
|
"learning_rate": 5.219019703631331e-06, |
|
"loss": 0.0201, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.6990718124084028, |
|
"grad_norm": 0.11153418570756912, |
|
"learning_rate": 5.015469793193292e-06, |
|
"loss": 0.0222, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 2.711284807034685, |
|
"grad_norm": 1.0574121475219727, |
|
"learning_rate": 4.811919882755251e-06, |
|
"loss": 0.0212, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 2.723497801660967, |
|
"grad_norm": 0.9357222318649292, |
|
"learning_rate": 4.608369972317213e-06, |
|
"loss": 0.0219, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 2.7357107962872496, |
|
"grad_norm": 0.18769215047359467, |
|
"learning_rate": 4.404820061879173e-06, |
|
"loss": 0.0192, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 2.747923790913532, |
|
"grad_norm": 1.0952208042144775, |
|
"learning_rate": 4.201270151441134e-06, |
|
"loss": 0.0165, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.7601367855398142, |
|
"grad_norm": 0.046009525656700134, |
|
"learning_rate": 3.997720241003094e-06, |
|
"loss": 0.0161, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 2.772349780166097, |
|
"grad_norm": 0.3359615206718445, |
|
"learning_rate": 3.794170330565055e-06, |
|
"loss": 0.0198, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 2.784562774792379, |
|
"grad_norm": 0.03583957999944687, |
|
"learning_rate": 3.590620420127015e-06, |
|
"loss": 0.0157, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 2.7967757694186615, |
|
"grad_norm": 2.4570398330688477, |
|
"learning_rate": 3.3870705096889755e-06, |
|
"loss": 0.0183, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 2.808988764044944, |
|
"grad_norm": 0.0799492597579956, |
|
"learning_rate": 3.1835205992509364e-06, |
|
"loss": 0.0154, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.821201758671226, |
|
"grad_norm": 0.17097431421279907, |
|
"learning_rate": 2.979970688812897e-06, |
|
"loss": 0.0208, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 2.8334147532975087, |
|
"grad_norm": 0.042323142290115356, |
|
"learning_rate": 2.776420778374858e-06, |
|
"loss": 0.0107, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 2.845627747923791, |
|
"grad_norm": 0.6305797100067139, |
|
"learning_rate": 2.5728708679368183e-06, |
|
"loss": 0.0225, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 2.8578407425500734, |
|
"grad_norm": 0.05080363526940346, |
|
"learning_rate": 2.3693209574987788e-06, |
|
"loss": 0.0238, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 2.8700537371763555, |
|
"grad_norm": 0.04388800263404846, |
|
"learning_rate": 2.1657710470607397e-06, |
|
"loss": 0.0184, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 2.882266731802638, |
|
"grad_norm": 2.4991371631622314, |
|
"learning_rate": 1.9622211366226997e-06, |
|
"loss": 0.0196, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 2.89447972642892, |
|
"grad_norm": 0.059519946575164795, |
|
"learning_rate": 1.7586712261846606e-06, |
|
"loss": 0.0156, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 2.9066927210552027, |
|
"grad_norm": 0.044085703790187836, |
|
"learning_rate": 1.5551213157466213e-06, |
|
"loss": 0.0161, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 2.9189057156814853, |
|
"grad_norm": 0.024006502702832222, |
|
"learning_rate": 1.3515714053085818e-06, |
|
"loss": 0.0172, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 2.9311187103077674, |
|
"grad_norm": 0.7654680609703064, |
|
"learning_rate": 1.1480214948705422e-06, |
|
"loss": 0.0165, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.94333170493405, |
|
"grad_norm": 0.8878483772277832, |
|
"learning_rate": 9.444715844325028e-07, |
|
"loss": 0.0174, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 2.955544699560332, |
|
"grad_norm": 3.2117550373077393, |
|
"learning_rate": 7.409216739944635e-07, |
|
"loss": 0.0171, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 2.9677576941866146, |
|
"grad_norm": 0.6114596128463745, |
|
"learning_rate": 5.373717635564241e-07, |
|
"loss": 0.0136, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 2.979970688812897, |
|
"grad_norm": 0.0984087809920311, |
|
"learning_rate": 3.3382185311838467e-07, |
|
"loss": 0.0166, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 2.9921836834391793, |
|
"grad_norm": 0.01637178845703602, |
|
"learning_rate": 1.3027194268034525e-07, |
|
"loss": 0.0133, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_f1": 0.9273861231763003, |
|
"eval_loss": 0.03446226194500923, |
|
"eval_precision": 0.9235803016858918, |
|
"eval_recall": 0.9312234399463207, |
|
"eval_runtime": 62.6474, |
|
"eval_samples_per_second": 130.7, |
|
"eval_steps_per_second": 16.345, |
|
"step": 24564 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 24564, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.283942398980096e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|