|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 20, |
|
"global_step": 12178, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0016424069473813874, |
|
"grad_norm": 0.4667005240917206, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9661, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.003284813894762775, |
|
"grad_norm": 0.5031771063804626, |
|
"learning_rate": 0.0002, |
|
"loss": 1.602, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.004927220842144162, |
|
"grad_norm": 0.4090685546398163, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4703, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.00656962778952555, |
|
"grad_norm": 0.4099690020084381, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3652, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.008212034736906937, |
|
"grad_norm": 0.4610142111778259, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4386, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.009854441684288324, |
|
"grad_norm": 0.3908289968967438, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3151, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.011496848631669712, |
|
"grad_norm": 0.4541659951210022, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1233, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.0131392555790511, |
|
"grad_norm": 0.43324407935142517, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1266, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.014781662526432487, |
|
"grad_norm": 0.3396519720554352, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1004, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.016424069473813873, |
|
"grad_norm": 0.5125846266746521, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1258, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01806647642119526, |
|
"grad_norm": 0.4572688937187195, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1796, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.01970888336857665, |
|
"grad_norm": 0.434186190366745, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1016, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.021351290315958036, |
|
"grad_norm": 0.5205552577972412, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0419, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.022993697263339424, |
|
"grad_norm": 0.3958785831928253, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9515, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.02463610421072081, |
|
"grad_norm": 0.46327391266822815, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0079, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.0262785111581022, |
|
"grad_norm": 0.39861008524894714, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9755, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.027920918105483587, |
|
"grad_norm": 0.42074650526046753, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9435, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.029563325052864974, |
|
"grad_norm": 0.41754183173179626, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9376, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.031205732000246362, |
|
"grad_norm": 0.3933572769165039, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9489, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.032848138947627746, |
|
"grad_norm": 0.4244033992290497, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9759, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.034490545895009134, |
|
"grad_norm": 0.3638761639595032, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9371, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.03613295284239052, |
|
"grad_norm": 0.4706399738788605, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8464, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.03777535978977191, |
|
"grad_norm": 0.4349803328514099, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8918, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.0394177667371533, |
|
"grad_norm": 0.3831111490726471, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8366, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.041060173684534684, |
|
"grad_norm": 0.4122432470321655, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8444, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04270258063191607, |
|
"grad_norm": 0.3296256959438324, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8301, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.04434498757929746, |
|
"grad_norm": 0.3447166979312897, |
|
"learning_rate": 0.0002, |
|
"loss": 0.857, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.04598739452667885, |
|
"grad_norm": 0.4408610761165619, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8356, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.047629801474060235, |
|
"grad_norm": 0.4657248854637146, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7525, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.04927220842144162, |
|
"grad_norm": 0.35138434171676636, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7486, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.05091461536882301, |
|
"grad_norm": 0.4687822461128235, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8169, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.0525570223162044, |
|
"grad_norm": 0.465108186006546, |
|
"learning_rate": 0.0002, |
|
"loss": 0.738, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.054199429263585785, |
|
"grad_norm": 0.3954925835132599, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7627, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.05584183621096717, |
|
"grad_norm": 0.5010778307914734, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7273, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.05748424315834856, |
|
"grad_norm": 0.6221648454666138, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7506, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.05912665010572995, |
|
"grad_norm": 0.4075715243816376, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7587, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.060769057053111336, |
|
"grad_norm": 0.4346787631511688, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7627, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.062411464000492724, |
|
"grad_norm": 0.4146323800086975, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6642, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.06405387094787411, |
|
"grad_norm": 0.4093219041824341, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7148, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.06569627789525549, |
|
"grad_norm": 0.4016498327255249, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6522, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.06733868484263689, |
|
"grad_norm": 0.436252236366272, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6884, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.06898109179001827, |
|
"grad_norm": 0.4362093508243561, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7185, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.07062349873739966, |
|
"grad_norm": 0.42092448472976685, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6702, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.07226590568478104, |
|
"grad_norm": 0.4649953842163086, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6753, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.07390831263216244, |
|
"grad_norm": 0.4321405589580536, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6578, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.07555071957954382, |
|
"grad_norm": 0.5045340657234192, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6993, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.07719312652692521, |
|
"grad_norm": 0.5063377022743225, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6654, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.0788355334743066, |
|
"grad_norm": 0.41710513830184937, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6264, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.08047794042168799, |
|
"grad_norm": 0.4204249083995819, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6683, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.08212034736906937, |
|
"grad_norm": 0.44983726739883423, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6592, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.08376275431645076, |
|
"grad_norm": 0.5991094708442688, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6197, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.08540516126383214, |
|
"grad_norm": 0.3672972619533539, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5656, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.08704756821121354, |
|
"grad_norm": 0.503656804561615, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6017, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.08868997515859492, |
|
"grad_norm": 0.49204686284065247, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6421, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.09033238210597631, |
|
"grad_norm": 0.45617127418518066, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6176, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.0919747890533577, |
|
"grad_norm": 0.49607595801353455, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5595, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.09361719600073909, |
|
"grad_norm": 0.39171984791755676, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5479, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.09525960294812047, |
|
"grad_norm": 0.4964667558670044, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5937, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.09690200989550186, |
|
"grad_norm": 0.40392565727233887, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5888, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.09854441684288325, |
|
"grad_norm": 0.4721887409687042, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5345, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.10018682379026464, |
|
"grad_norm": 0.4130144417285919, |
|
"learning_rate": 0.0002, |
|
"loss": 0.599, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.10182923073764602, |
|
"grad_norm": 0.4222985506057739, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5762, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.1034716376850274, |
|
"grad_norm": 0.47171750664711, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5619, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.1051140446324088, |
|
"grad_norm": 0.40906137228012085, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5137, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.10675645157979018, |
|
"grad_norm": 0.43774527311325073, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5888, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.10839885852717157, |
|
"grad_norm": 0.5423911213874817, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5409, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.11004126547455295, |
|
"grad_norm": 0.4405030906200409, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5248, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.11168367242193435, |
|
"grad_norm": 0.4299491345882416, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5196, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.11332607936931573, |
|
"grad_norm": 0.5445800423622131, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5524, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.11496848631669712, |
|
"grad_norm": 0.42257580161094666, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5266, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.1166108932640785, |
|
"grad_norm": 0.4614318907260895, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5593, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.1182533002114599, |
|
"grad_norm": 0.5021907687187195, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5183, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.11989570715884128, |
|
"grad_norm": 0.39399659633636475, |
|
"learning_rate": 0.0002, |
|
"loss": 0.516, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.12153811410622267, |
|
"grad_norm": 0.5128427743911743, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5067, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.12318052105360405, |
|
"grad_norm": 0.41359153389930725, |
|
"learning_rate": 0.0002, |
|
"loss": 0.508, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.12482292800098545, |
|
"grad_norm": 0.5723029375076294, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4955, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.12646533494836684, |
|
"grad_norm": 0.4619792699813843, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5398, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.12810774189574822, |
|
"grad_norm": 0.5200566649436951, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5213, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.1297501488431296, |
|
"grad_norm": 0.4156297445297241, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4895, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.13139255579051098, |
|
"grad_norm": 0.43649184703826904, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4809, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.1330349627378924, |
|
"grad_norm": 0.38926875591278076, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4819, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.13467736968527377, |
|
"grad_norm": 0.45897549390792847, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4619, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.13631977663265515, |
|
"grad_norm": 0.4487549364566803, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4737, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.13796218358003653, |
|
"grad_norm": 0.36948007345199585, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4576, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.13960459052741794, |
|
"grad_norm": 0.38834378123283386, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4464, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.14124699747479932, |
|
"grad_norm": 0.5436655879020691, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4616, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.1428894044221807, |
|
"grad_norm": 0.3576355278491974, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4669, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.14453181136956209, |
|
"grad_norm": 0.4736698269844055, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4788, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.1461742183169435, |
|
"grad_norm": 0.4074772596359253, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4214, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.14781662526432487, |
|
"grad_norm": 0.4454910457134247, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4407, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.14945903221170626, |
|
"grad_norm": 0.4039610028266907, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4585, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.15110143915908764, |
|
"grad_norm": 0.4431604743003845, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4483, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.15274384610646902, |
|
"grad_norm": 0.4190782606601715, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4516, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.15438625305385043, |
|
"grad_norm": 0.2951456606388092, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4584, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.1560286600012318, |
|
"grad_norm": 0.4400006830692291, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4533, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.1576710669486132, |
|
"grad_norm": 0.3839446008205414, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4489, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.15931347389599457, |
|
"grad_norm": 0.41484808921813965, |
|
"learning_rate": 0.0002, |
|
"loss": 0.422, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.16095588084337598, |
|
"grad_norm": 0.5211725831031799, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4379, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.16259828779075736, |
|
"grad_norm": 0.3866327106952667, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4279, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.16424069473813874, |
|
"grad_norm": 0.3327186107635498, |
|
"learning_rate": 0.0002, |
|
"loss": 0.417, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.16588310168552012, |
|
"grad_norm": 0.46427205204963684, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4411, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.16752550863290153, |
|
"grad_norm": 0.4826524257659912, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4359, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.1691679155802829, |
|
"grad_norm": 0.4641328454017639, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4691, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.1708103225276643, |
|
"grad_norm": 0.525749683380127, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4297, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.17245272947504567, |
|
"grad_norm": 0.45604804158210754, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4411, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.17409513642242708, |
|
"grad_norm": 0.3894326984882355, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4098, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.17573754336980846, |
|
"grad_norm": 0.34401944279670715, |
|
"learning_rate": 0.0002, |
|
"loss": 0.406, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.17737995031718984, |
|
"grad_norm": 0.3576812148094177, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4024, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.17902235726457122, |
|
"grad_norm": 0.4276871979236603, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4085, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.18066476421195263, |
|
"grad_norm": 0.49007973074913025, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4104, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.182307171159334, |
|
"grad_norm": 0.4573257267475128, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4041, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.1839495781067154, |
|
"grad_norm": 0.4118468463420868, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3984, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.18559198505409677, |
|
"grad_norm": 0.357284277677536, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4212, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.18723439200147818, |
|
"grad_norm": 0.4252781867980957, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3924, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.18887679894885956, |
|
"grad_norm": 0.40546557307243347, |
|
"learning_rate": 0.0002, |
|
"loss": 0.398, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.19051920589624094, |
|
"grad_norm": 0.4305673837661743, |
|
"learning_rate": 0.0002, |
|
"loss": 0.398, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.19216161284362232, |
|
"grad_norm": 0.40348726511001587, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4031, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.19380401979100373, |
|
"grad_norm": 0.48159924149513245, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3926, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.1954464267383851, |
|
"grad_norm": 0.5939348936080933, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3963, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.1970888336857665, |
|
"grad_norm": 0.42593804001808167, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3925, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.19873124063314787, |
|
"grad_norm": 0.515277624130249, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3753, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.20037364758052928, |
|
"grad_norm": 0.43423864245414734, |
|
"learning_rate": 0.0002, |
|
"loss": 0.396, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.20201605452791066, |
|
"grad_norm": 0.3857817053794861, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3834, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.20365846147529204, |
|
"grad_norm": 0.3945648670196533, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3768, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.20530086842267342, |
|
"grad_norm": 0.46411946415901184, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3852, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.2069432753700548, |
|
"grad_norm": 0.3779551684856415, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3767, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.2085856823174362, |
|
"grad_norm": 0.4743368625640869, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4253, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.2102280892648176, |
|
"grad_norm": 0.4278275668621063, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3558, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.21187049621219897, |
|
"grad_norm": 0.42412903904914856, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3934, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.21351290315958035, |
|
"grad_norm": 7.02437162399292, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3972, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.21515531010696176, |
|
"grad_norm": 0.46447402238845825, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3742, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.21679771705434314, |
|
"grad_norm": 0.4078330993652344, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3954, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.21844012400172452, |
|
"grad_norm": 0.39751455187797546, |
|
"learning_rate": 0.0002, |
|
"loss": 0.36, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.2200825309491059, |
|
"grad_norm": 0.4075968265533447, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3894, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.2217249378964873, |
|
"grad_norm": 0.39630162715911865, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3748, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.2233673448438687, |
|
"grad_norm": 0.42885056138038635, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3496, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.22500975179125007, |
|
"grad_norm": 0.4635525941848755, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3494, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.22665215873863145, |
|
"grad_norm": 0.48458898067474365, |
|
"learning_rate": 0.0002, |
|
"loss": 0.387, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.22829456568601286, |
|
"grad_norm": 0.49742501974105835, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3717, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.22993697263339424, |
|
"grad_norm": 0.4279645085334778, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3537, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.23157937958077562, |
|
"grad_norm": 0.5221889615058899, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3676, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.233221786528157, |
|
"grad_norm": 0.5390656590461731, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3439, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.2348641934755384, |
|
"grad_norm": 0.4269630014896393, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3663, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.2365066004229198, |
|
"grad_norm": 0.37411990761756897, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3779, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.23814900737030117, |
|
"grad_norm": 0.3186222016811371, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3513, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.23979141431768256, |
|
"grad_norm": 0.33270496129989624, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3534, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.24143382126506396, |
|
"grad_norm": 0.4496273100376129, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3588, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.24307622821244534, |
|
"grad_norm": 0.35411253571510315, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3466, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.24471863515982673, |
|
"grad_norm": 0.4333256185054779, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3555, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.2463610421072081, |
|
"grad_norm": 0.3264130651950836, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3345, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.24800344905458951, |
|
"grad_norm": 0.3925504684448242, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3559, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.2496458560019709, |
|
"grad_norm": 0.4186360836029053, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3458, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.2512882629493523, |
|
"grad_norm": 0.4656223952770233, |
|
"learning_rate": 0.0002, |
|
"loss": 0.349, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.2529306698967337, |
|
"grad_norm": 0.4535064399242401, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3474, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.25457307684411506, |
|
"grad_norm": 0.37564146518707275, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3454, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.25621548379149645, |
|
"grad_norm": 0.36363497376441956, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3515, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.2578578907388778, |
|
"grad_norm": 0.380750447511673, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3653, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.2595002976862592, |
|
"grad_norm": 0.3188472092151642, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3596, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.2611427046336406, |
|
"grad_norm": 0.4478905200958252, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3567, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.26278511158102197, |
|
"grad_norm": 0.4925800859928131, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3466, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.26442751852840335, |
|
"grad_norm": 0.3702840209007263, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3327, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.2660699254757848, |
|
"grad_norm": 0.35024309158325195, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3524, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.26771233242316617, |
|
"grad_norm": 0.4079764783382416, |
|
"learning_rate": 0.0002, |
|
"loss": 0.338, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.26935473937054755, |
|
"grad_norm": 0.4466266632080078, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3465, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.2709971463179289, |
|
"grad_norm": 0.4438311457633972, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3396, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.2726395532653103, |
|
"grad_norm": 0.37101468443870544, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3392, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.2742819602126917, |
|
"grad_norm": 0.41411712765693665, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3341, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.27592436716007307, |
|
"grad_norm": 0.47411611676216125, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3355, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.27756677410745445, |
|
"grad_norm": 0.4871801733970642, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3627, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.2792091810548359, |
|
"grad_norm": 0.47128844261169434, |
|
"learning_rate": 0.0002, |
|
"loss": 0.324, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.28085158800221727, |
|
"grad_norm": 0.4556843042373657, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3443, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.28249399494959865, |
|
"grad_norm": 0.3775945007801056, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3401, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.28413640189698003, |
|
"grad_norm": 0.377316415309906, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3478, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.2857788088443614, |
|
"grad_norm": 0.336944580078125, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3382, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.2874212157917428, |
|
"grad_norm": 0.4296940863132477, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3361, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.28906362273912417, |
|
"grad_norm": 0.4638020396232605, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3583, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.29070602968650555, |
|
"grad_norm": 0.4074634313583374, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3601, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.292348436633887, |
|
"grad_norm": 0.3634164035320282, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3216, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.29399084358126837, |
|
"grad_norm": 0.43480202555656433, |
|
"learning_rate": 0.0002, |
|
"loss": 0.33, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.29563325052864975, |
|
"grad_norm": 0.42778658866882324, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3408, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.29727565747603113, |
|
"grad_norm": 0.3778844177722931, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3309, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.2989180644234125, |
|
"grad_norm": 0.33491814136505127, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3011, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.3005604713707939, |
|
"grad_norm": 0.5079118609428406, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3079, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.30220287831817527, |
|
"grad_norm": 0.3751799166202545, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3286, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.30384528526555665, |
|
"grad_norm": 0.4447515904903412, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2991, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.30548769221293803, |
|
"grad_norm": 0.33741819858551025, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3169, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.30713009916031947, |
|
"grad_norm": 0.3624327480792999, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3213, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.30877250610770085, |
|
"grad_norm": 0.5299442410469055, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3476, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.31041491305508223, |
|
"grad_norm": 0.3178050220012665, |
|
"learning_rate": 0.0002, |
|
"loss": 0.329, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.3120573200024636, |
|
"grad_norm": 0.3178127408027649, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3046, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.313699726949845, |
|
"grad_norm": 0.4366089403629303, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3179, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.3153421338972264, |
|
"grad_norm": 0.47534024715423584, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3377, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.31698454084460775, |
|
"grad_norm": 0.4247181713581085, |
|
"learning_rate": 0.0002, |
|
"loss": 0.311, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.31862694779198913, |
|
"grad_norm": 0.5085952877998352, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3197, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.32026935473937057, |
|
"grad_norm": 0.3649958372116089, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3243, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.32191176168675195, |
|
"grad_norm": 0.43816304206848145, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3232, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.32355416863413333, |
|
"grad_norm": 0.32603034377098083, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3155, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.3251965755815147, |
|
"grad_norm": 0.4867421090602875, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3102, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.3268389825288961, |
|
"grad_norm": 0.3843926191329956, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3035, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.3284813894762775, |
|
"grad_norm": 0.49313676357269287, |
|
"learning_rate": 0.0002, |
|
"loss": 0.322, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.33012379642365886, |
|
"grad_norm": 0.4102085530757904, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3206, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.33176620337104024, |
|
"grad_norm": 0.47901496291160583, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3131, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.33340861031842167, |
|
"grad_norm": 0.40674644708633423, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3091, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.33505101726580305, |
|
"grad_norm": 0.44038107991218567, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3116, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.33669342421318443, |
|
"grad_norm": 0.3919316828250885, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3077, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.3383358311605658, |
|
"grad_norm": 0.38622769713401794, |
|
"learning_rate": 0.0002, |
|
"loss": 0.302, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.3399782381079472, |
|
"grad_norm": 0.4685916602611542, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3234, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.3416206450553286, |
|
"grad_norm": 0.3348797559738159, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3205, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.34326305200270996, |
|
"grad_norm": 0.4265504777431488, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3101, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.34490545895009134, |
|
"grad_norm": 0.4005930423736572, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3096, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.3465478658974728, |
|
"grad_norm": 0.4154227674007416, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3188, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.34819027284485415, |
|
"grad_norm": 0.30359068512916565, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2966, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.34983267979223553, |
|
"grad_norm": 0.35363709926605225, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3189, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.3514750867396169, |
|
"grad_norm": 0.43156126141548157, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2951, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.3531174936869983, |
|
"grad_norm": 0.4593096077442169, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3048, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.3547599006343797, |
|
"grad_norm": 0.49352073669433594, |
|
"learning_rate": 0.0002, |
|
"loss": 0.301, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.35640230758176106, |
|
"grad_norm": 0.4053367078304291, |
|
"learning_rate": 0.0002, |
|
"loss": 0.311, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.35804471452914244, |
|
"grad_norm": 0.3465437889099121, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3186, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.3596871214765238, |
|
"grad_norm": 0.4525587558746338, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3126, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.36132952842390526, |
|
"grad_norm": 0.4213342070579529, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3041, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.36297193537128664, |
|
"grad_norm": 0.37421244382858276, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3295, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.364614342318668, |
|
"grad_norm": 0.4033282697200775, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3031, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.3662567492660494, |
|
"grad_norm": 0.45873841643333435, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2819, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.3678991562134308, |
|
"grad_norm": 0.36195841431617737, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2908, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.36954156316081216, |
|
"grad_norm": 0.39707615971565247, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3023, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.37118397010819354, |
|
"grad_norm": 0.3999727666378021, |
|
"learning_rate": 0.0002, |
|
"loss": 0.31, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.3728263770555749, |
|
"grad_norm": 0.36880913376808167, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3017, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.37446878400295636, |
|
"grad_norm": 0.36656180024147034, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3129, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.37611119095033774, |
|
"grad_norm": 0.4566299021244049, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3039, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.3777535978977191, |
|
"grad_norm": 0.3202304542064667, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2827, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.3793960048451005, |
|
"grad_norm": 0.4553089439868927, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3401, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.3810384117924819, |
|
"grad_norm": 0.40536269545555115, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3038, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.38268081873986326, |
|
"grad_norm": 0.36675453186035156, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3198, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.38432322568724464, |
|
"grad_norm": 0.41660359501838684, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2904, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.385965632634626, |
|
"grad_norm": 0.2889881134033203, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3076, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.38760803958200746, |
|
"grad_norm": 0.3077252507209778, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3087, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.38925044652938884, |
|
"grad_norm": 0.43053752183914185, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2994, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.3908928534767702, |
|
"grad_norm": 0.39978402853012085, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2825, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.3925352604241516, |
|
"grad_norm": 0.39721283316612244, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3002, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.394177667371533, |
|
"grad_norm": 0.4234716296195984, |
|
"learning_rate": 0.0002, |
|
"loss": 0.281, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.39582007431891436, |
|
"grad_norm": 0.41390299797058105, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3015, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.39746248126629574, |
|
"grad_norm": 0.8412930369377136, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3034, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.3991048882136771, |
|
"grad_norm": 0.4165583848953247, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2844, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.40074729516105856, |
|
"grad_norm": 0.4212113618850708, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2847, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.40238970210843994, |
|
"grad_norm": 0.46880143880844116, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2877, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.4040321090558213, |
|
"grad_norm": 0.33470281958580017, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3006, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.4056745160032027, |
|
"grad_norm": 0.41939905285835266, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3014, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.4073169229505841, |
|
"grad_norm": 0.4031718671321869, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2959, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.40895932989796546, |
|
"grad_norm": 0.3611488938331604, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3175, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.41060173684534684, |
|
"grad_norm": 0.38445645570755005, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2897, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.4122441437927282, |
|
"grad_norm": 0.3903651833534241, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2716, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.4138865507401096, |
|
"grad_norm": 0.39842015504837036, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2987, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.41552895768749104, |
|
"grad_norm": 0.4211498200893402, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3027, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.4171713646348724, |
|
"grad_norm": 0.4767220914363861, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2897, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.4188137715822538, |
|
"grad_norm": 0.4871378242969513, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2874, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.4204561785296352, |
|
"grad_norm": 0.3960734009742737, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2903, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.42209858547701656, |
|
"grad_norm": 0.3350552022457123, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2835, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.42374099242439794, |
|
"grad_norm": 0.34975695610046387, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3025, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.4253833993717793, |
|
"grad_norm": 0.3886794149875641, |
|
"learning_rate": 0.0002, |
|
"loss": 0.289, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.4270258063191607, |
|
"grad_norm": 0.4114588797092438, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2802, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.42866821326654214, |
|
"grad_norm": 0.4368172585964203, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2918, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.4303106202139235, |
|
"grad_norm": 0.2889314889907837, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2854, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.4319530271613049, |
|
"grad_norm": 0.3999134600162506, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2955, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.4335954341086863, |
|
"grad_norm": 0.32143938541412354, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2836, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.43523784105606766, |
|
"grad_norm": 0.4069638252258301, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2854, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.43688024800344905, |
|
"grad_norm": 0.46609416604042053, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2777, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.4385226549508304, |
|
"grad_norm": 0.35112160444259644, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2896, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.4401650618982118, |
|
"grad_norm": 0.4243420660495758, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2743, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.44180746884559324, |
|
"grad_norm": 0.45615971088409424, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2699, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.4434498757929746, |
|
"grad_norm": 0.4836295247077942, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2932, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.445092282740356, |
|
"grad_norm": 0.41774359345436096, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2869, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.4467346896877374, |
|
"grad_norm": 0.3904239535331726, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2798, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.44837709663511877, |
|
"grad_norm": 0.3867247700691223, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2668, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.45001950358250015, |
|
"grad_norm": 0.33975329995155334, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2805, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.4516619105298815, |
|
"grad_norm": 0.30403727293014526, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2747, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.4533043174772629, |
|
"grad_norm": 0.4227672219276428, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2699, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.4549467244246443, |
|
"grad_norm": 0.38823801279067993, |
|
"learning_rate": 0.0002, |
|
"loss": 0.256, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.4565891313720257, |
|
"grad_norm": 0.3460341691970825, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2768, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.4582315383194071, |
|
"grad_norm": 0.40843436121940613, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2829, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.4598739452667885, |
|
"grad_norm": 0.411004900932312, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2849, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.46151635221416987, |
|
"grad_norm": 0.5354210138320923, |
|
"learning_rate": 0.0002, |
|
"loss": 0.298, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.46315875916155125, |
|
"grad_norm": 0.3296845555305481, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2571, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.46480116610893263, |
|
"grad_norm": 0.404950350522995, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2843, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.466443573056314, |
|
"grad_norm": 0.3697005808353424, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2655, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.4680859800036954, |
|
"grad_norm": 0.3465549945831299, |
|
"learning_rate": 0.0002, |
|
"loss": 0.282, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.4697283869510768, |
|
"grad_norm": 0.4802212119102478, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2672, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.4713707938984582, |
|
"grad_norm": 0.3909721076488495, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2704, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.4730132008458396, |
|
"grad_norm": 0.41303369402885437, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2797, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.47465560779322097, |
|
"grad_norm": 0.32934170961380005, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2903, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.47629801474060235, |
|
"grad_norm": 0.375072181224823, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2752, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.47794042168798373, |
|
"grad_norm": 0.35390418767929077, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2755, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 0.4795828286353651, |
|
"grad_norm": 0.3856378197669983, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2699, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 0.4812252355827465, |
|
"grad_norm": 0.2624310851097107, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2654, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 0.4828676425301279, |
|
"grad_norm": 0.43709930777549744, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2768, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.4845100494775093, |
|
"grad_norm": 0.3971209228038788, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2728, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.4861524564248907, |
|
"grad_norm": 0.3937450647354126, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2836, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.48779486337227207, |
|
"grad_norm": 0.3925333023071289, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2653, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 0.48943727031965345, |
|
"grad_norm": 0.3056396245956421, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2593, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 0.49107967726703483, |
|
"grad_norm": 0.349110871553421, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2872, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 0.4927220842144162, |
|
"grad_norm": 0.37678685784339905, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2779, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.4943644911617976, |
|
"grad_norm": 0.37364938855171204, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2612, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 0.49600689810917903, |
|
"grad_norm": 0.3885985016822815, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2701, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 0.4976493050565604, |
|
"grad_norm": 0.4726998507976532, |
|
"learning_rate": 0.0002, |
|
"loss": 0.258, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 0.4992917120039418, |
|
"grad_norm": 0.3752720355987549, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2873, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.5009341189513231, |
|
"grad_norm": 0.5174003839492798, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2677, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.5025765258987046, |
|
"grad_norm": 0.39343810081481934, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2498, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.504218932846086, |
|
"grad_norm": 0.3367049992084503, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2555, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 0.5058613397934674, |
|
"grad_norm": 0.3384205400943756, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2865, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 0.5075037467408487, |
|
"grad_norm": 0.37642723321914673, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2677, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 0.5091461536882301, |
|
"grad_norm": 0.31989771127700806, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2675, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.5107885606356115, |
|
"grad_norm": 0.30809977650642395, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2562, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 0.5124309675829929, |
|
"grad_norm": 0.3463954031467438, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2576, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.5140733745303743, |
|
"grad_norm": 0.3789072036743164, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2679, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 0.5157157814777557, |
|
"grad_norm": 0.458978533744812, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2596, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 0.517358188425137, |
|
"grad_norm": 0.3515280783176422, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2629, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.5190005953725184, |
|
"grad_norm": 0.42611977458000183, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2674, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 0.5206430023198998, |
|
"grad_norm": 0.3865070641040802, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2714, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 0.5222854092672812, |
|
"grad_norm": 0.3559401333332062, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2751, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 0.5239278162146626, |
|
"grad_norm": 0.3181537389755249, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2724, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 0.5255702231620439, |
|
"grad_norm": 0.37673598527908325, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2711, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.5272126301094253, |
|
"grad_norm": 0.44122573733329773, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2617, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 0.5288550370568067, |
|
"grad_norm": 0.4779141843318939, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2602, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 0.5304974440041882, |
|
"grad_norm": 0.3975127339363098, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2472, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 0.5321398509515696, |
|
"grad_norm": 0.3808406591415405, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2623, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 0.533782257898951, |
|
"grad_norm": 0.340666264295578, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2806, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.5354246648463323, |
|
"grad_norm": 0.41233885288238525, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2458, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 0.5370670717937137, |
|
"grad_norm": 0.28576114773750305, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2638, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 0.5387094787410951, |
|
"grad_norm": 0.4704492688179016, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2735, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 0.5403518856884765, |
|
"grad_norm": 0.43339604139328003, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2667, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 0.5419942926358579, |
|
"grad_norm": 0.332878440618515, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2513, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.5436366995832392, |
|
"grad_norm": 0.34620800614356995, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2768, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 0.5452791065306206, |
|
"grad_norm": 0.46673691272735596, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2597, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 0.546921513478002, |
|
"grad_norm": 0.36888402700424194, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2453, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 0.5485639204253834, |
|
"grad_norm": 0.363007515668869, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2545, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 0.5502063273727648, |
|
"grad_norm": 0.3927077353000641, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2597, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.5518487343201461, |
|
"grad_norm": 0.36897674202919006, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2571, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 0.5534911412675275, |
|
"grad_norm": 0.3425733149051666, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2624, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 0.5551335482149089, |
|
"grad_norm": 0.3315962553024292, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2656, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 0.5567759551622903, |
|
"grad_norm": 0.4456098675727844, |
|
"learning_rate": 0.0002, |
|
"loss": 0.266, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 0.5584183621096718, |
|
"grad_norm": 0.4146248996257782, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2631, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.5600607690570532, |
|
"grad_norm": 0.3591421842575073, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2475, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 0.5617031760044345, |
|
"grad_norm": 0.4540598690509796, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2667, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 0.5633455829518159, |
|
"grad_norm": 0.4394567906856537, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2673, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 0.5649879898991973, |
|
"grad_norm": 0.3273297846317291, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2631, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 0.5666303968465787, |
|
"grad_norm": 0.3828592896461487, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2601, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.5682728037939601, |
|
"grad_norm": 0.24124163389205933, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2507, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 0.5699152107413414, |
|
"grad_norm": 0.4403514564037323, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2686, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 0.5715576176887228, |
|
"grad_norm": 0.39177918434143066, |
|
"learning_rate": 0.0002, |
|
"loss": 0.255, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 0.5732000246361042, |
|
"grad_norm": 0.41621333360671997, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2472, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 0.5748424315834856, |
|
"grad_norm": 0.4051215648651123, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2692, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.576484838530867, |
|
"grad_norm": 0.9351252317428589, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2519, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 0.5781272454782483, |
|
"grad_norm": 0.38004037737846375, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2683, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 0.5797696524256297, |
|
"grad_norm": 0.31271103024482727, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2554, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 0.5814120593730111, |
|
"grad_norm": 0.3766959607601166, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2555, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 0.5830544663203925, |
|
"grad_norm": 2.4575226306915283, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2673, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.584696873267774, |
|
"grad_norm": 0.3419061005115509, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2484, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 0.5863392802151554, |
|
"grad_norm": 0.3647725284099579, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2614, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 0.5879816871625367, |
|
"grad_norm": 0.39643993973731995, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2583, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 0.5896240941099181, |
|
"grad_norm": 0.37024736404418945, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2605, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 0.5912665010572995, |
|
"grad_norm": 0.4551810324192047, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2512, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.5929089080046809, |
|
"grad_norm": 0.2843814492225647, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2504, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 0.5945513149520623, |
|
"grad_norm": 0.3765452206134796, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2557, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 0.5961937218994436, |
|
"grad_norm": 0.4625066816806793, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2433, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 0.597836128846825, |
|
"grad_norm": 0.4870743453502655, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2494, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 0.5994785357942064, |
|
"grad_norm": 0.4229605197906494, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2553, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.6011209427415878, |
|
"grad_norm": 0.37593892216682434, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2523, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 0.6027633496889692, |
|
"grad_norm": 0.36149609088897705, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2582, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 0.6044057566363505, |
|
"grad_norm": 0.3866046071052551, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2534, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 0.6060481635837319, |
|
"grad_norm": 0.4623259902000427, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2542, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 0.6076905705311133, |
|
"grad_norm": 0.32349276542663574, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2437, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.6093329774784947, |
|
"grad_norm": 0.386561781167984, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2494, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 0.6109753844258761, |
|
"grad_norm": 0.36509180068969727, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2559, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 0.6126177913732576, |
|
"grad_norm": 0.3628571331501007, |
|
"learning_rate": 0.0002, |
|
"loss": 0.26, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 0.6142601983206389, |
|
"grad_norm": 0.3218732476234436, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2487, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 0.6159026052680203, |
|
"grad_norm": 0.3551442623138428, |
|
"learning_rate": 0.0002, |
|
"loss": 0.231, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.6175450122154017, |
|
"grad_norm": 0.40962496399879456, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2486, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 0.6191874191627831, |
|
"grad_norm": 0.48531442880630493, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2547, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 0.6208298261101645, |
|
"grad_norm": 0.387851357460022, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2655, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 0.6224722330575458, |
|
"grad_norm": 0.3165546953678131, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2499, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 0.6241146400049272, |
|
"grad_norm": 0.3393017649650574, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2546, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.6257570469523086, |
|
"grad_norm": 0.3975006639957428, |
|
"learning_rate": 0.0002, |
|
"loss": 0.255, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 0.62739945389969, |
|
"grad_norm": 0.4458036720752716, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2671, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 0.6290418608470714, |
|
"grad_norm": 0.34977594017982483, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2438, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 0.6306842677944527, |
|
"grad_norm": 0.4126521646976471, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2473, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.6323266747418341, |
|
"grad_norm": 0.35712817311286926, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2568, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.6339690816892155, |
|
"grad_norm": 0.3464488983154297, |
|
"learning_rate": 0.0002, |
|
"loss": 0.26, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 0.6356114886365969, |
|
"grad_norm": 0.40559422969818115, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2531, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 0.6372538955839783, |
|
"grad_norm": 0.3709222972393036, |
|
"learning_rate": 0.0002, |
|
"loss": 0.257, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 0.6388963025313598, |
|
"grad_norm": 0.3671443462371826, |
|
"learning_rate": 0.0002, |
|
"loss": 0.243, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 0.6405387094787411, |
|
"grad_norm": 0.39361605048179626, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2569, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.6421811164261225, |
|
"grad_norm": 0.41323602199554443, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2465, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 0.6438235233735039, |
|
"grad_norm": 0.4266330301761627, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2495, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 0.6454659303208853, |
|
"grad_norm": 0.3892604112625122, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2505, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 0.6471083372682667, |
|
"grad_norm": 0.43539443612098694, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2643, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 0.648750744215648, |
|
"grad_norm": 0.3637757897377014, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2557, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.6503931511630294, |
|
"grad_norm": 0.42761602997779846, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2578, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 0.6520355581104108, |
|
"grad_norm": 0.38917163014411926, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2593, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 0.6536779650577922, |
|
"grad_norm": 0.42814767360687256, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2412, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 0.6553203720051736, |
|
"grad_norm": 0.3543958365917206, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2485, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 0.656962778952555, |
|
"grad_norm": 0.3452099859714508, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2519, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.6586051858999363, |
|
"grad_norm": 0.38600897789001465, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2443, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 0.6602475928473177, |
|
"grad_norm": 0.35474061965942383, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2435, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 0.6618899997946991, |
|
"grad_norm": 0.48493891954421997, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2564, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 0.6635324067420805, |
|
"grad_norm": 0.40137720108032227, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2592, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 0.6651748136894619, |
|
"grad_norm": 0.38460877537727356, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2387, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.6668172206368433, |
|
"grad_norm": 0.3780753016471863, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2517, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 0.6684596275842247, |
|
"grad_norm": 0.30384665727615356, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2442, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 0.6701020345316061, |
|
"grad_norm": 0.34080567955970764, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2443, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 0.6717444414789875, |
|
"grad_norm": 0.3789510130882263, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2462, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 0.6733868484263689, |
|
"grad_norm": 0.3566538989543915, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2418, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.6750292553737502, |
|
"grad_norm": 0.3436945676803589, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2353, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 0.6766716623211316, |
|
"grad_norm": 0.35046547651290894, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2521, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 0.678314069268513, |
|
"grad_norm": 0.3671397566795349, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2505, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 0.6799564762158944, |
|
"grad_norm": 0.33368802070617676, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2663, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 0.6815988831632758, |
|
"grad_norm": 0.35810762643814087, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2467, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.6832412901106572, |
|
"grad_norm": 0.3913412094116211, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2544, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 0.6848836970580385, |
|
"grad_norm": 0.3313830494880676, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2551, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 0.6865261040054199, |
|
"grad_norm": 0.3506488502025604, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2416, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 0.6881685109528013, |
|
"grad_norm": 0.3841126561164856, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2531, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 0.6898109179001827, |
|
"grad_norm": 0.38030919432640076, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2374, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.691453324847564, |
|
"grad_norm": 0.3643128573894501, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2616, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 0.6930957317949455, |
|
"grad_norm": 0.37401241064071655, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2424, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 0.6947381387423269, |
|
"grad_norm": 0.42304474115371704, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2491, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 0.6963805456897083, |
|
"grad_norm": 0.3441920280456543, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2429, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 0.6980229526370897, |
|
"grad_norm": 0.33383867144584656, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2361, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.6996653595844711, |
|
"grad_norm": 0.42935657501220703, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2598, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 0.7013077665318525, |
|
"grad_norm": 0.5143205523490906, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2348, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 0.7029501734792338, |
|
"grad_norm": 0.37915435433387756, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2277, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 0.7045925804266152, |
|
"grad_norm": 0.3202255666255951, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2474, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 0.7062349873739966, |
|
"grad_norm": 0.3681676387786865, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2417, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.707877394321378, |
|
"grad_norm": 0.41214585304260254, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2356, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 0.7095198012687594, |
|
"grad_norm": 0.35259029269218445, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2394, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 0.7111622082161407, |
|
"grad_norm": 0.47768017649650574, |
|
"learning_rate": 0.0002, |
|
"loss": 0.248, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 0.7128046151635221, |
|
"grad_norm": 0.3282839059829712, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2336, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 0.7144470221109035, |
|
"grad_norm": 0.441099613904953, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2631, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.7160894290582849, |
|
"grad_norm": 0.3486292362213135, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2531, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 0.7177318360056663, |
|
"grad_norm": 0.33037880063056946, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2405, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 0.7193742429530476, |
|
"grad_norm": 0.47114354372024536, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2665, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 0.7210166499004291, |
|
"grad_norm": 0.34797531366348267, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2481, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 0.7226590568478105, |
|
"grad_norm": 0.43183642625808716, |
|
"learning_rate": 0.0002, |
|
"loss": 0.242, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.7243014637951919, |
|
"grad_norm": 0.4230342507362366, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2363, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 0.7259438707425733, |
|
"grad_norm": 0.40553364157676697, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2422, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 0.7275862776899547, |
|
"grad_norm": 0.34155145287513733, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2422, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 0.729228684637336, |
|
"grad_norm": 0.4095294773578644, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2605, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 0.7308710915847174, |
|
"grad_norm": 0.36541318893432617, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2516, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.7325134985320988, |
|
"grad_norm": 0.40149998664855957, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2515, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 0.7341559054794802, |
|
"grad_norm": 0.3220469653606415, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2361, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 0.7357983124268616, |
|
"grad_norm": 0.3153376579284668, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2325, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 0.7374407193742429, |
|
"grad_norm": 0.3046116530895233, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2502, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 0.7390831263216243, |
|
"grad_norm": 0.502663791179657, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2471, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.7407255332690057, |
|
"grad_norm": 0.35168886184692383, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2309, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 0.7423679402163871, |
|
"grad_norm": 0.43629148602485657, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2423, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 0.7440103471637685, |
|
"grad_norm": 0.35909175872802734, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2453, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 0.7456527541111498, |
|
"grad_norm": 0.3052688539028168, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2413, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 0.7472951610585313, |
|
"grad_norm": 0.2708439230918884, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2237, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.7489375680059127, |
|
"grad_norm": 0.3965560495853424, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2423, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 0.7505799749532941, |
|
"grad_norm": 0.3895662724971771, |
|
"learning_rate": 0.0002, |
|
"loss": 0.249, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 0.7522223819006755, |
|
"grad_norm": 0.32124513387680054, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2376, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 0.7538647888480569, |
|
"grad_norm": 0.716029167175293, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2529, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 0.7555071957954382, |
|
"grad_norm": 0.3812948167324066, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2269, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.7571496027428196, |
|
"grad_norm": 0.37073054909706116, |
|
"learning_rate": 0.0002, |
|
"loss": 0.235, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 0.758792009690201, |
|
"grad_norm": 0.4043092727661133, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2345, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 0.7604344166375824, |
|
"grad_norm": 0.3160434365272522, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2412, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 0.7620768235849638, |
|
"grad_norm": 0.35415521264076233, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2358, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 0.7637192305323451, |
|
"grad_norm": 0.41371211409568787, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2317, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.7653616374797265, |
|
"grad_norm": 0.4175126850605011, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2547, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 0.7670040444271079, |
|
"grad_norm": 0.39811649918556213, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2462, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 0.7686464513744893, |
|
"grad_norm": 0.33596447110176086, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2368, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 0.7702888583218707, |
|
"grad_norm": 0.36754104495048523, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2484, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 0.771931265269252, |
|
"grad_norm": 0.38244250416755676, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2364, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.7735736722166334, |
|
"grad_norm": 0.3366243839263916, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2194, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 0.7752160791640149, |
|
"grad_norm": 0.39877885580062866, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2469, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 0.7768584861113963, |
|
"grad_norm": 0.2690157890319824, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2459, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 0.7785008930587777, |
|
"grad_norm": 0.3678382337093353, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2192, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 0.7801433000061591, |
|
"grad_norm": 0.3121150732040405, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2438, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.7817857069535404, |
|
"grad_norm": 0.3517535626888275, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2495, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 0.7834281139009218, |
|
"grad_norm": 0.434817910194397, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2532, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 0.7850705208483032, |
|
"grad_norm": 0.35570958256721497, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2467, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 0.7867129277956846, |
|
"grad_norm": 0.4270517826080322, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2337, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 0.788355334743066, |
|
"grad_norm": 0.2827800214290619, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2309, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.7899977416904473, |
|
"grad_norm": 0.39158400893211365, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2366, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 0.7916401486378287, |
|
"grad_norm": 0.32538673281669617, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2389, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 0.7932825555852101, |
|
"grad_norm": 0.3370015323162079, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2377, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 0.7949249625325915, |
|
"grad_norm": 0.3779650032520294, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2339, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 0.7965673694799729, |
|
"grad_norm": 0.36034300923347473, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2427, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.7982097764273542, |
|
"grad_norm": 0.3154286742210388, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2338, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 0.7998521833747356, |
|
"grad_norm": 0.3282501697540283, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2408, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 0.8014945903221171, |
|
"grad_norm": 0.41291025280952454, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2507, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 0.8031369972694985, |
|
"grad_norm": 0.3961363136768341, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2281, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 0.8047794042168799, |
|
"grad_norm": 0.47485384345054626, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2349, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.8064218111642613, |
|
"grad_norm": 0.3284982740879059, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2288, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 0.8080642181116426, |
|
"grad_norm": 0.38867270946502686, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2328, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 0.809706625059024, |
|
"grad_norm": 0.44371268153190613, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2416, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 0.8113490320064054, |
|
"grad_norm": 0.2462434470653534, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2391, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 0.8129914389537868, |
|
"grad_norm": 0.31762421131134033, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2467, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.8146338459011682, |
|
"grad_norm": 0.40011724829673767, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2351, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 0.8162762528485495, |
|
"grad_norm": 0.2972090542316437, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2469, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 0.8179186597959309, |
|
"grad_norm": 0.4047238230705261, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2257, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 0.8195610667433123, |
|
"grad_norm": 0.36663326621055603, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2302, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 0.8212034736906937, |
|
"grad_norm": 0.49191904067993164, |
|
"learning_rate": 0.0002, |
|
"loss": 0.242, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.8228458806380751, |
|
"grad_norm": 0.4621546268463135, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2324, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 0.8244882875854564, |
|
"grad_norm": 0.4055505394935608, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2373, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 0.8261306945328378, |
|
"grad_norm": 0.34892845153808594, |
|
"learning_rate": 0.0002, |
|
"loss": 0.23, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 0.8277731014802192, |
|
"grad_norm": 0.33453091979026794, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2348, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 0.8294155084276007, |
|
"grad_norm": 0.3283565640449524, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2314, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.8310579153749821, |
|
"grad_norm": 0.35970717668533325, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2336, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 0.8327003223223635, |
|
"grad_norm": 0.3093232810497284, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2363, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 0.8343427292697448, |
|
"grad_norm": 0.4389066696166992, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2422, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 0.8359851362171262, |
|
"grad_norm": 0.44654580950737, |
|
"learning_rate": 0.0002, |
|
"loss": 0.232, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 0.8376275431645076, |
|
"grad_norm": 0.2830391526222229, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2476, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.839269950111889, |
|
"grad_norm": 0.31547674536705017, |
|
"learning_rate": 0.0002, |
|
"loss": 0.231, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 0.8409123570592704, |
|
"grad_norm": 0.45748040080070496, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2372, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 0.8425547640066517, |
|
"grad_norm": 0.34882062673568726, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2376, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 0.8441971709540331, |
|
"grad_norm": 0.3529532849788666, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2323, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 0.8458395779014145, |
|
"grad_norm": 0.33054473996162415, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2376, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.8474819848487959, |
|
"grad_norm": 0.3015061616897583, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2243, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 0.8491243917961773, |
|
"grad_norm": 0.3048664629459381, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2318, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 0.8507667987435586, |
|
"grad_norm": 0.31459841132164, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2307, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 0.85240920569094, |
|
"grad_norm": 0.39160168170928955, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2407, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 0.8540516126383214, |
|
"grad_norm": 0.30392590165138245, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2206, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.8556940195857029, |
|
"grad_norm": 0.3656589686870575, |
|
"learning_rate": 0.0002, |
|
"loss": 0.229, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 0.8573364265330843, |
|
"grad_norm": 0.35856541991233826, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2361, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 0.8589788334804657, |
|
"grad_norm": 0.3591729402542114, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2232, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 0.860621240427847, |
|
"grad_norm": 0.36023178696632385, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2495, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 0.8622636473752284, |
|
"grad_norm": 0.38790059089660645, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2288, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.8639060543226098, |
|
"grad_norm": 0.39627397060394287, |
|
"learning_rate": 0.0002, |
|
"loss": 0.24, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 0.8655484612699912, |
|
"grad_norm": 0.32167407870292664, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2365, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 0.8671908682173726, |
|
"grad_norm": 0.34265172481536865, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2419, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 0.868833275164754, |
|
"grad_norm": 0.3236486613750458, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2326, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 0.8704756821121353, |
|
"grad_norm": 0.3700607120990753, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2361, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.8721180890595167, |
|
"grad_norm": 0.33969688415527344, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2236, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 0.8737604960068981, |
|
"grad_norm": 0.2824096083641052, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2415, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 0.8754029029542795, |
|
"grad_norm": 0.3842727243900299, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2223, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 0.8770453099016609, |
|
"grad_norm": 0.36808887124061584, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2253, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 0.8786877168490422, |
|
"grad_norm": 0.4065176844596863, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2274, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.8803301237964236, |
|
"grad_norm": 0.3421749174594879, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2309, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 0.881972530743805, |
|
"grad_norm": 0.30610519647598267, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2213, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 0.8836149376911865, |
|
"grad_norm": 0.40341177582740784, |
|
"learning_rate": 0.0002, |
|
"loss": 0.229, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 0.8852573446385679, |
|
"grad_norm": 0.43038755655288696, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2312, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 0.8868997515859492, |
|
"grad_norm": 0.26736319065093994, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2375, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.8885421585333306, |
|
"grad_norm": 0.34479281306266785, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2342, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 0.890184565480712, |
|
"grad_norm": 0.32857152819633484, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2352, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 0.8918269724280934, |
|
"grad_norm": 0.30919578671455383, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2133, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 0.8934693793754748, |
|
"grad_norm": 0.3049899637699127, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2374, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 0.8951117863228562, |
|
"grad_norm": 0.4088539779186249, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2377, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.8967541932702375, |
|
"grad_norm": 0.3318689167499542, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2459, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 0.8983966002176189, |
|
"grad_norm": 0.38051754236221313, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2305, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 0.9000390071650003, |
|
"grad_norm": 0.401080846786499, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2297, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 0.9016814141123817, |
|
"grad_norm": 0.30713602900505066, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2254, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 0.903323821059763, |
|
"grad_norm": 0.37888234853744507, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2346, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.9049662280071444, |
|
"grad_norm": 0.3106231689453125, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2206, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 0.9066086349545258, |
|
"grad_norm": 0.44297677278518677, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2218, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 0.9082510419019072, |
|
"grad_norm": 0.3375784456729889, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2273, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 0.9098934488492886, |
|
"grad_norm": 0.4860747158527374, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2317, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 0.9115358557966701, |
|
"grad_norm": 0.2880633771419525, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2398, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.9131782627440514, |
|
"grad_norm": 0.4085402190685272, |
|
"learning_rate": 0.0002, |
|
"loss": 0.234, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 0.9148206696914328, |
|
"grad_norm": 0.38998520374298096, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2402, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 0.9164630766388142, |
|
"grad_norm": 0.40508535504341125, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2136, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 0.9181054835861956, |
|
"grad_norm": 0.3789615035057068, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2267, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 0.919747890533577, |
|
"grad_norm": 0.3882130980491638, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2276, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.9213902974809584, |
|
"grad_norm": 0.3001303970813751, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2313, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 0.9230327044283397, |
|
"grad_norm": 0.4514042139053345, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2204, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 0.9246751113757211, |
|
"grad_norm": 0.43372517824172974, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2294, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 0.9263175183231025, |
|
"grad_norm": 0.2934057414531708, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2308, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 0.9279599252704839, |
|
"grad_norm": 0.4067831337451935, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2329, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.9296023322178653, |
|
"grad_norm": 0.3299509584903717, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2214, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 0.9312447391652466, |
|
"grad_norm": 0.35204941034317017, |
|
"learning_rate": 0.0002, |
|
"loss": 0.239, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 0.932887146112628, |
|
"grad_norm": 0.30878013372421265, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2248, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 0.9345295530600094, |
|
"grad_norm": 0.392170786857605, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2274, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 0.9361719600073908, |
|
"grad_norm": 0.4151529371738434, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2186, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.9378143669547723, |
|
"grad_norm": 0.3535741865634918, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2285, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 0.9394567739021537, |
|
"grad_norm": 0.3477960526943207, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2313, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 0.941099180849535, |
|
"grad_norm": 0.3621846139431, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2317, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 0.9427415877969164, |
|
"grad_norm": 0.3844580352306366, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2345, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 0.9443839947442978, |
|
"grad_norm": 0.3395872116088867, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2233, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.9460264016916792, |
|
"grad_norm": 0.4554111063480377, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2324, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 0.9476688086390606, |
|
"grad_norm": 0.34367838501930237, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2157, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 0.9493112155864419, |
|
"grad_norm": 0.2760342061519623, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2278, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 0.9509536225338233, |
|
"grad_norm": 0.4382875859737396, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2361, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 0.9525960294812047, |
|
"grad_norm": 0.3573220670223236, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2241, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.9542384364285861, |
|
"grad_norm": 0.3491596579551697, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2258, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 0.9558808433759675, |
|
"grad_norm": 0.42366743087768555, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2406, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 0.9575232503233488, |
|
"grad_norm": 0.3748779892921448, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2305, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 0.9591656572707302, |
|
"grad_norm": 0.40864527225494385, |
|
"learning_rate": 0.0002, |
|
"loss": 0.235, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 0.9608080642181116, |
|
"grad_norm": 0.41164445877075195, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2195, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.962450471165493, |
|
"grad_norm": 0.46402692794799805, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2266, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 0.9640928781128744, |
|
"grad_norm": 0.32727622985839844, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2324, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 0.9657352850602559, |
|
"grad_norm": 0.4346349537372589, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2257, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 0.9673776920076372, |
|
"grad_norm": 0.3470235764980316, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2333, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 0.9690200989550186, |
|
"grad_norm": 0.48941469192504883, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2336, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.9706625059024, |
|
"grad_norm": 0.3959124982357025, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2221, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 0.9723049128497814, |
|
"grad_norm": 0.40877676010131836, |
|
"learning_rate": 0.0002, |
|
"loss": 0.232, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 0.9739473197971628, |
|
"grad_norm": 0.4087940454483032, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2195, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 0.9755897267445441, |
|
"grad_norm": 0.3967040181159973, |
|
"learning_rate": 0.0002, |
|
"loss": 0.234, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 0.9772321336919255, |
|
"grad_norm": 0.41639575362205505, |
|
"learning_rate": 0.0002, |
|
"loss": 0.221, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.9788745406393069, |
|
"grad_norm": 0.304775595664978, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2283, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 0.9805169475866883, |
|
"grad_norm": 0.41931501030921936, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2263, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 0.9821593545340697, |
|
"grad_norm": 0.34010422229766846, |
|
"learning_rate": 0.0002, |
|
"loss": 0.222, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 0.983801761481451, |
|
"grad_norm": 0.3099174499511719, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2221, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 0.9854441684288324, |
|
"grad_norm": 0.3627716600894928, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2419, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.9870865753762138, |
|
"grad_norm": 0.3797793388366699, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2289, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 0.9887289823235952, |
|
"grad_norm": 0.34914806485176086, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2211, |
|
"step": 12040 |
|
}, |
|
{ |
|
"epoch": 0.9903713892709766, |
|
"grad_norm": 0.35985666513442993, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2271, |
|
"step": 12060 |
|
}, |
|
{ |
|
"epoch": 0.9920137962183581, |
|
"grad_norm": 0.3159051835536957, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2364, |
|
"step": 12080 |
|
}, |
|
{ |
|
"epoch": 0.9936562031657394, |
|
"grad_norm": 0.29203563928604126, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2429, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.9952986101131208, |
|
"grad_norm": 0.32187801599502563, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2386, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 0.9969410170605022, |
|
"grad_norm": 0.35564154386520386, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2349, |
|
"step": 12140 |
|
}, |
|
{ |
|
"epoch": 0.9985834240078836, |
|
"grad_norm": 0.3589749336242676, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2275, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.28475141525268555, |
|
"eval_runtime": 907.1315, |
|
"eval_samples_per_second": 4.174, |
|
"eval_steps_per_second": 0.523, |
|
"step": 12178 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 16000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 77, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.843715322728153e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|