|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 20, |
|
"global_step": 12178, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0016424069473813874, |
|
"grad_norm": 0.5388180017471313, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8932, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.003284813894762775, |
|
"grad_norm": 0.46543794870376587, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6701, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.004927220842144162, |
|
"grad_norm": 0.45620647072792053, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5541, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.00656962778952555, |
|
"grad_norm": 0.4583057761192322, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5777, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.008212034736906937, |
|
"grad_norm": 0.5295430421829224, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3046, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.009854441684288324, |
|
"grad_norm": 0.44552722573280334, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3053, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.011496848631669712, |
|
"grad_norm": 0.45540332794189453, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1971, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.0131392555790511, |
|
"grad_norm": 0.4302205443382263, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2143, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.014781662526432487, |
|
"grad_norm": 0.4064156413078308, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1695, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.016424069473813873, |
|
"grad_norm": 0.43175607919692993, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1836, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01806647642119526, |
|
"grad_norm": 0.5280532240867615, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1627, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.01970888336857665, |
|
"grad_norm": 0.4442996382713318, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2294, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.021351290315958036, |
|
"grad_norm": 0.4584205448627472, |
|
"learning_rate": 0.0002, |
|
"loss": 1.058, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.022993697263339424, |
|
"grad_norm": 0.40979012846946716, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0436, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.02463610421072081, |
|
"grad_norm": 0.4241325557231903, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1414, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.0262785111581022, |
|
"grad_norm": 0.4106293022632599, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0744, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.027920918105483587, |
|
"grad_norm": 0.46253764629364014, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0589, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.029563325052864974, |
|
"grad_norm": 0.4244967997074127, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0263, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.031205732000246362, |
|
"grad_norm": 0.35677096247673035, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0447, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.032848138947627746, |
|
"grad_norm": 0.4948490262031555, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0826, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.034490545895009134, |
|
"grad_norm": 0.5756106972694397, |
|
"learning_rate": 0.0002, |
|
"loss": 0.948, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.03613295284239052, |
|
"grad_norm": 0.5383228063583374, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0025, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.03777535978977191, |
|
"grad_norm": 0.3955784738063812, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9027, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.0394177667371533, |
|
"grad_norm": 0.37915533781051636, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9936, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.041060173684534684, |
|
"grad_norm": 0.5413188934326172, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9077, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04270258063191607, |
|
"grad_norm": 0.5334627032279968, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9009, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.04434498757929746, |
|
"grad_norm": 0.5394805073738098, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9542, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.04598739452667885, |
|
"grad_norm": 0.532177746295929, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8743, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.047629801474060235, |
|
"grad_norm": 0.5266315937042236, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8931, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.04927220842144162, |
|
"grad_norm": 0.4725072979927063, |
|
"learning_rate": 0.0002, |
|
"loss": 0.908, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.05091461536882301, |
|
"grad_norm": 0.6026243567466736, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7898, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.0525570223162044, |
|
"grad_norm": 0.4928111732006073, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8406, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.054199429263585785, |
|
"grad_norm": 0.4555020332336426, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8222, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.05584183621096717, |
|
"grad_norm": 0.6445655822753906, |
|
"learning_rate": 0.0002, |
|
"loss": 0.832, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.05748424315834856, |
|
"grad_norm": 0.5854527950286865, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8435, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.05912665010572995, |
|
"grad_norm": 0.4609089195728302, |
|
"learning_rate": 0.0002, |
|
"loss": 0.748, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.060769057053111336, |
|
"grad_norm": 0.5567362904548645, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7777, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.062411464000492724, |
|
"grad_norm": 0.5161166191101074, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7597, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.06405387094787411, |
|
"grad_norm": 0.5450626611709595, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7337, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.06569627789525549, |
|
"grad_norm": 0.6034521460533142, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7668, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.06733868484263689, |
|
"grad_norm": 0.4653383493423462, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7417, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.06898109179001827, |
|
"grad_norm": 0.4846251308917999, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7506, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.07062349873739966, |
|
"grad_norm": 0.4887784719467163, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7115, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.07226590568478104, |
|
"grad_norm": 0.5024611949920654, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7402, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.07390831263216244, |
|
"grad_norm": 0.5007764101028442, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6529, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.07555071957954382, |
|
"grad_norm": 0.5097551345825195, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7776, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.07719312652692521, |
|
"grad_norm": 0.5517822504043579, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6609, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.0788355334743066, |
|
"grad_norm": 0.5290623307228088, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7015, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.08047794042168799, |
|
"grad_norm": 0.576545000076294, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6752, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.08212034736906937, |
|
"grad_norm": 0.4689784049987793, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7047, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.08376275431645076, |
|
"grad_norm": 0.455814003944397, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6378, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.08540516126383214, |
|
"grad_norm": 0.6452861428260803, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6962, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.08704756821121354, |
|
"grad_norm": 0.5699702501296997, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6508, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.08868997515859492, |
|
"grad_norm": 0.5086561441421509, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6174, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.09033238210597631, |
|
"grad_norm": 0.48543211817741394, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6261, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.0919747890533577, |
|
"grad_norm": 0.6361482739448547, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6336, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.09361719600073909, |
|
"grad_norm": 0.5558167695999146, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6678, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.09525960294812047, |
|
"grad_norm": 0.5599238872528076, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6169, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.09690200989550186, |
|
"grad_norm": 0.5939186215400696, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6059, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.09854441684288325, |
|
"grad_norm": 0.5663330554962158, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5737, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.10018682379026464, |
|
"grad_norm": 0.49742865562438965, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6013, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.10182923073764602, |
|
"grad_norm": 0.520782470703125, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5929, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.1034716376850274, |
|
"grad_norm": 0.45269444584846497, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5981, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.1051140446324088, |
|
"grad_norm": 0.5428550243377686, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5814, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.10675645157979018, |
|
"grad_norm": 0.4782160818576813, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5858, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.10839885852717157, |
|
"grad_norm": 0.5338163375854492, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6255, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.11004126547455295, |
|
"grad_norm": 0.4596363306045532, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5974, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.11168367242193435, |
|
"grad_norm": 0.5203448534011841, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5452, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.11332607936931573, |
|
"grad_norm": 0.44463276863098145, |
|
"learning_rate": 0.0002, |
|
"loss": 0.576, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.11496848631669712, |
|
"grad_norm": 0.5106232762336731, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5679, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.1166108932640785, |
|
"grad_norm": 0.5451502799987793, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5673, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.1182533002114599, |
|
"grad_norm": 0.6638749837875366, |
|
"learning_rate": 0.0002, |
|
"loss": 0.543, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.11989570715884128, |
|
"grad_norm": 0.5045977830886841, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5803, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.12153811410622267, |
|
"grad_norm": 0.5385071635246277, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5357, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.12318052105360405, |
|
"grad_norm": 0.43107932806015015, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5378, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.12482292800098545, |
|
"grad_norm": 0.5887011885643005, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5594, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.12646533494836684, |
|
"grad_norm": 0.547126829624176, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5574, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.12810774189574822, |
|
"grad_norm": 0.532454788684845, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5506, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.1297501488431296, |
|
"grad_norm": 0.592251718044281, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5206, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.13139255579051098, |
|
"grad_norm": 0.6189798712730408, |
|
"learning_rate": 0.0002, |
|
"loss": 0.516, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.1330349627378924, |
|
"grad_norm": 0.4614121913909912, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4948, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.13467736968527377, |
|
"grad_norm": 0.6192139983177185, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4924, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.13631977663265515, |
|
"grad_norm": 0.5383406281471252, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4955, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.13796218358003653, |
|
"grad_norm": 0.681564450263977, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5224, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.13960459052741794, |
|
"grad_norm": 0.51935875415802, |
|
"learning_rate": 0.0002, |
|
"loss": 0.508, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.14124699747479932, |
|
"grad_norm": 0.532661497592926, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5362, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.1428894044221807, |
|
"grad_norm": 0.40774333477020264, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4908, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.14453181136956209, |
|
"grad_norm": 0.6406064033508301, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4891, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.1461742183169435, |
|
"grad_norm": 0.41497862339019775, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5234, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.14781662526432487, |
|
"grad_norm": 0.502389132976532, |
|
"learning_rate": 0.0002, |
|
"loss": 0.459, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.14945903221170626, |
|
"grad_norm": 0.5248283743858337, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4659, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.15110143915908764, |
|
"grad_norm": 0.5587234497070312, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4877, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.15274384610646902, |
|
"grad_norm": 0.479913592338562, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4598, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.15438625305385043, |
|
"grad_norm": 0.5423480272293091, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4754, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.1560286600012318, |
|
"grad_norm": 0.5485461354255676, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4681, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.1576710669486132, |
|
"grad_norm": 0.48511844873428345, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4672, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.15931347389599457, |
|
"grad_norm": 0.49132347106933594, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4694, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.16095588084337598, |
|
"grad_norm": 0.5654798746109009, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5047, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.16259828779075736, |
|
"grad_norm": 0.571369469165802, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4486, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.16424069473813874, |
|
"grad_norm": 0.5438801646232605, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4756, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.16588310168552012, |
|
"grad_norm": 0.5384829044342041, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4404, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.16752550863290153, |
|
"grad_norm": 0.5565232634544373, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4672, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.1691679155802829, |
|
"grad_norm": 0.5227774381637573, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4452, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.1708103225276643, |
|
"grad_norm": 0.47740334272384644, |
|
"learning_rate": 0.0002, |
|
"loss": 0.492, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.17245272947504567, |
|
"grad_norm": 0.4206157326698303, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4517, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.17409513642242708, |
|
"grad_norm": 0.5148787498474121, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4801, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.17573754336980846, |
|
"grad_norm": 0.4815204441547394, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4415, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.17737995031718984, |
|
"grad_norm": 0.5302825570106506, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4558, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.17902235726457122, |
|
"grad_norm": 0.574350118637085, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4709, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.18066476421195263, |
|
"grad_norm": 0.5393965244293213, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4528, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.182307171159334, |
|
"grad_norm": 0.43285471200942993, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4294, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.1839495781067154, |
|
"grad_norm": 0.4550113081932068, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4395, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.18559198505409677, |
|
"grad_norm": 0.586071789264679, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4456, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.18723439200147818, |
|
"grad_norm": 0.5634139776229858, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4295, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.18887679894885956, |
|
"grad_norm": 0.5095311403274536, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4347, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.19051920589624094, |
|
"grad_norm": 0.6051989793777466, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4278, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.19216161284362232, |
|
"grad_norm": 0.45743292570114136, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4191, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.19380401979100373, |
|
"grad_norm": 0.6048611402511597, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4512, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.1954464267383851, |
|
"grad_norm": 0.495731920003891, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4087, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.1970888336857665, |
|
"grad_norm": 0.5746319890022278, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4112, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.19873124063314787, |
|
"grad_norm": 0.4899024963378906, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4403, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.20037364758052928, |
|
"grad_norm": 0.40732160210609436, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4281, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.20201605452791066, |
|
"grad_norm": 0.4896198809146881, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4533, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.20365846147529204, |
|
"grad_norm": 0.5733948349952698, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4113, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.20530086842267342, |
|
"grad_norm": 0.4565046429634094, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4237, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.2069432753700548, |
|
"grad_norm": 0.5932797789573669, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4367, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.2085856823174362, |
|
"grad_norm": 0.5838333368301392, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4331, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.2102280892648176, |
|
"grad_norm": 0.5022397637367249, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4004, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.21187049621219897, |
|
"grad_norm": 0.5949686765670776, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4119, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.21351290315958035, |
|
"grad_norm": 0.45230528712272644, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4217, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.21515531010696176, |
|
"grad_norm": 0.4186144471168518, |
|
"learning_rate": 0.0002, |
|
"loss": 0.428, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.21679771705434314, |
|
"grad_norm": 0.5562434196472168, |
|
"learning_rate": 0.0002, |
|
"loss": 0.394, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.21844012400172452, |
|
"grad_norm": 0.5947513580322266, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3998, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.2200825309491059, |
|
"grad_norm": 0.4886711835861206, |
|
"learning_rate": 0.0002, |
|
"loss": 0.389, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.2217249378964873, |
|
"grad_norm": 0.551491379737854, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3952, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.2233673448438687, |
|
"grad_norm": 0.383627712726593, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3733, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.22500975179125007, |
|
"grad_norm": 0.45694270730018616, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4075, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.22665215873863145, |
|
"grad_norm": 0.46876367926597595, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4135, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.22829456568601286, |
|
"grad_norm": 0.9062886238098145, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3891, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.22993697263339424, |
|
"grad_norm": 0.47902002930641174, |
|
"learning_rate": 0.0002, |
|
"loss": 0.405, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.23157937958077562, |
|
"grad_norm": 0.6828575134277344, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3985, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.233221786528157, |
|
"grad_norm": 0.5411036610603333, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3658, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.2348641934755384, |
|
"grad_norm": 0.6698014736175537, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4003, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.2365066004229198, |
|
"grad_norm": 0.5779656171798706, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4003, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.23814900737030117, |
|
"grad_norm": 0.5321545004844666, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3667, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.23979141431768256, |
|
"grad_norm": 0.43935510516166687, |
|
"learning_rate": 0.0002, |
|
"loss": 0.375, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.24143382126506396, |
|
"grad_norm": 0.67582768201828, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3814, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.24307622821244534, |
|
"grad_norm": 0.6373169422149658, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4079, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.24471863515982673, |
|
"grad_norm": 0.4568232595920563, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3821, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.2463610421072081, |
|
"grad_norm": 0.5706847310066223, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3745, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.24800344905458951, |
|
"grad_norm": 0.5293543338775635, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3945, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.2496458560019709, |
|
"grad_norm": 0.5566920042037964, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3739, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.2512882629493523, |
|
"grad_norm": 0.5758338570594788, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4115, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.2529306698967337, |
|
"grad_norm": 0.5503116250038147, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3841, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.25457307684411506, |
|
"grad_norm": 0.5829768776893616, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3679, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.25621548379149645, |
|
"grad_norm": 0.4771459400653839, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3787, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.2578578907388778, |
|
"grad_norm": 0.508679986000061, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3424, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.2595002976862592, |
|
"grad_norm": 0.5478394031524658, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3616, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.2611427046336406, |
|
"grad_norm": 0.48918816447257996, |
|
"learning_rate": 0.0002, |
|
"loss": 0.364, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.26278511158102197, |
|
"grad_norm": 0.6158058047294617, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3563, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.26442751852840335, |
|
"grad_norm": 0.6302765607833862, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3472, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.2660699254757848, |
|
"grad_norm": 0.42650097608566284, |
|
"learning_rate": 0.0002, |
|
"loss": 0.374, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.26771233242316617, |
|
"grad_norm": 0.5517419576644897, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3747, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.26935473937054755, |
|
"grad_norm": 0.5887686014175415, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3655, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.2709971463179289, |
|
"grad_norm": 0.5252538323402405, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3864, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.2726395532653103, |
|
"grad_norm": 0.4829944968223572, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3526, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.2742819602126917, |
|
"grad_norm": 0.4375133216381073, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3536, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.27592436716007307, |
|
"grad_norm": 0.5371789336204529, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3501, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.27756677410745445, |
|
"grad_norm": 0.44075456261634827, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3584, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.2792091810548359, |
|
"grad_norm": 0.53825443983078, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3304, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.28085158800221727, |
|
"grad_norm": 0.48521581292152405, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3588, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.28249399494959865, |
|
"grad_norm": 0.4189339578151703, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3556, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.28413640189698003, |
|
"grad_norm": 0.4011813700199127, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3403, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.2857788088443614, |
|
"grad_norm": 0.4910661280155182, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3897, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.2874212157917428, |
|
"grad_norm": 0.5664734840393066, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3503, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.28906362273912417, |
|
"grad_norm": 0.45044422149658203, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3357, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.29070602968650555, |
|
"grad_norm": 0.6162013411521912, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3827, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.292348436633887, |
|
"grad_norm": 0.428659588098526, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3418, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.29399084358126837, |
|
"grad_norm": 0.48843899369239807, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3695, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.29563325052864975, |
|
"grad_norm": 0.5662574768066406, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3418, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.29727565747603113, |
|
"grad_norm": 0.5488101243972778, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3619, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.2989180644234125, |
|
"grad_norm": 0.4078102111816406, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3339, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.3005604713707939, |
|
"grad_norm": 0.6991748213768005, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3653, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.30220287831817527, |
|
"grad_norm": 0.4532040059566498, |
|
"learning_rate": 0.0002, |
|
"loss": 0.343, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.30384528526555665, |
|
"grad_norm": 0.47306913137435913, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3551, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.30548769221293803, |
|
"grad_norm": 0.4408378303050995, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3441, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.30713009916031947, |
|
"grad_norm": 0.5125454068183899, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3578, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.30877250610770085, |
|
"grad_norm": 0.5483905076980591, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3344, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.31041491305508223, |
|
"grad_norm": 0.3780999779701233, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3491, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.3120573200024636, |
|
"grad_norm": 0.4443167746067047, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3406, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.313699726949845, |
|
"grad_norm": 0.5337740182876587, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3369, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.3153421338972264, |
|
"grad_norm": 0.5371155738830566, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3579, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.31698454084460775, |
|
"grad_norm": 0.49183839559555054, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3359, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.31862694779198913, |
|
"grad_norm": 0.5076944828033447, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3604, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.32026935473937057, |
|
"grad_norm": 0.5076488256454468, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3373, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.32191176168675195, |
|
"grad_norm": 0.519506573677063, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3529, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.32355416863413333, |
|
"grad_norm": 0.3967176079750061, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3203, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.3251965755815147, |
|
"grad_norm": 0.5084711313247681, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3323, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.3268389825288961, |
|
"grad_norm": 0.5324501991271973, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3351, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.3284813894762775, |
|
"grad_norm": 0.4679279923439026, |
|
"learning_rate": 0.0002, |
|
"loss": 0.322, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.33012379642365886, |
|
"grad_norm": 0.5273401737213135, |
|
"learning_rate": 0.0002, |
|
"loss": 0.358, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.33176620337104024, |
|
"grad_norm": 0.560130774974823, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3252, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.33340861031842167, |
|
"grad_norm": 0.7334967851638794, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3125, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.33505101726580305, |
|
"grad_norm": 0.448902428150177, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3337, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.33669342421318443, |
|
"grad_norm": 0.42839765548706055, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3332, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.3383358311605658, |
|
"grad_norm": 0.43117448687553406, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3204, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.3399782381079472, |
|
"grad_norm": 0.4213992953300476, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3421, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.3416206450553286, |
|
"grad_norm": 0.40054526925086975, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3115, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.34326305200270996, |
|
"grad_norm": 0.5090795159339905, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3324, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.34490545895009134, |
|
"grad_norm": 0.5156223177909851, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3186, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.3465478658974728, |
|
"grad_norm": 0.4297846555709839, |
|
"learning_rate": 0.0002, |
|
"loss": 0.312, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.34819027284485415, |
|
"grad_norm": 0.4857240617275238, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3202, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.34983267979223553, |
|
"grad_norm": 0.6078678965568542, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3329, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.3514750867396169, |
|
"grad_norm": 0.5576339364051819, |
|
"learning_rate": 0.0002, |
|
"loss": 0.333, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.3531174936869983, |
|
"grad_norm": 0.5340404510498047, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3367, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.3547599006343797, |
|
"grad_norm": 0.5187095999717712, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3579, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.35640230758176106, |
|
"grad_norm": 0.4246378540992737, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3281, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.35804471452914244, |
|
"grad_norm": 0.6137174963951111, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3248, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.3596871214765238, |
|
"grad_norm": 0.44220972061157227, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3267, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.36132952842390526, |
|
"grad_norm": 0.4254567325115204, |
|
"learning_rate": 0.0002, |
|
"loss": 0.315, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.36297193537128664, |
|
"grad_norm": 0.66693115234375, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3354, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.364614342318668, |
|
"grad_norm": 0.5646852254867554, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3275, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.3662567492660494, |
|
"grad_norm": 0.525794506072998, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3095, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.3678991562134308, |
|
"grad_norm": 0.5454958081245422, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3177, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.36954156316081216, |
|
"grad_norm": 0.5054097771644592, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3291, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.37118397010819354, |
|
"grad_norm": 0.45259889960289, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3309, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.3728263770555749, |
|
"grad_norm": 0.4160098135471344, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3416, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.37446878400295636, |
|
"grad_norm": 0.36465033888816833, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3244, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.37611119095033774, |
|
"grad_norm": 0.3822501301765442, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3163, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.3777535978977191, |
|
"grad_norm": 0.4484947621822357, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3186, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.3793960048451005, |
|
"grad_norm": 0.481303334236145, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3202, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.3810384117924819, |
|
"grad_norm": 0.5275722742080688, |
|
"learning_rate": 0.0002, |
|
"loss": 0.319, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.38268081873986326, |
|
"grad_norm": 0.5782263278961182, |
|
"learning_rate": 0.0002, |
|
"loss": 0.327, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.38432322568724464, |
|
"grad_norm": 0.511466920375824, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3176, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.385965632634626, |
|
"grad_norm": 0.5383144617080688, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3215, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.38760803958200746, |
|
"grad_norm": 0.47731462121009827, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3184, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.38925044652938884, |
|
"grad_norm": 0.43928396701812744, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2998, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.3908928534767702, |
|
"grad_norm": 0.47170737385749817, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3211, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.3925352604241516, |
|
"grad_norm": 0.39744389057159424, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3119, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.394177667371533, |
|
"grad_norm": 0.4669509828090668, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2965, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.39582007431891436, |
|
"grad_norm": 0.4926499128341675, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2996, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.39746248126629574, |
|
"grad_norm": 0.4818594455718994, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3116, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.3991048882136771, |
|
"grad_norm": 0.4344610571861267, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2884, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.40074729516105856, |
|
"grad_norm": 0.3993249535560608, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3096, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.40238970210843994, |
|
"grad_norm": 0.4467979967594147, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2976, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.4040321090558213, |
|
"grad_norm": 0.5102105736732483, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3005, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.4056745160032027, |
|
"grad_norm": 0.49601197242736816, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2983, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.4073169229505841, |
|
"grad_norm": 0.39463695883750916, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3071, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.40895932989796546, |
|
"grad_norm": 0.5963265299797058, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3017, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.41060173684534684, |
|
"grad_norm": 0.5571741461753845, |
|
"learning_rate": 0.0002, |
|
"loss": 0.312, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.4122441437927282, |
|
"grad_norm": 0.430397629737854, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3077, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.4138865507401096, |
|
"grad_norm": 0.5038132667541504, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3065, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.41552895768749104, |
|
"grad_norm": 0.41420304775238037, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3061, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.4171713646348724, |
|
"grad_norm": 0.6602872610092163, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3101, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.4188137715822538, |
|
"grad_norm": 0.46677547693252563, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3097, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.4204561785296352, |
|
"grad_norm": 0.5312944054603577, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3136, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.42209858547701656, |
|
"grad_norm": 0.4542620778083801, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3177, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.42374099242439794, |
|
"grad_norm": 0.5240755081176758, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3121, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.4253833993717793, |
|
"grad_norm": 0.49393558502197266, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3145, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.4270258063191607, |
|
"grad_norm": 0.3480128347873688, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3047, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.42866821326654214, |
|
"grad_norm": 0.4269355833530426, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3128, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.4303106202139235, |
|
"grad_norm": 0.46620428562164307, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2892, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.4319530271613049, |
|
"grad_norm": 0.502040684223175, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2977, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.4335954341086863, |
|
"grad_norm": 0.4725840091705322, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2926, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.43523784105606766, |
|
"grad_norm": 0.4031844735145569, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2931, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.43688024800344905, |
|
"grad_norm": 0.5044718384742737, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2925, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.4385226549508304, |
|
"grad_norm": 0.43350791931152344, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3064, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.4401650618982118, |
|
"grad_norm": 0.4503776431083679, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2935, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.44180746884559324, |
|
"grad_norm": 0.4562300145626068, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2908, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.4434498757929746, |
|
"grad_norm": 0.4543699026107788, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2971, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.445092282740356, |
|
"grad_norm": 0.45582354068756104, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3039, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.4467346896877374, |
|
"grad_norm": 0.535355269908905, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3023, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.44837709663511877, |
|
"grad_norm": 0.6104617118835449, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3001, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.45001950358250015, |
|
"grad_norm": 0.5111253261566162, |
|
"learning_rate": 0.0002, |
|
"loss": 0.281, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.4516619105298815, |
|
"grad_norm": 0.49691838026046753, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3043, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.4533043174772629, |
|
"grad_norm": 0.5030774474143982, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2963, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.4549467244246443, |
|
"grad_norm": 0.4874095320701599, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3063, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.4565891313720257, |
|
"grad_norm": 0.4713788330554962, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2997, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.4582315383194071, |
|
"grad_norm": 0.48497167229652405, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2936, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.4598739452667885, |
|
"grad_norm": 0.5291727185249329, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2863, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.46151635221416987, |
|
"grad_norm": 0.5845544934272766, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2834, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.46315875916155125, |
|
"grad_norm": 0.5052700638771057, |
|
"learning_rate": 0.0002, |
|
"loss": 0.281, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.46480116610893263, |
|
"grad_norm": 0.47813382744789124, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2859, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.466443573056314, |
|
"grad_norm": 0.4913572072982788, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2765, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.4680859800036954, |
|
"grad_norm": 0.5044130086898804, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3068, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.4697283869510768, |
|
"grad_norm": 0.45967990159988403, |
|
"learning_rate": 0.0002, |
|
"loss": 0.294, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.4713707938984582, |
|
"grad_norm": 0.4834402799606323, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2902, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.4730132008458396, |
|
"grad_norm": 0.4889473617076874, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2931, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.47465560779322097, |
|
"grad_norm": 0.37159985303878784, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2836, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.47629801474060235, |
|
"grad_norm": 0.44428759813308716, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2994, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.47794042168798373, |
|
"grad_norm": 0.5093443989753723, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2943, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 0.4795828286353651, |
|
"grad_norm": 0.539089024066925, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2968, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 0.4812252355827465, |
|
"grad_norm": 0.33726248145103455, |
|
"learning_rate": 0.0002, |
|
"loss": 0.283, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 0.4828676425301279, |
|
"grad_norm": 0.451824426651001, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2824, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.4845100494775093, |
|
"grad_norm": 0.4333132207393646, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2908, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.4861524564248907, |
|
"grad_norm": 0.4399010241031647, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2857, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.48779486337227207, |
|
"grad_norm": 0.46633288264274597, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2796, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 0.48943727031965345, |
|
"grad_norm": 0.6088176965713501, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2868, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 0.49107967726703483, |
|
"grad_norm": 0.5191177129745483, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2713, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 0.4927220842144162, |
|
"grad_norm": 0.6080117225646973, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2925, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.4943644911617976, |
|
"grad_norm": 0.4405871629714966, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2827, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 0.49600689810917903, |
|
"grad_norm": 0.44443821907043457, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2641, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 0.4976493050565604, |
|
"grad_norm": 0.401265025138855, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2908, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 0.4992917120039418, |
|
"grad_norm": 0.4125641882419586, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2717, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.5009341189513231, |
|
"grad_norm": 0.4346245229244232, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2706, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.5025765258987046, |
|
"grad_norm": 0.47208690643310547, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2851, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.504218932846086, |
|
"grad_norm": 0.4369046986103058, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2809, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 0.5058613397934674, |
|
"grad_norm": 0.5451960563659668, |
|
"learning_rate": 0.0002, |
|
"loss": 0.293, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 0.5075037467408487, |
|
"grad_norm": 0.6085506677627563, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2748, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 0.5091461536882301, |
|
"grad_norm": 0.3898778259754181, |
|
"learning_rate": 0.0002, |
|
"loss": 0.276, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.5107885606356115, |
|
"grad_norm": 0.5069212317466736, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2925, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 0.5124309675829929, |
|
"grad_norm": 0.48736870288848877, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2718, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.5140733745303743, |
|
"grad_norm": 0.5182287693023682, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2783, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 0.5157157814777557, |
|
"grad_norm": 0.5157051086425781, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2828, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 0.517358188425137, |
|
"grad_norm": 0.4653798043727875, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2802, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.5190005953725184, |
|
"grad_norm": 0.4838721454143524, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2758, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 0.5206430023198998, |
|
"grad_norm": 0.47830331325531006, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2999, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 0.5222854092672812, |
|
"grad_norm": 0.45021089911460876, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2673, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 0.5239278162146626, |
|
"grad_norm": 0.4527071714401245, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2624, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 0.5255702231620439, |
|
"grad_norm": 0.508590817451477, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2555, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.5272126301094253, |
|
"grad_norm": 0.38745129108428955, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2863, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 0.5288550370568067, |
|
"grad_norm": 0.6669766902923584, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2813, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 0.5304974440041882, |
|
"grad_norm": 0.5111877918243408, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2712, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 0.5321398509515696, |
|
"grad_norm": 0.5499460697174072, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2656, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 0.533782257898951, |
|
"grad_norm": 0.5004873275756836, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2873, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.5354246648463323, |
|
"grad_norm": 0.6010814309120178, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3005, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 0.5370670717937137, |
|
"grad_norm": 0.4720690846443176, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2675, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 0.5387094787410951, |
|
"grad_norm": 0.47902727127075195, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2715, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 0.5403518856884765, |
|
"grad_norm": 0.46664199233055115, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2713, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 0.5419942926358579, |
|
"grad_norm": 0.5385149121284485, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2867, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.5436366995832392, |
|
"grad_norm": 0.3878926932811737, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2802, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 0.5452791065306206, |
|
"grad_norm": 0.390656054019928, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2676, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 0.546921513478002, |
|
"grad_norm": 0.4342198669910431, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2874, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 0.5485639204253834, |
|
"grad_norm": 0.42557764053344727, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2829, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 0.5502063273727648, |
|
"grad_norm": 0.5569108128547668, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2929, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.5518487343201461, |
|
"grad_norm": 0.38765788078308105, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2804, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 0.5534911412675275, |
|
"grad_norm": 0.5068329572677612, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2629, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 0.5551335482149089, |
|
"grad_norm": 0.5097832083702087, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2846, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 0.5567759551622903, |
|
"grad_norm": 0.37154141068458557, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2625, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 0.5584183621096718, |
|
"grad_norm": 0.41640445590019226, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2669, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.5600607690570532, |
|
"grad_norm": 0.45431575179100037, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2644, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 0.5617031760044345, |
|
"grad_norm": 0.46759283542633057, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2742, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 0.5633455829518159, |
|
"grad_norm": 0.4959569275379181, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2746, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 0.5649879898991973, |
|
"grad_norm": 0.44646400213241577, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2803, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 0.5666303968465787, |
|
"grad_norm": 0.5323026180267334, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2685, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.5682728037939601, |
|
"grad_norm": 0.5455038547515869, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2737, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 0.5699152107413414, |
|
"grad_norm": 0.429975301027298, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2826, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 0.5715576176887228, |
|
"grad_norm": 0.5396720170974731, |
|
"learning_rate": 0.0002, |
|
"loss": 0.266, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 0.5732000246361042, |
|
"grad_norm": 0.45468002557754517, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2676, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 0.5748424315834856, |
|
"grad_norm": 0.4196678698062897, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2786, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.576484838530867, |
|
"grad_norm": 0.4681088328361511, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2731, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 0.5781272454782483, |
|
"grad_norm": 0.4538247287273407, |
|
"learning_rate": 0.0002, |
|
"loss": 0.287, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 0.5797696524256297, |
|
"grad_norm": 0.4834930896759033, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2808, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 0.5814120593730111, |
|
"grad_norm": 0.5876035690307617, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2631, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 0.5830544663203925, |
|
"grad_norm": 0.5164270401000977, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2502, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.584696873267774, |
|
"grad_norm": 0.46229973435401917, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2575, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 0.5863392802151554, |
|
"grad_norm": 0.438803106546402, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2625, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 0.5879816871625367, |
|
"grad_norm": 0.5476749539375305, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2706, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 0.5896240941099181, |
|
"grad_norm": 0.5194425582885742, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2766, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 0.5912665010572995, |
|
"grad_norm": 0.4764098525047302, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2784, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.5929089080046809, |
|
"grad_norm": 0.4703931510448456, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2652, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 0.5945513149520623, |
|
"grad_norm": 0.43372678756713867, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2644, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 0.5961937218994436, |
|
"grad_norm": 0.40813469886779785, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2721, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 0.597836128846825, |
|
"grad_norm": 0.5182124376296997, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2741, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 0.5994785357942064, |
|
"grad_norm": 0.4767136573791504, |
|
"learning_rate": 0.0002, |
|
"loss": 0.277, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.6011209427415878, |
|
"grad_norm": 0.43762916326522827, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2645, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 0.6027633496889692, |
|
"grad_norm": 0.44736623764038086, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2639, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 0.6044057566363505, |
|
"grad_norm": 0.44404810667037964, |
|
"learning_rate": 0.0002, |
|
"loss": 0.269, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 0.6060481635837319, |
|
"grad_norm": 0.4380868673324585, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2615, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 0.6076905705311133, |
|
"grad_norm": 0.4491208791732788, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2462, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.6093329774784947, |
|
"grad_norm": 0.5080710053443909, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2823, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 0.6109753844258761, |
|
"grad_norm": 0.47498422861099243, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2706, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 0.6126177913732576, |
|
"grad_norm": 0.4133289158344269, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2684, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 0.6142601983206389, |
|
"grad_norm": 0.4456469416618347, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2542, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 0.6159026052680203, |
|
"grad_norm": 0.5421611070632935, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2737, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.6175450122154017, |
|
"grad_norm": 0.4131532609462738, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2507, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 0.6191874191627831, |
|
"grad_norm": 0.47127702832221985, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2819, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 0.6208298261101645, |
|
"grad_norm": 0.43743231892585754, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2822, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 0.6224722330575458, |
|
"grad_norm": 0.42425501346588135, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2654, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 0.6241146400049272, |
|
"grad_norm": 0.4609832763671875, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2466, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.6257570469523086, |
|
"grad_norm": 0.42701244354248047, |
|
"learning_rate": 0.0002, |
|
"loss": 0.255, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 0.62739945389969, |
|
"grad_norm": 0.5154401063919067, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2705, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 0.6290418608470714, |
|
"grad_norm": 0.451377809047699, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2586, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 0.6306842677944527, |
|
"grad_norm": 0.47166112065315247, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2605, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.6323266747418341, |
|
"grad_norm": 0.3716096878051758, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2539, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.6339690816892155, |
|
"grad_norm": 0.45413604378700256, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2633, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 0.6356114886365969, |
|
"grad_norm": 0.48580700159072876, |
|
"learning_rate": 0.0002, |
|
"loss": 0.256, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 0.6372538955839783, |
|
"grad_norm": 0.40647098422050476, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2655, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 0.6388963025313598, |
|
"grad_norm": 0.4718053638935089, |
|
"learning_rate": 0.0002, |
|
"loss": 0.261, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 0.6405387094787411, |
|
"grad_norm": 0.5230545401573181, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2464, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.6421811164261225, |
|
"grad_norm": 0.5010546445846558, |
|
"learning_rate": 0.0002, |
|
"loss": 0.261, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 0.6438235233735039, |
|
"grad_norm": 0.41263461112976074, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2626, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 0.6454659303208853, |
|
"grad_norm": 0.538346529006958, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2557, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 0.6471083372682667, |
|
"grad_norm": 0.4800877869129181, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2742, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 0.648750744215648, |
|
"grad_norm": 0.5247358083724976, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2608, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.6503931511630294, |
|
"grad_norm": 0.5625537037849426, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2445, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 0.6520355581104108, |
|
"grad_norm": 0.44077080488204956, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2572, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 0.6536779650577922, |
|
"grad_norm": 0.4610736072063446, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2645, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 0.6553203720051736, |
|
"grad_norm": 0.4790017008781433, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2556, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 0.656962778952555, |
|
"grad_norm": 0.45367711782455444, |
|
"learning_rate": 0.0002, |
|
"loss": 0.253, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.6586051858999363, |
|
"grad_norm": 0.4644503593444824, |
|
"learning_rate": 0.0002, |
|
"loss": 0.25, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 0.6602475928473177, |
|
"grad_norm": 0.3938300311565399, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2524, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 0.6618899997946991, |
|
"grad_norm": 0.4796749949455261, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2643, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 0.6635324067420805, |
|
"grad_norm": 0.3965921700000763, |
|
"learning_rate": 0.0002, |
|
"loss": 0.252, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 0.6651748136894619, |
|
"grad_norm": 0.4033324420452118, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2469, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.6668172206368433, |
|
"grad_norm": 0.5205174088478088, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2479, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 0.6684596275842247, |
|
"grad_norm": 0.4026409685611725, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2482, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 0.6701020345316061, |
|
"grad_norm": 0.33538395166397095, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2452, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 0.6717444414789875, |
|
"grad_norm": 0.43549609184265137, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2548, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 0.6733868484263689, |
|
"grad_norm": 0.5167241096496582, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2664, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.6750292553737502, |
|
"grad_norm": 0.4824913740158081, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2668, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 0.6766716623211316, |
|
"grad_norm": 0.49560844898223877, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2639, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 0.678314069268513, |
|
"grad_norm": 0.43627840280532837, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2536, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 0.6799564762158944, |
|
"grad_norm": 0.4371199905872345, |
|
"learning_rate": 0.0002, |
|
"loss": 0.259, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 0.6815988831632758, |
|
"grad_norm": 0.43210867047309875, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2413, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.6832412901106572, |
|
"grad_norm": 0.4612789750099182, |
|
"learning_rate": 0.0002, |
|
"loss": 0.257, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 0.6848836970580385, |
|
"grad_norm": 0.5780384540557861, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2497, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 0.6865261040054199, |
|
"grad_norm": 0.3581444323062897, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2542, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 0.6881685109528013, |
|
"grad_norm": 0.5276636481285095, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2482, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 0.6898109179001827, |
|
"grad_norm": 0.419548362493515, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2778, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.691453324847564, |
|
"grad_norm": 0.5594448447227478, |
|
"learning_rate": 0.0002, |
|
"loss": 0.271, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 0.6930957317949455, |
|
"grad_norm": 0.4505052864551544, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2531, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 0.6947381387423269, |
|
"grad_norm": 0.4273683726787567, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2687, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 0.6963805456897083, |
|
"grad_norm": 0.41312068700790405, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2535, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 0.6980229526370897, |
|
"grad_norm": 0.3998921811580658, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2507, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.6996653595844711, |
|
"grad_norm": 0.4063471257686615, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2604, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 0.7013077665318525, |
|
"grad_norm": 0.4816170036792755, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2563, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 0.7029501734792338, |
|
"grad_norm": 0.47880151867866516, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2582, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 0.7045925804266152, |
|
"grad_norm": 0.43934714794158936, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2588, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 0.7062349873739966, |
|
"grad_norm": 0.5664840340614319, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2361, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.707877394321378, |
|
"grad_norm": 0.4387499690055847, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2784, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 0.7095198012687594, |
|
"grad_norm": 0.4497361183166504, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2419, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 0.7111622082161407, |
|
"grad_norm": 0.36037716269493103, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2479, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 0.7128046151635221, |
|
"grad_norm": 0.5163317918777466, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2535, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 0.7144470221109035, |
|
"grad_norm": 0.466194748878479, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2533, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.7160894290582849, |
|
"grad_norm": 0.328848272562027, |
|
"learning_rate": 0.0002, |
|
"loss": 0.254, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 0.7177318360056663, |
|
"grad_norm": 0.5417701005935669, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2544, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 0.7193742429530476, |
|
"grad_norm": 0.5538254976272583, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2453, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 0.7210166499004291, |
|
"grad_norm": 0.4739200174808502, |
|
"learning_rate": 0.0002, |
|
"loss": 0.258, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 0.7226590568478105, |
|
"grad_norm": 0.40133044123649597, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2684, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.7243014637951919, |
|
"grad_norm": 0.4493289291858673, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2565, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 0.7259438707425733, |
|
"grad_norm": 0.4970559775829315, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2506, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 0.7275862776899547, |
|
"grad_norm": 0.5687580108642578, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2511, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 0.729228684637336, |
|
"grad_norm": 0.5328338742256165, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2428, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 0.7308710915847174, |
|
"grad_norm": 0.47104090452194214, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2491, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.7325134985320988, |
|
"grad_norm": 0.4887702167034149, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2532, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 0.7341559054794802, |
|
"grad_norm": 0.3589889705181122, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2587, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 0.7357983124268616, |
|
"grad_norm": 0.4665176570415497, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2407, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 0.7374407193742429, |
|
"grad_norm": 0.2580777108669281, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2501, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 0.7390831263216243, |
|
"grad_norm": 0.5562865734100342, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2589, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.7407255332690057, |
|
"grad_norm": 0.36843666434288025, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2639, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 0.7423679402163871, |
|
"grad_norm": 0.433339387178421, |
|
"learning_rate": 0.0002, |
|
"loss": 0.239, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 0.7440103471637685, |
|
"grad_norm": 0.5565098524093628, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2528, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 0.7456527541111498, |
|
"grad_norm": 0.39954161643981934, |
|
"learning_rate": 0.0002, |
|
"loss": 0.24, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 0.7472951610585313, |
|
"grad_norm": 0.43612274527549744, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2373, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.7489375680059127, |
|
"grad_norm": 0.4511432945728302, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2564, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 0.7505799749532941, |
|
"grad_norm": 0.3895890414714813, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2469, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 0.7522223819006755, |
|
"grad_norm": 0.4349375069141388, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2582, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 0.7538647888480569, |
|
"grad_norm": 0.39693930745124817, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2576, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 0.7555071957954382, |
|
"grad_norm": 0.35806095600128174, |
|
"learning_rate": 0.0002, |
|
"loss": 0.235, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.7571496027428196, |
|
"grad_norm": 0.5650025010108948, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2541, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 0.758792009690201, |
|
"grad_norm": 0.45522645115852356, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2323, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 0.7604344166375824, |
|
"grad_norm": 0.45849525928497314, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2459, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 0.7620768235849638, |
|
"grad_norm": 0.5666941404342651, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2634, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 0.7637192305323451, |
|
"grad_norm": 0.43697381019592285, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2482, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.7653616374797265, |
|
"grad_norm": 0.5133718848228455, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2631, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 0.7670040444271079, |
|
"grad_norm": 0.5440112352371216, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2593, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 0.7686464513744893, |
|
"grad_norm": 0.5012624263763428, |
|
"learning_rate": 0.0002, |
|
"loss": 0.243, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 0.7702888583218707, |
|
"grad_norm": 0.4387590289115906, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2448, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 0.771931265269252, |
|
"grad_norm": 0.4327554702758789, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2514, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.7735736722166334, |
|
"grad_norm": 0.4909968078136444, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2503, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 0.7752160791640149, |
|
"grad_norm": 0.4279715120792389, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2558, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 0.7768584861113963, |
|
"grad_norm": 0.4973134994506836, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2412, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 0.7785008930587777, |
|
"grad_norm": 0.3873676359653473, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2409, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 0.7801433000061591, |
|
"grad_norm": 0.40915995836257935, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2322, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.7817857069535404, |
|
"grad_norm": 0.5738871693611145, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2408, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 0.7834281139009218, |
|
"grad_norm": 0.49270549416542053, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2477, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 0.7850705208483032, |
|
"grad_norm": 0.4603147804737091, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2402, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 0.7867129277956846, |
|
"grad_norm": 0.47675642371177673, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2528, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 0.788355334743066, |
|
"grad_norm": 0.41800156235694885, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2571, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.7899977416904473, |
|
"grad_norm": 0.42527106404304504, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2452, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 0.7916401486378287, |
|
"grad_norm": 0.5056847333908081, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2511, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 0.7932825555852101, |
|
"grad_norm": 0.2951577305793762, |
|
"learning_rate": 0.0002, |
|
"loss": 0.233, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 0.7949249625325915, |
|
"grad_norm": 0.4254283010959625, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2474, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 0.7965673694799729, |
|
"grad_norm": 0.5127973556518555, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2655, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.7982097764273542, |
|
"grad_norm": 0.3507694900035858, |
|
"learning_rate": 0.0002, |
|
"loss": 0.227, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 0.7998521833747356, |
|
"grad_norm": 0.4255737364292145, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2591, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 0.8014945903221171, |
|
"grad_norm": 0.44822582602500916, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2287, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 0.8031369972694985, |
|
"grad_norm": 0.4737776517868042, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2412, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 0.8047794042168799, |
|
"grad_norm": 0.4281519651412964, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2559, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.8064218111642613, |
|
"grad_norm": 0.3413679301738739, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2479, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 0.8080642181116426, |
|
"grad_norm": 0.4361155033111572, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2539, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 0.809706625059024, |
|
"grad_norm": 0.48523005843162537, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2534, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 0.8113490320064054, |
|
"grad_norm": 0.4045993685722351, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2455, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 0.8129914389537868, |
|
"grad_norm": 0.5103000998497009, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2535, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.8146338459011682, |
|
"grad_norm": 0.3670307397842407, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2337, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 0.8162762528485495, |
|
"grad_norm": 0.3149369955062866, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2586, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 0.8179186597959309, |
|
"grad_norm": 0.5316740274429321, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2373, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 0.8195610667433123, |
|
"grad_norm": 0.5300164222717285, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2399, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 0.8212034736906937, |
|
"grad_norm": 0.48414990305900574, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2331, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.8228458806380751, |
|
"grad_norm": 0.41733840107917786, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2454, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 0.8244882875854564, |
|
"grad_norm": 0.5048840045928955, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2421, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 0.8261306945328378, |
|
"grad_norm": 0.4444895386695862, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2537, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 0.8277731014802192, |
|
"grad_norm": 0.45051780343055725, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2462, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 0.8294155084276007, |
|
"grad_norm": 0.3937041163444519, |
|
"learning_rate": 0.0002, |
|
"loss": 0.243, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.8310579153749821, |
|
"grad_norm": 0.45621591806411743, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2469, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 0.8327003223223635, |
|
"grad_norm": 0.5431267619132996, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2425, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 0.8343427292697448, |
|
"grad_norm": 0.5039596557617188, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2379, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 0.8359851362171262, |
|
"grad_norm": 0.3915367126464844, |
|
"learning_rate": 0.0002, |
|
"loss": 0.241, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 0.8376275431645076, |
|
"grad_norm": 0.46073317527770996, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2485, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.839269950111889, |
|
"grad_norm": 0.47057440876960754, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2452, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 0.8409123570592704, |
|
"grad_norm": 0.6143821477890015, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2394, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 0.8425547640066517, |
|
"grad_norm": 0.41434940695762634, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2332, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 0.8441971709540331, |
|
"grad_norm": 0.467459499835968, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2439, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 0.8458395779014145, |
|
"grad_norm": 0.49404439330101013, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2378, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.8474819848487959, |
|
"grad_norm": 0.4313650131225586, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2455, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 0.8491243917961773, |
|
"grad_norm": 0.34277698397636414, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2396, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 0.8507667987435586, |
|
"grad_norm": 0.3649916350841522, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2348, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 0.85240920569094, |
|
"grad_norm": 0.4841578006744385, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2488, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 0.8540516126383214, |
|
"grad_norm": 0.5488325953483582, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2399, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.8556940195857029, |
|
"grad_norm": 0.41103577613830566, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2371, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 0.8573364265330843, |
|
"grad_norm": 0.42253378033638, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2478, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 0.8589788334804657, |
|
"grad_norm": 0.43092676997184753, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2316, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 0.860621240427847, |
|
"grad_norm": 0.5474075078964233, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2734, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 0.8622636473752284, |
|
"grad_norm": 0.474618524312973, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2378, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.8639060543226098, |
|
"grad_norm": 0.44008612632751465, |
|
"learning_rate": 0.0002, |
|
"loss": 0.236, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 0.8655484612699912, |
|
"grad_norm": 0.4194040894508362, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2433, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 0.8671908682173726, |
|
"grad_norm": 0.3890872597694397, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2308, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 0.868833275164754, |
|
"grad_norm": 0.41979917883872986, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2417, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 0.8704756821121353, |
|
"grad_norm": 0.3800947666168213, |
|
"learning_rate": 0.0002, |
|
"loss": 0.244, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.8721180890595167, |
|
"grad_norm": 0.38609811663627625, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2477, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 0.8737604960068981, |
|
"grad_norm": 0.514067530632019, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2382, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 0.8754029029542795, |
|
"grad_norm": 0.47742265462875366, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2298, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 0.8770453099016609, |
|
"grad_norm": 0.45849281549453735, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2332, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 0.8786877168490422, |
|
"grad_norm": 0.39788320660591125, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2363, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.8803301237964236, |
|
"grad_norm": 0.5124650597572327, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2292, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 0.881972530743805, |
|
"grad_norm": 0.48688754439353943, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2444, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 0.8836149376911865, |
|
"grad_norm": 0.46146026253700256, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2473, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 0.8852573446385679, |
|
"grad_norm": 0.38401076197624207, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2441, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 0.8868997515859492, |
|
"grad_norm": 0.4642081558704376, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2338, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.8885421585333306, |
|
"grad_norm": 0.378845751285553, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2203, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 0.890184565480712, |
|
"grad_norm": 0.3785631060600281, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2474, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 0.8918269724280934, |
|
"grad_norm": 0.4151659309864044, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2361, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 0.8934693793754748, |
|
"grad_norm": 0.3314524292945862, |
|
"learning_rate": 0.0002, |
|
"loss": 0.241, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 0.8951117863228562, |
|
"grad_norm": 0.4619898200035095, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2426, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.8967541932702375, |
|
"grad_norm": 0.5724550485610962, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2455, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 0.8983966002176189, |
|
"grad_norm": 0.3766199052333832, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2319, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 0.9000390071650003, |
|
"grad_norm": 0.4241611659526825, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2316, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 0.9016814141123817, |
|
"grad_norm": 0.35726866126060486, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2343, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 0.903323821059763, |
|
"grad_norm": 0.5252423882484436, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2431, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.9049662280071444, |
|
"grad_norm": 0.47167885303497314, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2512, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 0.9066086349545258, |
|
"grad_norm": 0.4106541872024536, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2397, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 0.9082510419019072, |
|
"grad_norm": 0.4804975390434265, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2445, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 0.9098934488492886, |
|
"grad_norm": 0.4177796542644501, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2302, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 0.9115358557966701, |
|
"grad_norm": 0.34781017899513245, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2285, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.9131782627440514, |
|
"grad_norm": 0.34392043948173523, |
|
"learning_rate": 0.0002, |
|
"loss": 0.232, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 0.9148206696914328, |
|
"grad_norm": 0.46544018387794495, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2332, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 0.9164630766388142, |
|
"grad_norm": 0.47958704829216003, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2481, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 0.9181054835861956, |
|
"grad_norm": 0.4493333697319031, |
|
"learning_rate": 0.0002, |
|
"loss": 0.238, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 0.919747890533577, |
|
"grad_norm": 0.47599494457244873, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2416, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.9213902974809584, |
|
"grad_norm": 0.39547592401504517, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2456, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 0.9230327044283397, |
|
"grad_norm": 0.42187511920928955, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2425, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 0.9246751113757211, |
|
"grad_norm": 0.3870528042316437, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2366, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 0.9263175183231025, |
|
"grad_norm": 0.40943118929862976, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2088, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 0.9279599252704839, |
|
"grad_norm": 0.3936561346054077, |
|
"learning_rate": 0.0002, |
|
"loss": 0.239, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.9296023322178653, |
|
"grad_norm": 0.4154857397079468, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2413, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 0.9312447391652466, |
|
"grad_norm": 0.5544102191925049, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2565, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 0.932887146112628, |
|
"grad_norm": 0.5494611263275146, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2469, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 0.9345295530600094, |
|
"grad_norm": 0.41848114132881165, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2333, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 0.9361719600073908, |
|
"grad_norm": 0.41343703866004944, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2342, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.9378143669547723, |
|
"grad_norm": 0.6060330867767334, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2507, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 0.9394567739021537, |
|
"grad_norm": 0.42079275846481323, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2322, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 0.941099180849535, |
|
"grad_norm": 0.43053537607192993, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2257, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 0.9427415877969164, |
|
"grad_norm": 0.41895121335983276, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2501, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 0.9443839947442978, |
|
"grad_norm": 0.467018723487854, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2282, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.9460264016916792, |
|
"grad_norm": 0.5707799196243286, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2319, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 0.9476688086390606, |
|
"grad_norm": 0.4575120806694031, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2291, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 0.9493112155864419, |
|
"grad_norm": 0.38349372148513794, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2263, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 0.9509536225338233, |
|
"grad_norm": 0.4487491846084595, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2505, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 0.9525960294812047, |
|
"grad_norm": 0.39065688848495483, |
|
"learning_rate": 0.0002, |
|
"loss": 0.239, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.9542384364285861, |
|
"grad_norm": 0.4473966658115387, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2409, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 0.9558808433759675, |
|
"grad_norm": 0.39066895842552185, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2431, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 0.9575232503233488, |
|
"grad_norm": 0.470277339220047, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2419, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 0.9591656572707302, |
|
"grad_norm": 0.405834436416626, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2408, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 0.9608080642181116, |
|
"grad_norm": 0.5717544555664062, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2352, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.962450471165493, |
|
"grad_norm": 0.4837093651294708, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2435, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 0.9640928781128744, |
|
"grad_norm": 0.4689130187034607, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2324, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 0.9657352850602559, |
|
"grad_norm": 0.511249840259552, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2394, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 0.9673776920076372, |
|
"grad_norm": 0.43555593490600586, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2377, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 0.9690200989550186, |
|
"grad_norm": 0.41933077573776245, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2355, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.9706625059024, |
|
"grad_norm": 0.41573819518089294, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2345, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 0.9723049128497814, |
|
"grad_norm": 0.3951037526130676, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2399, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 0.9739473197971628, |
|
"grad_norm": 0.477756142616272, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2425, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 0.9755897267445441, |
|
"grad_norm": 0.5147901773452759, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2354, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 0.9772321336919255, |
|
"grad_norm": 0.40053385496139526, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2325, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.9788745406393069, |
|
"grad_norm": 0.4459463953971863, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2492, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 0.9805169475866883, |
|
"grad_norm": 0.42749595642089844, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2308, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 0.9821593545340697, |
|
"grad_norm": 0.4053783714771271, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2263, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 0.983801761481451, |
|
"grad_norm": 0.43342533707618713, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2348, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 0.9854441684288324, |
|
"grad_norm": 0.43272313475608826, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2234, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.9870865753762138, |
|
"grad_norm": 0.3550325036048889, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2186, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 0.9887289823235952, |
|
"grad_norm": 0.35271936655044556, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2326, |
|
"step": 12040 |
|
}, |
|
{ |
|
"epoch": 0.9903713892709766, |
|
"grad_norm": 0.37404924631118774, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2483, |
|
"step": 12060 |
|
}, |
|
{ |
|
"epoch": 0.9920137962183581, |
|
"grad_norm": 0.46686896681785583, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2213, |
|
"step": 12080 |
|
}, |
|
{ |
|
"epoch": 0.9936562031657394, |
|
"grad_norm": 0.37012913823127747, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2415, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.9952986101131208, |
|
"grad_norm": 0.4403967559337616, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2261, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 0.9969410170605022, |
|
"grad_norm": 0.36877259612083435, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2295, |
|
"step": 12140 |
|
}, |
|
{ |
|
"epoch": 0.9985834240078836, |
|
"grad_norm": 0.34526777267456055, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2236, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.30336490273475647, |
|
"eval_runtime": 533.8677, |
|
"eval_samples_per_second": 7.092, |
|
"eval_steps_per_second": 0.888, |
|
"step": 12178 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 14000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 77, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.0518674601423667e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|