CodeLlama-7B-QML / trainer_state.json
qt-spyro-hf's picture
Upload 13 files
d5d0407 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 20,
"global_step": 12178,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0016424069473813874,
"grad_norm": 0.5388180017471313,
"learning_rate": 0.0002,
"loss": 1.8932,
"step": 20
},
{
"epoch": 0.003284813894762775,
"grad_norm": 0.46543794870376587,
"learning_rate": 0.0002,
"loss": 1.6701,
"step": 40
},
{
"epoch": 0.004927220842144162,
"grad_norm": 0.45620647072792053,
"learning_rate": 0.0002,
"loss": 1.5541,
"step": 60
},
{
"epoch": 0.00656962778952555,
"grad_norm": 0.4583057761192322,
"learning_rate": 0.0002,
"loss": 1.5777,
"step": 80
},
{
"epoch": 0.008212034736906937,
"grad_norm": 0.5295430421829224,
"learning_rate": 0.0002,
"loss": 1.3046,
"step": 100
},
{
"epoch": 0.009854441684288324,
"grad_norm": 0.44552722573280334,
"learning_rate": 0.0002,
"loss": 1.3053,
"step": 120
},
{
"epoch": 0.011496848631669712,
"grad_norm": 0.45540332794189453,
"learning_rate": 0.0002,
"loss": 1.1971,
"step": 140
},
{
"epoch": 0.0131392555790511,
"grad_norm": 0.4302205443382263,
"learning_rate": 0.0002,
"loss": 1.2143,
"step": 160
},
{
"epoch": 0.014781662526432487,
"grad_norm": 0.4064156413078308,
"learning_rate": 0.0002,
"loss": 1.1695,
"step": 180
},
{
"epoch": 0.016424069473813873,
"grad_norm": 0.43175607919692993,
"learning_rate": 0.0002,
"loss": 1.1836,
"step": 200
},
{
"epoch": 0.01806647642119526,
"grad_norm": 0.5280532240867615,
"learning_rate": 0.0002,
"loss": 1.1627,
"step": 220
},
{
"epoch": 0.01970888336857665,
"grad_norm": 0.4442996382713318,
"learning_rate": 0.0002,
"loss": 1.2294,
"step": 240
},
{
"epoch": 0.021351290315958036,
"grad_norm": 0.4584205448627472,
"learning_rate": 0.0002,
"loss": 1.058,
"step": 260
},
{
"epoch": 0.022993697263339424,
"grad_norm": 0.40979012846946716,
"learning_rate": 0.0002,
"loss": 1.0436,
"step": 280
},
{
"epoch": 0.02463610421072081,
"grad_norm": 0.4241325557231903,
"learning_rate": 0.0002,
"loss": 1.1414,
"step": 300
},
{
"epoch": 0.0262785111581022,
"grad_norm": 0.4106293022632599,
"learning_rate": 0.0002,
"loss": 1.0744,
"step": 320
},
{
"epoch": 0.027920918105483587,
"grad_norm": 0.46253764629364014,
"learning_rate": 0.0002,
"loss": 1.0589,
"step": 340
},
{
"epoch": 0.029563325052864974,
"grad_norm": 0.4244967997074127,
"learning_rate": 0.0002,
"loss": 1.0263,
"step": 360
},
{
"epoch": 0.031205732000246362,
"grad_norm": 0.35677096247673035,
"learning_rate": 0.0002,
"loss": 1.0447,
"step": 380
},
{
"epoch": 0.032848138947627746,
"grad_norm": 0.4948490262031555,
"learning_rate": 0.0002,
"loss": 1.0826,
"step": 400
},
{
"epoch": 0.034490545895009134,
"grad_norm": 0.5756106972694397,
"learning_rate": 0.0002,
"loss": 0.948,
"step": 420
},
{
"epoch": 0.03613295284239052,
"grad_norm": 0.5383228063583374,
"learning_rate": 0.0002,
"loss": 1.0025,
"step": 440
},
{
"epoch": 0.03777535978977191,
"grad_norm": 0.3955784738063812,
"learning_rate": 0.0002,
"loss": 0.9027,
"step": 460
},
{
"epoch": 0.0394177667371533,
"grad_norm": 0.37915533781051636,
"learning_rate": 0.0002,
"loss": 0.9936,
"step": 480
},
{
"epoch": 0.041060173684534684,
"grad_norm": 0.5413188934326172,
"learning_rate": 0.0002,
"loss": 0.9077,
"step": 500
},
{
"epoch": 0.04270258063191607,
"grad_norm": 0.5334627032279968,
"learning_rate": 0.0002,
"loss": 0.9009,
"step": 520
},
{
"epoch": 0.04434498757929746,
"grad_norm": 0.5394805073738098,
"learning_rate": 0.0002,
"loss": 0.9542,
"step": 540
},
{
"epoch": 0.04598739452667885,
"grad_norm": 0.532177746295929,
"learning_rate": 0.0002,
"loss": 0.8743,
"step": 560
},
{
"epoch": 0.047629801474060235,
"grad_norm": 0.5266315937042236,
"learning_rate": 0.0002,
"loss": 0.8931,
"step": 580
},
{
"epoch": 0.04927220842144162,
"grad_norm": 0.4725072979927063,
"learning_rate": 0.0002,
"loss": 0.908,
"step": 600
},
{
"epoch": 0.05091461536882301,
"grad_norm": 0.6026243567466736,
"learning_rate": 0.0002,
"loss": 0.7898,
"step": 620
},
{
"epoch": 0.0525570223162044,
"grad_norm": 0.4928111732006073,
"learning_rate": 0.0002,
"loss": 0.8406,
"step": 640
},
{
"epoch": 0.054199429263585785,
"grad_norm": 0.4555020332336426,
"learning_rate": 0.0002,
"loss": 0.8222,
"step": 660
},
{
"epoch": 0.05584183621096717,
"grad_norm": 0.6445655822753906,
"learning_rate": 0.0002,
"loss": 0.832,
"step": 680
},
{
"epoch": 0.05748424315834856,
"grad_norm": 0.5854527950286865,
"learning_rate": 0.0002,
"loss": 0.8435,
"step": 700
},
{
"epoch": 0.05912665010572995,
"grad_norm": 0.4609089195728302,
"learning_rate": 0.0002,
"loss": 0.748,
"step": 720
},
{
"epoch": 0.060769057053111336,
"grad_norm": 0.5567362904548645,
"learning_rate": 0.0002,
"loss": 0.7777,
"step": 740
},
{
"epoch": 0.062411464000492724,
"grad_norm": 0.5161166191101074,
"learning_rate": 0.0002,
"loss": 0.7597,
"step": 760
},
{
"epoch": 0.06405387094787411,
"grad_norm": 0.5450626611709595,
"learning_rate": 0.0002,
"loss": 0.7337,
"step": 780
},
{
"epoch": 0.06569627789525549,
"grad_norm": 0.6034521460533142,
"learning_rate": 0.0002,
"loss": 0.7668,
"step": 800
},
{
"epoch": 0.06733868484263689,
"grad_norm": 0.4653383493423462,
"learning_rate": 0.0002,
"loss": 0.7417,
"step": 820
},
{
"epoch": 0.06898109179001827,
"grad_norm": 0.4846251308917999,
"learning_rate": 0.0002,
"loss": 0.7506,
"step": 840
},
{
"epoch": 0.07062349873739966,
"grad_norm": 0.4887784719467163,
"learning_rate": 0.0002,
"loss": 0.7115,
"step": 860
},
{
"epoch": 0.07226590568478104,
"grad_norm": 0.5024611949920654,
"learning_rate": 0.0002,
"loss": 0.7402,
"step": 880
},
{
"epoch": 0.07390831263216244,
"grad_norm": 0.5007764101028442,
"learning_rate": 0.0002,
"loss": 0.6529,
"step": 900
},
{
"epoch": 0.07555071957954382,
"grad_norm": 0.5097551345825195,
"learning_rate": 0.0002,
"loss": 0.7776,
"step": 920
},
{
"epoch": 0.07719312652692521,
"grad_norm": 0.5517822504043579,
"learning_rate": 0.0002,
"loss": 0.6609,
"step": 940
},
{
"epoch": 0.0788355334743066,
"grad_norm": 0.5290623307228088,
"learning_rate": 0.0002,
"loss": 0.7015,
"step": 960
},
{
"epoch": 0.08047794042168799,
"grad_norm": 0.576545000076294,
"learning_rate": 0.0002,
"loss": 0.6752,
"step": 980
},
{
"epoch": 0.08212034736906937,
"grad_norm": 0.4689784049987793,
"learning_rate": 0.0002,
"loss": 0.7047,
"step": 1000
},
{
"epoch": 0.08376275431645076,
"grad_norm": 0.455814003944397,
"learning_rate": 0.0002,
"loss": 0.6378,
"step": 1020
},
{
"epoch": 0.08540516126383214,
"grad_norm": 0.6452861428260803,
"learning_rate": 0.0002,
"loss": 0.6962,
"step": 1040
},
{
"epoch": 0.08704756821121354,
"grad_norm": 0.5699702501296997,
"learning_rate": 0.0002,
"loss": 0.6508,
"step": 1060
},
{
"epoch": 0.08868997515859492,
"grad_norm": 0.5086561441421509,
"learning_rate": 0.0002,
"loss": 0.6174,
"step": 1080
},
{
"epoch": 0.09033238210597631,
"grad_norm": 0.48543211817741394,
"learning_rate": 0.0002,
"loss": 0.6261,
"step": 1100
},
{
"epoch": 0.0919747890533577,
"grad_norm": 0.6361482739448547,
"learning_rate": 0.0002,
"loss": 0.6336,
"step": 1120
},
{
"epoch": 0.09361719600073909,
"grad_norm": 0.5558167695999146,
"learning_rate": 0.0002,
"loss": 0.6678,
"step": 1140
},
{
"epoch": 0.09525960294812047,
"grad_norm": 0.5599238872528076,
"learning_rate": 0.0002,
"loss": 0.6169,
"step": 1160
},
{
"epoch": 0.09690200989550186,
"grad_norm": 0.5939186215400696,
"learning_rate": 0.0002,
"loss": 0.6059,
"step": 1180
},
{
"epoch": 0.09854441684288325,
"grad_norm": 0.5663330554962158,
"learning_rate": 0.0002,
"loss": 0.5737,
"step": 1200
},
{
"epoch": 0.10018682379026464,
"grad_norm": 0.49742865562438965,
"learning_rate": 0.0002,
"loss": 0.6013,
"step": 1220
},
{
"epoch": 0.10182923073764602,
"grad_norm": 0.520782470703125,
"learning_rate": 0.0002,
"loss": 0.5929,
"step": 1240
},
{
"epoch": 0.1034716376850274,
"grad_norm": 0.45269444584846497,
"learning_rate": 0.0002,
"loss": 0.5981,
"step": 1260
},
{
"epoch": 0.1051140446324088,
"grad_norm": 0.5428550243377686,
"learning_rate": 0.0002,
"loss": 0.5814,
"step": 1280
},
{
"epoch": 0.10675645157979018,
"grad_norm": 0.4782160818576813,
"learning_rate": 0.0002,
"loss": 0.5858,
"step": 1300
},
{
"epoch": 0.10839885852717157,
"grad_norm": 0.5338163375854492,
"learning_rate": 0.0002,
"loss": 0.6255,
"step": 1320
},
{
"epoch": 0.11004126547455295,
"grad_norm": 0.4596363306045532,
"learning_rate": 0.0002,
"loss": 0.5974,
"step": 1340
},
{
"epoch": 0.11168367242193435,
"grad_norm": 0.5203448534011841,
"learning_rate": 0.0002,
"loss": 0.5452,
"step": 1360
},
{
"epoch": 0.11332607936931573,
"grad_norm": 0.44463276863098145,
"learning_rate": 0.0002,
"loss": 0.576,
"step": 1380
},
{
"epoch": 0.11496848631669712,
"grad_norm": 0.5106232762336731,
"learning_rate": 0.0002,
"loss": 0.5679,
"step": 1400
},
{
"epoch": 0.1166108932640785,
"grad_norm": 0.5451502799987793,
"learning_rate": 0.0002,
"loss": 0.5673,
"step": 1420
},
{
"epoch": 0.1182533002114599,
"grad_norm": 0.6638749837875366,
"learning_rate": 0.0002,
"loss": 0.543,
"step": 1440
},
{
"epoch": 0.11989570715884128,
"grad_norm": 0.5045977830886841,
"learning_rate": 0.0002,
"loss": 0.5803,
"step": 1460
},
{
"epoch": 0.12153811410622267,
"grad_norm": 0.5385071635246277,
"learning_rate": 0.0002,
"loss": 0.5357,
"step": 1480
},
{
"epoch": 0.12318052105360405,
"grad_norm": 0.43107932806015015,
"learning_rate": 0.0002,
"loss": 0.5378,
"step": 1500
},
{
"epoch": 0.12482292800098545,
"grad_norm": 0.5887011885643005,
"learning_rate": 0.0002,
"loss": 0.5594,
"step": 1520
},
{
"epoch": 0.12646533494836684,
"grad_norm": 0.547126829624176,
"learning_rate": 0.0002,
"loss": 0.5574,
"step": 1540
},
{
"epoch": 0.12810774189574822,
"grad_norm": 0.532454788684845,
"learning_rate": 0.0002,
"loss": 0.5506,
"step": 1560
},
{
"epoch": 0.1297501488431296,
"grad_norm": 0.592251718044281,
"learning_rate": 0.0002,
"loss": 0.5206,
"step": 1580
},
{
"epoch": 0.13139255579051098,
"grad_norm": 0.6189798712730408,
"learning_rate": 0.0002,
"loss": 0.516,
"step": 1600
},
{
"epoch": 0.1330349627378924,
"grad_norm": 0.4614121913909912,
"learning_rate": 0.0002,
"loss": 0.4948,
"step": 1620
},
{
"epoch": 0.13467736968527377,
"grad_norm": 0.6192139983177185,
"learning_rate": 0.0002,
"loss": 0.4924,
"step": 1640
},
{
"epoch": 0.13631977663265515,
"grad_norm": 0.5383406281471252,
"learning_rate": 0.0002,
"loss": 0.4955,
"step": 1660
},
{
"epoch": 0.13796218358003653,
"grad_norm": 0.681564450263977,
"learning_rate": 0.0002,
"loss": 0.5224,
"step": 1680
},
{
"epoch": 0.13960459052741794,
"grad_norm": 0.51935875415802,
"learning_rate": 0.0002,
"loss": 0.508,
"step": 1700
},
{
"epoch": 0.14124699747479932,
"grad_norm": 0.532661497592926,
"learning_rate": 0.0002,
"loss": 0.5362,
"step": 1720
},
{
"epoch": 0.1428894044221807,
"grad_norm": 0.40774333477020264,
"learning_rate": 0.0002,
"loss": 0.4908,
"step": 1740
},
{
"epoch": 0.14453181136956209,
"grad_norm": 0.6406064033508301,
"learning_rate": 0.0002,
"loss": 0.4891,
"step": 1760
},
{
"epoch": 0.1461742183169435,
"grad_norm": 0.41497862339019775,
"learning_rate": 0.0002,
"loss": 0.5234,
"step": 1780
},
{
"epoch": 0.14781662526432487,
"grad_norm": 0.502389132976532,
"learning_rate": 0.0002,
"loss": 0.459,
"step": 1800
},
{
"epoch": 0.14945903221170626,
"grad_norm": 0.5248283743858337,
"learning_rate": 0.0002,
"loss": 0.4659,
"step": 1820
},
{
"epoch": 0.15110143915908764,
"grad_norm": 0.5587234497070312,
"learning_rate": 0.0002,
"loss": 0.4877,
"step": 1840
},
{
"epoch": 0.15274384610646902,
"grad_norm": 0.479913592338562,
"learning_rate": 0.0002,
"loss": 0.4598,
"step": 1860
},
{
"epoch": 0.15438625305385043,
"grad_norm": 0.5423480272293091,
"learning_rate": 0.0002,
"loss": 0.4754,
"step": 1880
},
{
"epoch": 0.1560286600012318,
"grad_norm": 0.5485461354255676,
"learning_rate": 0.0002,
"loss": 0.4681,
"step": 1900
},
{
"epoch": 0.1576710669486132,
"grad_norm": 0.48511844873428345,
"learning_rate": 0.0002,
"loss": 0.4672,
"step": 1920
},
{
"epoch": 0.15931347389599457,
"grad_norm": 0.49132347106933594,
"learning_rate": 0.0002,
"loss": 0.4694,
"step": 1940
},
{
"epoch": 0.16095588084337598,
"grad_norm": 0.5654798746109009,
"learning_rate": 0.0002,
"loss": 0.5047,
"step": 1960
},
{
"epoch": 0.16259828779075736,
"grad_norm": 0.571369469165802,
"learning_rate": 0.0002,
"loss": 0.4486,
"step": 1980
},
{
"epoch": 0.16424069473813874,
"grad_norm": 0.5438801646232605,
"learning_rate": 0.0002,
"loss": 0.4756,
"step": 2000
},
{
"epoch": 0.16588310168552012,
"grad_norm": 0.5384829044342041,
"learning_rate": 0.0002,
"loss": 0.4404,
"step": 2020
},
{
"epoch": 0.16752550863290153,
"grad_norm": 0.5565232634544373,
"learning_rate": 0.0002,
"loss": 0.4672,
"step": 2040
},
{
"epoch": 0.1691679155802829,
"grad_norm": 0.5227774381637573,
"learning_rate": 0.0002,
"loss": 0.4452,
"step": 2060
},
{
"epoch": 0.1708103225276643,
"grad_norm": 0.47740334272384644,
"learning_rate": 0.0002,
"loss": 0.492,
"step": 2080
},
{
"epoch": 0.17245272947504567,
"grad_norm": 0.4206157326698303,
"learning_rate": 0.0002,
"loss": 0.4517,
"step": 2100
},
{
"epoch": 0.17409513642242708,
"grad_norm": 0.5148787498474121,
"learning_rate": 0.0002,
"loss": 0.4801,
"step": 2120
},
{
"epoch": 0.17573754336980846,
"grad_norm": 0.4815204441547394,
"learning_rate": 0.0002,
"loss": 0.4415,
"step": 2140
},
{
"epoch": 0.17737995031718984,
"grad_norm": 0.5302825570106506,
"learning_rate": 0.0002,
"loss": 0.4558,
"step": 2160
},
{
"epoch": 0.17902235726457122,
"grad_norm": 0.574350118637085,
"learning_rate": 0.0002,
"loss": 0.4709,
"step": 2180
},
{
"epoch": 0.18066476421195263,
"grad_norm": 0.5393965244293213,
"learning_rate": 0.0002,
"loss": 0.4528,
"step": 2200
},
{
"epoch": 0.182307171159334,
"grad_norm": 0.43285471200942993,
"learning_rate": 0.0002,
"loss": 0.4294,
"step": 2220
},
{
"epoch": 0.1839495781067154,
"grad_norm": 0.4550113081932068,
"learning_rate": 0.0002,
"loss": 0.4395,
"step": 2240
},
{
"epoch": 0.18559198505409677,
"grad_norm": 0.586071789264679,
"learning_rate": 0.0002,
"loss": 0.4456,
"step": 2260
},
{
"epoch": 0.18723439200147818,
"grad_norm": 0.5634139776229858,
"learning_rate": 0.0002,
"loss": 0.4295,
"step": 2280
},
{
"epoch": 0.18887679894885956,
"grad_norm": 0.5095311403274536,
"learning_rate": 0.0002,
"loss": 0.4347,
"step": 2300
},
{
"epoch": 0.19051920589624094,
"grad_norm": 0.6051989793777466,
"learning_rate": 0.0002,
"loss": 0.4278,
"step": 2320
},
{
"epoch": 0.19216161284362232,
"grad_norm": 0.45743292570114136,
"learning_rate": 0.0002,
"loss": 0.4191,
"step": 2340
},
{
"epoch": 0.19380401979100373,
"grad_norm": 0.6048611402511597,
"learning_rate": 0.0002,
"loss": 0.4512,
"step": 2360
},
{
"epoch": 0.1954464267383851,
"grad_norm": 0.495731920003891,
"learning_rate": 0.0002,
"loss": 0.4087,
"step": 2380
},
{
"epoch": 0.1970888336857665,
"grad_norm": 0.5746319890022278,
"learning_rate": 0.0002,
"loss": 0.4112,
"step": 2400
},
{
"epoch": 0.19873124063314787,
"grad_norm": 0.4899024963378906,
"learning_rate": 0.0002,
"loss": 0.4403,
"step": 2420
},
{
"epoch": 0.20037364758052928,
"grad_norm": 0.40732160210609436,
"learning_rate": 0.0002,
"loss": 0.4281,
"step": 2440
},
{
"epoch": 0.20201605452791066,
"grad_norm": 0.4896198809146881,
"learning_rate": 0.0002,
"loss": 0.4533,
"step": 2460
},
{
"epoch": 0.20365846147529204,
"grad_norm": 0.5733948349952698,
"learning_rate": 0.0002,
"loss": 0.4113,
"step": 2480
},
{
"epoch": 0.20530086842267342,
"grad_norm": 0.4565046429634094,
"learning_rate": 0.0002,
"loss": 0.4237,
"step": 2500
},
{
"epoch": 0.2069432753700548,
"grad_norm": 0.5932797789573669,
"learning_rate": 0.0002,
"loss": 0.4367,
"step": 2520
},
{
"epoch": 0.2085856823174362,
"grad_norm": 0.5838333368301392,
"learning_rate": 0.0002,
"loss": 0.4331,
"step": 2540
},
{
"epoch": 0.2102280892648176,
"grad_norm": 0.5022397637367249,
"learning_rate": 0.0002,
"loss": 0.4004,
"step": 2560
},
{
"epoch": 0.21187049621219897,
"grad_norm": 0.5949686765670776,
"learning_rate": 0.0002,
"loss": 0.4119,
"step": 2580
},
{
"epoch": 0.21351290315958035,
"grad_norm": 0.45230528712272644,
"learning_rate": 0.0002,
"loss": 0.4217,
"step": 2600
},
{
"epoch": 0.21515531010696176,
"grad_norm": 0.4186144471168518,
"learning_rate": 0.0002,
"loss": 0.428,
"step": 2620
},
{
"epoch": 0.21679771705434314,
"grad_norm": 0.5562434196472168,
"learning_rate": 0.0002,
"loss": 0.394,
"step": 2640
},
{
"epoch": 0.21844012400172452,
"grad_norm": 0.5947513580322266,
"learning_rate": 0.0002,
"loss": 0.3998,
"step": 2660
},
{
"epoch": 0.2200825309491059,
"grad_norm": 0.4886711835861206,
"learning_rate": 0.0002,
"loss": 0.389,
"step": 2680
},
{
"epoch": 0.2217249378964873,
"grad_norm": 0.551491379737854,
"learning_rate": 0.0002,
"loss": 0.3952,
"step": 2700
},
{
"epoch": 0.2233673448438687,
"grad_norm": 0.383627712726593,
"learning_rate": 0.0002,
"loss": 0.3733,
"step": 2720
},
{
"epoch": 0.22500975179125007,
"grad_norm": 0.45694270730018616,
"learning_rate": 0.0002,
"loss": 0.4075,
"step": 2740
},
{
"epoch": 0.22665215873863145,
"grad_norm": 0.46876367926597595,
"learning_rate": 0.0002,
"loss": 0.4135,
"step": 2760
},
{
"epoch": 0.22829456568601286,
"grad_norm": 0.9062886238098145,
"learning_rate": 0.0002,
"loss": 0.3891,
"step": 2780
},
{
"epoch": 0.22993697263339424,
"grad_norm": 0.47902002930641174,
"learning_rate": 0.0002,
"loss": 0.405,
"step": 2800
},
{
"epoch": 0.23157937958077562,
"grad_norm": 0.6828575134277344,
"learning_rate": 0.0002,
"loss": 0.3985,
"step": 2820
},
{
"epoch": 0.233221786528157,
"grad_norm": 0.5411036610603333,
"learning_rate": 0.0002,
"loss": 0.3658,
"step": 2840
},
{
"epoch": 0.2348641934755384,
"grad_norm": 0.6698014736175537,
"learning_rate": 0.0002,
"loss": 0.4003,
"step": 2860
},
{
"epoch": 0.2365066004229198,
"grad_norm": 0.5779656171798706,
"learning_rate": 0.0002,
"loss": 0.4003,
"step": 2880
},
{
"epoch": 0.23814900737030117,
"grad_norm": 0.5321545004844666,
"learning_rate": 0.0002,
"loss": 0.3667,
"step": 2900
},
{
"epoch": 0.23979141431768256,
"grad_norm": 0.43935510516166687,
"learning_rate": 0.0002,
"loss": 0.375,
"step": 2920
},
{
"epoch": 0.24143382126506396,
"grad_norm": 0.67582768201828,
"learning_rate": 0.0002,
"loss": 0.3814,
"step": 2940
},
{
"epoch": 0.24307622821244534,
"grad_norm": 0.6373169422149658,
"learning_rate": 0.0002,
"loss": 0.4079,
"step": 2960
},
{
"epoch": 0.24471863515982673,
"grad_norm": 0.4568232595920563,
"learning_rate": 0.0002,
"loss": 0.3821,
"step": 2980
},
{
"epoch": 0.2463610421072081,
"grad_norm": 0.5706847310066223,
"learning_rate": 0.0002,
"loss": 0.3745,
"step": 3000
},
{
"epoch": 0.24800344905458951,
"grad_norm": 0.5293543338775635,
"learning_rate": 0.0002,
"loss": 0.3945,
"step": 3020
},
{
"epoch": 0.2496458560019709,
"grad_norm": 0.5566920042037964,
"learning_rate": 0.0002,
"loss": 0.3739,
"step": 3040
},
{
"epoch": 0.2512882629493523,
"grad_norm": 0.5758338570594788,
"learning_rate": 0.0002,
"loss": 0.4115,
"step": 3060
},
{
"epoch": 0.2529306698967337,
"grad_norm": 0.5503116250038147,
"learning_rate": 0.0002,
"loss": 0.3841,
"step": 3080
},
{
"epoch": 0.25457307684411506,
"grad_norm": 0.5829768776893616,
"learning_rate": 0.0002,
"loss": 0.3679,
"step": 3100
},
{
"epoch": 0.25621548379149645,
"grad_norm": 0.4771459400653839,
"learning_rate": 0.0002,
"loss": 0.3787,
"step": 3120
},
{
"epoch": 0.2578578907388778,
"grad_norm": 0.508679986000061,
"learning_rate": 0.0002,
"loss": 0.3424,
"step": 3140
},
{
"epoch": 0.2595002976862592,
"grad_norm": 0.5478394031524658,
"learning_rate": 0.0002,
"loss": 0.3616,
"step": 3160
},
{
"epoch": 0.2611427046336406,
"grad_norm": 0.48918816447257996,
"learning_rate": 0.0002,
"loss": 0.364,
"step": 3180
},
{
"epoch": 0.26278511158102197,
"grad_norm": 0.6158058047294617,
"learning_rate": 0.0002,
"loss": 0.3563,
"step": 3200
},
{
"epoch": 0.26442751852840335,
"grad_norm": 0.6302765607833862,
"learning_rate": 0.0002,
"loss": 0.3472,
"step": 3220
},
{
"epoch": 0.2660699254757848,
"grad_norm": 0.42650097608566284,
"learning_rate": 0.0002,
"loss": 0.374,
"step": 3240
},
{
"epoch": 0.26771233242316617,
"grad_norm": 0.5517419576644897,
"learning_rate": 0.0002,
"loss": 0.3747,
"step": 3260
},
{
"epoch": 0.26935473937054755,
"grad_norm": 0.5887686014175415,
"learning_rate": 0.0002,
"loss": 0.3655,
"step": 3280
},
{
"epoch": 0.2709971463179289,
"grad_norm": 0.5252538323402405,
"learning_rate": 0.0002,
"loss": 0.3864,
"step": 3300
},
{
"epoch": 0.2726395532653103,
"grad_norm": 0.4829944968223572,
"learning_rate": 0.0002,
"loss": 0.3526,
"step": 3320
},
{
"epoch": 0.2742819602126917,
"grad_norm": 0.4375133216381073,
"learning_rate": 0.0002,
"loss": 0.3536,
"step": 3340
},
{
"epoch": 0.27592436716007307,
"grad_norm": 0.5371789336204529,
"learning_rate": 0.0002,
"loss": 0.3501,
"step": 3360
},
{
"epoch": 0.27756677410745445,
"grad_norm": 0.44075456261634827,
"learning_rate": 0.0002,
"loss": 0.3584,
"step": 3380
},
{
"epoch": 0.2792091810548359,
"grad_norm": 0.53825443983078,
"learning_rate": 0.0002,
"loss": 0.3304,
"step": 3400
},
{
"epoch": 0.28085158800221727,
"grad_norm": 0.48521581292152405,
"learning_rate": 0.0002,
"loss": 0.3588,
"step": 3420
},
{
"epoch": 0.28249399494959865,
"grad_norm": 0.4189339578151703,
"learning_rate": 0.0002,
"loss": 0.3556,
"step": 3440
},
{
"epoch": 0.28413640189698003,
"grad_norm": 0.4011813700199127,
"learning_rate": 0.0002,
"loss": 0.3403,
"step": 3460
},
{
"epoch": 0.2857788088443614,
"grad_norm": 0.4910661280155182,
"learning_rate": 0.0002,
"loss": 0.3897,
"step": 3480
},
{
"epoch": 0.2874212157917428,
"grad_norm": 0.5664734840393066,
"learning_rate": 0.0002,
"loss": 0.3503,
"step": 3500
},
{
"epoch": 0.28906362273912417,
"grad_norm": 0.45044422149658203,
"learning_rate": 0.0002,
"loss": 0.3357,
"step": 3520
},
{
"epoch": 0.29070602968650555,
"grad_norm": 0.6162013411521912,
"learning_rate": 0.0002,
"loss": 0.3827,
"step": 3540
},
{
"epoch": 0.292348436633887,
"grad_norm": 0.428659588098526,
"learning_rate": 0.0002,
"loss": 0.3418,
"step": 3560
},
{
"epoch": 0.29399084358126837,
"grad_norm": 0.48843899369239807,
"learning_rate": 0.0002,
"loss": 0.3695,
"step": 3580
},
{
"epoch": 0.29563325052864975,
"grad_norm": 0.5662574768066406,
"learning_rate": 0.0002,
"loss": 0.3418,
"step": 3600
},
{
"epoch": 0.29727565747603113,
"grad_norm": 0.5488101243972778,
"learning_rate": 0.0002,
"loss": 0.3619,
"step": 3620
},
{
"epoch": 0.2989180644234125,
"grad_norm": 0.4078102111816406,
"learning_rate": 0.0002,
"loss": 0.3339,
"step": 3640
},
{
"epoch": 0.3005604713707939,
"grad_norm": 0.6991748213768005,
"learning_rate": 0.0002,
"loss": 0.3653,
"step": 3660
},
{
"epoch": 0.30220287831817527,
"grad_norm": 0.4532040059566498,
"learning_rate": 0.0002,
"loss": 0.343,
"step": 3680
},
{
"epoch": 0.30384528526555665,
"grad_norm": 0.47306913137435913,
"learning_rate": 0.0002,
"loss": 0.3551,
"step": 3700
},
{
"epoch": 0.30548769221293803,
"grad_norm": 0.4408378303050995,
"learning_rate": 0.0002,
"loss": 0.3441,
"step": 3720
},
{
"epoch": 0.30713009916031947,
"grad_norm": 0.5125454068183899,
"learning_rate": 0.0002,
"loss": 0.3578,
"step": 3740
},
{
"epoch": 0.30877250610770085,
"grad_norm": 0.5483905076980591,
"learning_rate": 0.0002,
"loss": 0.3344,
"step": 3760
},
{
"epoch": 0.31041491305508223,
"grad_norm": 0.3780999779701233,
"learning_rate": 0.0002,
"loss": 0.3491,
"step": 3780
},
{
"epoch": 0.3120573200024636,
"grad_norm": 0.4443167746067047,
"learning_rate": 0.0002,
"loss": 0.3406,
"step": 3800
},
{
"epoch": 0.313699726949845,
"grad_norm": 0.5337740182876587,
"learning_rate": 0.0002,
"loss": 0.3369,
"step": 3820
},
{
"epoch": 0.3153421338972264,
"grad_norm": 0.5371155738830566,
"learning_rate": 0.0002,
"loss": 0.3579,
"step": 3840
},
{
"epoch": 0.31698454084460775,
"grad_norm": 0.49183839559555054,
"learning_rate": 0.0002,
"loss": 0.3359,
"step": 3860
},
{
"epoch": 0.31862694779198913,
"grad_norm": 0.5076944828033447,
"learning_rate": 0.0002,
"loss": 0.3604,
"step": 3880
},
{
"epoch": 0.32026935473937057,
"grad_norm": 0.5076488256454468,
"learning_rate": 0.0002,
"loss": 0.3373,
"step": 3900
},
{
"epoch": 0.32191176168675195,
"grad_norm": 0.519506573677063,
"learning_rate": 0.0002,
"loss": 0.3529,
"step": 3920
},
{
"epoch": 0.32355416863413333,
"grad_norm": 0.3967176079750061,
"learning_rate": 0.0002,
"loss": 0.3203,
"step": 3940
},
{
"epoch": 0.3251965755815147,
"grad_norm": 0.5084711313247681,
"learning_rate": 0.0002,
"loss": 0.3323,
"step": 3960
},
{
"epoch": 0.3268389825288961,
"grad_norm": 0.5324501991271973,
"learning_rate": 0.0002,
"loss": 0.3351,
"step": 3980
},
{
"epoch": 0.3284813894762775,
"grad_norm": 0.4679279923439026,
"learning_rate": 0.0002,
"loss": 0.322,
"step": 4000
},
{
"epoch": 0.33012379642365886,
"grad_norm": 0.5273401737213135,
"learning_rate": 0.0002,
"loss": 0.358,
"step": 4020
},
{
"epoch": 0.33176620337104024,
"grad_norm": 0.560130774974823,
"learning_rate": 0.0002,
"loss": 0.3252,
"step": 4040
},
{
"epoch": 0.33340861031842167,
"grad_norm": 0.7334967851638794,
"learning_rate": 0.0002,
"loss": 0.3125,
"step": 4060
},
{
"epoch": 0.33505101726580305,
"grad_norm": 0.448902428150177,
"learning_rate": 0.0002,
"loss": 0.3337,
"step": 4080
},
{
"epoch": 0.33669342421318443,
"grad_norm": 0.42839765548706055,
"learning_rate": 0.0002,
"loss": 0.3332,
"step": 4100
},
{
"epoch": 0.3383358311605658,
"grad_norm": 0.43117448687553406,
"learning_rate": 0.0002,
"loss": 0.3204,
"step": 4120
},
{
"epoch": 0.3399782381079472,
"grad_norm": 0.4213992953300476,
"learning_rate": 0.0002,
"loss": 0.3421,
"step": 4140
},
{
"epoch": 0.3416206450553286,
"grad_norm": 0.40054526925086975,
"learning_rate": 0.0002,
"loss": 0.3115,
"step": 4160
},
{
"epoch": 0.34326305200270996,
"grad_norm": 0.5090795159339905,
"learning_rate": 0.0002,
"loss": 0.3324,
"step": 4180
},
{
"epoch": 0.34490545895009134,
"grad_norm": 0.5156223177909851,
"learning_rate": 0.0002,
"loss": 0.3186,
"step": 4200
},
{
"epoch": 0.3465478658974728,
"grad_norm": 0.4297846555709839,
"learning_rate": 0.0002,
"loss": 0.312,
"step": 4220
},
{
"epoch": 0.34819027284485415,
"grad_norm": 0.4857240617275238,
"learning_rate": 0.0002,
"loss": 0.3202,
"step": 4240
},
{
"epoch": 0.34983267979223553,
"grad_norm": 0.6078678965568542,
"learning_rate": 0.0002,
"loss": 0.3329,
"step": 4260
},
{
"epoch": 0.3514750867396169,
"grad_norm": 0.5576339364051819,
"learning_rate": 0.0002,
"loss": 0.333,
"step": 4280
},
{
"epoch": 0.3531174936869983,
"grad_norm": 0.5340404510498047,
"learning_rate": 0.0002,
"loss": 0.3367,
"step": 4300
},
{
"epoch": 0.3547599006343797,
"grad_norm": 0.5187095999717712,
"learning_rate": 0.0002,
"loss": 0.3579,
"step": 4320
},
{
"epoch": 0.35640230758176106,
"grad_norm": 0.4246378540992737,
"learning_rate": 0.0002,
"loss": 0.3281,
"step": 4340
},
{
"epoch": 0.35804471452914244,
"grad_norm": 0.6137174963951111,
"learning_rate": 0.0002,
"loss": 0.3248,
"step": 4360
},
{
"epoch": 0.3596871214765238,
"grad_norm": 0.44220972061157227,
"learning_rate": 0.0002,
"loss": 0.3267,
"step": 4380
},
{
"epoch": 0.36132952842390526,
"grad_norm": 0.4254567325115204,
"learning_rate": 0.0002,
"loss": 0.315,
"step": 4400
},
{
"epoch": 0.36297193537128664,
"grad_norm": 0.66693115234375,
"learning_rate": 0.0002,
"loss": 0.3354,
"step": 4420
},
{
"epoch": 0.364614342318668,
"grad_norm": 0.5646852254867554,
"learning_rate": 0.0002,
"loss": 0.3275,
"step": 4440
},
{
"epoch": 0.3662567492660494,
"grad_norm": 0.525794506072998,
"learning_rate": 0.0002,
"loss": 0.3095,
"step": 4460
},
{
"epoch": 0.3678991562134308,
"grad_norm": 0.5454958081245422,
"learning_rate": 0.0002,
"loss": 0.3177,
"step": 4480
},
{
"epoch": 0.36954156316081216,
"grad_norm": 0.5054097771644592,
"learning_rate": 0.0002,
"loss": 0.3291,
"step": 4500
},
{
"epoch": 0.37118397010819354,
"grad_norm": 0.45259889960289,
"learning_rate": 0.0002,
"loss": 0.3309,
"step": 4520
},
{
"epoch": 0.3728263770555749,
"grad_norm": 0.4160098135471344,
"learning_rate": 0.0002,
"loss": 0.3416,
"step": 4540
},
{
"epoch": 0.37446878400295636,
"grad_norm": 0.36465033888816833,
"learning_rate": 0.0002,
"loss": 0.3244,
"step": 4560
},
{
"epoch": 0.37611119095033774,
"grad_norm": 0.3822501301765442,
"learning_rate": 0.0002,
"loss": 0.3163,
"step": 4580
},
{
"epoch": 0.3777535978977191,
"grad_norm": 0.4484947621822357,
"learning_rate": 0.0002,
"loss": 0.3186,
"step": 4600
},
{
"epoch": 0.3793960048451005,
"grad_norm": 0.481303334236145,
"learning_rate": 0.0002,
"loss": 0.3202,
"step": 4620
},
{
"epoch": 0.3810384117924819,
"grad_norm": 0.5275722742080688,
"learning_rate": 0.0002,
"loss": 0.319,
"step": 4640
},
{
"epoch": 0.38268081873986326,
"grad_norm": 0.5782263278961182,
"learning_rate": 0.0002,
"loss": 0.327,
"step": 4660
},
{
"epoch": 0.38432322568724464,
"grad_norm": 0.511466920375824,
"learning_rate": 0.0002,
"loss": 0.3176,
"step": 4680
},
{
"epoch": 0.385965632634626,
"grad_norm": 0.5383144617080688,
"learning_rate": 0.0002,
"loss": 0.3215,
"step": 4700
},
{
"epoch": 0.38760803958200746,
"grad_norm": 0.47731462121009827,
"learning_rate": 0.0002,
"loss": 0.3184,
"step": 4720
},
{
"epoch": 0.38925044652938884,
"grad_norm": 0.43928396701812744,
"learning_rate": 0.0002,
"loss": 0.2998,
"step": 4740
},
{
"epoch": 0.3908928534767702,
"grad_norm": 0.47170737385749817,
"learning_rate": 0.0002,
"loss": 0.3211,
"step": 4760
},
{
"epoch": 0.3925352604241516,
"grad_norm": 0.39744389057159424,
"learning_rate": 0.0002,
"loss": 0.3119,
"step": 4780
},
{
"epoch": 0.394177667371533,
"grad_norm": 0.4669509828090668,
"learning_rate": 0.0002,
"loss": 0.2965,
"step": 4800
},
{
"epoch": 0.39582007431891436,
"grad_norm": 0.4926499128341675,
"learning_rate": 0.0002,
"loss": 0.2996,
"step": 4820
},
{
"epoch": 0.39746248126629574,
"grad_norm": 0.4818594455718994,
"learning_rate": 0.0002,
"loss": 0.3116,
"step": 4840
},
{
"epoch": 0.3991048882136771,
"grad_norm": 0.4344610571861267,
"learning_rate": 0.0002,
"loss": 0.2884,
"step": 4860
},
{
"epoch": 0.40074729516105856,
"grad_norm": 0.3993249535560608,
"learning_rate": 0.0002,
"loss": 0.3096,
"step": 4880
},
{
"epoch": 0.40238970210843994,
"grad_norm": 0.4467979967594147,
"learning_rate": 0.0002,
"loss": 0.2976,
"step": 4900
},
{
"epoch": 0.4040321090558213,
"grad_norm": 0.5102105736732483,
"learning_rate": 0.0002,
"loss": 0.3005,
"step": 4920
},
{
"epoch": 0.4056745160032027,
"grad_norm": 0.49601197242736816,
"learning_rate": 0.0002,
"loss": 0.2983,
"step": 4940
},
{
"epoch": 0.4073169229505841,
"grad_norm": 0.39463695883750916,
"learning_rate": 0.0002,
"loss": 0.3071,
"step": 4960
},
{
"epoch": 0.40895932989796546,
"grad_norm": 0.5963265299797058,
"learning_rate": 0.0002,
"loss": 0.3017,
"step": 4980
},
{
"epoch": 0.41060173684534684,
"grad_norm": 0.5571741461753845,
"learning_rate": 0.0002,
"loss": 0.312,
"step": 5000
},
{
"epoch": 0.4122441437927282,
"grad_norm": 0.430397629737854,
"learning_rate": 0.0002,
"loss": 0.3077,
"step": 5020
},
{
"epoch": 0.4138865507401096,
"grad_norm": 0.5038132667541504,
"learning_rate": 0.0002,
"loss": 0.3065,
"step": 5040
},
{
"epoch": 0.41552895768749104,
"grad_norm": 0.41420304775238037,
"learning_rate": 0.0002,
"loss": 0.3061,
"step": 5060
},
{
"epoch": 0.4171713646348724,
"grad_norm": 0.6602872610092163,
"learning_rate": 0.0002,
"loss": 0.3101,
"step": 5080
},
{
"epoch": 0.4188137715822538,
"grad_norm": 0.46677547693252563,
"learning_rate": 0.0002,
"loss": 0.3097,
"step": 5100
},
{
"epoch": 0.4204561785296352,
"grad_norm": 0.5312944054603577,
"learning_rate": 0.0002,
"loss": 0.3136,
"step": 5120
},
{
"epoch": 0.42209858547701656,
"grad_norm": 0.4542620778083801,
"learning_rate": 0.0002,
"loss": 0.3177,
"step": 5140
},
{
"epoch": 0.42374099242439794,
"grad_norm": 0.5240755081176758,
"learning_rate": 0.0002,
"loss": 0.3121,
"step": 5160
},
{
"epoch": 0.4253833993717793,
"grad_norm": 0.49393558502197266,
"learning_rate": 0.0002,
"loss": 0.3145,
"step": 5180
},
{
"epoch": 0.4270258063191607,
"grad_norm": 0.3480128347873688,
"learning_rate": 0.0002,
"loss": 0.3047,
"step": 5200
},
{
"epoch": 0.42866821326654214,
"grad_norm": 0.4269355833530426,
"learning_rate": 0.0002,
"loss": 0.3128,
"step": 5220
},
{
"epoch": 0.4303106202139235,
"grad_norm": 0.46620428562164307,
"learning_rate": 0.0002,
"loss": 0.2892,
"step": 5240
},
{
"epoch": 0.4319530271613049,
"grad_norm": 0.502040684223175,
"learning_rate": 0.0002,
"loss": 0.2977,
"step": 5260
},
{
"epoch": 0.4335954341086863,
"grad_norm": 0.4725840091705322,
"learning_rate": 0.0002,
"loss": 0.2926,
"step": 5280
},
{
"epoch": 0.43523784105606766,
"grad_norm": 0.4031844735145569,
"learning_rate": 0.0002,
"loss": 0.2931,
"step": 5300
},
{
"epoch": 0.43688024800344905,
"grad_norm": 0.5044718384742737,
"learning_rate": 0.0002,
"loss": 0.2925,
"step": 5320
},
{
"epoch": 0.4385226549508304,
"grad_norm": 0.43350791931152344,
"learning_rate": 0.0002,
"loss": 0.3064,
"step": 5340
},
{
"epoch": 0.4401650618982118,
"grad_norm": 0.4503776431083679,
"learning_rate": 0.0002,
"loss": 0.2935,
"step": 5360
},
{
"epoch": 0.44180746884559324,
"grad_norm": 0.4562300145626068,
"learning_rate": 0.0002,
"loss": 0.2908,
"step": 5380
},
{
"epoch": 0.4434498757929746,
"grad_norm": 0.4543699026107788,
"learning_rate": 0.0002,
"loss": 0.2971,
"step": 5400
},
{
"epoch": 0.445092282740356,
"grad_norm": 0.45582354068756104,
"learning_rate": 0.0002,
"loss": 0.3039,
"step": 5420
},
{
"epoch": 0.4467346896877374,
"grad_norm": 0.535355269908905,
"learning_rate": 0.0002,
"loss": 0.3023,
"step": 5440
},
{
"epoch": 0.44837709663511877,
"grad_norm": 0.6104617118835449,
"learning_rate": 0.0002,
"loss": 0.3001,
"step": 5460
},
{
"epoch": 0.45001950358250015,
"grad_norm": 0.5111253261566162,
"learning_rate": 0.0002,
"loss": 0.281,
"step": 5480
},
{
"epoch": 0.4516619105298815,
"grad_norm": 0.49691838026046753,
"learning_rate": 0.0002,
"loss": 0.3043,
"step": 5500
},
{
"epoch": 0.4533043174772629,
"grad_norm": 0.5030774474143982,
"learning_rate": 0.0002,
"loss": 0.2963,
"step": 5520
},
{
"epoch": 0.4549467244246443,
"grad_norm": 0.4874095320701599,
"learning_rate": 0.0002,
"loss": 0.3063,
"step": 5540
},
{
"epoch": 0.4565891313720257,
"grad_norm": 0.4713788330554962,
"learning_rate": 0.0002,
"loss": 0.2997,
"step": 5560
},
{
"epoch": 0.4582315383194071,
"grad_norm": 0.48497167229652405,
"learning_rate": 0.0002,
"loss": 0.2936,
"step": 5580
},
{
"epoch": 0.4598739452667885,
"grad_norm": 0.5291727185249329,
"learning_rate": 0.0002,
"loss": 0.2863,
"step": 5600
},
{
"epoch": 0.46151635221416987,
"grad_norm": 0.5845544934272766,
"learning_rate": 0.0002,
"loss": 0.2834,
"step": 5620
},
{
"epoch": 0.46315875916155125,
"grad_norm": 0.5052700638771057,
"learning_rate": 0.0002,
"loss": 0.281,
"step": 5640
},
{
"epoch": 0.46480116610893263,
"grad_norm": 0.47813382744789124,
"learning_rate": 0.0002,
"loss": 0.2859,
"step": 5660
},
{
"epoch": 0.466443573056314,
"grad_norm": 0.4913572072982788,
"learning_rate": 0.0002,
"loss": 0.2765,
"step": 5680
},
{
"epoch": 0.4680859800036954,
"grad_norm": 0.5044130086898804,
"learning_rate": 0.0002,
"loss": 0.3068,
"step": 5700
},
{
"epoch": 0.4697283869510768,
"grad_norm": 0.45967990159988403,
"learning_rate": 0.0002,
"loss": 0.294,
"step": 5720
},
{
"epoch": 0.4713707938984582,
"grad_norm": 0.4834402799606323,
"learning_rate": 0.0002,
"loss": 0.2902,
"step": 5740
},
{
"epoch": 0.4730132008458396,
"grad_norm": 0.4889473617076874,
"learning_rate": 0.0002,
"loss": 0.2931,
"step": 5760
},
{
"epoch": 0.47465560779322097,
"grad_norm": 0.37159985303878784,
"learning_rate": 0.0002,
"loss": 0.2836,
"step": 5780
},
{
"epoch": 0.47629801474060235,
"grad_norm": 0.44428759813308716,
"learning_rate": 0.0002,
"loss": 0.2994,
"step": 5800
},
{
"epoch": 0.47794042168798373,
"grad_norm": 0.5093443989753723,
"learning_rate": 0.0002,
"loss": 0.2943,
"step": 5820
},
{
"epoch": 0.4795828286353651,
"grad_norm": 0.539089024066925,
"learning_rate": 0.0002,
"loss": 0.2968,
"step": 5840
},
{
"epoch": 0.4812252355827465,
"grad_norm": 0.33726248145103455,
"learning_rate": 0.0002,
"loss": 0.283,
"step": 5860
},
{
"epoch": 0.4828676425301279,
"grad_norm": 0.451824426651001,
"learning_rate": 0.0002,
"loss": 0.2824,
"step": 5880
},
{
"epoch": 0.4845100494775093,
"grad_norm": 0.4333132207393646,
"learning_rate": 0.0002,
"loss": 0.2908,
"step": 5900
},
{
"epoch": 0.4861524564248907,
"grad_norm": 0.4399010241031647,
"learning_rate": 0.0002,
"loss": 0.2857,
"step": 5920
},
{
"epoch": 0.48779486337227207,
"grad_norm": 0.46633288264274597,
"learning_rate": 0.0002,
"loss": 0.2796,
"step": 5940
},
{
"epoch": 0.48943727031965345,
"grad_norm": 0.6088176965713501,
"learning_rate": 0.0002,
"loss": 0.2868,
"step": 5960
},
{
"epoch": 0.49107967726703483,
"grad_norm": 0.5191177129745483,
"learning_rate": 0.0002,
"loss": 0.2713,
"step": 5980
},
{
"epoch": 0.4927220842144162,
"grad_norm": 0.6080117225646973,
"learning_rate": 0.0002,
"loss": 0.2925,
"step": 6000
},
{
"epoch": 0.4943644911617976,
"grad_norm": 0.4405871629714966,
"learning_rate": 0.0002,
"loss": 0.2827,
"step": 6020
},
{
"epoch": 0.49600689810917903,
"grad_norm": 0.44443821907043457,
"learning_rate": 0.0002,
"loss": 0.2641,
"step": 6040
},
{
"epoch": 0.4976493050565604,
"grad_norm": 0.401265025138855,
"learning_rate": 0.0002,
"loss": 0.2908,
"step": 6060
},
{
"epoch": 0.4992917120039418,
"grad_norm": 0.4125641882419586,
"learning_rate": 0.0002,
"loss": 0.2717,
"step": 6080
},
{
"epoch": 0.5009341189513231,
"grad_norm": 0.4346245229244232,
"learning_rate": 0.0002,
"loss": 0.2706,
"step": 6100
},
{
"epoch": 0.5025765258987046,
"grad_norm": 0.47208690643310547,
"learning_rate": 0.0002,
"loss": 0.2851,
"step": 6120
},
{
"epoch": 0.504218932846086,
"grad_norm": 0.4369046986103058,
"learning_rate": 0.0002,
"loss": 0.2809,
"step": 6140
},
{
"epoch": 0.5058613397934674,
"grad_norm": 0.5451960563659668,
"learning_rate": 0.0002,
"loss": 0.293,
"step": 6160
},
{
"epoch": 0.5075037467408487,
"grad_norm": 0.6085506677627563,
"learning_rate": 0.0002,
"loss": 0.2748,
"step": 6180
},
{
"epoch": 0.5091461536882301,
"grad_norm": 0.3898778259754181,
"learning_rate": 0.0002,
"loss": 0.276,
"step": 6200
},
{
"epoch": 0.5107885606356115,
"grad_norm": 0.5069212317466736,
"learning_rate": 0.0002,
"loss": 0.2925,
"step": 6220
},
{
"epoch": 0.5124309675829929,
"grad_norm": 0.48736870288848877,
"learning_rate": 0.0002,
"loss": 0.2718,
"step": 6240
},
{
"epoch": 0.5140733745303743,
"grad_norm": 0.5182287693023682,
"learning_rate": 0.0002,
"loss": 0.2783,
"step": 6260
},
{
"epoch": 0.5157157814777557,
"grad_norm": 0.5157051086425781,
"learning_rate": 0.0002,
"loss": 0.2828,
"step": 6280
},
{
"epoch": 0.517358188425137,
"grad_norm": 0.4653798043727875,
"learning_rate": 0.0002,
"loss": 0.2802,
"step": 6300
},
{
"epoch": 0.5190005953725184,
"grad_norm": 0.4838721454143524,
"learning_rate": 0.0002,
"loss": 0.2758,
"step": 6320
},
{
"epoch": 0.5206430023198998,
"grad_norm": 0.47830331325531006,
"learning_rate": 0.0002,
"loss": 0.2999,
"step": 6340
},
{
"epoch": 0.5222854092672812,
"grad_norm": 0.45021089911460876,
"learning_rate": 0.0002,
"loss": 0.2673,
"step": 6360
},
{
"epoch": 0.5239278162146626,
"grad_norm": 0.4527071714401245,
"learning_rate": 0.0002,
"loss": 0.2624,
"step": 6380
},
{
"epoch": 0.5255702231620439,
"grad_norm": 0.508590817451477,
"learning_rate": 0.0002,
"loss": 0.2555,
"step": 6400
},
{
"epoch": 0.5272126301094253,
"grad_norm": 0.38745129108428955,
"learning_rate": 0.0002,
"loss": 0.2863,
"step": 6420
},
{
"epoch": 0.5288550370568067,
"grad_norm": 0.6669766902923584,
"learning_rate": 0.0002,
"loss": 0.2813,
"step": 6440
},
{
"epoch": 0.5304974440041882,
"grad_norm": 0.5111877918243408,
"learning_rate": 0.0002,
"loss": 0.2712,
"step": 6460
},
{
"epoch": 0.5321398509515696,
"grad_norm": 0.5499460697174072,
"learning_rate": 0.0002,
"loss": 0.2656,
"step": 6480
},
{
"epoch": 0.533782257898951,
"grad_norm": 0.5004873275756836,
"learning_rate": 0.0002,
"loss": 0.2873,
"step": 6500
},
{
"epoch": 0.5354246648463323,
"grad_norm": 0.6010814309120178,
"learning_rate": 0.0002,
"loss": 0.3005,
"step": 6520
},
{
"epoch": 0.5370670717937137,
"grad_norm": 0.4720690846443176,
"learning_rate": 0.0002,
"loss": 0.2675,
"step": 6540
},
{
"epoch": 0.5387094787410951,
"grad_norm": 0.47902727127075195,
"learning_rate": 0.0002,
"loss": 0.2715,
"step": 6560
},
{
"epoch": 0.5403518856884765,
"grad_norm": 0.46664199233055115,
"learning_rate": 0.0002,
"loss": 0.2713,
"step": 6580
},
{
"epoch": 0.5419942926358579,
"grad_norm": 0.5385149121284485,
"learning_rate": 0.0002,
"loss": 0.2867,
"step": 6600
},
{
"epoch": 0.5436366995832392,
"grad_norm": 0.3878926932811737,
"learning_rate": 0.0002,
"loss": 0.2802,
"step": 6620
},
{
"epoch": 0.5452791065306206,
"grad_norm": 0.390656054019928,
"learning_rate": 0.0002,
"loss": 0.2676,
"step": 6640
},
{
"epoch": 0.546921513478002,
"grad_norm": 0.4342198669910431,
"learning_rate": 0.0002,
"loss": 0.2874,
"step": 6660
},
{
"epoch": 0.5485639204253834,
"grad_norm": 0.42557764053344727,
"learning_rate": 0.0002,
"loss": 0.2829,
"step": 6680
},
{
"epoch": 0.5502063273727648,
"grad_norm": 0.5569108128547668,
"learning_rate": 0.0002,
"loss": 0.2929,
"step": 6700
},
{
"epoch": 0.5518487343201461,
"grad_norm": 0.38765788078308105,
"learning_rate": 0.0002,
"loss": 0.2804,
"step": 6720
},
{
"epoch": 0.5534911412675275,
"grad_norm": 0.5068329572677612,
"learning_rate": 0.0002,
"loss": 0.2629,
"step": 6740
},
{
"epoch": 0.5551335482149089,
"grad_norm": 0.5097832083702087,
"learning_rate": 0.0002,
"loss": 0.2846,
"step": 6760
},
{
"epoch": 0.5567759551622903,
"grad_norm": 0.37154141068458557,
"learning_rate": 0.0002,
"loss": 0.2625,
"step": 6780
},
{
"epoch": 0.5584183621096718,
"grad_norm": 0.41640445590019226,
"learning_rate": 0.0002,
"loss": 0.2669,
"step": 6800
},
{
"epoch": 0.5600607690570532,
"grad_norm": 0.45431575179100037,
"learning_rate": 0.0002,
"loss": 0.2644,
"step": 6820
},
{
"epoch": 0.5617031760044345,
"grad_norm": 0.46759283542633057,
"learning_rate": 0.0002,
"loss": 0.2742,
"step": 6840
},
{
"epoch": 0.5633455829518159,
"grad_norm": 0.4959569275379181,
"learning_rate": 0.0002,
"loss": 0.2746,
"step": 6860
},
{
"epoch": 0.5649879898991973,
"grad_norm": 0.44646400213241577,
"learning_rate": 0.0002,
"loss": 0.2803,
"step": 6880
},
{
"epoch": 0.5666303968465787,
"grad_norm": 0.5323026180267334,
"learning_rate": 0.0002,
"loss": 0.2685,
"step": 6900
},
{
"epoch": 0.5682728037939601,
"grad_norm": 0.5455038547515869,
"learning_rate": 0.0002,
"loss": 0.2737,
"step": 6920
},
{
"epoch": 0.5699152107413414,
"grad_norm": 0.429975301027298,
"learning_rate": 0.0002,
"loss": 0.2826,
"step": 6940
},
{
"epoch": 0.5715576176887228,
"grad_norm": 0.5396720170974731,
"learning_rate": 0.0002,
"loss": 0.266,
"step": 6960
},
{
"epoch": 0.5732000246361042,
"grad_norm": 0.45468002557754517,
"learning_rate": 0.0002,
"loss": 0.2676,
"step": 6980
},
{
"epoch": 0.5748424315834856,
"grad_norm": 0.4196678698062897,
"learning_rate": 0.0002,
"loss": 0.2786,
"step": 7000
},
{
"epoch": 0.576484838530867,
"grad_norm": 0.4681088328361511,
"learning_rate": 0.0002,
"loss": 0.2731,
"step": 7020
},
{
"epoch": 0.5781272454782483,
"grad_norm": 0.4538247287273407,
"learning_rate": 0.0002,
"loss": 0.287,
"step": 7040
},
{
"epoch": 0.5797696524256297,
"grad_norm": 0.4834930896759033,
"learning_rate": 0.0002,
"loss": 0.2808,
"step": 7060
},
{
"epoch": 0.5814120593730111,
"grad_norm": 0.5876035690307617,
"learning_rate": 0.0002,
"loss": 0.2631,
"step": 7080
},
{
"epoch": 0.5830544663203925,
"grad_norm": 0.5164270401000977,
"learning_rate": 0.0002,
"loss": 0.2502,
"step": 7100
},
{
"epoch": 0.584696873267774,
"grad_norm": 0.46229973435401917,
"learning_rate": 0.0002,
"loss": 0.2575,
"step": 7120
},
{
"epoch": 0.5863392802151554,
"grad_norm": 0.438803106546402,
"learning_rate": 0.0002,
"loss": 0.2625,
"step": 7140
},
{
"epoch": 0.5879816871625367,
"grad_norm": 0.5476749539375305,
"learning_rate": 0.0002,
"loss": 0.2706,
"step": 7160
},
{
"epoch": 0.5896240941099181,
"grad_norm": 0.5194425582885742,
"learning_rate": 0.0002,
"loss": 0.2766,
"step": 7180
},
{
"epoch": 0.5912665010572995,
"grad_norm": 0.4764098525047302,
"learning_rate": 0.0002,
"loss": 0.2784,
"step": 7200
},
{
"epoch": 0.5929089080046809,
"grad_norm": 0.4703931510448456,
"learning_rate": 0.0002,
"loss": 0.2652,
"step": 7220
},
{
"epoch": 0.5945513149520623,
"grad_norm": 0.43372678756713867,
"learning_rate": 0.0002,
"loss": 0.2644,
"step": 7240
},
{
"epoch": 0.5961937218994436,
"grad_norm": 0.40813469886779785,
"learning_rate": 0.0002,
"loss": 0.2721,
"step": 7260
},
{
"epoch": 0.597836128846825,
"grad_norm": 0.5182124376296997,
"learning_rate": 0.0002,
"loss": 0.2741,
"step": 7280
},
{
"epoch": 0.5994785357942064,
"grad_norm": 0.4767136573791504,
"learning_rate": 0.0002,
"loss": 0.277,
"step": 7300
},
{
"epoch": 0.6011209427415878,
"grad_norm": 0.43762916326522827,
"learning_rate": 0.0002,
"loss": 0.2645,
"step": 7320
},
{
"epoch": 0.6027633496889692,
"grad_norm": 0.44736623764038086,
"learning_rate": 0.0002,
"loss": 0.2639,
"step": 7340
},
{
"epoch": 0.6044057566363505,
"grad_norm": 0.44404810667037964,
"learning_rate": 0.0002,
"loss": 0.269,
"step": 7360
},
{
"epoch": 0.6060481635837319,
"grad_norm": 0.4380868673324585,
"learning_rate": 0.0002,
"loss": 0.2615,
"step": 7380
},
{
"epoch": 0.6076905705311133,
"grad_norm": 0.4491208791732788,
"learning_rate": 0.0002,
"loss": 0.2462,
"step": 7400
},
{
"epoch": 0.6093329774784947,
"grad_norm": 0.5080710053443909,
"learning_rate": 0.0002,
"loss": 0.2823,
"step": 7420
},
{
"epoch": 0.6109753844258761,
"grad_norm": 0.47498422861099243,
"learning_rate": 0.0002,
"loss": 0.2706,
"step": 7440
},
{
"epoch": 0.6126177913732576,
"grad_norm": 0.4133289158344269,
"learning_rate": 0.0002,
"loss": 0.2684,
"step": 7460
},
{
"epoch": 0.6142601983206389,
"grad_norm": 0.4456469416618347,
"learning_rate": 0.0002,
"loss": 0.2542,
"step": 7480
},
{
"epoch": 0.6159026052680203,
"grad_norm": 0.5421611070632935,
"learning_rate": 0.0002,
"loss": 0.2737,
"step": 7500
},
{
"epoch": 0.6175450122154017,
"grad_norm": 0.4131532609462738,
"learning_rate": 0.0002,
"loss": 0.2507,
"step": 7520
},
{
"epoch": 0.6191874191627831,
"grad_norm": 0.47127702832221985,
"learning_rate": 0.0002,
"loss": 0.2819,
"step": 7540
},
{
"epoch": 0.6208298261101645,
"grad_norm": 0.43743231892585754,
"learning_rate": 0.0002,
"loss": 0.2822,
"step": 7560
},
{
"epoch": 0.6224722330575458,
"grad_norm": 0.42425501346588135,
"learning_rate": 0.0002,
"loss": 0.2654,
"step": 7580
},
{
"epoch": 0.6241146400049272,
"grad_norm": 0.4609832763671875,
"learning_rate": 0.0002,
"loss": 0.2466,
"step": 7600
},
{
"epoch": 0.6257570469523086,
"grad_norm": 0.42701244354248047,
"learning_rate": 0.0002,
"loss": 0.255,
"step": 7620
},
{
"epoch": 0.62739945389969,
"grad_norm": 0.5154401063919067,
"learning_rate": 0.0002,
"loss": 0.2705,
"step": 7640
},
{
"epoch": 0.6290418608470714,
"grad_norm": 0.451377809047699,
"learning_rate": 0.0002,
"loss": 0.2586,
"step": 7660
},
{
"epoch": 0.6306842677944527,
"grad_norm": 0.47166112065315247,
"learning_rate": 0.0002,
"loss": 0.2605,
"step": 7680
},
{
"epoch": 0.6323266747418341,
"grad_norm": 0.3716096878051758,
"learning_rate": 0.0002,
"loss": 0.2539,
"step": 7700
},
{
"epoch": 0.6339690816892155,
"grad_norm": 0.45413604378700256,
"learning_rate": 0.0002,
"loss": 0.2633,
"step": 7720
},
{
"epoch": 0.6356114886365969,
"grad_norm": 0.48580700159072876,
"learning_rate": 0.0002,
"loss": 0.256,
"step": 7740
},
{
"epoch": 0.6372538955839783,
"grad_norm": 0.40647098422050476,
"learning_rate": 0.0002,
"loss": 0.2655,
"step": 7760
},
{
"epoch": 0.6388963025313598,
"grad_norm": 0.4718053638935089,
"learning_rate": 0.0002,
"loss": 0.261,
"step": 7780
},
{
"epoch": 0.6405387094787411,
"grad_norm": 0.5230545401573181,
"learning_rate": 0.0002,
"loss": 0.2464,
"step": 7800
},
{
"epoch": 0.6421811164261225,
"grad_norm": 0.5010546445846558,
"learning_rate": 0.0002,
"loss": 0.261,
"step": 7820
},
{
"epoch": 0.6438235233735039,
"grad_norm": 0.41263461112976074,
"learning_rate": 0.0002,
"loss": 0.2626,
"step": 7840
},
{
"epoch": 0.6454659303208853,
"grad_norm": 0.538346529006958,
"learning_rate": 0.0002,
"loss": 0.2557,
"step": 7860
},
{
"epoch": 0.6471083372682667,
"grad_norm": 0.4800877869129181,
"learning_rate": 0.0002,
"loss": 0.2742,
"step": 7880
},
{
"epoch": 0.648750744215648,
"grad_norm": 0.5247358083724976,
"learning_rate": 0.0002,
"loss": 0.2608,
"step": 7900
},
{
"epoch": 0.6503931511630294,
"grad_norm": 0.5625537037849426,
"learning_rate": 0.0002,
"loss": 0.2445,
"step": 7920
},
{
"epoch": 0.6520355581104108,
"grad_norm": 0.44077080488204956,
"learning_rate": 0.0002,
"loss": 0.2572,
"step": 7940
},
{
"epoch": 0.6536779650577922,
"grad_norm": 0.4610736072063446,
"learning_rate": 0.0002,
"loss": 0.2645,
"step": 7960
},
{
"epoch": 0.6553203720051736,
"grad_norm": 0.4790017008781433,
"learning_rate": 0.0002,
"loss": 0.2556,
"step": 7980
},
{
"epoch": 0.656962778952555,
"grad_norm": 0.45367711782455444,
"learning_rate": 0.0002,
"loss": 0.253,
"step": 8000
},
{
"epoch": 0.6586051858999363,
"grad_norm": 0.4644503593444824,
"learning_rate": 0.0002,
"loss": 0.25,
"step": 8020
},
{
"epoch": 0.6602475928473177,
"grad_norm": 0.3938300311565399,
"learning_rate": 0.0002,
"loss": 0.2524,
"step": 8040
},
{
"epoch": 0.6618899997946991,
"grad_norm": 0.4796749949455261,
"learning_rate": 0.0002,
"loss": 0.2643,
"step": 8060
},
{
"epoch": 0.6635324067420805,
"grad_norm": 0.3965921700000763,
"learning_rate": 0.0002,
"loss": 0.252,
"step": 8080
},
{
"epoch": 0.6651748136894619,
"grad_norm": 0.4033324420452118,
"learning_rate": 0.0002,
"loss": 0.2469,
"step": 8100
},
{
"epoch": 0.6668172206368433,
"grad_norm": 0.5205174088478088,
"learning_rate": 0.0002,
"loss": 0.2479,
"step": 8120
},
{
"epoch": 0.6684596275842247,
"grad_norm": 0.4026409685611725,
"learning_rate": 0.0002,
"loss": 0.2482,
"step": 8140
},
{
"epoch": 0.6701020345316061,
"grad_norm": 0.33538395166397095,
"learning_rate": 0.0002,
"loss": 0.2452,
"step": 8160
},
{
"epoch": 0.6717444414789875,
"grad_norm": 0.43549609184265137,
"learning_rate": 0.0002,
"loss": 0.2548,
"step": 8180
},
{
"epoch": 0.6733868484263689,
"grad_norm": 0.5167241096496582,
"learning_rate": 0.0002,
"loss": 0.2664,
"step": 8200
},
{
"epoch": 0.6750292553737502,
"grad_norm": 0.4824913740158081,
"learning_rate": 0.0002,
"loss": 0.2668,
"step": 8220
},
{
"epoch": 0.6766716623211316,
"grad_norm": 0.49560844898223877,
"learning_rate": 0.0002,
"loss": 0.2639,
"step": 8240
},
{
"epoch": 0.678314069268513,
"grad_norm": 0.43627840280532837,
"learning_rate": 0.0002,
"loss": 0.2536,
"step": 8260
},
{
"epoch": 0.6799564762158944,
"grad_norm": 0.4371199905872345,
"learning_rate": 0.0002,
"loss": 0.259,
"step": 8280
},
{
"epoch": 0.6815988831632758,
"grad_norm": 0.43210867047309875,
"learning_rate": 0.0002,
"loss": 0.2413,
"step": 8300
},
{
"epoch": 0.6832412901106572,
"grad_norm": 0.4612789750099182,
"learning_rate": 0.0002,
"loss": 0.257,
"step": 8320
},
{
"epoch": 0.6848836970580385,
"grad_norm": 0.5780384540557861,
"learning_rate": 0.0002,
"loss": 0.2497,
"step": 8340
},
{
"epoch": 0.6865261040054199,
"grad_norm": 0.3581444323062897,
"learning_rate": 0.0002,
"loss": 0.2542,
"step": 8360
},
{
"epoch": 0.6881685109528013,
"grad_norm": 0.5276636481285095,
"learning_rate": 0.0002,
"loss": 0.2482,
"step": 8380
},
{
"epoch": 0.6898109179001827,
"grad_norm": 0.419548362493515,
"learning_rate": 0.0002,
"loss": 0.2778,
"step": 8400
},
{
"epoch": 0.691453324847564,
"grad_norm": 0.5594448447227478,
"learning_rate": 0.0002,
"loss": 0.271,
"step": 8420
},
{
"epoch": 0.6930957317949455,
"grad_norm": 0.4505052864551544,
"learning_rate": 0.0002,
"loss": 0.2531,
"step": 8440
},
{
"epoch": 0.6947381387423269,
"grad_norm": 0.4273683726787567,
"learning_rate": 0.0002,
"loss": 0.2687,
"step": 8460
},
{
"epoch": 0.6963805456897083,
"grad_norm": 0.41312068700790405,
"learning_rate": 0.0002,
"loss": 0.2535,
"step": 8480
},
{
"epoch": 0.6980229526370897,
"grad_norm": 0.3998921811580658,
"learning_rate": 0.0002,
"loss": 0.2507,
"step": 8500
},
{
"epoch": 0.6996653595844711,
"grad_norm": 0.4063471257686615,
"learning_rate": 0.0002,
"loss": 0.2604,
"step": 8520
},
{
"epoch": 0.7013077665318525,
"grad_norm": 0.4816170036792755,
"learning_rate": 0.0002,
"loss": 0.2563,
"step": 8540
},
{
"epoch": 0.7029501734792338,
"grad_norm": 0.47880151867866516,
"learning_rate": 0.0002,
"loss": 0.2582,
"step": 8560
},
{
"epoch": 0.7045925804266152,
"grad_norm": 0.43934714794158936,
"learning_rate": 0.0002,
"loss": 0.2588,
"step": 8580
},
{
"epoch": 0.7062349873739966,
"grad_norm": 0.5664840340614319,
"learning_rate": 0.0002,
"loss": 0.2361,
"step": 8600
},
{
"epoch": 0.707877394321378,
"grad_norm": 0.4387499690055847,
"learning_rate": 0.0002,
"loss": 0.2784,
"step": 8620
},
{
"epoch": 0.7095198012687594,
"grad_norm": 0.4497361183166504,
"learning_rate": 0.0002,
"loss": 0.2419,
"step": 8640
},
{
"epoch": 0.7111622082161407,
"grad_norm": 0.36037716269493103,
"learning_rate": 0.0002,
"loss": 0.2479,
"step": 8660
},
{
"epoch": 0.7128046151635221,
"grad_norm": 0.5163317918777466,
"learning_rate": 0.0002,
"loss": 0.2535,
"step": 8680
},
{
"epoch": 0.7144470221109035,
"grad_norm": 0.466194748878479,
"learning_rate": 0.0002,
"loss": 0.2533,
"step": 8700
},
{
"epoch": 0.7160894290582849,
"grad_norm": 0.328848272562027,
"learning_rate": 0.0002,
"loss": 0.254,
"step": 8720
},
{
"epoch": 0.7177318360056663,
"grad_norm": 0.5417701005935669,
"learning_rate": 0.0002,
"loss": 0.2544,
"step": 8740
},
{
"epoch": 0.7193742429530476,
"grad_norm": 0.5538254976272583,
"learning_rate": 0.0002,
"loss": 0.2453,
"step": 8760
},
{
"epoch": 0.7210166499004291,
"grad_norm": 0.4739200174808502,
"learning_rate": 0.0002,
"loss": 0.258,
"step": 8780
},
{
"epoch": 0.7226590568478105,
"grad_norm": 0.40133044123649597,
"learning_rate": 0.0002,
"loss": 0.2684,
"step": 8800
},
{
"epoch": 0.7243014637951919,
"grad_norm": 0.4493289291858673,
"learning_rate": 0.0002,
"loss": 0.2565,
"step": 8820
},
{
"epoch": 0.7259438707425733,
"grad_norm": 0.4970559775829315,
"learning_rate": 0.0002,
"loss": 0.2506,
"step": 8840
},
{
"epoch": 0.7275862776899547,
"grad_norm": 0.5687580108642578,
"learning_rate": 0.0002,
"loss": 0.2511,
"step": 8860
},
{
"epoch": 0.729228684637336,
"grad_norm": 0.5328338742256165,
"learning_rate": 0.0002,
"loss": 0.2428,
"step": 8880
},
{
"epoch": 0.7308710915847174,
"grad_norm": 0.47104090452194214,
"learning_rate": 0.0002,
"loss": 0.2491,
"step": 8900
},
{
"epoch": 0.7325134985320988,
"grad_norm": 0.4887702167034149,
"learning_rate": 0.0002,
"loss": 0.2532,
"step": 8920
},
{
"epoch": 0.7341559054794802,
"grad_norm": 0.3589889705181122,
"learning_rate": 0.0002,
"loss": 0.2587,
"step": 8940
},
{
"epoch": 0.7357983124268616,
"grad_norm": 0.4665176570415497,
"learning_rate": 0.0002,
"loss": 0.2407,
"step": 8960
},
{
"epoch": 0.7374407193742429,
"grad_norm": 0.2580777108669281,
"learning_rate": 0.0002,
"loss": 0.2501,
"step": 8980
},
{
"epoch": 0.7390831263216243,
"grad_norm": 0.5562865734100342,
"learning_rate": 0.0002,
"loss": 0.2589,
"step": 9000
},
{
"epoch": 0.7407255332690057,
"grad_norm": 0.36843666434288025,
"learning_rate": 0.0002,
"loss": 0.2639,
"step": 9020
},
{
"epoch": 0.7423679402163871,
"grad_norm": 0.433339387178421,
"learning_rate": 0.0002,
"loss": 0.239,
"step": 9040
},
{
"epoch": 0.7440103471637685,
"grad_norm": 0.5565098524093628,
"learning_rate": 0.0002,
"loss": 0.2528,
"step": 9060
},
{
"epoch": 0.7456527541111498,
"grad_norm": 0.39954161643981934,
"learning_rate": 0.0002,
"loss": 0.24,
"step": 9080
},
{
"epoch": 0.7472951610585313,
"grad_norm": 0.43612274527549744,
"learning_rate": 0.0002,
"loss": 0.2373,
"step": 9100
},
{
"epoch": 0.7489375680059127,
"grad_norm": 0.4511432945728302,
"learning_rate": 0.0002,
"loss": 0.2564,
"step": 9120
},
{
"epoch": 0.7505799749532941,
"grad_norm": 0.3895890414714813,
"learning_rate": 0.0002,
"loss": 0.2469,
"step": 9140
},
{
"epoch": 0.7522223819006755,
"grad_norm": 0.4349375069141388,
"learning_rate": 0.0002,
"loss": 0.2582,
"step": 9160
},
{
"epoch": 0.7538647888480569,
"grad_norm": 0.39693930745124817,
"learning_rate": 0.0002,
"loss": 0.2576,
"step": 9180
},
{
"epoch": 0.7555071957954382,
"grad_norm": 0.35806095600128174,
"learning_rate": 0.0002,
"loss": 0.235,
"step": 9200
},
{
"epoch": 0.7571496027428196,
"grad_norm": 0.5650025010108948,
"learning_rate": 0.0002,
"loss": 0.2541,
"step": 9220
},
{
"epoch": 0.758792009690201,
"grad_norm": 0.45522645115852356,
"learning_rate": 0.0002,
"loss": 0.2323,
"step": 9240
},
{
"epoch": 0.7604344166375824,
"grad_norm": 0.45849525928497314,
"learning_rate": 0.0002,
"loss": 0.2459,
"step": 9260
},
{
"epoch": 0.7620768235849638,
"grad_norm": 0.5666941404342651,
"learning_rate": 0.0002,
"loss": 0.2634,
"step": 9280
},
{
"epoch": 0.7637192305323451,
"grad_norm": 0.43697381019592285,
"learning_rate": 0.0002,
"loss": 0.2482,
"step": 9300
},
{
"epoch": 0.7653616374797265,
"grad_norm": 0.5133718848228455,
"learning_rate": 0.0002,
"loss": 0.2631,
"step": 9320
},
{
"epoch": 0.7670040444271079,
"grad_norm": 0.5440112352371216,
"learning_rate": 0.0002,
"loss": 0.2593,
"step": 9340
},
{
"epoch": 0.7686464513744893,
"grad_norm": 0.5012624263763428,
"learning_rate": 0.0002,
"loss": 0.243,
"step": 9360
},
{
"epoch": 0.7702888583218707,
"grad_norm": 0.4387590289115906,
"learning_rate": 0.0002,
"loss": 0.2448,
"step": 9380
},
{
"epoch": 0.771931265269252,
"grad_norm": 0.4327554702758789,
"learning_rate": 0.0002,
"loss": 0.2514,
"step": 9400
},
{
"epoch": 0.7735736722166334,
"grad_norm": 0.4909968078136444,
"learning_rate": 0.0002,
"loss": 0.2503,
"step": 9420
},
{
"epoch": 0.7752160791640149,
"grad_norm": 0.4279715120792389,
"learning_rate": 0.0002,
"loss": 0.2558,
"step": 9440
},
{
"epoch": 0.7768584861113963,
"grad_norm": 0.4973134994506836,
"learning_rate": 0.0002,
"loss": 0.2412,
"step": 9460
},
{
"epoch": 0.7785008930587777,
"grad_norm": 0.3873676359653473,
"learning_rate": 0.0002,
"loss": 0.2409,
"step": 9480
},
{
"epoch": 0.7801433000061591,
"grad_norm": 0.40915995836257935,
"learning_rate": 0.0002,
"loss": 0.2322,
"step": 9500
},
{
"epoch": 0.7817857069535404,
"grad_norm": 0.5738871693611145,
"learning_rate": 0.0002,
"loss": 0.2408,
"step": 9520
},
{
"epoch": 0.7834281139009218,
"grad_norm": 0.49270549416542053,
"learning_rate": 0.0002,
"loss": 0.2477,
"step": 9540
},
{
"epoch": 0.7850705208483032,
"grad_norm": 0.4603147804737091,
"learning_rate": 0.0002,
"loss": 0.2402,
"step": 9560
},
{
"epoch": 0.7867129277956846,
"grad_norm": 0.47675642371177673,
"learning_rate": 0.0002,
"loss": 0.2528,
"step": 9580
},
{
"epoch": 0.788355334743066,
"grad_norm": 0.41800156235694885,
"learning_rate": 0.0002,
"loss": 0.2571,
"step": 9600
},
{
"epoch": 0.7899977416904473,
"grad_norm": 0.42527106404304504,
"learning_rate": 0.0002,
"loss": 0.2452,
"step": 9620
},
{
"epoch": 0.7916401486378287,
"grad_norm": 0.5056847333908081,
"learning_rate": 0.0002,
"loss": 0.2511,
"step": 9640
},
{
"epoch": 0.7932825555852101,
"grad_norm": 0.2951577305793762,
"learning_rate": 0.0002,
"loss": 0.233,
"step": 9660
},
{
"epoch": 0.7949249625325915,
"grad_norm": 0.4254283010959625,
"learning_rate": 0.0002,
"loss": 0.2474,
"step": 9680
},
{
"epoch": 0.7965673694799729,
"grad_norm": 0.5127973556518555,
"learning_rate": 0.0002,
"loss": 0.2655,
"step": 9700
},
{
"epoch": 0.7982097764273542,
"grad_norm": 0.3507694900035858,
"learning_rate": 0.0002,
"loss": 0.227,
"step": 9720
},
{
"epoch": 0.7998521833747356,
"grad_norm": 0.4255737364292145,
"learning_rate": 0.0002,
"loss": 0.2591,
"step": 9740
},
{
"epoch": 0.8014945903221171,
"grad_norm": 0.44822582602500916,
"learning_rate": 0.0002,
"loss": 0.2287,
"step": 9760
},
{
"epoch": 0.8031369972694985,
"grad_norm": 0.4737776517868042,
"learning_rate": 0.0002,
"loss": 0.2412,
"step": 9780
},
{
"epoch": 0.8047794042168799,
"grad_norm": 0.4281519651412964,
"learning_rate": 0.0002,
"loss": 0.2559,
"step": 9800
},
{
"epoch": 0.8064218111642613,
"grad_norm": 0.3413679301738739,
"learning_rate": 0.0002,
"loss": 0.2479,
"step": 9820
},
{
"epoch": 0.8080642181116426,
"grad_norm": 0.4361155033111572,
"learning_rate": 0.0002,
"loss": 0.2539,
"step": 9840
},
{
"epoch": 0.809706625059024,
"grad_norm": 0.48523005843162537,
"learning_rate": 0.0002,
"loss": 0.2534,
"step": 9860
},
{
"epoch": 0.8113490320064054,
"grad_norm": 0.4045993685722351,
"learning_rate": 0.0002,
"loss": 0.2455,
"step": 9880
},
{
"epoch": 0.8129914389537868,
"grad_norm": 0.5103000998497009,
"learning_rate": 0.0002,
"loss": 0.2535,
"step": 9900
},
{
"epoch": 0.8146338459011682,
"grad_norm": 0.3670307397842407,
"learning_rate": 0.0002,
"loss": 0.2337,
"step": 9920
},
{
"epoch": 0.8162762528485495,
"grad_norm": 0.3149369955062866,
"learning_rate": 0.0002,
"loss": 0.2586,
"step": 9940
},
{
"epoch": 0.8179186597959309,
"grad_norm": 0.5316740274429321,
"learning_rate": 0.0002,
"loss": 0.2373,
"step": 9960
},
{
"epoch": 0.8195610667433123,
"grad_norm": 0.5300164222717285,
"learning_rate": 0.0002,
"loss": 0.2399,
"step": 9980
},
{
"epoch": 0.8212034736906937,
"grad_norm": 0.48414990305900574,
"learning_rate": 0.0002,
"loss": 0.2331,
"step": 10000
},
{
"epoch": 0.8228458806380751,
"grad_norm": 0.41733840107917786,
"learning_rate": 0.0002,
"loss": 0.2454,
"step": 10020
},
{
"epoch": 0.8244882875854564,
"grad_norm": 0.5048840045928955,
"learning_rate": 0.0002,
"loss": 0.2421,
"step": 10040
},
{
"epoch": 0.8261306945328378,
"grad_norm": 0.4444895386695862,
"learning_rate": 0.0002,
"loss": 0.2537,
"step": 10060
},
{
"epoch": 0.8277731014802192,
"grad_norm": 0.45051780343055725,
"learning_rate": 0.0002,
"loss": 0.2462,
"step": 10080
},
{
"epoch": 0.8294155084276007,
"grad_norm": 0.3937041163444519,
"learning_rate": 0.0002,
"loss": 0.243,
"step": 10100
},
{
"epoch": 0.8310579153749821,
"grad_norm": 0.45621591806411743,
"learning_rate": 0.0002,
"loss": 0.2469,
"step": 10120
},
{
"epoch": 0.8327003223223635,
"grad_norm": 0.5431267619132996,
"learning_rate": 0.0002,
"loss": 0.2425,
"step": 10140
},
{
"epoch": 0.8343427292697448,
"grad_norm": 0.5039596557617188,
"learning_rate": 0.0002,
"loss": 0.2379,
"step": 10160
},
{
"epoch": 0.8359851362171262,
"grad_norm": 0.3915367126464844,
"learning_rate": 0.0002,
"loss": 0.241,
"step": 10180
},
{
"epoch": 0.8376275431645076,
"grad_norm": 0.46073317527770996,
"learning_rate": 0.0002,
"loss": 0.2485,
"step": 10200
},
{
"epoch": 0.839269950111889,
"grad_norm": 0.47057440876960754,
"learning_rate": 0.0002,
"loss": 0.2452,
"step": 10220
},
{
"epoch": 0.8409123570592704,
"grad_norm": 0.6143821477890015,
"learning_rate": 0.0002,
"loss": 0.2394,
"step": 10240
},
{
"epoch": 0.8425547640066517,
"grad_norm": 0.41434940695762634,
"learning_rate": 0.0002,
"loss": 0.2332,
"step": 10260
},
{
"epoch": 0.8441971709540331,
"grad_norm": 0.467459499835968,
"learning_rate": 0.0002,
"loss": 0.2439,
"step": 10280
},
{
"epoch": 0.8458395779014145,
"grad_norm": 0.49404439330101013,
"learning_rate": 0.0002,
"loss": 0.2378,
"step": 10300
},
{
"epoch": 0.8474819848487959,
"grad_norm": 0.4313650131225586,
"learning_rate": 0.0002,
"loss": 0.2455,
"step": 10320
},
{
"epoch": 0.8491243917961773,
"grad_norm": 0.34277698397636414,
"learning_rate": 0.0002,
"loss": 0.2396,
"step": 10340
},
{
"epoch": 0.8507667987435586,
"grad_norm": 0.3649916350841522,
"learning_rate": 0.0002,
"loss": 0.2348,
"step": 10360
},
{
"epoch": 0.85240920569094,
"grad_norm": 0.4841578006744385,
"learning_rate": 0.0002,
"loss": 0.2488,
"step": 10380
},
{
"epoch": 0.8540516126383214,
"grad_norm": 0.5488325953483582,
"learning_rate": 0.0002,
"loss": 0.2399,
"step": 10400
},
{
"epoch": 0.8556940195857029,
"grad_norm": 0.41103577613830566,
"learning_rate": 0.0002,
"loss": 0.2371,
"step": 10420
},
{
"epoch": 0.8573364265330843,
"grad_norm": 0.42253378033638,
"learning_rate": 0.0002,
"loss": 0.2478,
"step": 10440
},
{
"epoch": 0.8589788334804657,
"grad_norm": 0.43092676997184753,
"learning_rate": 0.0002,
"loss": 0.2316,
"step": 10460
},
{
"epoch": 0.860621240427847,
"grad_norm": 0.5474075078964233,
"learning_rate": 0.0002,
"loss": 0.2734,
"step": 10480
},
{
"epoch": 0.8622636473752284,
"grad_norm": 0.474618524312973,
"learning_rate": 0.0002,
"loss": 0.2378,
"step": 10500
},
{
"epoch": 0.8639060543226098,
"grad_norm": 0.44008612632751465,
"learning_rate": 0.0002,
"loss": 0.236,
"step": 10520
},
{
"epoch": 0.8655484612699912,
"grad_norm": 0.4194040894508362,
"learning_rate": 0.0002,
"loss": 0.2433,
"step": 10540
},
{
"epoch": 0.8671908682173726,
"grad_norm": 0.3890872597694397,
"learning_rate": 0.0002,
"loss": 0.2308,
"step": 10560
},
{
"epoch": 0.868833275164754,
"grad_norm": 0.41979917883872986,
"learning_rate": 0.0002,
"loss": 0.2417,
"step": 10580
},
{
"epoch": 0.8704756821121353,
"grad_norm": 0.3800947666168213,
"learning_rate": 0.0002,
"loss": 0.244,
"step": 10600
},
{
"epoch": 0.8721180890595167,
"grad_norm": 0.38609811663627625,
"learning_rate": 0.0002,
"loss": 0.2477,
"step": 10620
},
{
"epoch": 0.8737604960068981,
"grad_norm": 0.514067530632019,
"learning_rate": 0.0002,
"loss": 0.2382,
"step": 10640
},
{
"epoch": 0.8754029029542795,
"grad_norm": 0.47742265462875366,
"learning_rate": 0.0002,
"loss": 0.2298,
"step": 10660
},
{
"epoch": 0.8770453099016609,
"grad_norm": 0.45849281549453735,
"learning_rate": 0.0002,
"loss": 0.2332,
"step": 10680
},
{
"epoch": 0.8786877168490422,
"grad_norm": 0.39788320660591125,
"learning_rate": 0.0002,
"loss": 0.2363,
"step": 10700
},
{
"epoch": 0.8803301237964236,
"grad_norm": 0.5124650597572327,
"learning_rate": 0.0002,
"loss": 0.2292,
"step": 10720
},
{
"epoch": 0.881972530743805,
"grad_norm": 0.48688754439353943,
"learning_rate": 0.0002,
"loss": 0.2444,
"step": 10740
},
{
"epoch": 0.8836149376911865,
"grad_norm": 0.46146026253700256,
"learning_rate": 0.0002,
"loss": 0.2473,
"step": 10760
},
{
"epoch": 0.8852573446385679,
"grad_norm": 0.38401076197624207,
"learning_rate": 0.0002,
"loss": 0.2441,
"step": 10780
},
{
"epoch": 0.8868997515859492,
"grad_norm": 0.4642081558704376,
"learning_rate": 0.0002,
"loss": 0.2338,
"step": 10800
},
{
"epoch": 0.8885421585333306,
"grad_norm": 0.378845751285553,
"learning_rate": 0.0002,
"loss": 0.2203,
"step": 10820
},
{
"epoch": 0.890184565480712,
"grad_norm": 0.3785631060600281,
"learning_rate": 0.0002,
"loss": 0.2474,
"step": 10840
},
{
"epoch": 0.8918269724280934,
"grad_norm": 0.4151659309864044,
"learning_rate": 0.0002,
"loss": 0.2361,
"step": 10860
},
{
"epoch": 0.8934693793754748,
"grad_norm": 0.3314524292945862,
"learning_rate": 0.0002,
"loss": 0.241,
"step": 10880
},
{
"epoch": 0.8951117863228562,
"grad_norm": 0.4619898200035095,
"learning_rate": 0.0002,
"loss": 0.2426,
"step": 10900
},
{
"epoch": 0.8967541932702375,
"grad_norm": 0.5724550485610962,
"learning_rate": 0.0002,
"loss": 0.2455,
"step": 10920
},
{
"epoch": 0.8983966002176189,
"grad_norm": 0.3766199052333832,
"learning_rate": 0.0002,
"loss": 0.2319,
"step": 10940
},
{
"epoch": 0.9000390071650003,
"grad_norm": 0.4241611659526825,
"learning_rate": 0.0002,
"loss": 0.2316,
"step": 10960
},
{
"epoch": 0.9016814141123817,
"grad_norm": 0.35726866126060486,
"learning_rate": 0.0002,
"loss": 0.2343,
"step": 10980
},
{
"epoch": 0.903323821059763,
"grad_norm": 0.5252423882484436,
"learning_rate": 0.0002,
"loss": 0.2431,
"step": 11000
},
{
"epoch": 0.9049662280071444,
"grad_norm": 0.47167885303497314,
"learning_rate": 0.0002,
"loss": 0.2512,
"step": 11020
},
{
"epoch": 0.9066086349545258,
"grad_norm": 0.4106541872024536,
"learning_rate": 0.0002,
"loss": 0.2397,
"step": 11040
},
{
"epoch": 0.9082510419019072,
"grad_norm": 0.4804975390434265,
"learning_rate": 0.0002,
"loss": 0.2445,
"step": 11060
},
{
"epoch": 0.9098934488492886,
"grad_norm": 0.4177796542644501,
"learning_rate": 0.0002,
"loss": 0.2302,
"step": 11080
},
{
"epoch": 0.9115358557966701,
"grad_norm": 0.34781017899513245,
"learning_rate": 0.0002,
"loss": 0.2285,
"step": 11100
},
{
"epoch": 0.9131782627440514,
"grad_norm": 0.34392043948173523,
"learning_rate": 0.0002,
"loss": 0.232,
"step": 11120
},
{
"epoch": 0.9148206696914328,
"grad_norm": 0.46544018387794495,
"learning_rate": 0.0002,
"loss": 0.2332,
"step": 11140
},
{
"epoch": 0.9164630766388142,
"grad_norm": 0.47958704829216003,
"learning_rate": 0.0002,
"loss": 0.2481,
"step": 11160
},
{
"epoch": 0.9181054835861956,
"grad_norm": 0.4493333697319031,
"learning_rate": 0.0002,
"loss": 0.238,
"step": 11180
},
{
"epoch": 0.919747890533577,
"grad_norm": 0.47599494457244873,
"learning_rate": 0.0002,
"loss": 0.2416,
"step": 11200
},
{
"epoch": 0.9213902974809584,
"grad_norm": 0.39547592401504517,
"learning_rate": 0.0002,
"loss": 0.2456,
"step": 11220
},
{
"epoch": 0.9230327044283397,
"grad_norm": 0.42187511920928955,
"learning_rate": 0.0002,
"loss": 0.2425,
"step": 11240
},
{
"epoch": 0.9246751113757211,
"grad_norm": 0.3870528042316437,
"learning_rate": 0.0002,
"loss": 0.2366,
"step": 11260
},
{
"epoch": 0.9263175183231025,
"grad_norm": 0.40943118929862976,
"learning_rate": 0.0002,
"loss": 0.2088,
"step": 11280
},
{
"epoch": 0.9279599252704839,
"grad_norm": 0.3936561346054077,
"learning_rate": 0.0002,
"loss": 0.239,
"step": 11300
},
{
"epoch": 0.9296023322178653,
"grad_norm": 0.4154857397079468,
"learning_rate": 0.0002,
"loss": 0.2413,
"step": 11320
},
{
"epoch": 0.9312447391652466,
"grad_norm": 0.5544102191925049,
"learning_rate": 0.0002,
"loss": 0.2565,
"step": 11340
},
{
"epoch": 0.932887146112628,
"grad_norm": 0.5494611263275146,
"learning_rate": 0.0002,
"loss": 0.2469,
"step": 11360
},
{
"epoch": 0.9345295530600094,
"grad_norm": 0.41848114132881165,
"learning_rate": 0.0002,
"loss": 0.2333,
"step": 11380
},
{
"epoch": 0.9361719600073908,
"grad_norm": 0.41343703866004944,
"learning_rate": 0.0002,
"loss": 0.2342,
"step": 11400
},
{
"epoch": 0.9378143669547723,
"grad_norm": 0.6060330867767334,
"learning_rate": 0.0002,
"loss": 0.2507,
"step": 11420
},
{
"epoch": 0.9394567739021537,
"grad_norm": 0.42079275846481323,
"learning_rate": 0.0002,
"loss": 0.2322,
"step": 11440
},
{
"epoch": 0.941099180849535,
"grad_norm": 0.43053537607192993,
"learning_rate": 0.0002,
"loss": 0.2257,
"step": 11460
},
{
"epoch": 0.9427415877969164,
"grad_norm": 0.41895121335983276,
"learning_rate": 0.0002,
"loss": 0.2501,
"step": 11480
},
{
"epoch": 0.9443839947442978,
"grad_norm": 0.467018723487854,
"learning_rate": 0.0002,
"loss": 0.2282,
"step": 11500
},
{
"epoch": 0.9460264016916792,
"grad_norm": 0.5707799196243286,
"learning_rate": 0.0002,
"loss": 0.2319,
"step": 11520
},
{
"epoch": 0.9476688086390606,
"grad_norm": 0.4575120806694031,
"learning_rate": 0.0002,
"loss": 0.2291,
"step": 11540
},
{
"epoch": 0.9493112155864419,
"grad_norm": 0.38349372148513794,
"learning_rate": 0.0002,
"loss": 0.2263,
"step": 11560
},
{
"epoch": 0.9509536225338233,
"grad_norm": 0.4487491846084595,
"learning_rate": 0.0002,
"loss": 0.2505,
"step": 11580
},
{
"epoch": 0.9525960294812047,
"grad_norm": 0.39065688848495483,
"learning_rate": 0.0002,
"loss": 0.239,
"step": 11600
},
{
"epoch": 0.9542384364285861,
"grad_norm": 0.4473966658115387,
"learning_rate": 0.0002,
"loss": 0.2409,
"step": 11620
},
{
"epoch": 0.9558808433759675,
"grad_norm": 0.39066895842552185,
"learning_rate": 0.0002,
"loss": 0.2431,
"step": 11640
},
{
"epoch": 0.9575232503233488,
"grad_norm": 0.470277339220047,
"learning_rate": 0.0002,
"loss": 0.2419,
"step": 11660
},
{
"epoch": 0.9591656572707302,
"grad_norm": 0.405834436416626,
"learning_rate": 0.0002,
"loss": 0.2408,
"step": 11680
},
{
"epoch": 0.9608080642181116,
"grad_norm": 0.5717544555664062,
"learning_rate": 0.0002,
"loss": 0.2352,
"step": 11700
},
{
"epoch": 0.962450471165493,
"grad_norm": 0.4837093651294708,
"learning_rate": 0.0002,
"loss": 0.2435,
"step": 11720
},
{
"epoch": 0.9640928781128744,
"grad_norm": 0.4689130187034607,
"learning_rate": 0.0002,
"loss": 0.2324,
"step": 11740
},
{
"epoch": 0.9657352850602559,
"grad_norm": 0.511249840259552,
"learning_rate": 0.0002,
"loss": 0.2394,
"step": 11760
},
{
"epoch": 0.9673776920076372,
"grad_norm": 0.43555593490600586,
"learning_rate": 0.0002,
"loss": 0.2377,
"step": 11780
},
{
"epoch": 0.9690200989550186,
"grad_norm": 0.41933077573776245,
"learning_rate": 0.0002,
"loss": 0.2355,
"step": 11800
},
{
"epoch": 0.9706625059024,
"grad_norm": 0.41573819518089294,
"learning_rate": 0.0002,
"loss": 0.2345,
"step": 11820
},
{
"epoch": 0.9723049128497814,
"grad_norm": 0.3951037526130676,
"learning_rate": 0.0002,
"loss": 0.2399,
"step": 11840
},
{
"epoch": 0.9739473197971628,
"grad_norm": 0.477756142616272,
"learning_rate": 0.0002,
"loss": 0.2425,
"step": 11860
},
{
"epoch": 0.9755897267445441,
"grad_norm": 0.5147901773452759,
"learning_rate": 0.0002,
"loss": 0.2354,
"step": 11880
},
{
"epoch": 0.9772321336919255,
"grad_norm": 0.40053385496139526,
"learning_rate": 0.0002,
"loss": 0.2325,
"step": 11900
},
{
"epoch": 0.9788745406393069,
"grad_norm": 0.4459463953971863,
"learning_rate": 0.0002,
"loss": 0.2492,
"step": 11920
},
{
"epoch": 0.9805169475866883,
"grad_norm": 0.42749595642089844,
"learning_rate": 0.0002,
"loss": 0.2308,
"step": 11940
},
{
"epoch": 0.9821593545340697,
"grad_norm": 0.4053783714771271,
"learning_rate": 0.0002,
"loss": 0.2263,
"step": 11960
},
{
"epoch": 0.983801761481451,
"grad_norm": 0.43342533707618713,
"learning_rate": 0.0002,
"loss": 0.2348,
"step": 11980
},
{
"epoch": 0.9854441684288324,
"grad_norm": 0.43272313475608826,
"learning_rate": 0.0002,
"loss": 0.2234,
"step": 12000
},
{
"epoch": 0.9870865753762138,
"grad_norm": 0.3550325036048889,
"learning_rate": 0.0002,
"loss": 0.2186,
"step": 12020
},
{
"epoch": 0.9887289823235952,
"grad_norm": 0.35271936655044556,
"learning_rate": 0.0002,
"loss": 0.2326,
"step": 12040
},
{
"epoch": 0.9903713892709766,
"grad_norm": 0.37404924631118774,
"learning_rate": 0.0002,
"loss": 0.2483,
"step": 12060
},
{
"epoch": 0.9920137962183581,
"grad_norm": 0.46686896681785583,
"learning_rate": 0.0002,
"loss": 0.2213,
"step": 12080
},
{
"epoch": 0.9936562031657394,
"grad_norm": 0.37012913823127747,
"learning_rate": 0.0002,
"loss": 0.2415,
"step": 12100
},
{
"epoch": 0.9952986101131208,
"grad_norm": 0.4403967559337616,
"learning_rate": 0.0002,
"loss": 0.2261,
"step": 12120
},
{
"epoch": 0.9969410170605022,
"grad_norm": 0.36877259612083435,
"learning_rate": 0.0002,
"loss": 0.2295,
"step": 12140
},
{
"epoch": 0.9985834240078836,
"grad_norm": 0.34526777267456055,
"learning_rate": 0.0002,
"loss": 0.2236,
"step": 12160
},
{
"epoch": 1.0,
"eval_loss": 0.30336490273475647,
"eval_runtime": 533.8677,
"eval_samples_per_second": 7.092,
"eval_steps_per_second": 0.888,
"step": 12178
}
],
"logging_steps": 20,
"max_steps": 14000,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 77,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4.0518674601423667e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}