{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 4.998442795639828, "eval_steps": 500, "global_step": 13645, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0003664010259228726, "grad_norm": 27.62790773759725, "learning_rate": 0.0, "loss": 1.9914, "num_tokens": 770427.0, "step": 1 }, { "epoch": 0.0007328020518457452, "grad_norm": 25.4008271065741, "learning_rate": 9.756097560975611e-08, "loss": 1.8806, "num_tokens": 1453707.0, "step": 2 }, { "epoch": 0.0010992030777686177, "grad_norm": 28.306001709451987, "learning_rate": 1.9512195121951221e-07, "loss": 1.9849, "num_tokens": 2285846.0, "step": 3 }, { "epoch": 0.0014656041036914904, "grad_norm": 29.60236560675045, "learning_rate": 2.926829268292683e-07, "loss": 2.0842, "num_tokens": 3188364.0, "step": 4 }, { "epoch": 0.001832005129614363, "grad_norm": 26.828658709537205, "learning_rate": 3.9024390243902443e-07, "loss": 1.9541, "num_tokens": 3880881.0, "step": 5 }, { "epoch": 0.0021984061555372354, "grad_norm": 28.17923965338119, "learning_rate": 4.878048780487805e-07, "loss": 1.9668, "num_tokens": 4620349.0, "step": 6 }, { "epoch": 0.002564807181460108, "grad_norm": 34.35375816384725, "learning_rate": 5.853658536585366e-07, "loss": 1.9991, "num_tokens": 5504038.0, "step": 7 }, { "epoch": 0.002931208207382981, "grad_norm": 38.977649327236165, "learning_rate": 6.829268292682928e-07, "loss": 1.935, "num_tokens": 6346222.0, "step": 8 }, { "epoch": 0.0032976092333058533, "grad_norm": 36.143290811668344, "learning_rate": 7.804878048780489e-07, "loss": 1.9149, "num_tokens": 7121158.0, "step": 9 }, { "epoch": 0.003664010259228726, "grad_norm": 42.87096531105629, "learning_rate": 8.780487804878049e-07, "loss": 1.9164, "num_tokens": 7844564.0, "step": 10 }, { "epoch": 0.004030411285151599, "grad_norm": 47.618730453522765, "learning_rate": 9.75609756097561e-07, "loss": 1.8946, "num_tokens": 8564840.0, "step": 11 }, { "epoch": 0.004396812311074471, "grad_norm": 67.22232353119746, "learning_rate": 1.0731707317073172e-06, "loss": 1.7642, "num_tokens": 9230252.0, "step": 12 }, { "epoch": 0.004763213336997344, "grad_norm": 94.01508730223624, "learning_rate": 1.1707317073170732e-06, "loss": 1.7265, "num_tokens": 9932522.0, "step": 13 }, { "epoch": 0.005129614362920216, "grad_norm": 119.30232653980062, "learning_rate": 1.2682926829268293e-06, "loss": 1.7525, "num_tokens": 10737101.0, "step": 14 }, { "epoch": 0.005496015388843089, "grad_norm": 36.45551107027554, "learning_rate": 1.3658536585365856e-06, "loss": 1.6532, "num_tokens": 11455413.0, "step": 15 }, { "epoch": 0.005862416414765962, "grad_norm": 28.848740253908424, "learning_rate": 1.4634146341463414e-06, "loss": 1.4996, "num_tokens": 12185981.0, "step": 16 }, { "epoch": 0.006228817440688834, "grad_norm": 27.67363979899123, "learning_rate": 1.5609756097560977e-06, "loss": 1.5655, "num_tokens": 12911265.0, "step": 17 }, { "epoch": 0.006595218466611707, "grad_norm": 33.42758658456724, "learning_rate": 1.6585365853658538e-06, "loss": 1.6419, "num_tokens": 13677854.0, "step": 18 }, { "epoch": 0.006961619492534579, "grad_norm": 41.537268379123326, "learning_rate": 1.7560975609756098e-06, "loss": 1.5667, "num_tokens": 14322511.0, "step": 19 }, { "epoch": 0.007328020518457452, "grad_norm": 135.6041812041426, "learning_rate": 1.853658536585366e-06, "loss": 1.4987, "num_tokens": 15252212.0, "step": 20 }, { "epoch": 0.007694421544380325, "grad_norm": 356.15683824601143, "learning_rate": 1.951219512195122e-06, "loss": 1.4239, "num_tokens": 15970048.0, "step": 21 }, { "epoch": 0.008060822570303198, "grad_norm": 241.24381215846853, "learning_rate": 2.048780487804878e-06, "loss": 1.422, "num_tokens": 16724047.0, "step": 22 }, { "epoch": 0.00842722359622607, "grad_norm": 202.41030230729922, "learning_rate": 2.1463414634146343e-06, "loss": 1.3961, "num_tokens": 17695020.0, "step": 23 }, { "epoch": 0.008793624622148942, "grad_norm": 172.3769811093967, "learning_rate": 2.2439024390243906e-06, "loss": 1.3681, "num_tokens": 18635389.0, "step": 24 }, { "epoch": 0.009160025648071815, "grad_norm": 184.58090916496832, "learning_rate": 2.3414634146341465e-06, "loss": 1.3768, "num_tokens": 19392307.0, "step": 25 }, { "epoch": 0.009526426673994687, "grad_norm": 152.08698257257657, "learning_rate": 2.4390243902439027e-06, "loss": 1.3638, "num_tokens": 20065899.0, "step": 26 }, { "epoch": 0.00989282769991756, "grad_norm": 137.18865589363966, "learning_rate": 2.5365853658536586e-06, "loss": 1.3332, "num_tokens": 20826678.0, "step": 27 }, { "epoch": 0.010259228725840432, "grad_norm": 44.57155074400533, "learning_rate": 2.634146341463415e-06, "loss": 1.313, "num_tokens": 21592665.0, "step": 28 }, { "epoch": 0.010625629751763305, "grad_norm": 7.684631480050412, "learning_rate": 2.731707317073171e-06, "loss": 1.313, "num_tokens": 22377933.0, "step": 29 }, { "epoch": 0.010992030777686177, "grad_norm": 5.678449462369119, "learning_rate": 2.8292682926829266e-06, "loss": 1.3001, "num_tokens": 23229624.0, "step": 30 }, { "epoch": 0.01135843180360905, "grad_norm": 4.341962270296106, "learning_rate": 2.926829268292683e-06, "loss": 1.2199, "num_tokens": 24011799.0, "step": 31 }, { "epoch": 0.011724832829531923, "grad_norm": 6.125909658947106, "learning_rate": 3.024390243902439e-06, "loss": 1.2008, "num_tokens": 24786428.0, "step": 32 }, { "epoch": 0.012091233855454795, "grad_norm": 7.539262107641775, "learning_rate": 3.1219512195121954e-06, "loss": 1.18, "num_tokens": 25586562.0, "step": 33 }, { "epoch": 0.012457634881377667, "grad_norm": 3.097941221448077, "learning_rate": 3.2195121951219517e-06, "loss": 1.1214, "num_tokens": 26416153.0, "step": 34 }, { "epoch": 0.012824035907300541, "grad_norm": 3.104618855078638, "learning_rate": 3.3170731707317076e-06, "loss": 1.1047, "num_tokens": 27271343.0, "step": 35 }, { "epoch": 0.013190436933223413, "grad_norm": 2.3231889697197277, "learning_rate": 3.414634146341464e-06, "loss": 1.0856, "num_tokens": 28030728.0, "step": 36 }, { "epoch": 0.013556837959146285, "grad_norm": 3.0096161402275516, "learning_rate": 3.5121951219512197e-06, "loss": 1.0776, "num_tokens": 28711555.0, "step": 37 }, { "epoch": 0.013923238985069157, "grad_norm": 2.757565514659022, "learning_rate": 3.609756097560976e-06, "loss": 1.0195, "num_tokens": 29409188.0, "step": 38 }, { "epoch": 0.014289640010992031, "grad_norm": 7.253736642359323, "learning_rate": 3.707317073170732e-06, "loss": 1.0379, "num_tokens": 30189678.0, "step": 39 }, { "epoch": 0.014656041036914903, "grad_norm": 2.2159904143429343, "learning_rate": 3.804878048780488e-06, "loss": 0.9678, "num_tokens": 30929709.0, "step": 40 }, { "epoch": 0.015022442062837775, "grad_norm": 1.7265531653706707, "learning_rate": 3.902439024390244e-06, "loss": 0.9437, "num_tokens": 31675450.0, "step": 41 }, { "epoch": 0.01538884308876065, "grad_norm": 1.4581395944258027, "learning_rate": 4.000000000000001e-06, "loss": 0.9106, "num_tokens": 32483771.0, "step": 42 }, { "epoch": 0.01575524411468352, "grad_norm": 1.5232060196551382, "learning_rate": 4.097560975609756e-06, "loss": 0.9328, "num_tokens": 33277714.0, "step": 43 }, { "epoch": 0.016121645140606395, "grad_norm": 1.2019836004609399, "learning_rate": 4.195121951219512e-06, "loss": 0.9223, "num_tokens": 34120020.0, "step": 44 }, { "epoch": 0.016488046166529265, "grad_norm": 0.8384641372711278, "learning_rate": 4.292682926829269e-06, "loss": 0.8937, "num_tokens": 34805294.0, "step": 45 }, { "epoch": 0.01685444719245214, "grad_norm": 1.0934053551203926, "learning_rate": 4.390243902439025e-06, "loss": 0.8855, "num_tokens": 35587014.0, "step": 46 }, { "epoch": 0.017220848218375013, "grad_norm": 0.8802384521110628, "learning_rate": 4.487804878048781e-06, "loss": 0.8425, "num_tokens": 36285471.0, "step": 47 }, { "epoch": 0.017587249244297883, "grad_norm": 0.9882398174776565, "learning_rate": 4.5853658536585375e-06, "loss": 0.8811, "num_tokens": 36969932.0, "step": 48 }, { "epoch": 0.017953650270220757, "grad_norm": 0.7040707708705312, "learning_rate": 4.682926829268293e-06, "loss": 0.8826, "num_tokens": 37628677.0, "step": 49 }, { "epoch": 0.01832005129614363, "grad_norm": 0.6053585895740834, "learning_rate": 4.780487804878049e-06, "loss": 0.8641, "num_tokens": 38339324.0, "step": 50 }, { "epoch": 0.0186864523220665, "grad_norm": 0.7757659768000182, "learning_rate": 4.8780487804878055e-06, "loss": 0.8732, "num_tokens": 39204103.0, "step": 51 }, { "epoch": 0.019052853347989375, "grad_norm": 0.5573893722866086, "learning_rate": 4.975609756097562e-06, "loss": 0.831, "num_tokens": 39912863.0, "step": 52 }, { "epoch": 0.019419254373912245, "grad_norm": 0.5509586373595505, "learning_rate": 5.073170731707317e-06, "loss": 0.795, "num_tokens": 40765206.0, "step": 53 }, { "epoch": 0.01978565539983512, "grad_norm": 0.578903718746161, "learning_rate": 5.1707317073170735e-06, "loss": 0.8352, "num_tokens": 41517807.0, "step": 54 }, { "epoch": 0.020152056425757993, "grad_norm": 0.47971186011247613, "learning_rate": 5.26829268292683e-06, "loss": 0.8246, "num_tokens": 42249248.0, "step": 55 }, { "epoch": 0.020518457451680863, "grad_norm": 0.518463702400546, "learning_rate": 5.365853658536586e-06, "loss": 0.8186, "num_tokens": 42957703.0, "step": 56 }, { "epoch": 0.020884858477603737, "grad_norm": 0.4480427991921867, "learning_rate": 5.463414634146342e-06, "loss": 0.8093, "num_tokens": 43701256.0, "step": 57 }, { "epoch": 0.02125125950352661, "grad_norm": 0.4330659948148125, "learning_rate": 5.560975609756099e-06, "loss": 0.8199, "num_tokens": 44467612.0, "step": 58 }, { "epoch": 0.02161766052944948, "grad_norm": 0.5033054451497732, "learning_rate": 5.658536585365853e-06, "loss": 0.7819, "num_tokens": 45211282.0, "step": 59 }, { "epoch": 0.021984061555372355, "grad_norm": 0.4420822569144066, "learning_rate": 5.7560975609756095e-06, "loss": 0.8041, "num_tokens": 46098161.0, "step": 60 }, { "epoch": 0.02235046258129523, "grad_norm": 0.42600153649295375, "learning_rate": 5.853658536585366e-06, "loss": 0.8164, "num_tokens": 47032121.0, "step": 61 }, { "epoch": 0.0227168636072181, "grad_norm": 0.44259987309120535, "learning_rate": 5.951219512195122e-06, "loss": 0.8211, "num_tokens": 47810961.0, "step": 62 }, { "epoch": 0.023083264633140973, "grad_norm": 0.44602641828809686, "learning_rate": 6.048780487804878e-06, "loss": 0.7581, "num_tokens": 48503575.0, "step": 63 }, { "epoch": 0.023449665659063847, "grad_norm": 0.3857262989767124, "learning_rate": 6.1463414634146346e-06, "loss": 0.7892, "num_tokens": 49254316.0, "step": 64 }, { "epoch": 0.023816066684986717, "grad_norm": 0.38075792179365364, "learning_rate": 6.243902439024391e-06, "loss": 0.7659, "num_tokens": 50093927.0, "step": 65 }, { "epoch": 0.02418246771090959, "grad_norm": 0.3696685719520611, "learning_rate": 6.341463414634147e-06, "loss": 0.759, "num_tokens": 50951218.0, "step": 66 }, { "epoch": 0.024548868736832465, "grad_norm": 0.42196823845185033, "learning_rate": 6.439024390243903e-06, "loss": 0.8007, "num_tokens": 51655884.0, "step": 67 }, { "epoch": 0.024915269762755335, "grad_norm": 0.37629592810911183, "learning_rate": 6.53658536585366e-06, "loss": 0.7259, "num_tokens": 52347357.0, "step": 68 }, { "epoch": 0.02528167078867821, "grad_norm": 0.3926135274980088, "learning_rate": 6.634146341463415e-06, "loss": 0.7064, "num_tokens": 53166957.0, "step": 69 }, { "epoch": 0.025648071814601082, "grad_norm": 0.4290804059644357, "learning_rate": 6.731707317073171e-06, "loss": 0.8083, "num_tokens": 53837223.0, "step": 70 }, { "epoch": 0.026014472840523953, "grad_norm": 0.4111185871133666, "learning_rate": 6.829268292682928e-06, "loss": 0.7215, "num_tokens": 54693482.0, "step": 71 }, { "epoch": 0.026380873866446827, "grad_norm": 0.4704034492471897, "learning_rate": 6.926829268292683e-06, "loss": 0.7392, "num_tokens": 55472276.0, "step": 72 }, { "epoch": 0.026747274892369697, "grad_norm": 0.39894150516534055, "learning_rate": 7.024390243902439e-06, "loss": 0.7829, "num_tokens": 56241682.0, "step": 73 }, { "epoch": 0.02711367591829257, "grad_norm": 0.4272600339363003, "learning_rate": 7.121951219512196e-06, "loss": 0.7453, "num_tokens": 57069798.0, "step": 74 }, { "epoch": 0.027480076944215445, "grad_norm": 0.4883957511004874, "learning_rate": 7.219512195121952e-06, "loss": 0.7539, "num_tokens": 57963822.0, "step": 75 }, { "epoch": 0.027846477970138315, "grad_norm": 0.3937816618815139, "learning_rate": 7.317073170731707e-06, "loss": 0.7307, "num_tokens": 58668005.0, "step": 76 }, { "epoch": 0.02821287899606119, "grad_norm": 0.368848572227951, "learning_rate": 7.414634146341464e-06, "loss": 0.7368, "num_tokens": 59479231.0, "step": 77 }, { "epoch": 0.028579280021984062, "grad_norm": 0.333181628852411, "learning_rate": 7.51219512195122e-06, "loss": 0.7417, "num_tokens": 60343640.0, "step": 78 }, { "epoch": 0.028945681047906933, "grad_norm": 0.36077361926245344, "learning_rate": 7.609756097560976e-06, "loss": 0.747, "num_tokens": 61014548.0, "step": 79 }, { "epoch": 0.029312082073829807, "grad_norm": 0.34110950546371366, "learning_rate": 7.707317073170732e-06, "loss": 0.7094, "num_tokens": 61793023.0, "step": 80 }, { "epoch": 0.02967848309975268, "grad_norm": 0.3269617477138762, "learning_rate": 7.804878048780489e-06, "loss": 0.6992, "num_tokens": 62543006.0, "step": 81 }, { "epoch": 0.03004488412567555, "grad_norm": 0.37264479518723564, "learning_rate": 7.902439024390245e-06, "loss": 0.7458, "num_tokens": 63332070.0, "step": 82 }, { "epoch": 0.030411285151598424, "grad_norm": 0.35877602204877274, "learning_rate": 8.000000000000001e-06, "loss": 0.6983, "num_tokens": 64023455.0, "step": 83 }, { "epoch": 0.0307776861775213, "grad_norm": 0.3834245072067245, "learning_rate": 8.097560975609758e-06, "loss": 0.7391, "num_tokens": 64831951.0, "step": 84 }, { "epoch": 0.03114408720344417, "grad_norm": 0.33810263153773684, "learning_rate": 8.195121951219512e-06, "loss": 0.7115, "num_tokens": 65638664.0, "step": 85 }, { "epoch": 0.03151048822936704, "grad_norm": 0.4405918913694983, "learning_rate": 8.292682926829268e-06, "loss": 0.6938, "num_tokens": 66399797.0, "step": 86 }, { "epoch": 0.03187688925528991, "grad_norm": 0.3615234371228791, "learning_rate": 8.390243902439025e-06, "loss": 0.7221, "num_tokens": 67162016.0, "step": 87 }, { "epoch": 0.03224329028121279, "grad_norm": 0.39979838746686985, "learning_rate": 8.487804878048781e-06, "loss": 0.6976, "num_tokens": 67961714.0, "step": 88 }, { "epoch": 0.03260969130713566, "grad_norm": 0.3856687896655199, "learning_rate": 8.585365853658537e-06, "loss": 0.7126, "num_tokens": 68757515.0, "step": 89 }, { "epoch": 0.03297609233305853, "grad_norm": 0.38896693154049805, "learning_rate": 8.682926829268294e-06, "loss": 0.7109, "num_tokens": 69450393.0, "step": 90 }, { "epoch": 0.03334249335898141, "grad_norm": 0.3975253002177152, "learning_rate": 8.78048780487805e-06, "loss": 0.6838, "num_tokens": 70236411.0, "step": 91 }, { "epoch": 0.03370889438490428, "grad_norm": 0.3537984886423977, "learning_rate": 8.878048780487806e-06, "loss": 0.7115, "num_tokens": 70972575.0, "step": 92 }, { "epoch": 0.03407529541082715, "grad_norm": 0.3424877032707451, "learning_rate": 8.975609756097562e-06, "loss": 0.7152, "num_tokens": 71794832.0, "step": 93 }, { "epoch": 0.034441696436750026, "grad_norm": 0.35692407179107855, "learning_rate": 9.073170731707319e-06, "loss": 0.6785, "num_tokens": 72644069.0, "step": 94 }, { "epoch": 0.034808097462672896, "grad_norm": 0.3403020331629499, "learning_rate": 9.170731707317075e-06, "loss": 0.7006, "num_tokens": 73475347.0, "step": 95 }, { "epoch": 0.035174498488595766, "grad_norm": 0.4418462488670592, "learning_rate": 9.268292682926831e-06, "loss": 0.7219, "num_tokens": 74240966.0, "step": 96 }, { "epoch": 0.035540899514518644, "grad_norm": 0.36724630586720747, "learning_rate": 9.365853658536586e-06, "loss": 0.7118, "num_tokens": 74994097.0, "step": 97 }, { "epoch": 0.035907300540441514, "grad_norm": 0.37958957339117694, "learning_rate": 9.463414634146342e-06, "loss": 0.7229, "num_tokens": 75731144.0, "step": 98 }, { "epoch": 0.036273701566364384, "grad_norm": 0.3411803957513947, "learning_rate": 9.560975609756098e-06, "loss": 0.692, "num_tokens": 76547853.0, "step": 99 }, { "epoch": 0.03664010259228726, "grad_norm": 0.39656409899687134, "learning_rate": 9.658536585365855e-06, "loss": 0.6673, "num_tokens": 77412953.0, "step": 100 }, { "epoch": 0.03700650361821013, "grad_norm": 0.35623023683358085, "learning_rate": 9.756097560975611e-06, "loss": 0.6907, "num_tokens": 78230949.0, "step": 101 }, { "epoch": 0.037372904644133, "grad_norm": 0.45053144178144844, "learning_rate": 9.853658536585367e-06, "loss": 0.7025, "num_tokens": 78944682.0, "step": 102 }, { "epoch": 0.03773930567005588, "grad_norm": 0.3969578796954408, "learning_rate": 9.951219512195124e-06, "loss": 0.7075, "num_tokens": 79707043.0, "step": 103 }, { "epoch": 0.03810570669597875, "grad_norm": 0.34859372115875115, "learning_rate": 1.0048780487804878e-05, "loss": 0.7276, "num_tokens": 80434899.0, "step": 104 }, { "epoch": 0.03847210772190162, "grad_norm": 0.32434435763904007, "learning_rate": 1.0146341463414634e-05, "loss": 0.7047, "num_tokens": 81219491.0, "step": 105 }, { "epoch": 0.03883850874782449, "grad_norm": 0.36957078540028326, "learning_rate": 1.024390243902439e-05, "loss": 0.7404, "num_tokens": 82022179.0, "step": 106 }, { "epoch": 0.03920490977374737, "grad_norm": 0.34405254733750407, "learning_rate": 1.0341463414634147e-05, "loss": 0.7142, "num_tokens": 82731656.0, "step": 107 }, { "epoch": 0.03957131079967024, "grad_norm": 0.33951292658625826, "learning_rate": 1.0439024390243903e-05, "loss": 0.7308, "num_tokens": 83498466.0, "step": 108 }, { "epoch": 0.03993771182559311, "grad_norm": 0.35266672872990146, "learning_rate": 1.053658536585366e-05, "loss": 0.7129, "num_tokens": 84259630.0, "step": 109 }, { "epoch": 0.040304112851515986, "grad_norm": 0.30306119423339734, "learning_rate": 1.0634146341463416e-05, "loss": 0.6394, "num_tokens": 85116535.0, "step": 110 }, { "epoch": 0.040670513877438856, "grad_norm": 0.3755948072706643, "learning_rate": 1.0731707317073172e-05, "loss": 0.6958, "num_tokens": 85973360.0, "step": 111 }, { "epoch": 0.041036914903361726, "grad_norm": 0.4103952301963023, "learning_rate": 1.0829268292682928e-05, "loss": 0.7078, "num_tokens": 86738302.0, "step": 112 }, { "epoch": 0.041403315929284604, "grad_norm": 0.47648531810229183, "learning_rate": 1.0926829268292685e-05, "loss": 0.7137, "num_tokens": 87413003.0, "step": 113 }, { "epoch": 0.041769716955207474, "grad_norm": 0.49514880907124914, "learning_rate": 1.1024390243902441e-05, "loss": 0.7219, "num_tokens": 88144632.0, "step": 114 }, { "epoch": 0.042136117981130344, "grad_norm": 0.37795907585792743, "learning_rate": 1.1121951219512197e-05, "loss": 0.6642, "num_tokens": 88904121.0, "step": 115 }, { "epoch": 0.04250251900705322, "grad_norm": 0.3973858911909636, "learning_rate": 1.1219512195121953e-05, "loss": 0.7183, "num_tokens": 89735347.0, "step": 116 }, { "epoch": 0.04286892003297609, "grad_norm": 0.367594672379486, "learning_rate": 1.1317073170731706e-05, "loss": 0.723, "num_tokens": 90510952.0, "step": 117 }, { "epoch": 0.04323532105889896, "grad_norm": 0.32884003332950335, "learning_rate": 1.1414634146341463e-05, "loss": 0.6921, "num_tokens": 91403116.0, "step": 118 }, { "epoch": 0.04360172208482184, "grad_norm": 0.3206586782731594, "learning_rate": 1.1512195121951219e-05, "loss": 0.6811, "num_tokens": 92071175.0, "step": 119 }, { "epoch": 0.04396812311074471, "grad_norm": 0.35267492315146615, "learning_rate": 1.1609756097560975e-05, "loss": 0.6941, "num_tokens": 92845820.0, "step": 120 }, { "epoch": 0.04433452413666758, "grad_norm": 0.34517764981189036, "learning_rate": 1.1707317073170731e-05, "loss": 0.6903, "num_tokens": 93686815.0, "step": 121 }, { "epoch": 0.04470092516259046, "grad_norm": 0.33887418191896207, "learning_rate": 1.1804878048780488e-05, "loss": 0.7014, "num_tokens": 94462478.0, "step": 122 }, { "epoch": 0.04506732618851333, "grad_norm": 0.3942856768234099, "learning_rate": 1.1902439024390244e-05, "loss": 0.6816, "num_tokens": 95242067.0, "step": 123 }, { "epoch": 0.0454337272144362, "grad_norm": 0.3386928420192908, "learning_rate": 1.2e-05, "loss": 0.6779, "num_tokens": 95981576.0, "step": 124 }, { "epoch": 0.045800128240359075, "grad_norm": 0.38826557489295704, "learning_rate": 1.2097560975609757e-05, "loss": 0.6734, "num_tokens": 96726675.0, "step": 125 }, { "epoch": 0.046166529266281946, "grad_norm": 0.4108115303671123, "learning_rate": 1.2195121951219513e-05, "loss": 0.6847, "num_tokens": 97473664.0, "step": 126 }, { "epoch": 0.046532930292204816, "grad_norm": 0.3215128148491068, "learning_rate": 1.2292682926829269e-05, "loss": 0.6679, "num_tokens": 98216332.0, "step": 127 }, { "epoch": 0.04689933131812769, "grad_norm": 0.3522756386772183, "learning_rate": 1.2390243902439025e-05, "loss": 0.7201, "num_tokens": 98938361.0, "step": 128 }, { "epoch": 0.047265732344050564, "grad_norm": 0.3159965173194752, "learning_rate": 1.2487804878048782e-05, "loss": 0.6853, "num_tokens": 99790987.0, "step": 129 }, { "epoch": 0.047632133369973434, "grad_norm": 0.35073772366568856, "learning_rate": 1.2585365853658538e-05, "loss": 0.6923, "num_tokens": 100497783.0, "step": 130 }, { "epoch": 0.04799853439589631, "grad_norm": 0.3568958569503603, "learning_rate": 1.2682926829268294e-05, "loss": 0.6547, "num_tokens": 101338493.0, "step": 131 }, { "epoch": 0.04836493542181918, "grad_norm": 0.34365571918550714, "learning_rate": 1.278048780487805e-05, "loss": 0.6901, "num_tokens": 102143278.0, "step": 132 }, { "epoch": 0.04873133644774205, "grad_norm": 0.3971804452679672, "learning_rate": 1.2878048780487807e-05, "loss": 0.669, "num_tokens": 102970407.0, "step": 133 }, { "epoch": 0.04909773747366493, "grad_norm": 0.32235012970046684, "learning_rate": 1.2975609756097563e-05, "loss": 0.6754, "num_tokens": 103673929.0, "step": 134 }, { "epoch": 0.0494641384995878, "grad_norm": 0.41724433669387734, "learning_rate": 1.307317073170732e-05, "loss": 0.7166, "num_tokens": 104418413.0, "step": 135 }, { "epoch": 0.04983053952551067, "grad_norm": 0.38201008359407157, "learning_rate": 1.3170731707317076e-05, "loss": 0.7083, "num_tokens": 105190502.0, "step": 136 }, { "epoch": 0.05019694055143355, "grad_norm": 0.4098375229505802, "learning_rate": 1.326829268292683e-05, "loss": 0.6794, "num_tokens": 105934624.0, "step": 137 }, { "epoch": 0.05056334157735642, "grad_norm": 0.35041537331753886, "learning_rate": 1.3365853658536587e-05, "loss": 0.6818, "num_tokens": 106740820.0, "step": 138 }, { "epoch": 0.05092974260327929, "grad_norm": 0.3655590977645496, "learning_rate": 1.3463414634146343e-05, "loss": 0.6937, "num_tokens": 107314383.0, "step": 139 }, { "epoch": 0.051296143629202165, "grad_norm": 0.38068531302208114, "learning_rate": 1.3560975609756099e-05, "loss": 0.6637, "num_tokens": 108083433.0, "step": 140 }, { "epoch": 0.051662544655125035, "grad_norm": 0.48177703410354106, "learning_rate": 1.3658536585365855e-05, "loss": 0.6658, "num_tokens": 108940107.0, "step": 141 }, { "epoch": 0.052028945681047906, "grad_norm": 0.4823295402707105, "learning_rate": 1.375609756097561e-05, "loss": 0.6815, "num_tokens": 109619213.0, "step": 142 }, { "epoch": 0.05239534670697078, "grad_norm": 0.3711869071093224, "learning_rate": 1.3853658536585366e-05, "loss": 0.7084, "num_tokens": 110400127.0, "step": 143 }, { "epoch": 0.05276174773289365, "grad_norm": 0.4060478329064451, "learning_rate": 1.3951219512195122e-05, "loss": 0.6758, "num_tokens": 111133262.0, "step": 144 }, { "epoch": 0.053128148758816524, "grad_norm": 0.39915825087596274, "learning_rate": 1.4048780487804879e-05, "loss": 0.6773, "num_tokens": 111920069.0, "step": 145 }, { "epoch": 0.053494549784739394, "grad_norm": 0.37794759798266775, "learning_rate": 1.4146341463414635e-05, "loss": 0.6846, "num_tokens": 112746368.0, "step": 146 }, { "epoch": 0.05386095081066227, "grad_norm": 0.3930607801142797, "learning_rate": 1.4243902439024391e-05, "loss": 0.6919, "num_tokens": 113591180.0, "step": 147 }, { "epoch": 0.05422735183658514, "grad_norm": 0.3312644368017718, "learning_rate": 1.4341463414634148e-05, "loss": 0.703, "num_tokens": 114368445.0, "step": 148 }, { "epoch": 0.05459375286250801, "grad_norm": 0.34706286613909454, "learning_rate": 1.4439024390243904e-05, "loss": 0.6257, "num_tokens": 115154095.0, "step": 149 }, { "epoch": 0.05496015388843089, "grad_norm": 0.36819758880594644, "learning_rate": 1.4536585365853658e-05, "loss": 0.6888, "num_tokens": 115878947.0, "step": 150 }, { "epoch": 0.05532655491435376, "grad_norm": 0.351517017243187, "learning_rate": 1.4634146341463415e-05, "loss": 0.6787, "num_tokens": 116636020.0, "step": 151 }, { "epoch": 0.05569295594027663, "grad_norm": 0.2930926353328973, "learning_rate": 1.4731707317073171e-05, "loss": 0.6315, "num_tokens": 117365442.0, "step": 152 }, { "epoch": 0.05605935696619951, "grad_norm": 0.3216844894461187, "learning_rate": 1.4829268292682927e-05, "loss": 0.6643, "num_tokens": 118248385.0, "step": 153 }, { "epoch": 0.05642575799212238, "grad_norm": 0.4347994171734394, "learning_rate": 1.4926829268292684e-05, "loss": 0.6669, "num_tokens": 119085025.0, "step": 154 }, { "epoch": 0.05679215901804525, "grad_norm": 0.32777749375405674, "learning_rate": 1.502439024390244e-05, "loss": 0.662, "num_tokens": 119904343.0, "step": 155 }, { "epoch": 0.057158560043968125, "grad_norm": 0.48841176664196667, "learning_rate": 1.5121951219512196e-05, "loss": 0.669, "num_tokens": 120628106.0, "step": 156 }, { "epoch": 0.057524961069890995, "grad_norm": 0.43347208801673937, "learning_rate": 1.5219512195121952e-05, "loss": 0.6622, "num_tokens": 121353747.0, "step": 157 }, { "epoch": 0.057891362095813866, "grad_norm": 0.34942437754808187, "learning_rate": 1.531707317073171e-05, "loss": 0.6692, "num_tokens": 122259268.0, "step": 158 }, { "epoch": 0.05825776312173674, "grad_norm": 0.41659518435289833, "learning_rate": 1.5414634146341465e-05, "loss": 0.6635, "num_tokens": 122931335.0, "step": 159 }, { "epoch": 0.05862416414765961, "grad_norm": 0.40009616796504904, "learning_rate": 1.551219512195122e-05, "loss": 0.6726, "num_tokens": 123593685.0, "step": 160 }, { "epoch": 0.058990565173582483, "grad_norm": 0.4424077820939399, "learning_rate": 1.5609756097560978e-05, "loss": 0.6694, "num_tokens": 124424669.0, "step": 161 }, { "epoch": 0.05935696619950536, "grad_norm": 0.34056641113823743, "learning_rate": 1.5707317073170732e-05, "loss": 0.6548, "num_tokens": 125225727.0, "step": 162 }, { "epoch": 0.05972336722542823, "grad_norm": 0.31253381777175865, "learning_rate": 1.580487804878049e-05, "loss": 0.6558, "num_tokens": 125911593.0, "step": 163 }, { "epoch": 0.0600897682513511, "grad_norm": 0.5226498524842904, "learning_rate": 1.5902439024390245e-05, "loss": 0.6726, "num_tokens": 126623079.0, "step": 164 }, { "epoch": 0.06045616927727398, "grad_norm": 0.44901325755647065, "learning_rate": 1.6000000000000003e-05, "loss": 0.7156, "num_tokens": 127376522.0, "step": 165 }, { "epoch": 0.06082257030319685, "grad_norm": 0.3062950584307715, "learning_rate": 1.6097560975609757e-05, "loss": 0.662, "num_tokens": 128073320.0, "step": 166 }, { "epoch": 0.06118897132911972, "grad_norm": 0.40862352531630064, "learning_rate": 1.6195121951219515e-05, "loss": 0.6799, "num_tokens": 128816773.0, "step": 167 }, { "epoch": 0.0615553723550426, "grad_norm": 0.42007166799675305, "learning_rate": 1.629268292682927e-05, "loss": 0.6665, "num_tokens": 129534954.0, "step": 168 }, { "epoch": 0.06192177338096547, "grad_norm": 0.33524728082606725, "learning_rate": 1.6390243902439024e-05, "loss": 0.6453, "num_tokens": 130310533.0, "step": 169 }, { "epoch": 0.06228817440688834, "grad_norm": 0.35151945633918646, "learning_rate": 1.6487804878048782e-05, "loss": 0.6473, "num_tokens": 131075317.0, "step": 170 }, { "epoch": 0.06265457543281121, "grad_norm": 0.3232338528960769, "learning_rate": 1.6585365853658537e-05, "loss": 0.6795, "num_tokens": 131791551.0, "step": 171 }, { "epoch": 0.06302097645873408, "grad_norm": 0.32563639974958686, "learning_rate": 1.6682926829268295e-05, "loss": 0.6665, "num_tokens": 132466310.0, "step": 172 }, { "epoch": 0.06338737748465696, "grad_norm": 0.4117276907467294, "learning_rate": 1.678048780487805e-05, "loss": 0.6584, "num_tokens": 133239725.0, "step": 173 }, { "epoch": 0.06375377851057983, "grad_norm": 0.3764217855776454, "learning_rate": 1.6878048780487804e-05, "loss": 0.6769, "num_tokens": 133967555.0, "step": 174 }, { "epoch": 0.0641201795365027, "grad_norm": 0.39662605387097843, "learning_rate": 1.6975609756097562e-05, "loss": 0.6891, "num_tokens": 134776063.0, "step": 175 }, { "epoch": 0.06448658056242558, "grad_norm": 0.37610160118161995, "learning_rate": 1.7073170731707317e-05, "loss": 0.6651, "num_tokens": 135566432.0, "step": 176 }, { "epoch": 0.06485298158834844, "grad_norm": 0.36734806079139903, "learning_rate": 1.7170731707317075e-05, "loss": 0.675, "num_tokens": 136241785.0, "step": 177 }, { "epoch": 0.06521938261427132, "grad_norm": 0.43384724219812915, "learning_rate": 1.726829268292683e-05, "loss": 0.6661, "num_tokens": 136942842.0, "step": 178 }, { "epoch": 0.0655857836401942, "grad_norm": 0.38008216191904304, "learning_rate": 1.7365853658536587e-05, "loss": 0.6467, "num_tokens": 137721345.0, "step": 179 }, { "epoch": 0.06595218466611706, "grad_norm": 0.38118546973461137, "learning_rate": 1.7463414634146342e-05, "loss": 0.6587, "num_tokens": 138685788.0, "step": 180 }, { "epoch": 0.06631858569203994, "grad_norm": 0.5066146620498043, "learning_rate": 1.75609756097561e-05, "loss": 0.6476, "num_tokens": 139510021.0, "step": 181 }, { "epoch": 0.06668498671796282, "grad_norm": 0.3735368560842598, "learning_rate": 1.7658536585365854e-05, "loss": 0.7097, "num_tokens": 140253533.0, "step": 182 }, { "epoch": 0.06705138774388568, "grad_norm": 0.35970042201477975, "learning_rate": 1.7756097560975612e-05, "loss": 0.6895, "num_tokens": 140970219.0, "step": 183 }, { "epoch": 0.06741778876980856, "grad_norm": 0.40115055155368684, "learning_rate": 1.7853658536585367e-05, "loss": 0.6327, "num_tokens": 141793665.0, "step": 184 }, { "epoch": 0.06778418979573143, "grad_norm": 0.3078762091183435, "learning_rate": 1.7951219512195125e-05, "loss": 0.6561, "num_tokens": 142575885.0, "step": 185 }, { "epoch": 0.0681505908216543, "grad_norm": 0.42698848909227893, "learning_rate": 1.804878048780488e-05, "loss": 0.6839, "num_tokens": 143191161.0, "step": 186 }, { "epoch": 0.06851699184757717, "grad_norm": 0.36109888672090373, "learning_rate": 1.8146341463414637e-05, "loss": 0.6801, "num_tokens": 144070081.0, "step": 187 }, { "epoch": 0.06888339287350005, "grad_norm": 0.33324895355660744, "learning_rate": 1.8243902439024392e-05, "loss": 0.6753, "num_tokens": 144862953.0, "step": 188 }, { "epoch": 0.06924979389942292, "grad_norm": 0.4293141577717878, "learning_rate": 1.834146341463415e-05, "loss": 0.6303, "num_tokens": 145600038.0, "step": 189 }, { "epoch": 0.06961619492534579, "grad_norm": 0.3921397938591637, "learning_rate": 1.8439024390243905e-05, "loss": 0.6834, "num_tokens": 146425398.0, "step": 190 }, { "epoch": 0.06998259595126867, "grad_norm": 0.33210440822960663, "learning_rate": 1.8536585365853663e-05, "loss": 0.6733, "num_tokens": 147156031.0, "step": 191 }, { "epoch": 0.07034899697719153, "grad_norm": 0.4693581270478018, "learning_rate": 1.8634146341463417e-05, "loss": 0.6619, "num_tokens": 147895245.0, "step": 192 }, { "epoch": 0.07071539800311441, "grad_norm": 0.4600535450815989, "learning_rate": 1.8731707317073172e-05, "loss": 0.6349, "num_tokens": 148801683.0, "step": 193 }, { "epoch": 0.07108179902903729, "grad_norm": 0.3493529189044173, "learning_rate": 1.8829268292682926e-05, "loss": 0.6398, "num_tokens": 149626579.0, "step": 194 }, { "epoch": 0.07144820005496015, "grad_norm": 0.42483320121462215, "learning_rate": 1.8926829268292684e-05, "loss": 0.6716, "num_tokens": 150377623.0, "step": 195 }, { "epoch": 0.07181460108088303, "grad_norm": 0.36327858379770483, "learning_rate": 1.902439024390244e-05, "loss": 0.6643, "num_tokens": 151161981.0, "step": 196 }, { "epoch": 0.0721810021068059, "grad_norm": 0.35653981202055907, "learning_rate": 1.9121951219512197e-05, "loss": 0.6722, "num_tokens": 152043446.0, "step": 197 }, { "epoch": 0.07254740313272877, "grad_norm": 0.3998522566334058, "learning_rate": 1.921951219512195e-05, "loss": 0.6725, "num_tokens": 152794598.0, "step": 198 }, { "epoch": 0.07291380415865165, "grad_norm": 0.5145063713801831, "learning_rate": 1.931707317073171e-05, "loss": 0.6846, "num_tokens": 153647436.0, "step": 199 }, { "epoch": 0.07328020518457452, "grad_norm": 0.40435780091314877, "learning_rate": 1.9414634146341464e-05, "loss": 0.6392, "num_tokens": 154472923.0, "step": 200 }, { "epoch": 0.07364660621049739, "grad_norm": 0.4495820453520046, "learning_rate": 1.9512195121951222e-05, "loss": 0.6528, "num_tokens": 155162432.0, "step": 201 }, { "epoch": 0.07401300723642026, "grad_norm": 0.47394765762675595, "learning_rate": 1.9609756097560977e-05, "loss": 0.6729, "num_tokens": 155934301.0, "step": 202 }, { "epoch": 0.07437940826234314, "grad_norm": 0.4839437869650149, "learning_rate": 1.9707317073170734e-05, "loss": 0.6777, "num_tokens": 156822889.0, "step": 203 }, { "epoch": 0.074745809288266, "grad_norm": 0.3733128718765032, "learning_rate": 1.980487804878049e-05, "loss": 0.6474, "num_tokens": 157661100.0, "step": 204 }, { "epoch": 0.07511221031418888, "grad_norm": 0.4692062396978813, "learning_rate": 1.9902439024390247e-05, "loss": 0.7294, "num_tokens": 158272704.0, "step": 205 }, { "epoch": 0.07547861134011176, "grad_norm": 0.48893152374976157, "learning_rate": 2e-05, "loss": 0.6876, "num_tokens": 158995495.0, "step": 206 }, { "epoch": 0.07584501236603462, "grad_norm": 0.31279984952968154, "learning_rate": 2.0097560975609756e-05, "loss": 0.685, "num_tokens": 159614138.0, "step": 207 }, { "epoch": 0.0762114133919575, "grad_norm": 0.4943051177824034, "learning_rate": 2.0195121951219514e-05, "loss": 0.6933, "num_tokens": 160361620.0, "step": 208 }, { "epoch": 0.07657781441788036, "grad_norm": 0.38874042411389753, "learning_rate": 2.029268292682927e-05, "loss": 0.6698, "num_tokens": 161076912.0, "step": 209 }, { "epoch": 0.07694421544380324, "grad_norm": 0.5391241517974322, "learning_rate": 2.0390243902439027e-05, "loss": 0.6608, "num_tokens": 161932162.0, "step": 210 }, { "epoch": 0.07731061646972612, "grad_norm": 0.482502173103103, "learning_rate": 2.048780487804878e-05, "loss": 0.7003, "num_tokens": 162629933.0, "step": 211 }, { "epoch": 0.07767701749564898, "grad_norm": 0.4998591259061329, "learning_rate": 2.058536585365854e-05, "loss": 0.6838, "num_tokens": 163350454.0, "step": 212 }, { "epoch": 0.07804341852157186, "grad_norm": 0.36009818907293156, "learning_rate": 2.0682926829268294e-05, "loss": 0.6567, "num_tokens": 164167298.0, "step": 213 }, { "epoch": 0.07840981954749474, "grad_norm": 0.4531697833209868, "learning_rate": 2.0780487804878052e-05, "loss": 0.6469, "num_tokens": 164980619.0, "step": 214 }, { "epoch": 0.0787762205734176, "grad_norm": 0.4748846843020645, "learning_rate": 2.0878048780487806e-05, "loss": 0.6742, "num_tokens": 165869287.0, "step": 215 }, { "epoch": 0.07914262159934048, "grad_norm": 0.3596116184414484, "learning_rate": 2.0975609756097564e-05, "loss": 0.6522, "num_tokens": 166594070.0, "step": 216 }, { "epoch": 0.07950902262526335, "grad_norm": 0.3867290994859667, "learning_rate": 2.107317073170732e-05, "loss": 0.6538, "num_tokens": 167360103.0, "step": 217 }, { "epoch": 0.07987542365118622, "grad_norm": 0.49455819314413463, "learning_rate": 2.1170731707317077e-05, "loss": 0.6483, "num_tokens": 168017998.0, "step": 218 }, { "epoch": 0.0802418246771091, "grad_norm": 0.3756253504794491, "learning_rate": 2.126829268292683e-05, "loss": 0.6804, "num_tokens": 168729405.0, "step": 219 }, { "epoch": 0.08060822570303197, "grad_norm": 0.3190696940197664, "learning_rate": 2.136585365853659e-05, "loss": 0.6883, "num_tokens": 169504376.0, "step": 220 }, { "epoch": 0.08097462672895483, "grad_norm": 0.44622627723189634, "learning_rate": 2.1463414634146344e-05, "loss": 0.6904, "num_tokens": 170149802.0, "step": 221 }, { "epoch": 0.08134102775487771, "grad_norm": 0.3303999082590185, "learning_rate": 2.1560975609756102e-05, "loss": 0.6937, "num_tokens": 170869067.0, "step": 222 }, { "epoch": 0.08170742878080059, "grad_norm": 0.4712808361887098, "learning_rate": 2.1658536585365857e-05, "loss": 0.6632, "num_tokens": 171532866.0, "step": 223 }, { "epoch": 0.08207382980672345, "grad_norm": 0.41070574629391715, "learning_rate": 2.1756097560975615e-05, "loss": 0.6783, "num_tokens": 172234693.0, "step": 224 }, { "epoch": 0.08244023083264633, "grad_norm": 0.47083623065583285, "learning_rate": 2.185365853658537e-05, "loss": 0.6475, "num_tokens": 172991779.0, "step": 225 }, { "epoch": 0.08280663185856921, "grad_norm": 0.4145853761092914, "learning_rate": 2.1951219512195124e-05, "loss": 0.6301, "num_tokens": 173788518.0, "step": 226 }, { "epoch": 0.08317303288449207, "grad_norm": 0.3852248925380027, "learning_rate": 2.2048780487804882e-05, "loss": 0.6405, "num_tokens": 174583907.0, "step": 227 }, { "epoch": 0.08353943391041495, "grad_norm": 0.31303365930867166, "learning_rate": 2.2146341463414636e-05, "loss": 0.6748, "num_tokens": 175465541.0, "step": 228 }, { "epoch": 0.08390583493633783, "grad_norm": 0.3777130601397071, "learning_rate": 2.2243902439024394e-05, "loss": 0.6557, "num_tokens": 176264844.0, "step": 229 }, { "epoch": 0.08427223596226069, "grad_norm": 0.37552794765532366, "learning_rate": 2.234146341463415e-05, "loss": 0.6348, "num_tokens": 176965260.0, "step": 230 }, { "epoch": 0.08463863698818357, "grad_norm": 0.4572810821220058, "learning_rate": 2.2439024390243907e-05, "loss": 0.6905, "num_tokens": 177681207.0, "step": 231 }, { "epoch": 0.08500503801410644, "grad_norm": 0.3750539764855033, "learning_rate": 2.2536585365853658e-05, "loss": 0.649, "num_tokens": 178463093.0, "step": 232 }, { "epoch": 0.0853714390400293, "grad_norm": 0.4931001099467944, "learning_rate": 2.2634146341463413e-05, "loss": 0.6548, "num_tokens": 179138117.0, "step": 233 }, { "epoch": 0.08573784006595218, "grad_norm": 0.42327111567406644, "learning_rate": 2.273170731707317e-05, "loss": 0.6824, "num_tokens": 179826925.0, "step": 234 }, { "epoch": 0.08610424109187506, "grad_norm": 0.43518710075676637, "learning_rate": 2.2829268292682925e-05, "loss": 0.6748, "num_tokens": 180584200.0, "step": 235 }, { "epoch": 0.08647064211779792, "grad_norm": 0.38436619627431784, "learning_rate": 2.2926829268292683e-05, "loss": 0.6629, "num_tokens": 181310530.0, "step": 236 }, { "epoch": 0.0868370431437208, "grad_norm": 0.3351271724696366, "learning_rate": 2.3024390243902438e-05, "loss": 0.6802, "num_tokens": 181967003.0, "step": 237 }, { "epoch": 0.08720344416964368, "grad_norm": 0.3904142714115971, "learning_rate": 2.3121951219512196e-05, "loss": 0.6372, "num_tokens": 182794724.0, "step": 238 }, { "epoch": 0.08756984519556654, "grad_norm": 0.40138478631464064, "learning_rate": 2.321951219512195e-05, "loss": 0.6869, "num_tokens": 183606960.0, "step": 239 }, { "epoch": 0.08793624622148942, "grad_norm": 0.36353892955821476, "learning_rate": 2.331707317073171e-05, "loss": 0.6881, "num_tokens": 184432245.0, "step": 240 }, { "epoch": 0.0883026472474123, "grad_norm": 0.39594094263734747, "learning_rate": 2.3414634146341463e-05, "loss": 0.6507, "num_tokens": 185304229.0, "step": 241 }, { "epoch": 0.08866904827333516, "grad_norm": 0.3786194096837687, "learning_rate": 2.351219512195122e-05, "loss": 0.6459, "num_tokens": 186101721.0, "step": 242 }, { "epoch": 0.08903544929925804, "grad_norm": 0.34460551622355734, "learning_rate": 2.3609756097560975e-05, "loss": 0.6465, "num_tokens": 186805463.0, "step": 243 }, { "epoch": 0.08940185032518091, "grad_norm": 0.36950793743628574, "learning_rate": 2.3707317073170733e-05, "loss": 0.6793, "num_tokens": 187546338.0, "step": 244 }, { "epoch": 0.08976825135110378, "grad_norm": 0.40494960314843154, "learning_rate": 2.3804878048780488e-05, "loss": 0.6294, "num_tokens": 188328413.0, "step": 245 }, { "epoch": 0.09013465237702666, "grad_norm": 0.3707582103376089, "learning_rate": 2.3902439024390246e-05, "loss": 0.6498, "num_tokens": 189124359.0, "step": 246 }, { "epoch": 0.09050105340294953, "grad_norm": 0.4120709363310188, "learning_rate": 2.4e-05, "loss": 0.6236, "num_tokens": 189959622.0, "step": 247 }, { "epoch": 0.0908674544288724, "grad_norm": 0.4078630666700355, "learning_rate": 2.409756097560976e-05, "loss": 0.6488, "num_tokens": 190806182.0, "step": 248 }, { "epoch": 0.09123385545479527, "grad_norm": 0.3726207264187459, "learning_rate": 2.4195121951219513e-05, "loss": 0.6491, "num_tokens": 191522919.0, "step": 249 }, { "epoch": 0.09160025648071815, "grad_norm": 0.45123505459909163, "learning_rate": 2.429268292682927e-05, "loss": 0.6663, "num_tokens": 192449442.0, "step": 250 }, { "epoch": 0.09196665750664101, "grad_norm": 0.42658652705583816, "learning_rate": 2.4390243902439026e-05, "loss": 0.6986, "num_tokens": 193192314.0, "step": 251 }, { "epoch": 0.09233305853256389, "grad_norm": 0.3595556549669573, "learning_rate": 2.4487804878048784e-05, "loss": 0.646, "num_tokens": 194007983.0, "step": 252 }, { "epoch": 0.09269945955848677, "grad_norm": 0.4318425152698519, "learning_rate": 2.4585365853658538e-05, "loss": 0.6504, "num_tokens": 194700795.0, "step": 253 }, { "epoch": 0.09306586058440963, "grad_norm": 0.3366522409443689, "learning_rate": 2.4682926829268293e-05, "loss": 0.6571, "num_tokens": 195335740.0, "step": 254 }, { "epoch": 0.09343226161033251, "grad_norm": 0.5298486870061851, "learning_rate": 2.478048780487805e-05, "loss": 0.6496, "num_tokens": 196034375.0, "step": 255 }, { "epoch": 0.09379866263625539, "grad_norm": 0.4132759454833951, "learning_rate": 2.4878048780487805e-05, "loss": 0.6297, "num_tokens": 196917101.0, "step": 256 }, { "epoch": 0.09416506366217825, "grad_norm": 0.5676208051322073, "learning_rate": 2.4975609756097563e-05, "loss": 0.6599, "num_tokens": 197713884.0, "step": 257 }, { "epoch": 0.09453146468810113, "grad_norm": 0.524420660705074, "learning_rate": 2.5073170731707318e-05, "loss": 0.656, "num_tokens": 198439016.0, "step": 258 }, { "epoch": 0.094897865714024, "grad_norm": 0.47915961180898636, "learning_rate": 2.5170731707317076e-05, "loss": 0.6506, "num_tokens": 199287988.0, "step": 259 }, { "epoch": 0.09526426673994687, "grad_norm": 0.49501212910549724, "learning_rate": 2.526829268292683e-05, "loss": 0.6178, "num_tokens": 200132688.0, "step": 260 }, { "epoch": 0.09563066776586975, "grad_norm": 0.4561341414254862, "learning_rate": 2.536585365853659e-05, "loss": 0.6128, "num_tokens": 200907549.0, "step": 261 }, { "epoch": 0.09599706879179262, "grad_norm": 0.31021367731575966, "learning_rate": 2.5463414634146343e-05, "loss": 0.6477, "num_tokens": 201747990.0, "step": 262 }, { "epoch": 0.09636346981771549, "grad_norm": 0.5184169081683457, "learning_rate": 2.55609756097561e-05, "loss": 0.6394, "num_tokens": 202470503.0, "step": 263 }, { "epoch": 0.09672987084363836, "grad_norm": 0.3521545636178625, "learning_rate": 2.5658536585365856e-05, "loss": 0.6532, "num_tokens": 203166842.0, "step": 264 }, { "epoch": 0.09709627186956124, "grad_norm": 0.44445947778893174, "learning_rate": 2.5756097560975614e-05, "loss": 0.6343, "num_tokens": 203992040.0, "step": 265 }, { "epoch": 0.0974626728954841, "grad_norm": 0.4537434291828058, "learning_rate": 2.5853658536585368e-05, "loss": 0.6279, "num_tokens": 204840108.0, "step": 266 }, { "epoch": 0.09782907392140698, "grad_norm": 0.5087543349195777, "learning_rate": 2.5951219512195126e-05, "loss": 0.6875, "num_tokens": 205713746.0, "step": 267 }, { "epoch": 0.09819547494732986, "grad_norm": 0.4382723751760939, "learning_rate": 2.604878048780488e-05, "loss": 0.6531, "num_tokens": 206526298.0, "step": 268 }, { "epoch": 0.09856187597325272, "grad_norm": 0.4293609076945157, "learning_rate": 2.614634146341464e-05, "loss": 0.6655, "num_tokens": 207182230.0, "step": 269 }, { "epoch": 0.0989282769991756, "grad_norm": 0.45091472426556223, "learning_rate": 2.6243902439024393e-05, "loss": 0.6435, "num_tokens": 208081987.0, "step": 270 }, { "epoch": 0.09929467802509848, "grad_norm": 0.4055696375628542, "learning_rate": 2.634146341463415e-05, "loss": 0.6787, "num_tokens": 208878486.0, "step": 271 }, { "epoch": 0.09966107905102134, "grad_norm": 0.4915165849506442, "learning_rate": 2.6439024390243906e-05, "loss": 0.6209, "num_tokens": 209572170.0, "step": 272 }, { "epoch": 0.10002748007694422, "grad_norm": 0.41322194033326626, "learning_rate": 2.653658536585366e-05, "loss": 0.6464, "num_tokens": 210513400.0, "step": 273 }, { "epoch": 0.1003938811028671, "grad_norm": 0.41470507058589734, "learning_rate": 2.663414634146342e-05, "loss": 0.6603, "num_tokens": 211237645.0, "step": 274 }, { "epoch": 0.10076028212878996, "grad_norm": 0.4371602716074357, "learning_rate": 2.6731707317073173e-05, "loss": 0.6309, "num_tokens": 211968581.0, "step": 275 }, { "epoch": 0.10112668315471283, "grad_norm": 0.37461230603106493, "learning_rate": 2.682926829268293e-05, "loss": 0.6517, "num_tokens": 212760430.0, "step": 276 }, { "epoch": 0.10149308418063571, "grad_norm": 0.41627842518154873, "learning_rate": 2.6926829268292686e-05, "loss": 0.6799, "num_tokens": 213353564.0, "step": 277 }, { "epoch": 0.10185948520655858, "grad_norm": 0.3997993047339354, "learning_rate": 2.7024390243902444e-05, "loss": 0.6415, "num_tokens": 214074010.0, "step": 278 }, { "epoch": 0.10222588623248145, "grad_norm": 0.35633322293279635, "learning_rate": 2.7121951219512198e-05, "loss": 0.6389, "num_tokens": 214903541.0, "step": 279 }, { "epoch": 0.10259228725840433, "grad_norm": 0.4169947934704077, "learning_rate": 2.7219512195121956e-05, "loss": 0.6622, "num_tokens": 215715112.0, "step": 280 }, { "epoch": 0.1029586882843272, "grad_norm": 0.39768052915737195, "learning_rate": 2.731707317073171e-05, "loss": 0.655, "num_tokens": 216547092.0, "step": 281 }, { "epoch": 0.10332508931025007, "grad_norm": 0.31807670729762666, "learning_rate": 2.741463414634147e-05, "loss": 0.6399, "num_tokens": 217324283.0, "step": 282 }, { "epoch": 0.10369149033617295, "grad_norm": 0.323984165171348, "learning_rate": 2.751219512195122e-05, "loss": 0.6502, "num_tokens": 218066643.0, "step": 283 }, { "epoch": 0.10405789136209581, "grad_norm": 0.3321971649753582, "learning_rate": 2.7609756097560974e-05, "loss": 0.6391, "num_tokens": 218860525.0, "step": 284 }, { "epoch": 0.10442429238801869, "grad_norm": 0.39166982513048393, "learning_rate": 2.7707317073170732e-05, "loss": 0.6562, "num_tokens": 219442555.0, "step": 285 }, { "epoch": 0.10479069341394157, "grad_norm": 0.31514944651626126, "learning_rate": 2.7804878048780487e-05, "loss": 0.6339, "num_tokens": 220196376.0, "step": 286 }, { "epoch": 0.10515709443986443, "grad_norm": 0.45232765564595645, "learning_rate": 2.7902439024390245e-05, "loss": 0.6574, "num_tokens": 220766601.0, "step": 287 }, { "epoch": 0.1055234954657873, "grad_norm": 0.39582093636167137, "learning_rate": 2.8e-05, "loss": 0.6474, "num_tokens": 221552259.0, "step": 288 }, { "epoch": 0.10588989649171018, "grad_norm": 0.37320827628855757, "learning_rate": 2.8097560975609758e-05, "loss": 0.6376, "num_tokens": 222287778.0, "step": 289 }, { "epoch": 0.10625629751763305, "grad_norm": 0.3919088747762516, "learning_rate": 2.8195121951219512e-05, "loss": 0.6185, "num_tokens": 223090332.0, "step": 290 }, { "epoch": 0.10662269854355592, "grad_norm": 0.3199500571135142, "learning_rate": 2.829268292682927e-05, "loss": 0.6084, "num_tokens": 223835005.0, "step": 291 }, { "epoch": 0.10698909956947879, "grad_norm": 0.36577293382239123, "learning_rate": 2.8390243902439025e-05, "loss": 0.6767, "num_tokens": 224510245.0, "step": 292 }, { "epoch": 0.10735550059540167, "grad_norm": 0.3873593996990216, "learning_rate": 2.8487804878048783e-05, "loss": 0.6753, "num_tokens": 225189967.0, "step": 293 }, { "epoch": 0.10772190162132454, "grad_norm": 0.40587425654068293, "learning_rate": 2.8585365853658537e-05, "loss": 0.6742, "num_tokens": 225986923.0, "step": 294 }, { "epoch": 0.1080883026472474, "grad_norm": 0.4447380185035672, "learning_rate": 2.8682926829268295e-05, "loss": 0.6255, "num_tokens": 226833770.0, "step": 295 }, { "epoch": 0.10845470367317028, "grad_norm": 0.3862972655921374, "learning_rate": 2.878048780487805e-05, "loss": 0.6263, "num_tokens": 227642273.0, "step": 296 }, { "epoch": 0.10882110469909316, "grad_norm": 0.362441239648575, "learning_rate": 2.8878048780487808e-05, "loss": 0.64, "num_tokens": 228384697.0, "step": 297 }, { "epoch": 0.10918750572501602, "grad_norm": 0.46291158499082646, "learning_rate": 2.8975609756097562e-05, "loss": 0.6363, "num_tokens": 229189921.0, "step": 298 }, { "epoch": 0.1095539067509389, "grad_norm": 0.3861620265679374, "learning_rate": 2.9073170731707317e-05, "loss": 0.6236, "num_tokens": 230043841.0, "step": 299 }, { "epoch": 0.10992030777686178, "grad_norm": 0.4019304432953735, "learning_rate": 2.9170731707317075e-05, "loss": 0.6465, "num_tokens": 230709647.0, "step": 300 }, { "epoch": 0.11028670880278464, "grad_norm": 0.418994414054167, "learning_rate": 2.926829268292683e-05, "loss": 0.6491, "num_tokens": 231468368.0, "step": 301 }, { "epoch": 0.11065310982870752, "grad_norm": 0.3230767956370336, "learning_rate": 2.9365853658536587e-05, "loss": 0.634, "num_tokens": 232207187.0, "step": 302 }, { "epoch": 0.1110195108546304, "grad_norm": 0.33904971061124145, "learning_rate": 2.9463414634146342e-05, "loss": 0.6358, "num_tokens": 233042966.0, "step": 303 }, { "epoch": 0.11138591188055326, "grad_norm": 0.3113187346056272, "learning_rate": 2.95609756097561e-05, "loss": 0.6465, "num_tokens": 233797518.0, "step": 304 }, { "epoch": 0.11175231290647614, "grad_norm": 0.5032370034335554, "learning_rate": 2.9658536585365855e-05, "loss": 0.6602, "num_tokens": 234544707.0, "step": 305 }, { "epoch": 0.11211871393239901, "grad_norm": 0.39009871554283504, "learning_rate": 2.9756097560975613e-05, "loss": 0.6523, "num_tokens": 235350118.0, "step": 306 }, { "epoch": 0.11248511495832188, "grad_norm": 0.509035056195805, "learning_rate": 2.9853658536585367e-05, "loss": 0.6337, "num_tokens": 236139654.0, "step": 307 }, { "epoch": 0.11285151598424475, "grad_norm": 0.41979245095235695, "learning_rate": 2.9951219512195125e-05, "loss": 0.605, "num_tokens": 236945262.0, "step": 308 }, { "epoch": 0.11321791701016763, "grad_norm": 0.3896687683763755, "learning_rate": 3.004878048780488e-05, "loss": 0.6609, "num_tokens": 237698383.0, "step": 309 }, { "epoch": 0.1135843180360905, "grad_norm": 0.44853009454713705, "learning_rate": 3.0146341463414638e-05, "loss": 0.6255, "num_tokens": 238455338.0, "step": 310 }, { "epoch": 0.11395071906201337, "grad_norm": 0.3215253465515402, "learning_rate": 3.0243902439024392e-05, "loss": 0.6239, "num_tokens": 239321227.0, "step": 311 }, { "epoch": 0.11431712008793625, "grad_norm": 0.4236325929861058, "learning_rate": 3.034146341463415e-05, "loss": 0.602, "num_tokens": 240082917.0, "step": 312 }, { "epoch": 0.11468352111385911, "grad_norm": 0.40174314414950335, "learning_rate": 3.0439024390243905e-05, "loss": 0.6529, "num_tokens": 240913737.0, "step": 313 }, { "epoch": 0.11504992213978199, "grad_norm": 0.39593452127362794, "learning_rate": 3.053658536585366e-05, "loss": 0.6416, "num_tokens": 241656131.0, "step": 314 }, { "epoch": 0.11541632316570487, "grad_norm": 0.46868352801003615, "learning_rate": 3.063414634146342e-05, "loss": 0.6557, "num_tokens": 242398278.0, "step": 315 }, { "epoch": 0.11578272419162773, "grad_norm": 0.5106171512194696, "learning_rate": 3.073170731707317e-05, "loss": 0.6646, "num_tokens": 243147400.0, "step": 316 }, { "epoch": 0.11614912521755061, "grad_norm": 0.37495051212142855, "learning_rate": 3.082926829268293e-05, "loss": 0.6577, "num_tokens": 243790116.0, "step": 317 }, { "epoch": 0.11651552624347349, "grad_norm": 0.5015534864600896, "learning_rate": 3.092682926829269e-05, "loss": 0.6182, "num_tokens": 244768618.0, "step": 318 }, { "epoch": 0.11688192726939635, "grad_norm": 0.3101110875405238, "learning_rate": 3.102439024390244e-05, "loss": 0.6381, "num_tokens": 245667909.0, "step": 319 }, { "epoch": 0.11724832829531923, "grad_norm": 0.38029537701907556, "learning_rate": 3.11219512195122e-05, "loss": 0.6075, "num_tokens": 246462381.0, "step": 320 }, { "epoch": 0.1176147293212421, "grad_norm": 0.3772215610153732, "learning_rate": 3.1219512195121955e-05, "loss": 0.7121, "num_tokens": 247157114.0, "step": 321 }, { "epoch": 0.11798113034716497, "grad_norm": 0.42453822323580426, "learning_rate": 3.131707317073171e-05, "loss": 0.6319, "num_tokens": 247874759.0, "step": 322 }, { "epoch": 0.11834753137308784, "grad_norm": 0.5029747115448135, "learning_rate": 3.1414634146341464e-05, "loss": 0.6541, "num_tokens": 248706318.0, "step": 323 }, { "epoch": 0.11871393239901072, "grad_norm": 0.37654481852748145, "learning_rate": 3.151219512195122e-05, "loss": 0.6553, "num_tokens": 249520058.0, "step": 324 }, { "epoch": 0.11908033342493358, "grad_norm": 0.367524787768601, "learning_rate": 3.160975609756098e-05, "loss": 0.6714, "num_tokens": 250285404.0, "step": 325 }, { "epoch": 0.11944673445085646, "grad_norm": 0.4330658002045, "learning_rate": 3.170731707317074e-05, "loss": 0.6514, "num_tokens": 251000166.0, "step": 326 }, { "epoch": 0.11981313547677934, "grad_norm": 0.3468588114650688, "learning_rate": 3.180487804878049e-05, "loss": 0.6674, "num_tokens": 251839615.0, "step": 327 }, { "epoch": 0.1201795365027022, "grad_norm": 0.47090837583380685, "learning_rate": 3.190243902439025e-05, "loss": 0.6409, "num_tokens": 252621544.0, "step": 328 }, { "epoch": 0.12054593752862508, "grad_norm": 0.3726961682934165, "learning_rate": 3.2000000000000005e-05, "loss": 0.6192, "num_tokens": 253407866.0, "step": 329 }, { "epoch": 0.12091233855454796, "grad_norm": 0.4013569909153049, "learning_rate": 3.209756097560976e-05, "loss": 0.6603, "num_tokens": 254091723.0, "step": 330 }, { "epoch": 0.12127873958047082, "grad_norm": 0.35550474258260567, "learning_rate": 3.2195121951219514e-05, "loss": 0.6117, "num_tokens": 254846321.0, "step": 331 }, { "epoch": 0.1216451406063937, "grad_norm": 0.3781230766684281, "learning_rate": 3.229268292682927e-05, "loss": 0.6248, "num_tokens": 255716105.0, "step": 332 }, { "epoch": 0.12201154163231658, "grad_norm": 0.35991173641962104, "learning_rate": 3.239024390243903e-05, "loss": 0.6582, "num_tokens": 256569577.0, "step": 333 }, { "epoch": 0.12237794265823944, "grad_norm": 0.39519634090124356, "learning_rate": 3.248780487804879e-05, "loss": 0.6321, "num_tokens": 257339333.0, "step": 334 }, { "epoch": 0.12274434368416232, "grad_norm": 0.30790973180833364, "learning_rate": 3.258536585365854e-05, "loss": 0.608, "num_tokens": 258282747.0, "step": 335 }, { "epoch": 0.1231107447100852, "grad_norm": 0.420789732894684, "learning_rate": 3.268292682926829e-05, "loss": 0.624, "num_tokens": 259076201.0, "step": 336 }, { "epoch": 0.12347714573600806, "grad_norm": 0.3875738805233684, "learning_rate": 3.278048780487805e-05, "loss": 0.6794, "num_tokens": 259767263.0, "step": 337 }, { "epoch": 0.12384354676193093, "grad_norm": 0.445911756000939, "learning_rate": 3.287804878048781e-05, "loss": 0.6259, "num_tokens": 260645954.0, "step": 338 }, { "epoch": 0.12420994778785381, "grad_norm": 0.4154095982447656, "learning_rate": 3.2975609756097565e-05, "loss": 0.6258, "num_tokens": 261371553.0, "step": 339 }, { "epoch": 0.12457634881377667, "grad_norm": 0.3952294040805401, "learning_rate": 3.3073170731707316e-05, "loss": 0.621, "num_tokens": 262098591.0, "step": 340 }, { "epoch": 0.12494274983969955, "grad_norm": 0.5154319908546662, "learning_rate": 3.3170731707317074e-05, "loss": 0.6177, "num_tokens": 262870772.0, "step": 341 }, { "epoch": 0.12530915086562242, "grad_norm": 0.4387306125803263, "learning_rate": 3.326829268292683e-05, "loss": 0.6854, "num_tokens": 263550034.0, "step": 342 }, { "epoch": 0.1256755518915453, "grad_norm": 0.5210102086067621, "learning_rate": 3.336585365853659e-05, "loss": 0.6501, "num_tokens": 264220917.0, "step": 343 }, { "epoch": 0.12604195291746817, "grad_norm": 0.5334648604371428, "learning_rate": 3.346341463414634e-05, "loss": 0.6812, "num_tokens": 265035412.0, "step": 344 }, { "epoch": 0.12640835394339103, "grad_norm": 0.36145582126966413, "learning_rate": 3.35609756097561e-05, "loss": 0.6121, "num_tokens": 265775744.0, "step": 345 }, { "epoch": 0.12677475496931392, "grad_norm": 0.4599182145214594, "learning_rate": 3.365853658536586e-05, "loss": 0.6471, "num_tokens": 266469473.0, "step": 346 }, { "epoch": 0.1271411559952368, "grad_norm": 0.33236481274698143, "learning_rate": 3.375609756097561e-05, "loss": 0.6474, "num_tokens": 267223873.0, "step": 347 }, { "epoch": 0.12750755702115965, "grad_norm": 0.375020975012096, "learning_rate": 3.3853658536585366e-05, "loss": 0.7035, "num_tokens": 267863679.0, "step": 348 }, { "epoch": 0.12787395804708254, "grad_norm": 0.4025639591695887, "learning_rate": 3.3951219512195124e-05, "loss": 0.6418, "num_tokens": 268665022.0, "step": 349 }, { "epoch": 0.1282403590730054, "grad_norm": 0.3906690375444682, "learning_rate": 3.404878048780488e-05, "loss": 0.6372, "num_tokens": 269376733.0, "step": 350 }, { "epoch": 0.12860676009892827, "grad_norm": 0.45364400933593213, "learning_rate": 3.414634146341463e-05, "loss": 0.6736, "num_tokens": 270101389.0, "step": 351 }, { "epoch": 0.12897316112485116, "grad_norm": 0.3552417943832252, "learning_rate": 3.424390243902439e-05, "loss": 0.623, "num_tokens": 270777357.0, "step": 352 }, { "epoch": 0.12933956215077402, "grad_norm": 0.40038169010502606, "learning_rate": 3.434146341463415e-05, "loss": 0.6325, "num_tokens": 271631597.0, "step": 353 }, { "epoch": 0.1297059631766969, "grad_norm": 0.4189588548298471, "learning_rate": 3.443902439024391e-05, "loss": 0.6356, "num_tokens": 272354160.0, "step": 354 }, { "epoch": 0.13007236420261978, "grad_norm": 0.34035789089880725, "learning_rate": 3.453658536585366e-05, "loss": 0.6349, "num_tokens": 273027502.0, "step": 355 }, { "epoch": 0.13043876522854264, "grad_norm": 0.4577294273275068, "learning_rate": 3.4634146341463416e-05, "loss": 0.6259, "num_tokens": 273751714.0, "step": 356 }, { "epoch": 0.1308051662544655, "grad_norm": 0.3701909447407691, "learning_rate": 3.4731707317073174e-05, "loss": 0.6187, "num_tokens": 274535559.0, "step": 357 }, { "epoch": 0.1311715672803884, "grad_norm": 0.4615860878472057, "learning_rate": 3.482926829268293e-05, "loss": 0.6353, "num_tokens": 275269149.0, "step": 358 }, { "epoch": 0.13153796830631126, "grad_norm": 0.3836746730301346, "learning_rate": 3.4926829268292684e-05, "loss": 0.6344, "num_tokens": 276106629.0, "step": 359 }, { "epoch": 0.13190436933223412, "grad_norm": 0.3514033267174442, "learning_rate": 3.502439024390244e-05, "loss": 0.6147, "num_tokens": 276907840.0, "step": 360 }, { "epoch": 0.132270770358157, "grad_norm": 0.39861633935217317, "learning_rate": 3.51219512195122e-05, "loss": 0.6319, "num_tokens": 277809276.0, "step": 361 }, { "epoch": 0.13263717138407988, "grad_norm": 0.36066386402336625, "learning_rate": 3.521951219512196e-05, "loss": 0.6323, "num_tokens": 278552298.0, "step": 362 }, { "epoch": 0.13300357241000274, "grad_norm": 0.4249202905587367, "learning_rate": 3.531707317073171e-05, "loss": 0.6387, "num_tokens": 279437428.0, "step": 363 }, { "epoch": 0.13336997343592563, "grad_norm": 0.4922822261227883, "learning_rate": 3.541463414634147e-05, "loss": 0.6191, "num_tokens": 280214484.0, "step": 364 }, { "epoch": 0.1337363744618485, "grad_norm": 0.39911393490538166, "learning_rate": 3.5512195121951225e-05, "loss": 0.674, "num_tokens": 280935533.0, "step": 365 }, { "epoch": 0.13410277548777136, "grad_norm": 0.3790889370333945, "learning_rate": 3.5609756097560976e-05, "loss": 0.6683, "num_tokens": 281691629.0, "step": 366 }, { "epoch": 0.13446917651369425, "grad_norm": 0.45446764987341365, "learning_rate": 3.5707317073170734e-05, "loss": 0.6356, "num_tokens": 282420560.0, "step": 367 }, { "epoch": 0.1348355775396171, "grad_norm": 0.29367181827338246, "learning_rate": 3.580487804878049e-05, "loss": 0.6321, "num_tokens": 283207932.0, "step": 368 }, { "epoch": 0.13520197856553998, "grad_norm": 0.4810163225120418, "learning_rate": 3.590243902439025e-05, "loss": 0.6225, "num_tokens": 283930223.0, "step": 369 }, { "epoch": 0.13556837959146287, "grad_norm": 0.35930772831187263, "learning_rate": 3.6e-05, "loss": 0.6198, "num_tokens": 284732769.0, "step": 370 }, { "epoch": 0.13593478061738573, "grad_norm": 0.526730976092211, "learning_rate": 3.609756097560976e-05, "loss": 0.6695, "num_tokens": 285525189.0, "step": 371 }, { "epoch": 0.1363011816433086, "grad_norm": 0.46694894772066664, "learning_rate": 3.619512195121952e-05, "loss": 0.6284, "num_tokens": 286225800.0, "step": 372 }, { "epoch": 0.13666758266923149, "grad_norm": 0.35775943451663594, "learning_rate": 3.6292682926829275e-05, "loss": 0.5906, "num_tokens": 286929313.0, "step": 373 }, { "epoch": 0.13703398369515435, "grad_norm": 0.4693415723393437, "learning_rate": 3.6390243902439026e-05, "loss": 0.6436, "num_tokens": 287906610.0, "step": 374 }, { "epoch": 0.1374003847210772, "grad_norm": 0.35628496325908343, "learning_rate": 3.6487804878048784e-05, "loss": 0.6524, "num_tokens": 288637046.0, "step": 375 }, { "epoch": 0.1377667857470001, "grad_norm": 0.3540738787328444, "learning_rate": 3.658536585365854e-05, "loss": 0.6618, "num_tokens": 289403992.0, "step": 376 }, { "epoch": 0.13813318677292297, "grad_norm": 0.4139052782089132, "learning_rate": 3.66829268292683e-05, "loss": 0.6668, "num_tokens": 290041266.0, "step": 377 }, { "epoch": 0.13849958779884583, "grad_norm": 0.2778675430956112, "learning_rate": 3.678048780487805e-05, "loss": 0.5814, "num_tokens": 290810890.0, "step": 378 }, { "epoch": 0.13886598882476872, "grad_norm": 0.46813148565042245, "learning_rate": 3.687804878048781e-05, "loss": 0.6242, "num_tokens": 291508371.0, "step": 379 }, { "epoch": 0.13923238985069158, "grad_norm": 0.31816990860897276, "learning_rate": 3.697560975609757e-05, "loss": 0.6526, "num_tokens": 292266179.0, "step": 380 }, { "epoch": 0.13959879087661445, "grad_norm": 0.4533898716976028, "learning_rate": 3.7073170731707325e-05, "loss": 0.6899, "num_tokens": 292962943.0, "step": 381 }, { "epoch": 0.13996519190253734, "grad_norm": 0.42695546333277473, "learning_rate": 3.7170731707317076e-05, "loss": 0.6568, "num_tokens": 293761420.0, "step": 382 }, { "epoch": 0.1403315929284602, "grad_norm": 0.3759057645993536, "learning_rate": 3.7268292682926834e-05, "loss": 0.6302, "num_tokens": 294585615.0, "step": 383 }, { "epoch": 0.14069799395438307, "grad_norm": 0.38045046131273136, "learning_rate": 3.736585365853659e-05, "loss": 0.6181, "num_tokens": 295270568.0, "step": 384 }, { "epoch": 0.14106439498030596, "grad_norm": 0.37629572195081074, "learning_rate": 3.7463414634146343e-05, "loss": 0.6351, "num_tokens": 295950382.0, "step": 385 }, { "epoch": 0.14143079600622882, "grad_norm": 0.4575048743165107, "learning_rate": 3.75609756097561e-05, "loss": 0.6345, "num_tokens": 296755834.0, "step": 386 }, { "epoch": 0.14179719703215168, "grad_norm": 0.3184217540753489, "learning_rate": 3.765853658536585e-05, "loss": 0.6111, "num_tokens": 297473489.0, "step": 387 }, { "epoch": 0.14216359805807458, "grad_norm": 0.4044505138387947, "learning_rate": 3.775609756097561e-05, "loss": 0.6258, "num_tokens": 298330064.0, "step": 388 }, { "epoch": 0.14252999908399744, "grad_norm": 0.4517152340662388, "learning_rate": 3.785365853658537e-05, "loss": 0.6665, "num_tokens": 299146491.0, "step": 389 }, { "epoch": 0.1428964001099203, "grad_norm": 0.36666552113814793, "learning_rate": 3.7951219512195126e-05, "loss": 0.6325, "num_tokens": 299907269.0, "step": 390 }, { "epoch": 0.1432628011358432, "grad_norm": 0.46130184121478024, "learning_rate": 3.804878048780488e-05, "loss": 0.6996, "num_tokens": 300677169.0, "step": 391 }, { "epoch": 0.14362920216176606, "grad_norm": 0.3520124559704097, "learning_rate": 3.8146341463414636e-05, "loss": 0.6346, "num_tokens": 301380955.0, "step": 392 }, { "epoch": 0.14399560318768892, "grad_norm": 0.4345752209183583, "learning_rate": 3.8243902439024394e-05, "loss": 0.6241, "num_tokens": 302200037.0, "step": 393 }, { "epoch": 0.1443620042136118, "grad_norm": 0.43865266974921446, "learning_rate": 3.8341463414634145e-05, "loss": 0.6173, "num_tokens": 303068849.0, "step": 394 }, { "epoch": 0.14472840523953467, "grad_norm": 0.3857652210593535, "learning_rate": 3.84390243902439e-05, "loss": 0.6556, "num_tokens": 303790814.0, "step": 395 }, { "epoch": 0.14509480626545754, "grad_norm": 0.4451646596809127, "learning_rate": 3.853658536585366e-05, "loss": 0.6397, "num_tokens": 304578176.0, "step": 396 }, { "epoch": 0.14546120729138043, "grad_norm": 0.35504131930333876, "learning_rate": 3.863414634146342e-05, "loss": 0.6137, "num_tokens": 305283297.0, "step": 397 }, { "epoch": 0.1458276083173033, "grad_norm": 0.4150593717494499, "learning_rate": 3.873170731707317e-05, "loss": 0.6205, "num_tokens": 306088136.0, "step": 398 }, { "epoch": 0.14619400934322616, "grad_norm": 0.35245988351627183, "learning_rate": 3.882926829268293e-05, "loss": 0.6265, "num_tokens": 306857631.0, "step": 399 }, { "epoch": 0.14656041036914905, "grad_norm": 0.4438272854458112, "learning_rate": 3.8926829268292686e-05, "loss": 0.682, "num_tokens": 307593465.0, "step": 400 }, { "epoch": 0.1469268113950719, "grad_norm": 0.4239715670015994, "learning_rate": 3.9024390243902444e-05, "loss": 0.6468, "num_tokens": 308383045.0, "step": 401 }, { "epoch": 0.14729321242099477, "grad_norm": 0.38669641286865064, "learning_rate": 3.9121951219512195e-05, "loss": 0.6649, "num_tokens": 309225841.0, "step": 402 }, { "epoch": 0.14765961344691766, "grad_norm": 0.40714322327487484, "learning_rate": 3.921951219512195e-05, "loss": 0.6297, "num_tokens": 309943028.0, "step": 403 }, { "epoch": 0.14802601447284053, "grad_norm": 0.42198353804862204, "learning_rate": 3.931707317073171e-05, "loss": 0.6206, "num_tokens": 310682744.0, "step": 404 }, { "epoch": 0.1483924154987634, "grad_norm": 0.366977847551809, "learning_rate": 3.941463414634147e-05, "loss": 0.6491, "num_tokens": 311409677.0, "step": 405 }, { "epoch": 0.14875881652468628, "grad_norm": 0.40883343778960896, "learning_rate": 3.951219512195122e-05, "loss": 0.6483, "num_tokens": 312229519.0, "step": 406 }, { "epoch": 0.14912521755060915, "grad_norm": 0.3520310162541585, "learning_rate": 3.960975609756098e-05, "loss": 0.6279, "num_tokens": 313118850.0, "step": 407 }, { "epoch": 0.149491618576532, "grad_norm": 0.2592088964271546, "learning_rate": 3.9707317073170736e-05, "loss": 0.6204, "num_tokens": 313945276.0, "step": 408 }, { "epoch": 0.1498580196024549, "grad_norm": 0.35500939444623075, "learning_rate": 3.9804878048780494e-05, "loss": 0.6152, "num_tokens": 314709488.0, "step": 409 }, { "epoch": 0.15022442062837776, "grad_norm": 0.27605153803932975, "learning_rate": 3.9902439024390245e-05, "loss": 0.6388, "num_tokens": 315580699.0, "step": 410 }, { "epoch": 0.15059082165430063, "grad_norm": 0.35848389661883634, "learning_rate": 4e-05, "loss": 0.6429, "num_tokens": 316337687.0, "step": 411 }, { "epoch": 0.15095722268022352, "grad_norm": 0.3392574775852065, "learning_rate": 3.9999999492899125e-05, "loss": 0.6281, "num_tokens": 317137421.0, "step": 412 }, { "epoch": 0.15132362370614638, "grad_norm": 0.36020025631551195, "learning_rate": 3.999999797159653e-05, "loss": 0.6348, "num_tokens": 317869088.0, "step": 413 }, { "epoch": 0.15169002473206925, "grad_norm": 0.3498334421618699, "learning_rate": 3.999999543609229e-05, "loss": 0.608, "num_tokens": 318640058.0, "step": 414 }, { "epoch": 0.1520564257579921, "grad_norm": 0.36338471454106586, "learning_rate": 3.9999991886386544e-05, "loss": 0.6322, "num_tokens": 319482139.0, "step": 415 }, { "epoch": 0.152422826783915, "grad_norm": 0.3579299383517862, "learning_rate": 3.9999987322479505e-05, "loss": 0.6207, "num_tokens": 320385816.0, "step": 416 }, { "epoch": 0.15278922780983786, "grad_norm": 0.3409598030759385, "learning_rate": 3.9999981744371434e-05, "loss": 0.6265, "num_tokens": 321199542.0, "step": 417 }, { "epoch": 0.15315562883576073, "grad_norm": 0.3616050986904932, "learning_rate": 3.9999975152062636e-05, "loss": 0.6415, "num_tokens": 321964338.0, "step": 418 }, { "epoch": 0.15352202986168362, "grad_norm": 0.33197765862728473, "learning_rate": 3.9999967545553484e-05, "loss": 0.6405, "num_tokens": 322857365.0, "step": 419 }, { "epoch": 0.15388843088760648, "grad_norm": 0.3905248202556317, "learning_rate": 3.9999958924844404e-05, "loss": 0.6355, "num_tokens": 323676412.0, "step": 420 }, { "epoch": 0.15425483191352934, "grad_norm": 0.3332950959555503, "learning_rate": 3.9999949289935885e-05, "loss": 0.6302, "num_tokens": 324489365.0, "step": 421 }, { "epoch": 0.15462123293945224, "grad_norm": 0.32946798819222384, "learning_rate": 3.9999938640828475e-05, "loss": 0.6104, "num_tokens": 325381011.0, "step": 422 }, { "epoch": 0.1549876339653751, "grad_norm": 0.3463076833361785, "learning_rate": 3.9999926977522765e-05, "loss": 0.6282, "num_tokens": 326245387.0, "step": 423 }, { "epoch": 0.15535403499129796, "grad_norm": 0.2910024308215488, "learning_rate": 3.999991430001942e-05, "loss": 0.6414, "num_tokens": 327017773.0, "step": 424 }, { "epoch": 0.15572043601722085, "grad_norm": 0.35181675397012585, "learning_rate": 3.999990060831914e-05, "loss": 0.6412, "num_tokens": 327763584.0, "step": 425 }, { "epoch": 0.15608683704314372, "grad_norm": 0.3581713129276652, "learning_rate": 3.999988590242272e-05, "loss": 0.6179, "num_tokens": 328561755.0, "step": 426 }, { "epoch": 0.15645323806906658, "grad_norm": 0.29055674643735774, "learning_rate": 3.9999870182330965e-05, "loss": 0.6406, "num_tokens": 329310533.0, "step": 427 }, { "epoch": 0.15681963909498947, "grad_norm": 0.4433852248314601, "learning_rate": 3.999985344804477e-05, "loss": 0.6435, "num_tokens": 330136493.0, "step": 428 }, { "epoch": 0.15718604012091233, "grad_norm": 0.2948291473358385, "learning_rate": 3.9999835699565086e-05, "loss": 0.6105, "num_tokens": 330946993.0, "step": 429 }, { "epoch": 0.1575524411468352, "grad_norm": 0.5072211261567389, "learning_rate": 3.99998169368929e-05, "loss": 0.6275, "num_tokens": 331716913.0, "step": 430 }, { "epoch": 0.1579188421727581, "grad_norm": 0.41053769869728707, "learning_rate": 3.999979716002927e-05, "loss": 0.6094, "num_tokens": 332490061.0, "step": 431 }, { "epoch": 0.15828524319868095, "grad_norm": 0.436229152833258, "learning_rate": 3.999977636897532e-05, "loss": 0.6565, "num_tokens": 333393529.0, "step": 432 }, { "epoch": 0.15865164422460382, "grad_norm": 0.4075282211743251, "learning_rate": 3.9999754563732224e-05, "loss": 0.6578, "num_tokens": 334169806.0, "step": 433 }, { "epoch": 0.1590180452505267, "grad_norm": 0.41035513618651637, "learning_rate": 3.99997317443012e-05, "loss": 0.6234, "num_tokens": 334779192.0, "step": 434 }, { "epoch": 0.15938444627644957, "grad_norm": 0.3413712138397087, "learning_rate": 3.9999707910683524e-05, "loss": 0.6742, "num_tokens": 335408952.0, "step": 435 }, { "epoch": 0.15975084730237243, "grad_norm": 0.34353447165862105, "learning_rate": 3.9999683062880564e-05, "loss": 0.6327, "num_tokens": 336130877.0, "step": 436 }, { "epoch": 0.16011724832829533, "grad_norm": 0.30788629626618164, "learning_rate": 3.999965720089371e-05, "loss": 0.6261, "num_tokens": 336988887.0, "step": 437 }, { "epoch": 0.1604836493542182, "grad_norm": 0.39373580597269464, "learning_rate": 3.999963032472441e-05, "loss": 0.6727, "num_tokens": 337682988.0, "step": 438 }, { "epoch": 0.16085005038014105, "grad_norm": 0.3837459978044248, "learning_rate": 3.999960243437419e-05, "loss": 0.6354, "num_tokens": 338425695.0, "step": 439 }, { "epoch": 0.16121645140606394, "grad_norm": 0.3417012990910778, "learning_rate": 3.9999573529844616e-05, "loss": 0.617, "num_tokens": 339183357.0, "step": 440 }, { "epoch": 0.1615828524319868, "grad_norm": 0.43595532537801174, "learning_rate": 3.999954361113732e-05, "loss": 0.6422, "num_tokens": 339909157.0, "step": 441 }, { "epoch": 0.16194925345790967, "grad_norm": 0.3540409557311808, "learning_rate": 3.9999512678253984e-05, "loss": 0.6773, "num_tokens": 340718846.0, "step": 442 }, { "epoch": 0.16231565448383256, "grad_norm": 0.3404471156338543, "learning_rate": 3.999948073119635e-05, "loss": 0.6518, "num_tokens": 341377386.0, "step": 443 }, { "epoch": 0.16268205550975542, "grad_norm": 0.3625578149305212, "learning_rate": 3.999944776996622e-05, "loss": 0.623, "num_tokens": 342120129.0, "step": 444 }, { "epoch": 0.1630484565356783, "grad_norm": 0.32359975750244135, "learning_rate": 3.999941379456545e-05, "loss": 0.6212, "num_tokens": 342874088.0, "step": 445 }, { "epoch": 0.16341485756160118, "grad_norm": 0.33012673816053384, "learning_rate": 3.999937880499596e-05, "loss": 0.6331, "num_tokens": 343653573.0, "step": 446 }, { "epoch": 0.16378125858752404, "grad_norm": 0.3518046364641611, "learning_rate": 3.999934280125971e-05, "loss": 0.6558, "num_tokens": 344412427.0, "step": 447 }, { "epoch": 0.1641476596134469, "grad_norm": 0.2836684042808191, "learning_rate": 3.9999305783358745e-05, "loss": 0.5996, "num_tokens": 345194864.0, "step": 448 }, { "epoch": 0.1645140606393698, "grad_norm": 0.39261350661826255, "learning_rate": 3.999926775129514e-05, "loss": 0.6392, "num_tokens": 345973153.0, "step": 449 }, { "epoch": 0.16488046166529266, "grad_norm": 0.37760017789578726, "learning_rate": 3.9999228705071037e-05, "loss": 0.6185, "num_tokens": 346806425.0, "step": 450 }, { "epoch": 0.16524686269121552, "grad_norm": 0.3497857224175842, "learning_rate": 3.9999188644688643e-05, "loss": 0.6591, "num_tokens": 347474949.0, "step": 451 }, { "epoch": 0.16561326371713841, "grad_norm": 0.44330741761788833, "learning_rate": 3.99991475701502e-05, "loss": 0.6342, "num_tokens": 348123194.0, "step": 452 }, { "epoch": 0.16597966474306128, "grad_norm": 0.3746144583534662, "learning_rate": 3.999910548145804e-05, "loss": 0.6317, "num_tokens": 348912317.0, "step": 453 }, { "epoch": 0.16634606576898414, "grad_norm": 0.4239865873955771, "learning_rate": 3.999906237861453e-05, "loss": 0.6544, "num_tokens": 349649931.0, "step": 454 }, { "epoch": 0.16671246679490703, "grad_norm": 0.43873391640866327, "learning_rate": 3.9999018261622094e-05, "loss": 0.66, "num_tokens": 350475734.0, "step": 455 }, { "epoch": 0.1670788678208299, "grad_norm": 0.38365105940397787, "learning_rate": 3.999897313048322e-05, "loss": 0.611, "num_tokens": 351096559.0, "step": 456 }, { "epoch": 0.16744526884675276, "grad_norm": 0.34609835048792364, "learning_rate": 3.9998926985200445e-05, "loss": 0.6187, "num_tokens": 351898727.0, "step": 457 }, { "epoch": 0.16781166987267565, "grad_norm": 0.316973924447155, "learning_rate": 3.999887982577638e-05, "loss": 0.6281, "num_tokens": 352619567.0, "step": 458 }, { "epoch": 0.16817807089859851, "grad_norm": 0.36198111162424884, "learning_rate": 3.9998831652213674e-05, "loss": 0.6432, "num_tokens": 353354080.0, "step": 459 }, { "epoch": 0.16854447192452138, "grad_norm": 0.29145037844193095, "learning_rate": 3.999878246451505e-05, "loss": 0.6071, "num_tokens": 354154086.0, "step": 460 }, { "epoch": 0.16891087295044427, "grad_norm": 0.42840993878320527, "learning_rate": 3.999873226268327e-05, "loss": 0.6596, "num_tokens": 354953044.0, "step": 461 }, { "epoch": 0.16927727397636713, "grad_norm": 0.31928113700937183, "learning_rate": 3.999868104672117e-05, "loss": 0.6077, "num_tokens": 355713517.0, "step": 462 }, { "epoch": 0.16964367500229, "grad_norm": 0.37209188278155814, "learning_rate": 3.999862881663163e-05, "loss": 0.6121, "num_tokens": 356483744.0, "step": 463 }, { "epoch": 0.1700100760282129, "grad_norm": 0.31760116187299886, "learning_rate": 3.99985755724176e-05, "loss": 0.6625, "num_tokens": 357250228.0, "step": 464 }, { "epoch": 0.17037647705413575, "grad_norm": 0.35623239850387073, "learning_rate": 3.9998521314082064e-05, "loss": 0.6244, "num_tokens": 358013331.0, "step": 465 }, { "epoch": 0.1707428780800586, "grad_norm": 0.3263492464106942, "learning_rate": 3.99984660416281e-05, "loss": 0.6303, "num_tokens": 358786608.0, "step": 466 }, { "epoch": 0.1711092791059815, "grad_norm": 0.25488656951846356, "learning_rate": 3.99984097550588e-05, "loss": 0.6189, "num_tokens": 359566881.0, "step": 467 }, { "epoch": 0.17147568013190437, "grad_norm": 0.4422174361294387, "learning_rate": 3.999835245437736e-05, "loss": 0.6241, "num_tokens": 360278819.0, "step": 468 }, { "epoch": 0.17184208115782723, "grad_norm": 0.31381342747717544, "learning_rate": 3.999829413958699e-05, "loss": 0.6394, "num_tokens": 360995851.0, "step": 469 }, { "epoch": 0.17220848218375012, "grad_norm": 0.46099594683756895, "learning_rate": 3.9998234810690984e-05, "loss": 0.6618, "num_tokens": 361733027.0, "step": 470 }, { "epoch": 0.17257488320967299, "grad_norm": 0.37322470400704716, "learning_rate": 3.9998174467692686e-05, "loss": 0.6813, "num_tokens": 362434219.0, "step": 471 }, { "epoch": 0.17294128423559585, "grad_norm": 0.44447071290855483, "learning_rate": 3.999811311059549e-05, "loss": 0.6028, "num_tokens": 363198236.0, "step": 472 }, { "epoch": 0.17330768526151874, "grad_norm": 0.3445429768690978, "learning_rate": 3.999805073940286e-05, "loss": 0.6226, "num_tokens": 363975281.0, "step": 473 }, { "epoch": 0.1736740862874416, "grad_norm": 0.4880336360304946, "learning_rate": 3.99979873541183e-05, "loss": 0.6488, "num_tokens": 364791385.0, "step": 474 }, { "epoch": 0.17404048731336447, "grad_norm": 0.4755620675518434, "learning_rate": 3.999792295474539e-05, "loss": 0.6573, "num_tokens": 365442780.0, "step": 475 }, { "epoch": 0.17440688833928736, "grad_norm": 0.3340663047036813, "learning_rate": 3.999785754128776e-05, "loss": 0.631, "num_tokens": 366352193.0, "step": 476 }, { "epoch": 0.17477328936521022, "grad_norm": 0.3566878510063959, "learning_rate": 3.999779111374909e-05, "loss": 0.6252, "num_tokens": 367064429.0, "step": 477 }, { "epoch": 0.17513969039113308, "grad_norm": 0.3499634758719671, "learning_rate": 3.999772367213312e-05, "loss": 0.6187, "num_tokens": 367861682.0, "step": 478 }, { "epoch": 0.17550609141705598, "grad_norm": 0.3078502049694453, "learning_rate": 3.9997655216443666e-05, "loss": 0.5743, "num_tokens": 368635839.0, "step": 479 }, { "epoch": 0.17587249244297884, "grad_norm": 0.4144822735860886, "learning_rate": 3.9997585746684565e-05, "loss": 0.6229, "num_tokens": 369362085.0, "step": 480 }, { "epoch": 0.1762388934689017, "grad_norm": 0.39034553554295226, "learning_rate": 3.999751526285975e-05, "loss": 0.6854, "num_tokens": 370179952.0, "step": 481 }, { "epoch": 0.1766052944948246, "grad_norm": 0.4301219524175015, "learning_rate": 3.999744376497317e-05, "loss": 0.6926, "num_tokens": 370831261.0, "step": 482 }, { "epoch": 0.17697169552074746, "grad_norm": 0.3036860287746482, "learning_rate": 3.999737125302887e-05, "loss": 0.6101, "num_tokens": 371568190.0, "step": 483 }, { "epoch": 0.17733809654667032, "grad_norm": 0.38888054344394213, "learning_rate": 3.999729772703093e-05, "loss": 0.6224, "num_tokens": 372279021.0, "step": 484 }, { "epoch": 0.1777044975725932, "grad_norm": 0.29119221175351445, "learning_rate": 3.99972231869835e-05, "loss": 0.6384, "num_tokens": 372994755.0, "step": 485 }, { "epoch": 0.17807089859851608, "grad_norm": 0.39164962292312844, "learning_rate": 3.999714763289078e-05, "loss": 0.6607, "num_tokens": 373710678.0, "step": 486 }, { "epoch": 0.17843729962443894, "grad_norm": 0.39883170377898375, "learning_rate": 3.999707106475701e-05, "loss": 0.6314, "num_tokens": 374525145.0, "step": 487 }, { "epoch": 0.17880370065036183, "grad_norm": 0.3775587864332074, "learning_rate": 3.9996993482586516e-05, "loss": 0.6327, "num_tokens": 375291979.0, "step": 488 }, { "epoch": 0.1791701016762847, "grad_norm": 0.38424466230588283, "learning_rate": 3.999691488638368e-05, "loss": 0.6828, "num_tokens": 376102506.0, "step": 489 }, { "epoch": 0.17953650270220756, "grad_norm": 0.3583096740125347, "learning_rate": 3.999683527615291e-05, "loss": 0.6194, "num_tokens": 376912291.0, "step": 490 }, { "epoch": 0.17990290372813045, "grad_norm": 0.34841036173787526, "learning_rate": 3.9996754651898705e-05, "loss": 0.5955, "num_tokens": 377805481.0, "step": 491 }, { "epoch": 0.1802693047540533, "grad_norm": 0.3908300674542737, "learning_rate": 3.9996673013625605e-05, "loss": 0.631, "num_tokens": 378583818.0, "step": 492 }, { "epoch": 0.18063570577997617, "grad_norm": 0.29823529964807927, "learning_rate": 3.999659036133821e-05, "loss": 0.6354, "num_tokens": 379395022.0, "step": 493 }, { "epoch": 0.18100210680589907, "grad_norm": 0.36947806681920126, "learning_rate": 3.999650669504117e-05, "loss": 0.6249, "num_tokens": 380131123.0, "step": 494 }, { "epoch": 0.18136850783182193, "grad_norm": 0.3014691617548993, "learning_rate": 3.999642201473921e-05, "loss": 0.6092, "num_tokens": 380881612.0, "step": 495 }, { "epoch": 0.1817349088577448, "grad_norm": 0.3376143297396972, "learning_rate": 3.99963363204371e-05, "loss": 0.6042, "num_tokens": 381608534.0, "step": 496 }, { "epoch": 0.18210130988366768, "grad_norm": 0.302978774014681, "learning_rate": 3.999624961213965e-05, "loss": 0.6866, "num_tokens": 382412127.0, "step": 497 }, { "epoch": 0.18246771090959055, "grad_norm": 0.3031329220112348, "learning_rate": 3.9996161889851765e-05, "loss": 0.6235, "num_tokens": 383135373.0, "step": 498 }, { "epoch": 0.1828341119355134, "grad_norm": 0.32542489478958864, "learning_rate": 3.999607315357839e-05, "loss": 0.6035, "num_tokens": 383827433.0, "step": 499 }, { "epoch": 0.1832005129614363, "grad_norm": 0.37906740216354434, "learning_rate": 3.999598340332451e-05, "loss": 0.6432, "num_tokens": 384614009.0, "step": 500 }, { "epoch": 0.18356691398735916, "grad_norm": 0.33479923344673146, "learning_rate": 3.9995892639095194e-05, "loss": 0.6163, "num_tokens": 385433638.0, "step": 501 }, { "epoch": 0.18393331501328203, "grad_norm": 0.39515699075447136, "learning_rate": 3.999580086089555e-05, "loss": 0.6356, "num_tokens": 386191789.0, "step": 502 }, { "epoch": 0.18429971603920492, "grad_norm": 0.33210655904041686, "learning_rate": 3.9995708068730756e-05, "loss": 0.6726, "num_tokens": 386956222.0, "step": 503 }, { "epoch": 0.18466611706512778, "grad_norm": 0.33405391459913075, "learning_rate": 3.999561426260603e-05, "loss": 0.6356, "num_tokens": 387645596.0, "step": 504 }, { "epoch": 0.18503251809105065, "grad_norm": 0.24689696742241876, "learning_rate": 3.999551944252666e-05, "loss": 0.5914, "num_tokens": 388377667.0, "step": 505 }, { "epoch": 0.18539891911697354, "grad_norm": 0.34553326470543316, "learning_rate": 3.9995423608498e-05, "loss": 0.6462, "num_tokens": 389126533.0, "step": 506 }, { "epoch": 0.1857653201428964, "grad_norm": 0.31036723290548496, "learning_rate": 3.9995326760525436e-05, "loss": 0.6412, "num_tokens": 389943908.0, "step": 507 }, { "epoch": 0.18613172116881926, "grad_norm": 0.35851958314152027, "learning_rate": 3.999522889861443e-05, "loss": 0.6303, "num_tokens": 390767912.0, "step": 508 }, { "epoch": 0.18649812219474216, "grad_norm": 0.283131324418142, "learning_rate": 3.99951300227705e-05, "loss": 0.5975, "num_tokens": 391439305.0, "step": 509 }, { "epoch": 0.18686452322066502, "grad_norm": 0.3205105000665072, "learning_rate": 3.9995030132999204e-05, "loss": 0.609, "num_tokens": 392124531.0, "step": 510 }, { "epoch": 0.18723092424658788, "grad_norm": 0.3148101628535038, "learning_rate": 3.999492922930618e-05, "loss": 0.6345, "num_tokens": 392892626.0, "step": 511 }, { "epoch": 0.18759732527251077, "grad_norm": 0.25658579269751947, "learning_rate": 3.999482731169712e-05, "loss": 0.6053, "num_tokens": 393653201.0, "step": 512 }, { "epoch": 0.18796372629843364, "grad_norm": 0.295029412738951, "learning_rate": 3.999472438017776e-05, "loss": 0.608, "num_tokens": 394371748.0, "step": 513 }, { "epoch": 0.1883301273243565, "grad_norm": 0.3508612762495205, "learning_rate": 3.999462043475389e-05, "loss": 0.6428, "num_tokens": 395102789.0, "step": 514 }, { "epoch": 0.1886965283502794, "grad_norm": 0.323581327802834, "learning_rate": 3.9994515475431386e-05, "loss": 0.6186, "num_tokens": 395984409.0, "step": 515 }, { "epoch": 0.18906292937620225, "grad_norm": 0.3106437456938868, "learning_rate": 3.9994409502216144e-05, "loss": 0.638, "num_tokens": 396719233.0, "step": 516 }, { "epoch": 0.18942933040212512, "grad_norm": 0.37071662463849075, "learning_rate": 3.999430251511414e-05, "loss": 0.7249, "num_tokens": 397368332.0, "step": 517 }, { "epoch": 0.189795731428048, "grad_norm": 0.28989903846298976, "learning_rate": 3.999419451413141e-05, "loss": 0.6306, "num_tokens": 398097193.0, "step": 518 }, { "epoch": 0.19016213245397087, "grad_norm": 0.31086467813243773, "learning_rate": 3.999408549927403e-05, "loss": 0.598, "num_tokens": 398816508.0, "step": 519 }, { "epoch": 0.19052853347989374, "grad_norm": 0.2937254936526918, "learning_rate": 3.999397547054815e-05, "loss": 0.6573, "num_tokens": 399533515.0, "step": 520 }, { "epoch": 0.19089493450581663, "grad_norm": 0.2772075109783512, "learning_rate": 3.999386442795996e-05, "loss": 0.6343, "num_tokens": 400242687.0, "step": 521 }, { "epoch": 0.1912613355317395, "grad_norm": 0.321796134718184, "learning_rate": 3.9993752371515724e-05, "loss": 0.6197, "num_tokens": 400935285.0, "step": 522 }, { "epoch": 0.19162773655766235, "grad_norm": 0.317173083132704, "learning_rate": 3.999363930122176e-05, "loss": 0.6388, "num_tokens": 401764462.0, "step": 523 }, { "epoch": 0.19199413758358524, "grad_norm": 0.31178509164184437, "learning_rate": 3.999352521708443e-05, "loss": 0.626, "num_tokens": 402681760.0, "step": 524 }, { "epoch": 0.1923605386095081, "grad_norm": 0.4542677313054528, "learning_rate": 3.9993410119110156e-05, "loss": 0.6438, "num_tokens": 403432542.0, "step": 525 }, { "epoch": 0.19272693963543097, "grad_norm": 0.4994078479752892, "learning_rate": 3.999329400730544e-05, "loss": 0.6607, "num_tokens": 404158328.0, "step": 526 }, { "epoch": 0.19309334066135386, "grad_norm": 0.32323004003864736, "learning_rate": 3.999317688167682e-05, "loss": 0.6368, "num_tokens": 405127349.0, "step": 527 }, { "epoch": 0.19345974168727673, "grad_norm": 0.41035512780199046, "learning_rate": 3.999305874223089e-05, "loss": 0.5942, "num_tokens": 405885728.0, "step": 528 }, { "epoch": 0.1938261427131996, "grad_norm": 0.3209471125125247, "learning_rate": 3.9992939588974305e-05, "loss": 0.6262, "num_tokens": 406614446.0, "step": 529 }, { "epoch": 0.19419254373912248, "grad_norm": 0.3341113554202331, "learning_rate": 3.999281942191378e-05, "loss": 0.5858, "num_tokens": 407363022.0, "step": 530 }, { "epoch": 0.19455894476504534, "grad_norm": 0.2742428448212525, "learning_rate": 3.999269824105609e-05, "loss": 0.6213, "num_tokens": 408007527.0, "step": 531 }, { "epoch": 0.1949253457909682, "grad_norm": 0.3518139866641482, "learning_rate": 3.999257604640806e-05, "loss": 0.6408, "num_tokens": 408849376.0, "step": 532 }, { "epoch": 0.1952917468168911, "grad_norm": 0.3093355436193648, "learning_rate": 3.999245283797658e-05, "loss": 0.6201, "num_tokens": 409593825.0, "step": 533 }, { "epoch": 0.19565814784281396, "grad_norm": 0.3921381197410355, "learning_rate": 3.999232861576858e-05, "loss": 0.6278, "num_tokens": 410380228.0, "step": 534 }, { "epoch": 0.19602454886873683, "grad_norm": 0.30837225363837506, "learning_rate": 3.999220337979107e-05, "loss": 0.6292, "num_tokens": 411113671.0, "step": 535 }, { "epoch": 0.19639094989465972, "grad_norm": 0.36646208011422493, "learning_rate": 3.999207713005111e-05, "loss": 0.6464, "num_tokens": 411901173.0, "step": 536 }, { "epoch": 0.19675735092058258, "grad_norm": 0.3684121044799952, "learning_rate": 3.9991949866555796e-05, "loss": 0.605, "num_tokens": 412740011.0, "step": 537 }, { "epoch": 0.19712375194650544, "grad_norm": 0.34148261722588175, "learning_rate": 3.999182158931232e-05, "loss": 0.595, "num_tokens": 413445261.0, "step": 538 }, { "epoch": 0.19749015297242833, "grad_norm": 0.3689425925109516, "learning_rate": 3.9991692298327894e-05, "loss": 0.6578, "num_tokens": 414213061.0, "step": 539 }, { "epoch": 0.1978565539983512, "grad_norm": 0.33415156773816623, "learning_rate": 3.999156199360981e-05, "loss": 0.6723, "num_tokens": 414983719.0, "step": 540 }, { "epoch": 0.19822295502427406, "grad_norm": 0.3525349852090264, "learning_rate": 3.99914306751654e-05, "loss": 0.6629, "num_tokens": 415709510.0, "step": 541 }, { "epoch": 0.19858935605019695, "grad_norm": 0.24713839333553958, "learning_rate": 3.999129834300208e-05, "loss": 0.5822, "num_tokens": 416559596.0, "step": 542 }, { "epoch": 0.19895575707611982, "grad_norm": 0.27480851667621636, "learning_rate": 3.9991164997127286e-05, "loss": 0.6043, "num_tokens": 417279181.0, "step": 543 }, { "epoch": 0.19932215810204268, "grad_norm": 0.26613652699549245, "learning_rate": 3.999103063754855e-05, "loss": 0.6565, "num_tokens": 418000200.0, "step": 544 }, { "epoch": 0.19968855912796557, "grad_norm": 0.31883927642559196, "learning_rate": 3.999089526427344e-05, "loss": 0.6565, "num_tokens": 418683558.0, "step": 545 }, { "epoch": 0.20005496015388843, "grad_norm": 0.3231307461404415, "learning_rate": 3.999075887730957e-05, "loss": 0.602, "num_tokens": 419492942.0, "step": 546 }, { "epoch": 0.2004213611798113, "grad_norm": 0.262936747479066, "learning_rate": 3.999062147666464e-05, "loss": 0.6354, "num_tokens": 420314474.0, "step": 547 }, { "epoch": 0.2007877622057342, "grad_norm": 0.3246804212875779, "learning_rate": 3.999048306234638e-05, "loss": 0.5874, "num_tokens": 420990254.0, "step": 548 }, { "epoch": 0.20115416323165705, "grad_norm": 0.34144567115686075, "learning_rate": 3.99903436343626e-05, "loss": 0.6189, "num_tokens": 421627813.0, "step": 549 }, { "epoch": 0.20152056425757991, "grad_norm": 0.32734916448893997, "learning_rate": 3.9990203192721134e-05, "loss": 0.611, "num_tokens": 422316249.0, "step": 550 }, { "epoch": 0.2018869652835028, "grad_norm": 0.34626914205622944, "learning_rate": 3.999006173742993e-05, "loss": 0.6357, "num_tokens": 423072403.0, "step": 551 }, { "epoch": 0.20225336630942567, "grad_norm": 0.2749366759967881, "learning_rate": 3.998991926849693e-05, "loss": 0.6452, "num_tokens": 423740177.0, "step": 552 }, { "epoch": 0.20261976733534853, "grad_norm": 0.25755984074545757, "learning_rate": 3.998977578593017e-05, "loss": 0.586, "num_tokens": 424449626.0, "step": 553 }, { "epoch": 0.20298616836127142, "grad_norm": 0.3279562912465793, "learning_rate": 3.9989631289737735e-05, "loss": 0.5893, "num_tokens": 425240813.0, "step": 554 }, { "epoch": 0.2033525693871943, "grad_norm": 0.31864919138985465, "learning_rate": 3.998948577992777e-05, "loss": 0.6376, "num_tokens": 426061363.0, "step": 555 }, { "epoch": 0.20371897041311715, "grad_norm": 0.28773558766227675, "learning_rate": 3.998933925650847e-05, "loss": 0.6396, "num_tokens": 426704690.0, "step": 556 }, { "epoch": 0.20408537143904004, "grad_norm": 0.36640301691820004, "learning_rate": 3.998919171948809e-05, "loss": 0.5899, "num_tokens": 427475150.0, "step": 557 }, { "epoch": 0.2044517724649629, "grad_norm": 0.28706480297568593, "learning_rate": 3.998904316887494e-05, "loss": 0.631, "num_tokens": 428268950.0, "step": 558 }, { "epoch": 0.20481817349088577, "grad_norm": 0.34707973816215093, "learning_rate": 3.9988893604677394e-05, "loss": 0.6231, "num_tokens": 429058334.0, "step": 559 }, { "epoch": 0.20518457451680866, "grad_norm": 0.3078017510725554, "learning_rate": 3.9988743026903884e-05, "loss": 0.5961, "num_tokens": 429713796.0, "step": 560 }, { "epoch": 0.20555097554273152, "grad_norm": 0.3542957546487302, "learning_rate": 3.998859143556289e-05, "loss": 0.6534, "num_tokens": 430573339.0, "step": 561 }, { "epoch": 0.2059173765686544, "grad_norm": 0.3750864844234409, "learning_rate": 3.998843883066295e-05, "loss": 0.6514, "num_tokens": 431370141.0, "step": 562 }, { "epoch": 0.20628377759457728, "grad_norm": 0.32796874594845016, "learning_rate": 3.9988285212212663e-05, "loss": 0.5932, "num_tokens": 432189221.0, "step": 563 }, { "epoch": 0.20665017862050014, "grad_norm": 0.36221176772465075, "learning_rate": 3.998813058022069e-05, "loss": 0.6586, "num_tokens": 432954326.0, "step": 564 }, { "epoch": 0.207016579646423, "grad_norm": 0.33903728769728403, "learning_rate": 3.998797493469574e-05, "loss": 0.5896, "num_tokens": 433774465.0, "step": 565 }, { "epoch": 0.2073829806723459, "grad_norm": 0.3200868249028306, "learning_rate": 3.998781827564658e-05, "loss": 0.6477, "num_tokens": 434653952.0, "step": 566 }, { "epoch": 0.20774938169826876, "grad_norm": 0.3095126377728889, "learning_rate": 3.9987660603082045e-05, "loss": 0.612, "num_tokens": 435308716.0, "step": 567 }, { "epoch": 0.20811578272419162, "grad_norm": 0.3154932763311356, "learning_rate": 3.998750191701101e-05, "loss": 0.6609, "num_tokens": 436068254.0, "step": 568 }, { "epoch": 0.2084821837501145, "grad_norm": 0.32304875598548394, "learning_rate": 3.9987342217442415e-05, "loss": 0.6748, "num_tokens": 436813329.0, "step": 569 }, { "epoch": 0.20884858477603738, "grad_norm": 0.2983939296895527, "learning_rate": 3.998718150438527e-05, "loss": 0.6536, "num_tokens": 437505152.0, "step": 570 }, { "epoch": 0.20921498580196024, "grad_norm": 0.33044872166253303, "learning_rate": 3.9987019777848625e-05, "loss": 0.6184, "num_tokens": 438354747.0, "step": 571 }, { "epoch": 0.20958138682788313, "grad_norm": 0.30965144364633757, "learning_rate": 3.998685703784159e-05, "loss": 0.6243, "num_tokens": 439082634.0, "step": 572 }, { "epoch": 0.209947787853806, "grad_norm": 0.31210143338455065, "learning_rate": 3.998669328437333e-05, "loss": 0.6528, "num_tokens": 439775591.0, "step": 573 }, { "epoch": 0.21031418887972886, "grad_norm": 0.3550153444387646, "learning_rate": 3.998652851745308e-05, "loss": 0.6438, "num_tokens": 440563197.0, "step": 574 }, { "epoch": 0.21068058990565175, "grad_norm": 0.3336573614474963, "learning_rate": 3.998636273709013e-05, "loss": 0.6138, "num_tokens": 441355258.0, "step": 575 }, { "epoch": 0.2110469909315746, "grad_norm": 0.2906196136894586, "learning_rate": 3.9986195943293796e-05, "loss": 0.6331, "num_tokens": 442184913.0, "step": 576 }, { "epoch": 0.21141339195749748, "grad_norm": 0.32387755495237164, "learning_rate": 3.99860281360735e-05, "loss": 0.6402, "num_tokens": 442897922.0, "step": 577 }, { "epoch": 0.21177979298342037, "grad_norm": 0.3241518535861208, "learning_rate": 3.998585931543869e-05, "loss": 0.5676, "num_tokens": 443687848.0, "step": 578 }, { "epoch": 0.21214619400934323, "grad_norm": 0.328787479864381, "learning_rate": 3.998568948139887e-05, "loss": 0.6392, "num_tokens": 444487669.0, "step": 579 }, { "epoch": 0.2125125950352661, "grad_norm": 0.39983147856485135, "learning_rate": 3.998551863396362e-05, "loss": 0.6428, "num_tokens": 445345403.0, "step": 580 }, { "epoch": 0.21287899606118896, "grad_norm": 0.418581212758264, "learning_rate": 3.998534677314257e-05, "loss": 0.621, "num_tokens": 446201239.0, "step": 581 }, { "epoch": 0.21324539708711185, "grad_norm": 0.28735913911784094, "learning_rate": 3.9985173898945385e-05, "loss": 0.6281, "num_tokens": 447090451.0, "step": 582 }, { "epoch": 0.2136117981130347, "grad_norm": 0.3251576453985077, "learning_rate": 3.9985000011381816e-05, "loss": 0.5794, "num_tokens": 447817239.0, "step": 583 }, { "epoch": 0.21397819913895758, "grad_norm": 0.305583784251564, "learning_rate": 3.9984825110461674e-05, "loss": 0.6294, "num_tokens": 448720489.0, "step": 584 }, { "epoch": 0.21434460016488047, "grad_norm": 0.28377533192166915, "learning_rate": 3.998464919619478e-05, "loss": 0.6089, "num_tokens": 449546181.0, "step": 585 }, { "epoch": 0.21471100119080333, "grad_norm": 0.3448887379795739, "learning_rate": 3.998447226859108e-05, "loss": 0.6742, "num_tokens": 450274993.0, "step": 586 }, { "epoch": 0.2150774022167262, "grad_norm": 0.3518442153727486, "learning_rate": 3.9984294327660526e-05, "loss": 0.59, "num_tokens": 451056322.0, "step": 587 }, { "epoch": 0.21544380324264908, "grad_norm": 0.32389114735413266, "learning_rate": 3.998411537341314e-05, "loss": 0.6386, "num_tokens": 451765919.0, "step": 588 }, { "epoch": 0.21581020426857195, "grad_norm": 0.3744853774702795, "learning_rate": 3.9983935405859026e-05, "loss": 0.6194, "num_tokens": 452470713.0, "step": 589 }, { "epoch": 0.2161766052944948, "grad_norm": 0.30524138514773247, "learning_rate": 3.9983754425008304e-05, "loss": 0.639, "num_tokens": 453207277.0, "step": 590 }, { "epoch": 0.2165430063204177, "grad_norm": 0.4009860747097202, "learning_rate": 3.998357243087118e-05, "loss": 0.6411, "num_tokens": 453959213.0, "step": 591 }, { "epoch": 0.21690940734634057, "grad_norm": 0.3143168687077043, "learning_rate": 3.9983389423457904e-05, "loss": 0.6227, "num_tokens": 454716872.0, "step": 592 }, { "epoch": 0.21727580837226343, "grad_norm": 0.3937125839975145, "learning_rate": 3.998320540277879e-05, "loss": 0.6257, "num_tokens": 455533259.0, "step": 593 }, { "epoch": 0.21764220939818632, "grad_norm": 0.3628420992729961, "learning_rate": 3.99830203688442e-05, "loss": 0.6224, "num_tokens": 456306725.0, "step": 594 }, { "epoch": 0.21800861042410918, "grad_norm": 0.3023392927684728, "learning_rate": 3.9982834321664576e-05, "loss": 0.6164, "num_tokens": 457126409.0, "step": 595 }, { "epoch": 0.21837501145003205, "grad_norm": 0.3530216607910194, "learning_rate": 3.9982647261250386e-05, "loss": 0.6412, "num_tokens": 457805139.0, "step": 596 }, { "epoch": 0.21874141247595494, "grad_norm": 0.3877849905590004, "learning_rate": 3.9982459187612173e-05, "loss": 0.6475, "num_tokens": 458594461.0, "step": 597 }, { "epoch": 0.2191078135018778, "grad_norm": 0.2611727411765241, "learning_rate": 3.998227010076054e-05, "loss": 0.6022, "num_tokens": 459394329.0, "step": 598 }, { "epoch": 0.21947421452780067, "grad_norm": 0.45011289487782175, "learning_rate": 3.9982080000706124e-05, "loss": 0.6249, "num_tokens": 460238841.0, "step": 599 }, { "epoch": 0.21984061555372356, "grad_norm": 0.36479672940869284, "learning_rate": 3.998188888745966e-05, "loss": 0.6275, "num_tokens": 461065012.0, "step": 600 }, { "epoch": 0.22020701657964642, "grad_norm": 0.42403274541574937, "learning_rate": 3.99816967610319e-05, "loss": 0.6439, "num_tokens": 461849419.0, "step": 601 }, { "epoch": 0.22057341760556928, "grad_norm": 0.3934550462544417, "learning_rate": 3.998150362143368e-05, "loss": 0.6178, "num_tokens": 462615972.0, "step": 602 }, { "epoch": 0.22093981863149217, "grad_norm": 0.3185338111776199, "learning_rate": 3.9981309468675873e-05, "loss": 0.6153, "num_tokens": 463411990.0, "step": 603 }, { "epoch": 0.22130621965741504, "grad_norm": 0.37624720886665364, "learning_rate": 3.998111430276943e-05, "loss": 0.6136, "num_tokens": 464216913.0, "step": 604 }, { "epoch": 0.2216726206833379, "grad_norm": 0.2740309478000569, "learning_rate": 3.998091812372532e-05, "loss": 0.5945, "num_tokens": 464858505.0, "step": 605 }, { "epoch": 0.2220390217092608, "grad_norm": 0.2924405881218445, "learning_rate": 3.998072093155463e-05, "loss": 0.6203, "num_tokens": 465751116.0, "step": 606 }, { "epoch": 0.22240542273518366, "grad_norm": 0.3170795144487193, "learning_rate": 3.9980522726268446e-05, "loss": 0.6132, "num_tokens": 466626553.0, "step": 607 }, { "epoch": 0.22277182376110652, "grad_norm": 0.2622395915760971, "learning_rate": 3.998032350787796e-05, "loss": 0.5972, "num_tokens": 467456443.0, "step": 608 }, { "epoch": 0.2231382247870294, "grad_norm": 0.2715557350442809, "learning_rate": 3.9980123276394376e-05, "loss": 0.6188, "num_tokens": 468223395.0, "step": 609 }, { "epoch": 0.22350462581295227, "grad_norm": 0.29173162550406645, "learning_rate": 3.997992203182898e-05, "loss": 0.6592, "num_tokens": 469016806.0, "step": 610 }, { "epoch": 0.22387102683887514, "grad_norm": 0.27084544935959043, "learning_rate": 3.997971977419312e-05, "loss": 0.6765, "num_tokens": 469768805.0, "step": 611 }, { "epoch": 0.22423742786479803, "grad_norm": 0.35904999502114043, "learning_rate": 3.997951650349818e-05, "loss": 0.6382, "num_tokens": 470488392.0, "step": 612 }, { "epoch": 0.2246038288907209, "grad_norm": 0.29860665679358045, "learning_rate": 3.997931221975563e-05, "loss": 0.6218, "num_tokens": 471176163.0, "step": 613 }, { "epoch": 0.22497022991664375, "grad_norm": 0.3592592405248205, "learning_rate": 3.9979106922976954e-05, "loss": 0.6096, "num_tokens": 471996254.0, "step": 614 }, { "epoch": 0.22533663094256665, "grad_norm": 0.33891824484269184, "learning_rate": 3.997890061317375e-05, "loss": 0.6133, "num_tokens": 472802020.0, "step": 615 }, { "epoch": 0.2257030319684895, "grad_norm": 0.2890763876343217, "learning_rate": 3.997869329035762e-05, "loss": 0.616, "num_tokens": 473611114.0, "step": 616 }, { "epoch": 0.22606943299441237, "grad_norm": 0.2549700051770504, "learning_rate": 3.997848495454025e-05, "loss": 0.5961, "num_tokens": 474430700.0, "step": 617 }, { "epoch": 0.22643583402033526, "grad_norm": 0.31696431102402534, "learning_rate": 3.9978275605733394e-05, "loss": 0.6288, "num_tokens": 475260965.0, "step": 618 }, { "epoch": 0.22680223504625813, "grad_norm": 0.2265983836522982, "learning_rate": 3.997806524394883e-05, "loss": 0.5833, "num_tokens": 476027151.0, "step": 619 }, { "epoch": 0.227168636072181, "grad_norm": 0.24470082053451359, "learning_rate": 3.997785386919841e-05, "loss": 0.6488, "num_tokens": 476735033.0, "step": 620 }, { "epoch": 0.22753503709810388, "grad_norm": 0.24565775587772443, "learning_rate": 3.9977641481494056e-05, "loss": 0.597, "num_tokens": 477438858.0, "step": 621 }, { "epoch": 0.22790143812402675, "grad_norm": 0.2511957904144006, "learning_rate": 3.997742808084773e-05, "loss": 0.6247, "num_tokens": 478121665.0, "step": 622 }, { "epoch": 0.2282678391499496, "grad_norm": 0.2868993904546432, "learning_rate": 3.997721366727145e-05, "loss": 0.5915, "num_tokens": 478935216.0, "step": 623 }, { "epoch": 0.2286342401758725, "grad_norm": 0.27688503427381966, "learning_rate": 3.99769982407773e-05, "loss": 0.6315, "num_tokens": 479622376.0, "step": 624 }, { "epoch": 0.22900064120179536, "grad_norm": 0.4355029979469706, "learning_rate": 3.997678180137742e-05, "loss": 0.6123, "num_tokens": 480310199.0, "step": 625 }, { "epoch": 0.22936704222771823, "grad_norm": 0.3828412926454975, "learning_rate": 3.997656434908401e-05, "loss": 0.6142, "num_tokens": 480981090.0, "step": 626 }, { "epoch": 0.22973344325364112, "grad_norm": 0.3204508201207879, "learning_rate": 3.997634588390931e-05, "loss": 0.5913, "num_tokens": 481806899.0, "step": 627 }, { "epoch": 0.23009984427956398, "grad_norm": 0.34939759203708104, "learning_rate": 3.9976126405865645e-05, "loss": 0.5737, "num_tokens": 482607472.0, "step": 628 }, { "epoch": 0.23046624530548684, "grad_norm": 0.34258997286827125, "learning_rate": 3.9975905914965374e-05, "loss": 0.5916, "num_tokens": 483515836.0, "step": 629 }, { "epoch": 0.23083264633140974, "grad_norm": 0.36048628030231233, "learning_rate": 3.997568441122092e-05, "loss": 0.605, "num_tokens": 484270241.0, "step": 630 }, { "epoch": 0.2311990473573326, "grad_norm": 0.24280938247032316, "learning_rate": 3.997546189464475e-05, "loss": 0.6177, "num_tokens": 485126330.0, "step": 631 }, { "epoch": 0.23156544838325546, "grad_norm": 0.33473256620623354, "learning_rate": 3.997523836524943e-05, "loss": 0.5678, "num_tokens": 485756427.0, "step": 632 }, { "epoch": 0.23193184940917835, "grad_norm": 0.3044528183671008, "learning_rate": 3.997501382304753e-05, "loss": 0.5937, "num_tokens": 486467058.0, "step": 633 }, { "epoch": 0.23229825043510122, "grad_norm": 0.32073304080060305, "learning_rate": 3.997478826805171e-05, "loss": 0.6407, "num_tokens": 487275913.0, "step": 634 }, { "epoch": 0.23266465146102408, "grad_norm": 0.26476156610796536, "learning_rate": 3.997456170027468e-05, "loss": 0.6057, "num_tokens": 488126174.0, "step": 635 }, { "epoch": 0.23303105248694697, "grad_norm": 0.33586263501019975, "learning_rate": 3.997433411972921e-05, "loss": 0.6159, "num_tokens": 488859666.0, "step": 636 }, { "epoch": 0.23339745351286983, "grad_norm": 0.2608847235235534, "learning_rate": 3.997410552642812e-05, "loss": 0.6005, "num_tokens": 489728196.0, "step": 637 }, { "epoch": 0.2337638545387927, "grad_norm": 0.30183610694323176, "learning_rate": 3.9973875920384285e-05, "loss": 0.6062, "num_tokens": 490421525.0, "step": 638 }, { "epoch": 0.2341302555647156, "grad_norm": 0.24769098756298266, "learning_rate": 3.997364530161064e-05, "loss": 0.638, "num_tokens": 491164641.0, "step": 639 }, { "epoch": 0.23449665659063845, "grad_norm": 0.3605492312346688, "learning_rate": 3.99734136701202e-05, "loss": 0.6084, "num_tokens": 491953334.0, "step": 640 }, { "epoch": 0.23486305761656132, "grad_norm": 0.3254582396470647, "learning_rate": 3.9973181025925985e-05, "loss": 0.6273, "num_tokens": 492804279.0, "step": 641 }, { "epoch": 0.2352294586424842, "grad_norm": 0.383523588826606, "learning_rate": 3.9972947369041135e-05, "loss": 0.6286, "num_tokens": 493617917.0, "step": 642 }, { "epoch": 0.23559585966840707, "grad_norm": 0.38701265753498276, "learning_rate": 3.997271269947878e-05, "loss": 0.6275, "num_tokens": 494270818.0, "step": 643 }, { "epoch": 0.23596226069432993, "grad_norm": 0.25186928284923665, "learning_rate": 3.997247701725218e-05, "loss": 0.6322, "num_tokens": 495104319.0, "step": 644 }, { "epoch": 0.23632866172025283, "grad_norm": 0.4429659379416552, "learning_rate": 3.9972240322374586e-05, "loss": 0.6307, "num_tokens": 495856699.0, "step": 645 }, { "epoch": 0.2366950627461757, "grad_norm": 0.3471878880861808, "learning_rate": 3.997200261485934e-05, "loss": 0.6342, "num_tokens": 496517044.0, "step": 646 }, { "epoch": 0.23706146377209855, "grad_norm": 0.3280244253329822, "learning_rate": 3.997176389471985e-05, "loss": 0.6325, "num_tokens": 497250081.0, "step": 647 }, { "epoch": 0.23742786479802144, "grad_norm": 0.3923626855790351, "learning_rate": 3.997152416196955e-05, "loss": 0.6145, "num_tokens": 498206143.0, "step": 648 }, { "epoch": 0.2377942658239443, "grad_norm": 0.2697630622302858, "learning_rate": 3.997128341662196e-05, "loss": 0.588, "num_tokens": 498976527.0, "step": 649 }, { "epoch": 0.23816066684986717, "grad_norm": 0.3405800045120495, "learning_rate": 3.997104165869064e-05, "loss": 0.6163, "num_tokens": 499778565.0, "step": 650 }, { "epoch": 0.23852706787579006, "grad_norm": 0.3188522604681833, "learning_rate": 3.997079888818921e-05, "loss": 0.6023, "num_tokens": 500483207.0, "step": 651 }, { "epoch": 0.23889346890171292, "grad_norm": 0.2792180258423211, "learning_rate": 3.997055510513134e-05, "loss": 0.5757, "num_tokens": 501290522.0, "step": 652 }, { "epoch": 0.2392598699276358, "grad_norm": 0.30396692764543615, "learning_rate": 3.997031030953079e-05, "loss": 0.6142, "num_tokens": 501946882.0, "step": 653 }, { "epoch": 0.23962627095355868, "grad_norm": 0.28259255232766156, "learning_rate": 3.997006450140133e-05, "loss": 0.6082, "num_tokens": 502679459.0, "step": 654 }, { "epoch": 0.23999267197948154, "grad_norm": 0.2797381087139953, "learning_rate": 3.996981768075681e-05, "loss": 0.6157, "num_tokens": 503575302.0, "step": 655 }, { "epoch": 0.2403590730054044, "grad_norm": 0.28042135537119056, "learning_rate": 3.996956984761116e-05, "loss": 0.5656, "num_tokens": 504383568.0, "step": 656 }, { "epoch": 0.2407254740313273, "grad_norm": 0.25700861650129603, "learning_rate": 3.9969321001978324e-05, "loss": 0.6219, "num_tokens": 505200004.0, "step": 657 }, { "epoch": 0.24109187505725016, "grad_norm": 0.30074266209786366, "learning_rate": 3.996907114387233e-05, "loss": 0.5814, "num_tokens": 506008377.0, "step": 658 }, { "epoch": 0.24145827608317302, "grad_norm": 0.2589372605743391, "learning_rate": 3.9968820273307246e-05, "loss": 0.6248, "num_tokens": 506838119.0, "step": 659 }, { "epoch": 0.24182467710909591, "grad_norm": 0.2704760661126264, "learning_rate": 3.996856839029723e-05, "loss": 0.6149, "num_tokens": 507657551.0, "step": 660 }, { "epoch": 0.24219107813501878, "grad_norm": 0.24821505831902818, "learning_rate": 3.996831549485645e-05, "loss": 0.6067, "num_tokens": 508411257.0, "step": 661 }, { "epoch": 0.24255747916094164, "grad_norm": 0.27925366629901194, "learning_rate": 3.9968061586999166e-05, "loss": 0.6145, "num_tokens": 509076891.0, "step": 662 }, { "epoch": 0.24292388018686453, "grad_norm": 0.24340208584472317, "learning_rate": 3.996780666673969e-05, "loss": 0.6715, "num_tokens": 509875329.0, "step": 663 }, { "epoch": 0.2432902812127874, "grad_norm": 0.2562570311654375, "learning_rate": 3.996755073409237e-05, "loss": 0.612, "num_tokens": 510631275.0, "step": 664 }, { "epoch": 0.24365668223871026, "grad_norm": 0.2645847934716869, "learning_rate": 3.996729378907164e-05, "loss": 0.6322, "num_tokens": 511337762.0, "step": 665 }, { "epoch": 0.24402308326463315, "grad_norm": 0.24966246193892871, "learning_rate": 3.996703583169197e-05, "loss": 0.6137, "num_tokens": 512103157.0, "step": 666 }, { "epoch": 0.244389484290556, "grad_norm": 0.26860173860633285, "learning_rate": 3.99667768619679e-05, "loss": 0.5917, "num_tokens": 512849644.0, "step": 667 }, { "epoch": 0.24475588531647888, "grad_norm": 0.268898141641138, "learning_rate": 3.996651687991402e-05, "loss": 0.6123, "num_tokens": 513730040.0, "step": 668 }, { "epoch": 0.24512228634240177, "grad_norm": 0.25078717625804137, "learning_rate": 3.9966255885544975e-05, "loss": 0.6345, "num_tokens": 514435120.0, "step": 669 }, { "epoch": 0.24548868736832463, "grad_norm": 0.2540492680470773, "learning_rate": 3.996599387887547e-05, "loss": 0.6271, "num_tokens": 515316107.0, "step": 670 }, { "epoch": 0.2458550883942475, "grad_norm": 0.25653226250747607, "learning_rate": 3.9965730859920274e-05, "loss": 0.634, "num_tokens": 516070447.0, "step": 671 }, { "epoch": 0.2462214894201704, "grad_norm": 0.2501932547606396, "learning_rate": 3.9965466828694205e-05, "loss": 0.6298, "num_tokens": 516823742.0, "step": 672 }, { "epoch": 0.24658789044609325, "grad_norm": 0.270731191214002, "learning_rate": 3.996520178521213e-05, "loss": 0.6023, "num_tokens": 517693893.0, "step": 673 }, { "epoch": 0.2469542914720161, "grad_norm": 0.2531032010903169, "learning_rate": 3.9964935729488996e-05, "loss": 0.5734, "num_tokens": 518447791.0, "step": 674 }, { "epoch": 0.247320692497939, "grad_norm": 0.2524055758604383, "learning_rate": 3.996466866153979e-05, "loss": 0.5855, "num_tokens": 519233940.0, "step": 675 }, { "epoch": 0.24768709352386187, "grad_norm": 0.2611112025216699, "learning_rate": 3.996440058137956e-05, "loss": 0.6189, "num_tokens": 519898539.0, "step": 676 }, { "epoch": 0.24805349454978473, "grad_norm": 0.27614848653353186, "learning_rate": 3.996413148902341e-05, "loss": 0.6379, "num_tokens": 520615014.0, "step": 677 }, { "epoch": 0.24841989557570762, "grad_norm": 0.2613684241651798, "learning_rate": 3.9963861384486495e-05, "loss": 0.5811, "num_tokens": 521375000.0, "step": 678 }, { "epoch": 0.24878629660163049, "grad_norm": 0.29814073068487695, "learning_rate": 3.996359026778404e-05, "loss": 0.6154, "num_tokens": 522051283.0, "step": 679 }, { "epoch": 0.24915269762755335, "grad_norm": 0.33486332051849155, "learning_rate": 3.9963318138931327e-05, "loss": 0.6095, "num_tokens": 522698034.0, "step": 680 }, { "epoch": 0.24951909865347624, "grad_norm": 0.3454091348804636, "learning_rate": 3.996304499794368e-05, "loss": 0.6186, "num_tokens": 523394456.0, "step": 681 }, { "epoch": 0.2498854996793991, "grad_norm": 0.288222540757643, "learning_rate": 3.99627708448365e-05, "loss": 0.6137, "num_tokens": 524240056.0, "step": 682 }, { "epoch": 0.250251900705322, "grad_norm": 0.2977470669440426, "learning_rate": 3.996249567962521e-05, "loss": 0.6036, "num_tokens": 525063073.0, "step": 683 }, { "epoch": 0.25061830173124483, "grad_norm": 0.2793675824834761, "learning_rate": 3.9962219502325334e-05, "loss": 0.6095, "num_tokens": 525816515.0, "step": 684 }, { "epoch": 0.2509847027571677, "grad_norm": 0.3632508464092818, "learning_rate": 3.996194231295244e-05, "loss": 0.6105, "num_tokens": 526607131.0, "step": 685 }, { "epoch": 0.2513511037830906, "grad_norm": 0.3402189106817329, "learning_rate": 3.996166411152213e-05, "loss": 0.6187, "num_tokens": 527368228.0, "step": 686 }, { "epoch": 0.25171750480901345, "grad_norm": 0.33729211349323474, "learning_rate": 3.9961384898050077e-05, "loss": 0.6412, "num_tokens": 528128442.0, "step": 687 }, { "epoch": 0.25208390583493634, "grad_norm": 0.3417690007614225, "learning_rate": 3.996110467255203e-05, "loss": 0.6684, "num_tokens": 528774359.0, "step": 688 }, { "epoch": 0.25245030686085923, "grad_norm": 0.27310052145879665, "learning_rate": 3.996082343504376e-05, "loss": 0.6049, "num_tokens": 529438617.0, "step": 689 }, { "epoch": 0.25281670788678207, "grad_norm": 0.32321886797169996, "learning_rate": 3.9960541185541134e-05, "loss": 0.6324, "num_tokens": 530252575.0, "step": 690 }, { "epoch": 0.25318310891270496, "grad_norm": 0.32232528571280533, "learning_rate": 3.996025792406003e-05, "loss": 0.596, "num_tokens": 531029821.0, "step": 691 }, { "epoch": 0.25354950993862785, "grad_norm": 0.32733630342311815, "learning_rate": 3.9959973650616425e-05, "loss": 0.6411, "num_tokens": 531757224.0, "step": 692 }, { "epoch": 0.2539159109645507, "grad_norm": 0.38692871901237663, "learning_rate": 3.995968836522634e-05, "loss": 0.6342, "num_tokens": 532594204.0, "step": 693 }, { "epoch": 0.2542823119904736, "grad_norm": 0.26940007351284323, "learning_rate": 3.995940206790583e-05, "loss": 0.6057, "num_tokens": 533342365.0, "step": 694 }, { "epoch": 0.25464871301639647, "grad_norm": 0.2818712596539372, "learning_rate": 3.9959114758671046e-05, "loss": 0.6037, "num_tokens": 534138972.0, "step": 695 }, { "epoch": 0.2550151140423193, "grad_norm": 0.3243127675146593, "learning_rate": 3.9958826437538174e-05, "loss": 0.6242, "num_tokens": 534902923.0, "step": 696 }, { "epoch": 0.2553815150682422, "grad_norm": 0.28374966501494103, "learning_rate": 3.995853710452344e-05, "loss": 0.6574, "num_tokens": 535526893.0, "step": 697 }, { "epoch": 0.2557479160941651, "grad_norm": 0.3089909248586471, "learning_rate": 3.995824675964317e-05, "loss": 0.6059, "num_tokens": 536373877.0, "step": 698 }, { "epoch": 0.2561143171200879, "grad_norm": 0.2451007931292034, "learning_rate": 3.9957955402913715e-05, "loss": 0.5911, "num_tokens": 537204703.0, "step": 699 }, { "epoch": 0.2564807181460108, "grad_norm": 0.23071432183745894, "learning_rate": 3.995766303435148e-05, "loss": 0.6267, "num_tokens": 537953865.0, "step": 700 }, { "epoch": 0.2568471191719337, "grad_norm": 0.24032122761043742, "learning_rate": 3.9957369653972956e-05, "loss": 0.5722, "num_tokens": 538740705.0, "step": 701 }, { "epoch": 0.25721352019785654, "grad_norm": 0.2845334740239729, "learning_rate": 3.995707526179466e-05, "loss": 0.5826, "num_tokens": 539471702.0, "step": 702 }, { "epoch": 0.25757992122377943, "grad_norm": 0.23409751511829963, "learning_rate": 3.995677985783319e-05, "loss": 0.5877, "num_tokens": 540283333.0, "step": 703 }, { "epoch": 0.2579463222497023, "grad_norm": 0.24151503194713808, "learning_rate": 3.9956483442105184e-05, "loss": 0.6344, "num_tokens": 540978646.0, "step": 704 }, { "epoch": 0.25831272327562516, "grad_norm": 0.25630716130510917, "learning_rate": 3.995618601462735e-05, "loss": 0.5964, "num_tokens": 541790850.0, "step": 705 }, { "epoch": 0.25867912430154805, "grad_norm": 0.24671834971204906, "learning_rate": 3.995588757541643e-05, "loss": 0.6265, "num_tokens": 542542006.0, "step": 706 }, { "epoch": 0.25904552532747094, "grad_norm": 0.2634252413485729, "learning_rate": 3.995558812448926e-05, "loss": 0.5911, "num_tokens": 543266632.0, "step": 707 }, { "epoch": 0.2594119263533938, "grad_norm": 0.2656061634798714, "learning_rate": 3.995528766186269e-05, "loss": 0.6126, "num_tokens": 544017317.0, "step": 708 }, { "epoch": 0.25977832737931666, "grad_norm": 0.24762152251241598, "learning_rate": 3.9954986187553675e-05, "loss": 0.6464, "num_tokens": 544665173.0, "step": 709 }, { "epoch": 0.26014472840523956, "grad_norm": 0.30941325324947033, "learning_rate": 3.995468370157919e-05, "loss": 0.6045, "num_tokens": 545455615.0, "step": 710 }, { "epoch": 0.2605111294311624, "grad_norm": 0.25367666161818037, "learning_rate": 3.995438020395627e-05, "loss": 0.6063, "num_tokens": 546322311.0, "step": 711 }, { "epoch": 0.2608775304570853, "grad_norm": 0.32116180935212985, "learning_rate": 3.995407569470202e-05, "loss": 0.6062, "num_tokens": 547107488.0, "step": 712 }, { "epoch": 0.2612439314830082, "grad_norm": 0.27896128644765794, "learning_rate": 3.995377017383361e-05, "loss": 0.604, "num_tokens": 547934715.0, "step": 713 }, { "epoch": 0.261610332508931, "grad_norm": 0.3928744269166012, "learning_rate": 3.995346364136824e-05, "loss": 0.6104, "num_tokens": 548727188.0, "step": 714 }, { "epoch": 0.2619767335348539, "grad_norm": 0.3468145306729865, "learning_rate": 3.9953156097323186e-05, "loss": 0.63, "num_tokens": 549426483.0, "step": 715 }, { "epoch": 0.2623431345607768, "grad_norm": 0.3249315577425661, "learning_rate": 3.9952847541715784e-05, "loss": 0.6557, "num_tokens": 550338557.0, "step": 716 }, { "epoch": 0.2627095355866996, "grad_norm": 0.37040269904441075, "learning_rate": 3.99525379745634e-05, "loss": 0.5958, "num_tokens": 551000076.0, "step": 717 }, { "epoch": 0.2630759366126225, "grad_norm": 0.2518618272733027, "learning_rate": 3.9952227395883495e-05, "loss": 0.6455, "num_tokens": 551685016.0, "step": 718 }, { "epoch": 0.2634423376385454, "grad_norm": 0.32813809339568123, "learning_rate": 3.995191580569356e-05, "loss": 0.5967, "num_tokens": 552575591.0, "step": 719 }, { "epoch": 0.26380873866446825, "grad_norm": 0.27001084647102475, "learning_rate": 3.995160320401116e-05, "loss": 0.608, "num_tokens": 553299042.0, "step": 720 }, { "epoch": 0.26417513969039114, "grad_norm": 0.23469225460199095, "learning_rate": 3.995128959085389e-05, "loss": 0.6045, "num_tokens": 554014031.0, "step": 721 }, { "epoch": 0.264541540716314, "grad_norm": 0.26442527305870894, "learning_rate": 3.995097496623944e-05, "loss": 0.6154, "num_tokens": 554701731.0, "step": 722 }, { "epoch": 0.26490794174223686, "grad_norm": 0.2600157020096633, "learning_rate": 3.995065933018552e-05, "loss": 0.6195, "num_tokens": 555468541.0, "step": 723 }, { "epoch": 0.26527434276815975, "grad_norm": 0.2529728508364249, "learning_rate": 3.9950342682709935e-05, "loss": 0.5954, "num_tokens": 556199664.0, "step": 724 }, { "epoch": 0.26564074379408265, "grad_norm": 0.27295138100643024, "learning_rate": 3.99500250238305e-05, "loss": 0.6096, "num_tokens": 557121219.0, "step": 725 }, { "epoch": 0.2660071448200055, "grad_norm": 0.24217275972226532, "learning_rate": 3.994970635356515e-05, "loss": 0.6195, "num_tokens": 557862484.0, "step": 726 }, { "epoch": 0.2663735458459284, "grad_norm": 0.30895913047127316, "learning_rate": 3.9949386671931805e-05, "loss": 0.6419, "num_tokens": 558598644.0, "step": 727 }, { "epoch": 0.26673994687185126, "grad_norm": 0.23912879001984375, "learning_rate": 3.9949065978948494e-05, "loss": 0.608, "num_tokens": 559374157.0, "step": 728 }, { "epoch": 0.2671063478977741, "grad_norm": 0.2907151620254269, "learning_rate": 3.994874427463328e-05, "loss": 0.6247, "num_tokens": 560131281.0, "step": 729 }, { "epoch": 0.267472748923697, "grad_norm": 0.2287475059190942, "learning_rate": 3.99484215590043e-05, "loss": 0.5998, "num_tokens": 560930129.0, "step": 730 }, { "epoch": 0.2678391499496199, "grad_norm": 0.3206837100169847, "learning_rate": 3.994809783207972e-05, "loss": 0.6033, "num_tokens": 561626638.0, "step": 731 }, { "epoch": 0.2682055509755427, "grad_norm": 0.24274411647063873, "learning_rate": 3.9947773093877803e-05, "loss": 0.5849, "num_tokens": 562322907.0, "step": 732 }, { "epoch": 0.2685719520014656, "grad_norm": 0.3412189523063377, "learning_rate": 3.994744734441682e-05, "loss": 0.59, "num_tokens": 563148426.0, "step": 733 }, { "epoch": 0.2689383530273885, "grad_norm": 0.3059844057541992, "learning_rate": 3.994712058371515e-05, "loss": 0.5855, "num_tokens": 563821427.0, "step": 734 }, { "epoch": 0.26930475405331133, "grad_norm": 0.32480603247991424, "learning_rate": 3.9946792811791186e-05, "loss": 0.6262, "num_tokens": 564438016.0, "step": 735 }, { "epoch": 0.2696711550792342, "grad_norm": 0.3322824910405804, "learning_rate": 3.99464640286634e-05, "loss": 0.613, "num_tokens": 565289407.0, "step": 736 }, { "epoch": 0.2700375561051571, "grad_norm": 0.2601681965163768, "learning_rate": 3.994613423435033e-05, "loss": 0.571, "num_tokens": 565995010.0, "step": 737 }, { "epoch": 0.27040395713107995, "grad_norm": 0.2807292159828199, "learning_rate": 3.9945803428870536e-05, "loss": 0.6022, "num_tokens": 566819099.0, "step": 738 }, { "epoch": 0.27077035815700284, "grad_norm": 0.2493463564168413, "learning_rate": 3.994547161224268e-05, "loss": 0.6041, "num_tokens": 567549985.0, "step": 739 }, { "epoch": 0.27113675918292574, "grad_norm": 0.31213796641550556, "learning_rate": 3.994513878448544e-05, "loss": 0.5884, "num_tokens": 568418176.0, "step": 740 }, { "epoch": 0.27150316020884857, "grad_norm": 0.3083016544210282, "learning_rate": 3.994480494561758e-05, "loss": 0.5892, "num_tokens": 569295093.0, "step": 741 }, { "epoch": 0.27186956123477146, "grad_norm": 0.31103657083898956, "learning_rate": 3.9944470095657906e-05, "loss": 0.5898, "num_tokens": 570024102.0, "step": 742 }, { "epoch": 0.27223596226069435, "grad_norm": 0.2210814018116027, "learning_rate": 3.994413423462529e-05, "loss": 0.5846, "num_tokens": 570829596.0, "step": 743 }, { "epoch": 0.2726023632866172, "grad_norm": 0.2548133576151883, "learning_rate": 3.994379736253864e-05, "loss": 0.6171, "num_tokens": 571634444.0, "step": 744 }, { "epoch": 0.2729687643125401, "grad_norm": 0.23472313862041178, "learning_rate": 3.9943459479416964e-05, "loss": 0.5989, "num_tokens": 572459975.0, "step": 745 }, { "epoch": 0.27333516533846297, "grad_norm": 0.24414461644791596, "learning_rate": 3.994312058527927e-05, "loss": 0.6145, "num_tokens": 573241996.0, "step": 746 }, { "epoch": 0.2737015663643858, "grad_norm": 0.26226764883856707, "learning_rate": 3.994278068014467e-05, "loss": 0.6043, "num_tokens": 574006560.0, "step": 747 }, { "epoch": 0.2740679673903087, "grad_norm": 0.26260641727293904, "learning_rate": 3.9942439764032316e-05, "loss": 0.6038, "num_tokens": 574750725.0, "step": 748 }, { "epoch": 0.2744343684162316, "grad_norm": 0.2833997985591882, "learning_rate": 3.994209783696141e-05, "loss": 0.597, "num_tokens": 575508927.0, "step": 749 }, { "epoch": 0.2748007694421544, "grad_norm": 0.23403140181343735, "learning_rate": 3.994175489895123e-05, "loss": 0.5866, "num_tokens": 576222291.0, "step": 750 }, { "epoch": 0.2751671704680773, "grad_norm": 0.2742465488338332, "learning_rate": 3.994141095002107e-05, "loss": 0.584, "num_tokens": 576869855.0, "step": 751 }, { "epoch": 0.2755335714940002, "grad_norm": 0.2884839234605789, "learning_rate": 3.9941065990190345e-05, "loss": 0.6221, "num_tokens": 577738480.0, "step": 752 }, { "epoch": 0.27589997251992304, "grad_norm": 0.24340687817616163, "learning_rate": 3.9940720019478475e-05, "loss": 0.5767, "num_tokens": 578477004.0, "step": 753 }, { "epoch": 0.27626637354584593, "grad_norm": 0.2751832884795527, "learning_rate": 3.9940373037904956e-05, "loss": 0.6245, "num_tokens": 579229678.0, "step": 754 }, { "epoch": 0.2766327745717688, "grad_norm": 0.3066885378014123, "learning_rate": 3.994002504548933e-05, "loss": 0.581, "num_tokens": 580000842.0, "step": 755 }, { "epoch": 0.27699917559769166, "grad_norm": 0.2754397230324491, "learning_rate": 3.993967604225122e-05, "loss": 0.649, "num_tokens": 580781162.0, "step": 756 }, { "epoch": 0.27736557662361455, "grad_norm": 0.23954987080911655, "learning_rate": 3.993932602821027e-05, "loss": 0.6016, "num_tokens": 581829635.0, "step": 757 }, { "epoch": 0.27773197764953744, "grad_norm": 0.24459870488274557, "learning_rate": 3.9938975003386224e-05, "loss": 0.6138, "num_tokens": 582698199.0, "step": 758 }, { "epoch": 0.2780983786754603, "grad_norm": 0.2501779302382527, "learning_rate": 3.9938622967798843e-05, "loss": 0.636, "num_tokens": 583532656.0, "step": 759 }, { "epoch": 0.27846477970138317, "grad_norm": 0.221740207928515, "learning_rate": 3.993826992146797e-05, "loss": 0.5926, "num_tokens": 584326028.0, "step": 760 }, { "epoch": 0.27883118072730606, "grad_norm": 0.27045280049695136, "learning_rate": 3.9937915864413505e-05, "loss": 0.6315, "num_tokens": 585053806.0, "step": 761 }, { "epoch": 0.2791975817532289, "grad_norm": 0.24546612346001642, "learning_rate": 3.993756079665538e-05, "loss": 0.6142, "num_tokens": 585841769.0, "step": 762 }, { "epoch": 0.2795639827791518, "grad_norm": 0.23833337124917822, "learning_rate": 3.9937204718213614e-05, "loss": 0.6083, "num_tokens": 586586460.0, "step": 763 }, { "epoch": 0.2799303838050747, "grad_norm": 0.23392218375932844, "learning_rate": 3.9936847629108265e-05, "loss": 0.6143, "num_tokens": 587387327.0, "step": 764 }, { "epoch": 0.2802967848309975, "grad_norm": 0.2219026540046904, "learning_rate": 3.993648952935945e-05, "loss": 0.5696, "num_tokens": 588141282.0, "step": 765 }, { "epoch": 0.2806631858569204, "grad_norm": 0.24822406531354813, "learning_rate": 3.993613041898735e-05, "loss": 0.6026, "num_tokens": 588905636.0, "step": 766 }, { "epoch": 0.2810295868828433, "grad_norm": 0.2221503792167683, "learning_rate": 3.99357702980122e-05, "loss": 0.5961, "num_tokens": 589669237.0, "step": 767 }, { "epoch": 0.28139598790876613, "grad_norm": 0.22584939562533057, "learning_rate": 3.993540916645429e-05, "loss": 0.5984, "num_tokens": 590558819.0, "step": 768 }, { "epoch": 0.281762388934689, "grad_norm": 0.241852386046701, "learning_rate": 3.993504702433397e-05, "loss": 0.6223, "num_tokens": 591305633.0, "step": 769 }, { "epoch": 0.2821287899606119, "grad_norm": 0.20259484398439054, "learning_rate": 3.993468387167164e-05, "loss": 0.5902, "num_tokens": 592046496.0, "step": 770 }, { "epoch": 0.28249519098653475, "grad_norm": 0.23171106093002547, "learning_rate": 3.993431970848776e-05, "loss": 0.6157, "num_tokens": 592863463.0, "step": 771 }, { "epoch": 0.28286159201245764, "grad_norm": 0.23270900694220048, "learning_rate": 3.9933954534802856e-05, "loss": 0.6187, "num_tokens": 593508082.0, "step": 772 }, { "epoch": 0.28322799303838053, "grad_norm": 0.26965450627481896, "learning_rate": 3.9933588350637495e-05, "loss": 0.6178, "num_tokens": 594398197.0, "step": 773 }, { "epoch": 0.28359439406430337, "grad_norm": 0.2521402751378749, "learning_rate": 3.993322115601232e-05, "loss": 0.6296, "num_tokens": 595192065.0, "step": 774 }, { "epoch": 0.28396079509022626, "grad_norm": 0.24453033053894477, "learning_rate": 3.9932852950948014e-05, "loss": 0.5816, "num_tokens": 595989564.0, "step": 775 }, { "epoch": 0.28432719611614915, "grad_norm": 0.2589162999077081, "learning_rate": 3.993248373546532e-05, "loss": 0.6507, "num_tokens": 596674996.0, "step": 776 }, { "epoch": 0.284693597142072, "grad_norm": 0.2562680872556944, "learning_rate": 3.993211350958505e-05, "loss": 0.6684, "num_tokens": 597365609.0, "step": 777 }, { "epoch": 0.2850599981679949, "grad_norm": 0.22460986197763824, "learning_rate": 3.993174227332805e-05, "loss": 0.5759, "num_tokens": 598179922.0, "step": 778 }, { "epoch": 0.28542639919391777, "grad_norm": 0.2363042681289311, "learning_rate": 3.9931370026715256e-05, "loss": 0.6116, "num_tokens": 598961818.0, "step": 779 }, { "epoch": 0.2857928002198406, "grad_norm": 0.2506215521022622, "learning_rate": 3.9930996769767635e-05, "loss": 0.6139, "num_tokens": 599743851.0, "step": 780 }, { "epoch": 0.2861592012457635, "grad_norm": 0.2341305071595156, "learning_rate": 3.9930622502506206e-05, "loss": 0.6517, "num_tokens": 600605632.0, "step": 781 }, { "epoch": 0.2865256022716864, "grad_norm": 0.27361114331182257, "learning_rate": 3.993024722495207e-05, "loss": 0.6199, "num_tokens": 601341681.0, "step": 782 }, { "epoch": 0.2868920032976092, "grad_norm": 0.22787439195214265, "learning_rate": 3.992987093712638e-05, "loss": 0.5638, "num_tokens": 602127299.0, "step": 783 }, { "epoch": 0.2872584043235321, "grad_norm": 0.2448412134738796, "learning_rate": 3.9929493639050314e-05, "loss": 0.6042, "num_tokens": 602818309.0, "step": 784 }, { "epoch": 0.287624805349455, "grad_norm": 0.24318990387769296, "learning_rate": 3.992911533074514e-05, "loss": 0.5635, "num_tokens": 603673194.0, "step": 785 }, { "epoch": 0.28799120637537784, "grad_norm": 0.25143111969361204, "learning_rate": 3.992873601223219e-05, "loss": 0.6404, "num_tokens": 604402851.0, "step": 786 }, { "epoch": 0.28835760740130073, "grad_norm": 0.25974092336108784, "learning_rate": 3.9928355683532806e-05, "loss": 0.6203, "num_tokens": 605148434.0, "step": 787 }, { "epoch": 0.2887240084272236, "grad_norm": 0.2475132534108261, "learning_rate": 3.992797434466845e-05, "loss": 0.6353, "num_tokens": 605870432.0, "step": 788 }, { "epoch": 0.28909040945314646, "grad_norm": 0.24091235986732973, "learning_rate": 3.992759199566058e-05, "loss": 0.6159, "num_tokens": 606683495.0, "step": 789 }, { "epoch": 0.28945681047906935, "grad_norm": 0.25538522350336385, "learning_rate": 3.992720863653076e-05, "loss": 0.6105, "num_tokens": 607556758.0, "step": 790 }, { "epoch": 0.28982321150499224, "grad_norm": 0.23241159083477206, "learning_rate": 3.9926824267300584e-05, "loss": 0.5875, "num_tokens": 608445339.0, "step": 791 }, { "epoch": 0.2901896125309151, "grad_norm": 0.23454340986432293, "learning_rate": 3.99264388879917e-05, "loss": 0.5994, "num_tokens": 609138894.0, "step": 792 }, { "epoch": 0.29055601355683797, "grad_norm": 0.23656150447866428, "learning_rate": 3.992605249862584e-05, "loss": 0.5858, "num_tokens": 609982036.0, "step": 793 }, { "epoch": 0.29092241458276086, "grad_norm": 0.2504514851392762, "learning_rate": 3.992566509922475e-05, "loss": 0.6334, "num_tokens": 610839805.0, "step": 794 }, { "epoch": 0.2912888156086837, "grad_norm": 0.25789885619365666, "learning_rate": 3.992527668981028e-05, "loss": 0.6209, "num_tokens": 611577751.0, "step": 795 }, { "epoch": 0.2916552166346066, "grad_norm": 0.2634939881508197, "learning_rate": 3.992488727040431e-05, "loss": 0.5833, "num_tokens": 612352483.0, "step": 796 }, { "epoch": 0.2920216176605295, "grad_norm": 0.25821546238350573, "learning_rate": 3.9924496841028774e-05, "loss": 0.628, "num_tokens": 613061770.0, "step": 797 }, { "epoch": 0.2923880186864523, "grad_norm": 0.2537179905921837, "learning_rate": 3.992410540170568e-05, "loss": 0.6115, "num_tokens": 613749585.0, "step": 798 }, { "epoch": 0.2927544197123752, "grad_norm": 0.2405101635406298, "learning_rate": 3.992371295245707e-05, "loss": 0.6414, "num_tokens": 614481444.0, "step": 799 }, { "epoch": 0.2931208207382981, "grad_norm": 0.28453462613369906, "learning_rate": 3.992331949330507e-05, "loss": 0.5732, "num_tokens": 615183085.0, "step": 800 }, { "epoch": 0.29348722176422093, "grad_norm": 0.20812754494436192, "learning_rate": 3.992292502427185e-05, "loss": 0.5974, "num_tokens": 616000021.0, "step": 801 }, { "epoch": 0.2938536227901438, "grad_norm": 0.34731910830387563, "learning_rate": 3.992252954537962e-05, "loss": 0.6219, "num_tokens": 616744999.0, "step": 802 }, { "epoch": 0.2942200238160667, "grad_norm": 0.26144441404378294, "learning_rate": 3.992213305665068e-05, "loss": 0.5841, "num_tokens": 617436663.0, "step": 803 }, { "epoch": 0.29458642484198955, "grad_norm": 0.3410366500245713, "learning_rate": 3.992173555810736e-05, "loss": 0.5851, "num_tokens": 618316860.0, "step": 804 }, { "epoch": 0.29495282586791244, "grad_norm": 0.36326574465792966, "learning_rate": 3.9921337049772064e-05, "loss": 0.6921, "num_tokens": 619020082.0, "step": 805 }, { "epoch": 0.29531922689383533, "grad_norm": 0.25373220451845657, "learning_rate": 3.992093753166724e-05, "loss": 0.6016, "num_tokens": 619697111.0, "step": 806 }, { "epoch": 0.29568562791975816, "grad_norm": 0.2634659061922754, "learning_rate": 3.99205370038154e-05, "loss": 0.629, "num_tokens": 620459244.0, "step": 807 }, { "epoch": 0.29605202894568106, "grad_norm": 0.25263508916437605, "learning_rate": 3.992013546623912e-05, "loss": 0.5788, "num_tokens": 621292654.0, "step": 808 }, { "epoch": 0.29641842997160395, "grad_norm": 0.22125801024180528, "learning_rate": 3.991973291896101e-05, "loss": 0.6129, "num_tokens": 622118617.0, "step": 809 }, { "epoch": 0.2967848309975268, "grad_norm": 0.23028945554424593, "learning_rate": 3.991932936200376e-05, "loss": 0.6132, "num_tokens": 623006876.0, "step": 810 }, { "epoch": 0.2971512320234497, "grad_norm": 0.24899164156629114, "learning_rate": 3.991892479539011e-05, "loss": 0.6126, "num_tokens": 623714655.0, "step": 811 }, { "epoch": 0.29751763304937257, "grad_norm": 0.24784911005310234, "learning_rate": 3.9918519219142846e-05, "loss": 0.6029, "num_tokens": 624491232.0, "step": 812 }, { "epoch": 0.2978840340752954, "grad_norm": 0.2787575682519307, "learning_rate": 3.991811263328482e-05, "loss": 0.6113, "num_tokens": 625304820.0, "step": 813 }, { "epoch": 0.2982504351012183, "grad_norm": 0.264425482725935, "learning_rate": 3.991770503783895e-05, "loss": 0.5838, "num_tokens": 626101256.0, "step": 814 }, { "epoch": 0.2986168361271412, "grad_norm": 0.24926043120857053, "learning_rate": 3.9917296432828206e-05, "loss": 0.58, "num_tokens": 626878196.0, "step": 815 }, { "epoch": 0.298983237153064, "grad_norm": 0.267079242899069, "learning_rate": 3.99168868182756e-05, "loss": 0.6128, "num_tokens": 627795960.0, "step": 816 }, { "epoch": 0.2993496381789869, "grad_norm": 0.24519798407940627, "learning_rate": 3.991647619420421e-05, "loss": 0.5908, "num_tokens": 628560195.0, "step": 817 }, { "epoch": 0.2997160392049098, "grad_norm": 0.2470492253450686, "learning_rate": 3.991606456063718e-05, "loss": 0.5974, "num_tokens": 629433046.0, "step": 818 }, { "epoch": 0.30008244023083264, "grad_norm": 0.3082788689426672, "learning_rate": 3.9915651917597704e-05, "loss": 0.6003, "num_tokens": 630311869.0, "step": 819 }, { "epoch": 0.30044884125675553, "grad_norm": 0.2309719106974586, "learning_rate": 3.9915238265109025e-05, "loss": 0.5886, "num_tokens": 631115296.0, "step": 820 }, { "epoch": 0.3008152422826784, "grad_norm": 0.2601596395327584, "learning_rate": 3.991482360319445e-05, "loss": 0.6014, "num_tokens": 631874697.0, "step": 821 }, { "epoch": 0.30118164330860125, "grad_norm": 0.23885235337704477, "learning_rate": 3.991440793187735e-05, "loss": 0.6207, "num_tokens": 632657444.0, "step": 822 }, { "epoch": 0.30154804433452415, "grad_norm": 0.2660952043421024, "learning_rate": 3.991399125118114e-05, "loss": 0.6259, "num_tokens": 633343644.0, "step": 823 }, { "epoch": 0.30191444536044704, "grad_norm": 0.23527480124602432, "learning_rate": 3.99135735611293e-05, "loss": 0.6283, "num_tokens": 634126676.0, "step": 824 }, { "epoch": 0.3022808463863699, "grad_norm": 0.24742756206080022, "learning_rate": 3.9913154861745364e-05, "loss": 0.583, "num_tokens": 634975556.0, "step": 825 }, { "epoch": 0.30264724741229276, "grad_norm": 0.24778892135860525, "learning_rate": 3.991273515305292e-05, "loss": 0.5862, "num_tokens": 635699721.0, "step": 826 }, { "epoch": 0.30301364843821565, "grad_norm": 0.25878767732451846, "learning_rate": 3.9912314435075626e-05, "loss": 0.6058, "num_tokens": 636428688.0, "step": 827 }, { "epoch": 0.3033800494641385, "grad_norm": 0.2301718470297621, "learning_rate": 3.991189270783718e-05, "loss": 0.6126, "num_tokens": 637183482.0, "step": 828 }, { "epoch": 0.3037464504900614, "grad_norm": 0.23686692122447206, "learning_rate": 3.991146997136135e-05, "loss": 0.5799, "num_tokens": 638023674.0, "step": 829 }, { "epoch": 0.3041128515159842, "grad_norm": 0.25817254769211095, "learning_rate": 3.991104622567194e-05, "loss": 0.6396, "num_tokens": 638741415.0, "step": 830 }, { "epoch": 0.3044792525419071, "grad_norm": 0.2833303419967583, "learning_rate": 3.991062147079285e-05, "loss": 0.6117, "num_tokens": 639490124.0, "step": 831 }, { "epoch": 0.30484565356783, "grad_norm": 0.22828306229783146, "learning_rate": 3.991019570674799e-05, "loss": 0.584, "num_tokens": 640357185.0, "step": 832 }, { "epoch": 0.30521205459375284, "grad_norm": 0.2697756116521704, "learning_rate": 3.9909768933561355e-05, "loss": 0.6244, "num_tokens": 641031248.0, "step": 833 }, { "epoch": 0.3055784556196757, "grad_norm": 0.24654777540807424, "learning_rate": 3.9909341151257e-05, "loss": 0.5913, "num_tokens": 641828914.0, "step": 834 }, { "epoch": 0.3059448566455986, "grad_norm": 0.2730125235952297, "learning_rate": 3.990891235985902e-05, "loss": 0.6206, "num_tokens": 642691022.0, "step": 835 }, { "epoch": 0.30631125767152145, "grad_norm": 0.2816434885186958, "learning_rate": 3.990848255939157e-05, "loss": 0.5883, "num_tokens": 643511555.0, "step": 836 }, { "epoch": 0.30667765869744434, "grad_norm": 0.21749555997905717, "learning_rate": 3.990805174987888e-05, "loss": 0.5812, "num_tokens": 644200128.0, "step": 837 }, { "epoch": 0.30704405972336724, "grad_norm": 0.2521152081001264, "learning_rate": 3.990761993134523e-05, "loss": 0.5873, "num_tokens": 644976290.0, "step": 838 }, { "epoch": 0.30741046074929007, "grad_norm": 0.27627119881886164, "learning_rate": 3.9907187103814934e-05, "loss": 0.607, "num_tokens": 645701413.0, "step": 839 }, { "epoch": 0.30777686177521296, "grad_norm": 0.22413204139898227, "learning_rate": 3.990675326731238e-05, "loss": 0.642, "num_tokens": 646413602.0, "step": 840 }, { "epoch": 0.30814326280113585, "grad_norm": 0.2364188290278586, "learning_rate": 3.990631842186201e-05, "loss": 0.6082, "num_tokens": 647227954.0, "step": 841 }, { "epoch": 0.3085096638270587, "grad_norm": 0.22727298332175866, "learning_rate": 3.990588256748834e-05, "loss": 0.5812, "num_tokens": 647923042.0, "step": 842 }, { "epoch": 0.3088760648529816, "grad_norm": 0.22744297600975447, "learning_rate": 3.990544570421592e-05, "loss": 0.5947, "num_tokens": 648643284.0, "step": 843 }, { "epoch": 0.30924246587890447, "grad_norm": 0.22954831205085877, "learning_rate": 3.990500783206936e-05, "loss": 0.5837, "num_tokens": 649439781.0, "step": 844 }, { "epoch": 0.3096088669048273, "grad_norm": 0.2531659846077986, "learning_rate": 3.9904568951073337e-05, "loss": 0.608, "num_tokens": 650209719.0, "step": 845 }, { "epoch": 0.3099752679307502, "grad_norm": 0.24537462620318978, "learning_rate": 3.990412906125259e-05, "loss": 0.6096, "num_tokens": 651038392.0, "step": 846 }, { "epoch": 0.3103416689566731, "grad_norm": 0.22413501687046064, "learning_rate": 3.990368816263188e-05, "loss": 0.6071, "num_tokens": 651954398.0, "step": 847 }, { "epoch": 0.3107080699825959, "grad_norm": 0.20331590359221466, "learning_rate": 3.9903246255236066e-05, "loss": 0.5988, "num_tokens": 652678320.0, "step": 848 }, { "epoch": 0.3110744710085188, "grad_norm": 0.25865607327755796, "learning_rate": 3.990280333909005e-05, "loss": 0.5758, "num_tokens": 653434857.0, "step": 849 }, { "epoch": 0.3114408720344417, "grad_norm": 0.21127209415454407, "learning_rate": 3.9902359414218776e-05, "loss": 0.5857, "num_tokens": 654200785.0, "step": 850 }, { "epoch": 0.31180727306036454, "grad_norm": 0.2911309454101145, "learning_rate": 3.9901914480647264e-05, "loss": 0.5791, "num_tokens": 654948734.0, "step": 851 }, { "epoch": 0.31217367408628743, "grad_norm": 0.284041167247904, "learning_rate": 3.9901468538400586e-05, "loss": 0.5546, "num_tokens": 655797818.0, "step": 852 }, { "epoch": 0.3125400751122103, "grad_norm": 0.2543855189693308, "learning_rate": 3.9901021587503855e-05, "loss": 0.6112, "num_tokens": 656595781.0, "step": 853 }, { "epoch": 0.31290647613813316, "grad_norm": 0.3010099088021421, "learning_rate": 3.990057362798227e-05, "loss": 0.6056, "num_tokens": 657272294.0, "step": 854 }, { "epoch": 0.31327287716405605, "grad_norm": 0.2383683017512704, "learning_rate": 3.990012465986106e-05, "loss": 0.6217, "num_tokens": 657967493.0, "step": 855 }, { "epoch": 0.31363927818997894, "grad_norm": 0.2779892402747015, "learning_rate": 3.9899674683165536e-05, "loss": 0.604, "num_tokens": 658708025.0, "step": 856 }, { "epoch": 0.3140056792159018, "grad_norm": 0.2767798375546966, "learning_rate": 3.989922369792103e-05, "loss": 0.654, "num_tokens": 659372687.0, "step": 857 }, { "epoch": 0.31437208024182467, "grad_norm": 0.29265013057518574, "learning_rate": 3.989877170415297e-05, "loss": 0.6033, "num_tokens": 660141524.0, "step": 858 }, { "epoch": 0.31473848126774756, "grad_norm": 0.2907355984960831, "learning_rate": 3.9898318701886824e-05, "loss": 0.6119, "num_tokens": 660965058.0, "step": 859 }, { "epoch": 0.3151048822936704, "grad_norm": 0.27025745784538435, "learning_rate": 3.9897864691148105e-05, "loss": 0.6399, "num_tokens": 661748591.0, "step": 860 }, { "epoch": 0.3154712833195933, "grad_norm": 0.20347200594145443, "learning_rate": 3.98974096719624e-05, "loss": 0.5558, "num_tokens": 662529114.0, "step": 861 }, { "epoch": 0.3158376843455162, "grad_norm": 0.2431655140448032, "learning_rate": 3.989695364435534e-05, "loss": 0.6089, "num_tokens": 663245959.0, "step": 862 }, { "epoch": 0.316204085371439, "grad_norm": 0.2211587897780392, "learning_rate": 3.989649660835263e-05, "loss": 0.5827, "num_tokens": 664039596.0, "step": 863 }, { "epoch": 0.3165704863973619, "grad_norm": 0.21058791426350523, "learning_rate": 3.989603856398003e-05, "loss": 0.591, "num_tokens": 664890505.0, "step": 864 }, { "epoch": 0.3169368874232848, "grad_norm": 0.23211971023519098, "learning_rate": 3.989557951126332e-05, "loss": 0.6264, "num_tokens": 665601682.0, "step": 865 }, { "epoch": 0.31730328844920763, "grad_norm": 0.2258409594536177, "learning_rate": 3.9895119450228385e-05, "loss": 0.6307, "num_tokens": 666394601.0, "step": 866 }, { "epoch": 0.3176696894751305, "grad_norm": 0.27051891401741895, "learning_rate": 3.989465838090114e-05, "loss": 0.6118, "num_tokens": 667082049.0, "step": 867 }, { "epoch": 0.3180360905010534, "grad_norm": 0.24589930934711968, "learning_rate": 3.9894196303307574e-05, "loss": 0.5969, "num_tokens": 667812013.0, "step": 868 }, { "epoch": 0.31840249152697625, "grad_norm": 0.23275855280027488, "learning_rate": 3.989373321747372e-05, "loss": 0.5903, "num_tokens": 668575367.0, "step": 869 }, { "epoch": 0.31876889255289914, "grad_norm": 0.28190321336439483, "learning_rate": 3.989326912342566e-05, "loss": 0.6018, "num_tokens": 669352736.0, "step": 870 }, { "epoch": 0.31913529357882203, "grad_norm": 0.22448155151696325, "learning_rate": 3.989280402118954e-05, "loss": 0.5645, "num_tokens": 670141112.0, "step": 871 }, { "epoch": 0.31950169460474487, "grad_norm": 0.23018433299453628, "learning_rate": 3.989233791079158e-05, "loss": 0.5932, "num_tokens": 670853622.0, "step": 872 }, { "epoch": 0.31986809563066776, "grad_norm": 0.2478104903196724, "learning_rate": 3.989187079225804e-05, "loss": 0.5974, "num_tokens": 671622570.0, "step": 873 }, { "epoch": 0.32023449665659065, "grad_norm": 0.2354350090810957, "learning_rate": 3.9891402665615235e-05, "loss": 0.6475, "num_tokens": 672402328.0, "step": 874 }, { "epoch": 0.3206008976825135, "grad_norm": 0.27506674343784565, "learning_rate": 3.989093353088954e-05, "loss": 0.5955, "num_tokens": 673272934.0, "step": 875 }, { "epoch": 0.3209672987084364, "grad_norm": 0.2839031894039053, "learning_rate": 3.989046338810739e-05, "loss": 0.6314, "num_tokens": 673988084.0, "step": 876 }, { "epoch": 0.32133369973435927, "grad_norm": 0.1975230406255795, "learning_rate": 3.988999223729528e-05, "loss": 0.5771, "num_tokens": 674810188.0, "step": 877 }, { "epoch": 0.3217001007602821, "grad_norm": 0.24775829459653848, "learning_rate": 3.988952007847976e-05, "loss": 0.5784, "num_tokens": 675643713.0, "step": 878 }, { "epoch": 0.322066501786205, "grad_norm": 0.25057317502697296, "learning_rate": 3.988904691168742e-05, "loss": 0.6186, "num_tokens": 676290265.0, "step": 879 }, { "epoch": 0.3224329028121279, "grad_norm": 0.24846920408273135, "learning_rate": 3.988857273694493e-05, "loss": 0.6441, "num_tokens": 677076563.0, "step": 880 }, { "epoch": 0.3227993038380507, "grad_norm": 0.26062051460724217, "learning_rate": 3.9888097554279e-05, "loss": 0.6198, "num_tokens": 677919124.0, "step": 881 }, { "epoch": 0.3231657048639736, "grad_norm": 0.27504701015821154, "learning_rate": 3.988762136371641e-05, "loss": 0.6446, "num_tokens": 678651647.0, "step": 882 }, { "epoch": 0.3235321058898965, "grad_norm": 0.2869838681584641, "learning_rate": 3.988714416528399e-05, "loss": 0.607, "num_tokens": 679434419.0, "step": 883 }, { "epoch": 0.32389850691581934, "grad_norm": 0.2348510241980997, "learning_rate": 3.988666595900863e-05, "loss": 0.6123, "num_tokens": 680199861.0, "step": 884 }, { "epoch": 0.32426490794174223, "grad_norm": 0.21389688405970397, "learning_rate": 3.988618674491727e-05, "loss": 0.6025, "num_tokens": 680890273.0, "step": 885 }, { "epoch": 0.3246313089676651, "grad_norm": 0.22540040348017149, "learning_rate": 3.988570652303691e-05, "loss": 0.5898, "num_tokens": 681757662.0, "step": 886 }, { "epoch": 0.32499770999358796, "grad_norm": 0.21328821962631309, "learning_rate": 3.9885225293394605e-05, "loss": 0.5827, "num_tokens": 682620639.0, "step": 887 }, { "epoch": 0.32536411101951085, "grad_norm": 0.24972903373747984, "learning_rate": 3.9884743056017486e-05, "loss": 0.6066, "num_tokens": 683326006.0, "step": 888 }, { "epoch": 0.32573051204543374, "grad_norm": 0.28053591381806736, "learning_rate": 3.98842598109327e-05, "loss": 0.6469, "num_tokens": 683928932.0, "step": 889 }, { "epoch": 0.3260969130713566, "grad_norm": 0.22471882144443006, "learning_rate": 3.98837755581675e-05, "loss": 0.623, "num_tokens": 684691131.0, "step": 890 }, { "epoch": 0.32646331409727947, "grad_norm": 0.23664604732981354, "learning_rate": 3.988329029774915e-05, "loss": 0.6052, "num_tokens": 685465002.0, "step": 891 }, { "epoch": 0.32682971512320236, "grad_norm": 0.27170303048488753, "learning_rate": 3.9882804029705006e-05, "loss": 0.6169, "num_tokens": 686188079.0, "step": 892 }, { "epoch": 0.3271961161491252, "grad_norm": 0.25248309177682954, "learning_rate": 3.9882316754062455e-05, "loss": 0.5954, "num_tokens": 687041865.0, "step": 893 }, { "epoch": 0.3275625171750481, "grad_norm": 0.3060981808215392, "learning_rate": 3.988182847084897e-05, "loss": 0.6072, "num_tokens": 687789632.0, "step": 894 }, { "epoch": 0.327928918200971, "grad_norm": 0.32850210341708896, "learning_rate": 3.9881339180092046e-05, "loss": 0.6117, "num_tokens": 688568041.0, "step": 895 }, { "epoch": 0.3282953192268938, "grad_norm": 0.3131424255787757, "learning_rate": 3.988084888181925e-05, "loss": 0.6196, "num_tokens": 689291972.0, "step": 896 }, { "epoch": 0.3286617202528167, "grad_norm": 0.2771361601481613, "learning_rate": 3.988035757605823e-05, "loss": 0.5964, "num_tokens": 689983392.0, "step": 897 }, { "epoch": 0.3290281212787396, "grad_norm": 0.38191269008417217, "learning_rate": 3.987986526283665e-05, "loss": 0.601, "num_tokens": 690749791.0, "step": 898 }, { "epoch": 0.32939452230466243, "grad_norm": 0.2617877786464947, "learning_rate": 3.987937194218224e-05, "loss": 0.6077, "num_tokens": 691505824.0, "step": 899 }, { "epoch": 0.3297609233305853, "grad_norm": 0.28838095779874057, "learning_rate": 3.9878877614122826e-05, "loss": 0.5778, "num_tokens": 692253738.0, "step": 900 }, { "epoch": 0.3301273243565082, "grad_norm": 0.29788742692820863, "learning_rate": 3.987838227868624e-05, "loss": 0.6045, "num_tokens": 693034493.0, "step": 901 }, { "epoch": 0.33049372538243105, "grad_norm": 0.26266641721002554, "learning_rate": 3.987788593590039e-05, "loss": 0.5739, "num_tokens": 693863754.0, "step": 902 }, { "epoch": 0.33086012640835394, "grad_norm": 0.25492164301136505, "learning_rate": 3.987738858579324e-05, "loss": 0.6482, "num_tokens": 694530992.0, "step": 903 }, { "epoch": 0.33122652743427683, "grad_norm": 0.23461527872910076, "learning_rate": 3.9876890228392836e-05, "loss": 0.6105, "num_tokens": 695360397.0, "step": 904 }, { "epoch": 0.33159292846019967, "grad_norm": 0.2531729480371314, "learning_rate": 3.987639086372723e-05, "loss": 0.6161, "num_tokens": 696087747.0, "step": 905 }, { "epoch": 0.33195932948612256, "grad_norm": 0.2695596788984482, "learning_rate": 3.987589049182458e-05, "loss": 0.593, "num_tokens": 696793024.0, "step": 906 }, { "epoch": 0.33232573051204545, "grad_norm": 0.2745870563301569, "learning_rate": 3.9875389112713066e-05, "loss": 0.6236, "num_tokens": 697481074.0, "step": 907 }, { "epoch": 0.3326921315379683, "grad_norm": 0.2264758619870033, "learning_rate": 3.987488672642094e-05, "loss": 0.5861, "num_tokens": 698258695.0, "step": 908 }, { "epoch": 0.3330585325638912, "grad_norm": 0.2549808913309669, "learning_rate": 3.987438333297651e-05, "loss": 0.6004, "num_tokens": 699106342.0, "step": 909 }, { "epoch": 0.33342493358981407, "grad_norm": 0.2803712935280834, "learning_rate": 3.987387893240815e-05, "loss": 0.5782, "num_tokens": 699907478.0, "step": 910 }, { "epoch": 0.3337913346157369, "grad_norm": 0.2650941936788769, "learning_rate": 3.987337352474425e-05, "loss": 0.6177, "num_tokens": 700620782.0, "step": 911 }, { "epoch": 0.3341577356416598, "grad_norm": 0.29541027474405757, "learning_rate": 3.987286711001332e-05, "loss": 0.5978, "num_tokens": 701359661.0, "step": 912 }, { "epoch": 0.3345241366675827, "grad_norm": 0.24801666802292696, "learning_rate": 3.9872359688243884e-05, "loss": 0.5797, "num_tokens": 702118869.0, "step": 913 }, { "epoch": 0.3348905376935055, "grad_norm": 0.20066203222248294, "learning_rate": 3.987185125946452e-05, "loss": 0.562, "num_tokens": 702811864.0, "step": 914 }, { "epoch": 0.3352569387194284, "grad_norm": 0.28110314957377674, "learning_rate": 3.987134182370389e-05, "loss": 0.5852, "num_tokens": 703600469.0, "step": 915 }, { "epoch": 0.3356233397453513, "grad_norm": 0.21736192795441345, "learning_rate": 3.987083138099069e-05, "loss": 0.6139, "num_tokens": 704343717.0, "step": 916 }, { "epoch": 0.33598974077127414, "grad_norm": 0.268195695480008, "learning_rate": 3.987031993135368e-05, "loss": 0.6008, "num_tokens": 705102016.0, "step": 917 }, { "epoch": 0.33635614179719703, "grad_norm": 0.2880790660266078, "learning_rate": 3.986980747482169e-05, "loss": 0.5924, "num_tokens": 705802475.0, "step": 918 }, { "epoch": 0.3367225428231199, "grad_norm": 0.23446355597833338, "learning_rate": 3.986929401142357e-05, "loss": 0.6038, "num_tokens": 706584205.0, "step": 919 }, { "epoch": 0.33708894384904275, "grad_norm": 0.22232831250168675, "learning_rate": 3.986877954118828e-05, "loss": 0.5797, "num_tokens": 707463669.0, "step": 920 }, { "epoch": 0.33745534487496565, "grad_norm": 0.2450054711295083, "learning_rate": 3.986826406414479e-05, "loss": 0.5719, "num_tokens": 708310211.0, "step": 921 }, { "epoch": 0.33782174590088854, "grad_norm": 0.20740817801210723, "learning_rate": 3.986774758032214e-05, "loss": 0.5897, "num_tokens": 709116262.0, "step": 922 }, { "epoch": 0.3381881469268114, "grad_norm": 0.29858608699290096, "learning_rate": 3.986723008974944e-05, "loss": 0.5919, "num_tokens": 709830493.0, "step": 923 }, { "epoch": 0.33855454795273426, "grad_norm": 0.230183422542756, "learning_rate": 3.986671159245585e-05, "loss": 0.5859, "num_tokens": 710541312.0, "step": 924 }, { "epoch": 0.33892094897865715, "grad_norm": 0.3063338356644502, "learning_rate": 3.9866192088470585e-05, "loss": 0.6462, "num_tokens": 711362924.0, "step": 925 }, { "epoch": 0.33928735000458, "grad_norm": 0.26930178940283844, "learning_rate": 3.98656715778229e-05, "loss": 0.612, "num_tokens": 712120987.0, "step": 926 }, { "epoch": 0.3396537510305029, "grad_norm": 0.3455687779153102, "learning_rate": 3.986515006054214e-05, "loss": 0.6735, "num_tokens": 712756304.0, "step": 927 }, { "epoch": 0.3400201520564258, "grad_norm": 0.2602696980754593, "learning_rate": 3.9864627536657685e-05, "loss": 0.6265, "num_tokens": 713446098.0, "step": 928 }, { "epoch": 0.3403865530823486, "grad_norm": 0.25767144090275373, "learning_rate": 3.9864104006198974e-05, "loss": 0.6147, "num_tokens": 714284353.0, "step": 929 }, { "epoch": 0.3407529541082715, "grad_norm": 0.23894875897039522, "learning_rate": 3.986357946919551e-05, "loss": 0.5866, "num_tokens": 715176844.0, "step": 930 }, { "epoch": 0.3411193551341944, "grad_norm": 0.23395425316895213, "learning_rate": 3.9863053925676845e-05, "loss": 0.6118, "num_tokens": 715903812.0, "step": 931 }, { "epoch": 0.3414857561601172, "grad_norm": 0.30593427442322785, "learning_rate": 3.9862527375672584e-05, "loss": 0.6206, "num_tokens": 716563285.0, "step": 932 }, { "epoch": 0.3418521571860401, "grad_norm": 0.2426012025325804, "learning_rate": 3.9861999819212404e-05, "loss": 0.604, "num_tokens": 717361814.0, "step": 933 }, { "epoch": 0.342218558211963, "grad_norm": 0.23219799405574104, "learning_rate": 3.986147125632603e-05, "loss": 0.5765, "num_tokens": 718111243.0, "step": 934 }, { "epoch": 0.34258495923788584, "grad_norm": 0.2237711670759951, "learning_rate": 3.9860941687043245e-05, "loss": 0.599, "num_tokens": 718893356.0, "step": 935 }, { "epoch": 0.34295136026380874, "grad_norm": 0.2303176116147489, "learning_rate": 3.986041111139387e-05, "loss": 0.585, "num_tokens": 719696521.0, "step": 936 }, { "epoch": 0.3433177612897316, "grad_norm": 0.23173964013856, "learning_rate": 3.985987952940782e-05, "loss": 0.6203, "num_tokens": 720480552.0, "step": 937 }, { "epoch": 0.34368416231565446, "grad_norm": 0.279234869261717, "learning_rate": 3.9859346941115044e-05, "loss": 0.6144, "num_tokens": 721259022.0, "step": 938 }, { "epoch": 0.34405056334157735, "grad_norm": 0.26841152123770595, "learning_rate": 3.985881334654554e-05, "loss": 0.6249, "num_tokens": 722021385.0, "step": 939 }, { "epoch": 0.34441696436750024, "grad_norm": 0.2550253187454842, "learning_rate": 3.985827874572939e-05, "loss": 0.5965, "num_tokens": 722727697.0, "step": 940 }, { "epoch": 0.3447833653934231, "grad_norm": 0.22165224277722798, "learning_rate": 3.985774313869669e-05, "loss": 0.636, "num_tokens": 723460600.0, "step": 941 }, { "epoch": 0.34514976641934597, "grad_norm": 0.2625160037713498, "learning_rate": 3.985720652547764e-05, "loss": 0.6036, "num_tokens": 724235048.0, "step": 942 }, { "epoch": 0.34551616744526886, "grad_norm": 0.29516125575642016, "learning_rate": 3.985666890610248e-05, "loss": 0.6191, "num_tokens": 724897592.0, "step": 943 }, { "epoch": 0.3458825684711917, "grad_norm": 0.26020811897981605, "learning_rate": 3.985613028060148e-05, "loss": 0.6261, "num_tokens": 725682485.0, "step": 944 }, { "epoch": 0.3462489694971146, "grad_norm": 0.35470661490867866, "learning_rate": 3.9855590649005004e-05, "loss": 0.6547, "num_tokens": 726325360.0, "step": 945 }, { "epoch": 0.3466153705230375, "grad_norm": 0.2898624567875572, "learning_rate": 3.9855050011343445e-05, "loss": 0.631, "num_tokens": 727072577.0, "step": 946 }, { "epoch": 0.3469817715489603, "grad_norm": 0.3256805441561629, "learning_rate": 3.985450836764728e-05, "loss": 0.6165, "num_tokens": 727832316.0, "step": 947 }, { "epoch": 0.3473481725748832, "grad_norm": 0.2942189993082234, "learning_rate": 3.985396571794703e-05, "loss": 0.6023, "num_tokens": 728525535.0, "step": 948 }, { "epoch": 0.3477145736008061, "grad_norm": 0.28939493491097856, "learning_rate": 3.985342206227324e-05, "loss": 0.5681, "num_tokens": 729265548.0, "step": 949 }, { "epoch": 0.34808097462672893, "grad_norm": 0.33090603754873493, "learning_rate": 3.985287740065658e-05, "loss": 0.601, "num_tokens": 729977157.0, "step": 950 }, { "epoch": 0.3484473756526518, "grad_norm": 0.26502488400532087, "learning_rate": 3.985233173312771e-05, "loss": 0.6087, "num_tokens": 730745845.0, "step": 951 }, { "epoch": 0.3488137766785747, "grad_norm": 0.2614265982814813, "learning_rate": 3.98517850597174e-05, "loss": 0.5777, "num_tokens": 731502376.0, "step": 952 }, { "epoch": 0.34918017770449755, "grad_norm": 0.23601361500913426, "learning_rate": 3.9851237380456426e-05, "loss": 0.6084, "num_tokens": 732334840.0, "step": 953 }, { "epoch": 0.34954657873042044, "grad_norm": 0.21249995237634003, "learning_rate": 3.985068869537567e-05, "loss": 0.5931, "num_tokens": 733080256.0, "step": 954 }, { "epoch": 0.34991297975634333, "grad_norm": 0.2416453131837621, "learning_rate": 3.9850139004506025e-05, "loss": 0.5897, "num_tokens": 733942623.0, "step": 955 }, { "epoch": 0.35027938078226617, "grad_norm": 0.22406445071014683, "learning_rate": 3.984958830787849e-05, "loss": 0.6363, "num_tokens": 734718782.0, "step": 956 }, { "epoch": 0.35064578180818906, "grad_norm": 0.20873920440823426, "learning_rate": 3.984903660552408e-05, "loss": 0.5568, "num_tokens": 735489521.0, "step": 957 }, { "epoch": 0.35101218283411195, "grad_norm": 0.24970322478566778, "learning_rate": 3.9848483897473864e-05, "loss": 0.5655, "num_tokens": 736283357.0, "step": 958 }, { "epoch": 0.3513785838600348, "grad_norm": 0.26970611448704135, "learning_rate": 3.984793018375901e-05, "loss": 0.5958, "num_tokens": 737042566.0, "step": 959 }, { "epoch": 0.3517449848859577, "grad_norm": 0.22871861059162504, "learning_rate": 3.98473754644107e-05, "loss": 0.6011, "num_tokens": 737862095.0, "step": 960 }, { "epoch": 0.35211138591188057, "grad_norm": 0.21506826042550423, "learning_rate": 3.98468197394602e-05, "loss": 0.5893, "num_tokens": 738662354.0, "step": 961 }, { "epoch": 0.3524777869378034, "grad_norm": 0.23143328329726634, "learning_rate": 3.9846263008938824e-05, "loss": 0.6309, "num_tokens": 739405397.0, "step": 962 }, { "epoch": 0.3528441879637263, "grad_norm": 0.22181965848399188, "learning_rate": 3.984570527287792e-05, "loss": 0.587, "num_tokens": 740156288.0, "step": 963 }, { "epoch": 0.3532105889896492, "grad_norm": 0.233720947217216, "learning_rate": 3.984514653130894e-05, "loss": 0.6526, "num_tokens": 740770343.0, "step": 964 }, { "epoch": 0.353576990015572, "grad_norm": 0.2820955247334181, "learning_rate": 3.984458678426335e-05, "loss": 0.6158, "num_tokens": 741488186.0, "step": 965 }, { "epoch": 0.3539433910414949, "grad_norm": 0.2544349503602878, "learning_rate": 3.98440260317727e-05, "loss": 0.6795, "num_tokens": 742238394.0, "step": 966 }, { "epoch": 0.3543097920674178, "grad_norm": 0.2596222949818763, "learning_rate": 3.984346427386857e-05, "loss": 0.5945, "num_tokens": 742981131.0, "step": 967 }, { "epoch": 0.35467619309334064, "grad_norm": 0.2642014445050887, "learning_rate": 3.984290151058262e-05, "loss": 0.6036, "num_tokens": 743813765.0, "step": 968 }, { "epoch": 0.35504259411926353, "grad_norm": 0.2666741318806301, "learning_rate": 3.984233774194656e-05, "loss": 0.621, "num_tokens": 744645481.0, "step": 969 }, { "epoch": 0.3554089951451864, "grad_norm": 0.27819084785367976, "learning_rate": 3.984177296799215e-05, "loss": 0.5532, "num_tokens": 745444625.0, "step": 970 }, { "epoch": 0.35577539617110926, "grad_norm": 0.280339472958724, "learning_rate": 3.984120718875122e-05, "loss": 0.5614, "num_tokens": 746220410.0, "step": 971 }, { "epoch": 0.35614179719703215, "grad_norm": 0.2838621433471933, "learning_rate": 3.984064040425565e-05, "loss": 0.6245, "num_tokens": 747049105.0, "step": 972 }, { "epoch": 0.35650819822295504, "grad_norm": 0.26861050171163015, "learning_rate": 3.984007261453736e-05, "loss": 0.6414, "num_tokens": 747743381.0, "step": 973 }, { "epoch": 0.3568745992488779, "grad_norm": 0.36857882295863487, "learning_rate": 3.9839503819628353e-05, "loss": 0.5921, "num_tokens": 748409109.0, "step": 974 }, { "epoch": 0.35724100027480077, "grad_norm": 0.2396730098586616, "learning_rate": 3.983893401956068e-05, "loss": 0.5906, "num_tokens": 749096351.0, "step": 975 }, { "epoch": 0.35760740130072366, "grad_norm": 0.22293686181777297, "learning_rate": 3.983836321436645e-05, "loss": 0.5661, "num_tokens": 749920697.0, "step": 976 }, { "epoch": 0.3579738023266465, "grad_norm": 0.22364096669554318, "learning_rate": 3.983779140407781e-05, "loss": 0.5739, "num_tokens": 750599306.0, "step": 977 }, { "epoch": 0.3583402033525694, "grad_norm": 0.24537834802871966, "learning_rate": 3.983721858872698e-05, "loss": 0.5933, "num_tokens": 751407706.0, "step": 978 }, { "epoch": 0.3587066043784923, "grad_norm": 0.22854713103594898, "learning_rate": 3.983664476834624e-05, "loss": 0.5728, "num_tokens": 752202945.0, "step": 979 }, { "epoch": 0.3590730054044151, "grad_norm": 0.2267136537092691, "learning_rate": 3.983606994296793e-05, "loss": 0.5745, "num_tokens": 752907214.0, "step": 980 }, { "epoch": 0.359439406430338, "grad_norm": 0.22744607958409913, "learning_rate": 3.9835494112624427e-05, "loss": 0.5905, "num_tokens": 753772108.0, "step": 981 }, { "epoch": 0.3598058074562609, "grad_norm": 0.2184624391165856, "learning_rate": 3.983491727734817e-05, "loss": 0.6127, "num_tokens": 754478085.0, "step": 982 }, { "epoch": 0.36017220848218373, "grad_norm": 0.24414741740119403, "learning_rate": 3.983433943717168e-05, "loss": 0.5646, "num_tokens": 755338071.0, "step": 983 }, { "epoch": 0.3605386095081066, "grad_norm": 0.2390917127046887, "learning_rate": 3.9833760592127505e-05, "loss": 0.6039, "num_tokens": 755981756.0, "step": 984 }, { "epoch": 0.3609050105340295, "grad_norm": 0.39393464282320007, "learning_rate": 3.983318074224825e-05, "loss": 0.5828, "num_tokens": 756861392.0, "step": 985 }, { "epoch": 0.36127141155995235, "grad_norm": 0.306450516262369, "learning_rate": 3.983259988756661e-05, "loss": 0.6319, "num_tokens": 757570895.0, "step": 986 }, { "epoch": 0.36163781258587524, "grad_norm": 0.24274267038084843, "learning_rate": 3.983201802811529e-05, "loss": 0.619, "num_tokens": 758319846.0, "step": 987 }, { "epoch": 0.36200421361179813, "grad_norm": 0.36717476975864055, "learning_rate": 3.983143516392709e-05, "loss": 0.6286, "num_tokens": 759052245.0, "step": 988 }, { "epoch": 0.36237061463772097, "grad_norm": 0.310306137296496, "learning_rate": 3.9830851295034834e-05, "loss": 0.5885, "num_tokens": 759781720.0, "step": 989 }, { "epoch": 0.36273701566364386, "grad_norm": 0.24390203295438362, "learning_rate": 3.9830266421471435e-05, "loss": 0.6103, "num_tokens": 760453610.0, "step": 990 }, { "epoch": 0.36310341668956675, "grad_norm": 0.2872067894112897, "learning_rate": 3.982968054326985e-05, "loss": 0.5967, "num_tokens": 761180422.0, "step": 991 }, { "epoch": 0.3634698177154896, "grad_norm": 0.22276929121075345, "learning_rate": 3.9829093660463074e-05, "loss": 0.5787, "num_tokens": 761983754.0, "step": 992 }, { "epoch": 0.3638362187414125, "grad_norm": 0.2628206423863506, "learning_rate": 3.9828505773084186e-05, "loss": 0.6069, "num_tokens": 762741529.0, "step": 993 }, { "epoch": 0.36420261976733537, "grad_norm": 0.2533479861584086, "learning_rate": 3.982791688116632e-05, "loss": 0.6297, "num_tokens": 763468153.0, "step": 994 }, { "epoch": 0.3645690207932582, "grad_norm": 0.250878985425608, "learning_rate": 3.9827326984742624e-05, "loss": 0.5774, "num_tokens": 764207873.0, "step": 995 }, { "epoch": 0.3649354218191811, "grad_norm": 0.23069292418767498, "learning_rate": 3.982673608384637e-05, "loss": 0.5971, "num_tokens": 764946961.0, "step": 996 }, { "epoch": 0.365301822845104, "grad_norm": 0.21158347789855894, "learning_rate": 3.9826144178510833e-05, "loss": 0.6035, "num_tokens": 765773620.0, "step": 997 }, { "epoch": 0.3656682238710268, "grad_norm": 0.20740665385697626, "learning_rate": 3.9825551268769375e-05, "loss": 0.5822, "num_tokens": 766561171.0, "step": 998 }, { "epoch": 0.3660346248969497, "grad_norm": 0.2080412319048278, "learning_rate": 3.9824957354655393e-05, "loss": 0.6122, "num_tokens": 767425101.0, "step": 999 }, { "epoch": 0.3664010259228726, "grad_norm": 0.26587230898355224, "learning_rate": 3.982436243620235e-05, "loss": 0.6133, "num_tokens": 768278674.0, "step": 1000 }, { "epoch": 0.36676742694879544, "grad_norm": 0.22432267432928876, "learning_rate": 3.9823766513443776e-05, "loss": 0.5566, "num_tokens": 768990305.0, "step": 1001 }, { "epoch": 0.36713382797471833, "grad_norm": 0.21932554267674306, "learning_rate": 3.9823169586413246e-05, "loss": 0.5943, "num_tokens": 769788039.0, "step": 1002 }, { "epoch": 0.3675002290006412, "grad_norm": 0.24486172467744916, "learning_rate": 3.9822571655144383e-05, "loss": 0.5553, "num_tokens": 770598867.0, "step": 1003 }, { "epoch": 0.36786663002656406, "grad_norm": 0.21602494332330102, "learning_rate": 3.98219727196709e-05, "loss": 0.5516, "num_tokens": 771467388.0, "step": 1004 }, { "epoch": 0.36823303105248695, "grad_norm": 0.25478363826330597, "learning_rate": 3.982137278002651e-05, "loss": 0.6101, "num_tokens": 772305390.0, "step": 1005 }, { "epoch": 0.36859943207840984, "grad_norm": 0.24233229670559814, "learning_rate": 3.9820771836245045e-05, "loss": 0.6019, "num_tokens": 773073596.0, "step": 1006 }, { "epoch": 0.3689658331043327, "grad_norm": 0.23258271527741964, "learning_rate": 3.982016988836035e-05, "loss": 0.573, "num_tokens": 773904940.0, "step": 1007 }, { "epoch": 0.36933223413025557, "grad_norm": 0.21842893252625986, "learning_rate": 3.9819566936406356e-05, "loss": 0.5819, "num_tokens": 774630406.0, "step": 1008 }, { "epoch": 0.36969863515617846, "grad_norm": 0.24276119472313515, "learning_rate": 3.981896298041702e-05, "loss": 0.6225, "num_tokens": 775386988.0, "step": 1009 }, { "epoch": 0.3700650361821013, "grad_norm": 0.22200830172535513, "learning_rate": 3.981835802042638e-05, "loss": 0.5804, "num_tokens": 776191555.0, "step": 1010 }, { "epoch": 0.3704314372080242, "grad_norm": 0.22284669110435207, "learning_rate": 3.981775205646852e-05, "loss": 0.6068, "num_tokens": 776865194.0, "step": 1011 }, { "epoch": 0.3707978382339471, "grad_norm": 0.25543600596638294, "learning_rate": 3.981714508857759e-05, "loss": 0.5971, "num_tokens": 777551865.0, "step": 1012 }, { "epoch": 0.3711642392598699, "grad_norm": 0.2352154115532733, "learning_rate": 3.981653711678777e-05, "loss": 0.6201, "num_tokens": 778283509.0, "step": 1013 }, { "epoch": 0.3715306402857928, "grad_norm": 0.2908367739092727, "learning_rate": 3.981592814113334e-05, "loss": 0.6107, "num_tokens": 778902087.0, "step": 1014 }, { "epoch": 0.3718970413117157, "grad_norm": 0.2912698432824806, "learning_rate": 3.9815318161648595e-05, "loss": 0.6234, "num_tokens": 779549294.0, "step": 1015 }, { "epoch": 0.37226344233763853, "grad_norm": 0.29285179413904544, "learning_rate": 3.981470717836791e-05, "loss": 0.5791, "num_tokens": 780268922.0, "step": 1016 }, { "epoch": 0.3726298433635614, "grad_norm": 0.23711855479041152, "learning_rate": 3.981409519132571e-05, "loss": 0.5577, "num_tokens": 780992981.0, "step": 1017 }, { "epoch": 0.3729962443894843, "grad_norm": 0.3012578233501245, "learning_rate": 3.981348220055648e-05, "loss": 0.6171, "num_tokens": 781742534.0, "step": 1018 }, { "epoch": 0.37336264541540715, "grad_norm": 0.24445757748542032, "learning_rate": 3.981286820609475e-05, "loss": 0.606, "num_tokens": 782411125.0, "step": 1019 }, { "epoch": 0.37372904644133004, "grad_norm": 0.253091134091982, "learning_rate": 3.9812253207975127e-05, "loss": 0.6024, "num_tokens": 783155690.0, "step": 1020 }, { "epoch": 0.37409544746725293, "grad_norm": 0.2655165722762519, "learning_rate": 3.981163720623225e-05, "loss": 0.6218, "num_tokens": 784021418.0, "step": 1021 }, { "epoch": 0.37446184849317576, "grad_norm": 0.22719922869535933, "learning_rate": 3.981102020090084e-05, "loss": 0.604, "num_tokens": 784692137.0, "step": 1022 }, { "epoch": 0.37482824951909866, "grad_norm": 0.2540904997208608, "learning_rate": 3.981040219201566e-05, "loss": 0.6396, "num_tokens": 785456713.0, "step": 1023 }, { "epoch": 0.37519465054502155, "grad_norm": 0.20913770467591652, "learning_rate": 3.980978317961153e-05, "loss": 0.5789, "num_tokens": 786088078.0, "step": 1024 }, { "epoch": 0.3755610515709444, "grad_norm": 0.24754404346076278, "learning_rate": 3.9809163163723314e-05, "loss": 0.6368, "num_tokens": 786760275.0, "step": 1025 }, { "epoch": 0.3759274525968673, "grad_norm": 0.2086948260119318, "learning_rate": 3.9808542144385966e-05, "loss": 0.5748, "num_tokens": 787505330.0, "step": 1026 }, { "epoch": 0.37629385362279016, "grad_norm": 0.19460461192443387, "learning_rate": 3.980792012163447e-05, "loss": 0.5785, "num_tokens": 788193122.0, "step": 1027 }, { "epoch": 0.376660254648713, "grad_norm": 0.2533997065120635, "learning_rate": 3.980729709550387e-05, "loss": 0.615, "num_tokens": 788943987.0, "step": 1028 }, { "epoch": 0.3770266556746359, "grad_norm": 0.19147684225494693, "learning_rate": 3.980667306602927e-05, "loss": 0.5982, "num_tokens": 789908644.0, "step": 1029 }, { "epoch": 0.3773930567005588, "grad_norm": 0.25389856244806724, "learning_rate": 3.980604803324585e-05, "loss": 0.6133, "num_tokens": 790658564.0, "step": 1030 }, { "epoch": 0.3777594577264816, "grad_norm": 0.21699345096090022, "learning_rate": 3.9805421997188796e-05, "loss": 0.6394, "num_tokens": 791313561.0, "step": 1031 }, { "epoch": 0.3781258587524045, "grad_norm": 0.23135386705455147, "learning_rate": 3.98047949578934e-05, "loss": 0.6171, "num_tokens": 792079034.0, "step": 1032 }, { "epoch": 0.3784922597783274, "grad_norm": 0.23933438362842996, "learning_rate": 3.9804166915394985e-05, "loss": 0.6033, "num_tokens": 792873906.0, "step": 1033 }, { "epoch": 0.37885866080425024, "grad_norm": 0.23543002214009134, "learning_rate": 3.9803537869728945e-05, "loss": 0.6405, "num_tokens": 793585351.0, "step": 1034 }, { "epoch": 0.3792250618301731, "grad_norm": 0.20595222703475863, "learning_rate": 3.980290782093072e-05, "loss": 0.6076, "num_tokens": 794381672.0, "step": 1035 }, { "epoch": 0.379591462856096, "grad_norm": 0.20344197377109616, "learning_rate": 3.980227676903582e-05, "loss": 0.5912, "num_tokens": 795156315.0, "step": 1036 }, { "epoch": 0.37995786388201885, "grad_norm": 0.2192220257922623, "learning_rate": 3.980164471407978e-05, "loss": 0.6254, "num_tokens": 795868900.0, "step": 1037 }, { "epoch": 0.38032426490794174, "grad_norm": 0.20457224633367468, "learning_rate": 3.980101165609823e-05, "loss": 0.5573, "num_tokens": 796677311.0, "step": 1038 }, { "epoch": 0.38069066593386464, "grad_norm": 0.23735516093912243, "learning_rate": 3.980037759512683e-05, "loss": 0.6213, "num_tokens": 797426622.0, "step": 1039 }, { "epoch": 0.38105706695978747, "grad_norm": 0.2284854236535039, "learning_rate": 3.97997425312013e-05, "loss": 0.6537, "num_tokens": 798122575.0, "step": 1040 }, { "epoch": 0.38142346798571036, "grad_norm": 0.21157801921433428, "learning_rate": 3.979910646435744e-05, "loss": 0.5795, "num_tokens": 798965491.0, "step": 1041 }, { "epoch": 0.38178986901163325, "grad_norm": 0.18988916666102226, "learning_rate": 3.979846939463108e-05, "loss": 0.5667, "num_tokens": 799747100.0, "step": 1042 }, { "epoch": 0.3821562700375561, "grad_norm": 0.2157009712232108, "learning_rate": 3.979783132205812e-05, "loss": 0.612, "num_tokens": 800404932.0, "step": 1043 }, { "epoch": 0.382522671063479, "grad_norm": 0.2123128854104945, "learning_rate": 3.9797192246674504e-05, "loss": 0.6002, "num_tokens": 801082905.0, "step": 1044 }, { "epoch": 0.38288907208940187, "grad_norm": 0.2429475699619134, "learning_rate": 3.979655216851624e-05, "loss": 0.6241, "num_tokens": 801750359.0, "step": 1045 }, { "epoch": 0.3832554731153247, "grad_norm": 0.2674627053140716, "learning_rate": 3.9795911087619405e-05, "loss": 0.617, "num_tokens": 802507380.0, "step": 1046 }, { "epoch": 0.3836218741412476, "grad_norm": 0.23997442980195952, "learning_rate": 3.979526900402011e-05, "loss": 0.5794, "num_tokens": 803375648.0, "step": 1047 }, { "epoch": 0.3839882751671705, "grad_norm": 0.24523628436781494, "learning_rate": 3.9794625917754535e-05, "loss": 0.6378, "num_tokens": 804068410.0, "step": 1048 }, { "epoch": 0.3843546761930933, "grad_norm": 0.28044935611042565, "learning_rate": 3.979398182885892e-05, "loss": 0.6268, "num_tokens": 804727480.0, "step": 1049 }, { "epoch": 0.3847210772190162, "grad_norm": 0.24551680853081911, "learning_rate": 3.979333673736954e-05, "loss": 0.6125, "num_tokens": 805496539.0, "step": 1050 }, { "epoch": 0.3850874782449391, "grad_norm": 0.27404870220963534, "learning_rate": 3.979269064332275e-05, "loss": 0.6077, "num_tokens": 806381715.0, "step": 1051 }, { "epoch": 0.38545387927086194, "grad_norm": 0.26729265473000063, "learning_rate": 3.9792043546754975e-05, "loss": 0.5619, "num_tokens": 807174552.0, "step": 1052 }, { "epoch": 0.38582028029678483, "grad_norm": 0.2562124458286023, "learning_rate": 3.9791395447702645e-05, "loss": 0.601, "num_tokens": 807939885.0, "step": 1053 }, { "epoch": 0.3861866813227077, "grad_norm": 0.2394316518294701, "learning_rate": 3.979074634620229e-05, "loss": 0.5787, "num_tokens": 808703929.0, "step": 1054 }, { "epoch": 0.38655308234863056, "grad_norm": 0.22724238639381916, "learning_rate": 3.979009624229049e-05, "loss": 0.6121, "num_tokens": 809488390.0, "step": 1055 }, { "epoch": 0.38691948337455345, "grad_norm": 0.26252838597732014, "learning_rate": 3.978944513600386e-05, "loss": 0.5882, "num_tokens": 810325316.0, "step": 1056 }, { "epoch": 0.38728588440047634, "grad_norm": 0.21675217397709368, "learning_rate": 3.9788793027379095e-05, "loss": 0.5865, "num_tokens": 811047218.0, "step": 1057 }, { "epoch": 0.3876522854263992, "grad_norm": 0.24428631152719188, "learning_rate": 3.9788139916452935e-05, "loss": 0.5926, "num_tokens": 811844317.0, "step": 1058 }, { "epoch": 0.38801868645232207, "grad_norm": 0.3224791079300084, "learning_rate": 3.9787485803262185e-05, "loss": 0.6275, "num_tokens": 812541482.0, "step": 1059 }, { "epoch": 0.38838508747824496, "grad_norm": 0.2373958779235935, "learning_rate": 3.9786830687843695e-05, "loss": 0.5846, "num_tokens": 813274456.0, "step": 1060 }, { "epoch": 0.3887514885041678, "grad_norm": 0.2764435121317422, "learning_rate": 3.9786174570234375e-05, "loss": 0.5746, "num_tokens": 814139574.0, "step": 1061 }, { "epoch": 0.3891178895300907, "grad_norm": 0.22198533852444424, "learning_rate": 3.9785517450471204e-05, "loss": 0.5832, "num_tokens": 814855818.0, "step": 1062 }, { "epoch": 0.3894842905560136, "grad_norm": 0.26259438028459636, "learning_rate": 3.978485932859119e-05, "loss": 0.6135, "num_tokens": 815543911.0, "step": 1063 }, { "epoch": 0.3898506915819364, "grad_norm": 0.3083917863388458, "learning_rate": 3.9784200204631435e-05, "loss": 0.6115, "num_tokens": 816368146.0, "step": 1064 }, { "epoch": 0.3902170926078593, "grad_norm": 0.24818430326271904, "learning_rate": 3.9783540078629065e-05, "loss": 0.623, "num_tokens": 817079640.0, "step": 1065 }, { "epoch": 0.3905834936337822, "grad_norm": 0.27138755737989595, "learning_rate": 3.978287895062127e-05, "loss": 0.6105, "num_tokens": 818037685.0, "step": 1066 }, { "epoch": 0.39094989465970503, "grad_norm": 0.26808720864519314, "learning_rate": 3.9782216820645307e-05, "loss": 0.5672, "num_tokens": 818787474.0, "step": 1067 }, { "epoch": 0.3913162956856279, "grad_norm": 0.24116268001076477, "learning_rate": 3.978155368873849e-05, "loss": 0.6032, "num_tokens": 819495599.0, "step": 1068 }, { "epoch": 0.3916826967115508, "grad_norm": 0.24175005608998076, "learning_rate": 3.9780889554938176e-05, "loss": 0.5967, "num_tokens": 820244794.0, "step": 1069 }, { "epoch": 0.39204909773747365, "grad_norm": 0.2539114734653551, "learning_rate": 3.9780224419281785e-05, "loss": 0.616, "num_tokens": 820953777.0, "step": 1070 }, { "epoch": 0.39241549876339654, "grad_norm": 0.2174983198702841, "learning_rate": 3.97795582818068e-05, "loss": 0.6055, "num_tokens": 821779477.0, "step": 1071 }, { "epoch": 0.39278189978931943, "grad_norm": 0.23013493456389517, "learning_rate": 3.977889114255074e-05, "loss": 0.5804, "num_tokens": 822540284.0, "step": 1072 }, { "epoch": 0.39314830081524227, "grad_norm": 0.22925735652990206, "learning_rate": 3.97782230015512e-05, "loss": 0.6097, "num_tokens": 823287744.0, "step": 1073 }, { "epoch": 0.39351470184116516, "grad_norm": 0.2594310014552239, "learning_rate": 3.977755385884584e-05, "loss": 0.5956, "num_tokens": 824042052.0, "step": 1074 }, { "epoch": 0.39388110286708805, "grad_norm": 0.2559160637389216, "learning_rate": 3.9776883714472354e-05, "loss": 0.6157, "num_tokens": 824789449.0, "step": 1075 }, { "epoch": 0.3942475038930109, "grad_norm": 0.266195194314495, "learning_rate": 3.977621256846849e-05, "loss": 0.6056, "num_tokens": 825434405.0, "step": 1076 }, { "epoch": 0.3946139049189338, "grad_norm": 0.22755782393033855, "learning_rate": 3.977554042087208e-05, "loss": 0.5651, "num_tokens": 826049409.0, "step": 1077 }, { "epoch": 0.39498030594485667, "grad_norm": 0.22556314192705185, "learning_rate": 3.977486727172098e-05, "loss": 0.5864, "num_tokens": 826812807.0, "step": 1078 }, { "epoch": 0.3953467069707795, "grad_norm": 0.20870293282621363, "learning_rate": 3.9774193121053135e-05, "loss": 0.5914, "num_tokens": 827601871.0, "step": 1079 }, { "epoch": 0.3957131079967024, "grad_norm": 0.19641233813090678, "learning_rate": 3.9773517968906515e-05, "loss": 0.6012, "num_tokens": 828456452.0, "step": 1080 }, { "epoch": 0.3960795090226253, "grad_norm": 0.2314308186577651, "learning_rate": 3.977284181531917e-05, "loss": 0.5949, "num_tokens": 829247630.0, "step": 1081 }, { "epoch": 0.3964459100485481, "grad_norm": 0.20831895689497448, "learning_rate": 3.97721646603292e-05, "loss": 0.6174, "num_tokens": 829877102.0, "step": 1082 }, { "epoch": 0.396812311074471, "grad_norm": 0.25435513329682813, "learning_rate": 3.977148650397474e-05, "loss": 0.5794, "num_tokens": 830709153.0, "step": 1083 }, { "epoch": 0.3971787121003939, "grad_norm": 0.2415062838696764, "learning_rate": 3.977080734629403e-05, "loss": 0.5367, "num_tokens": 831557907.0, "step": 1084 }, { "epoch": 0.39754511312631674, "grad_norm": 0.21943151893852025, "learning_rate": 3.9770127187325315e-05, "loss": 0.6027, "num_tokens": 832220457.0, "step": 1085 }, { "epoch": 0.39791151415223963, "grad_norm": 0.2265567459447606, "learning_rate": 3.976944602710693e-05, "loss": 0.6196, "num_tokens": 832962618.0, "step": 1086 }, { "epoch": 0.3982779151781625, "grad_norm": 0.18870327096673414, "learning_rate": 3.9768763865677243e-05, "loss": 0.5797, "num_tokens": 833721958.0, "step": 1087 }, { "epoch": 0.39864431620408536, "grad_norm": 0.24158379631778196, "learning_rate": 3.97680807030747e-05, "loss": 0.5661, "num_tokens": 834534706.0, "step": 1088 }, { "epoch": 0.39901071723000825, "grad_norm": 0.19692908910261114, "learning_rate": 3.9767396539337784e-05, "loss": 0.6047, "num_tokens": 835299922.0, "step": 1089 }, { "epoch": 0.39937711825593114, "grad_norm": 0.23459059252183415, "learning_rate": 3.976671137450506e-05, "loss": 0.567, "num_tokens": 836067533.0, "step": 1090 }, { "epoch": 0.399743519281854, "grad_norm": 0.2374387435598931, "learning_rate": 3.9766025208615115e-05, "loss": 0.6105, "num_tokens": 836715379.0, "step": 1091 }, { "epoch": 0.40010992030777687, "grad_norm": 0.22084303895413998, "learning_rate": 3.9765338041706616e-05, "loss": 0.5928, "num_tokens": 837376213.0, "step": 1092 }, { "epoch": 0.40047632133369976, "grad_norm": 0.2236580696034308, "learning_rate": 3.976464987381829e-05, "loss": 0.6104, "num_tokens": 838192741.0, "step": 1093 }, { "epoch": 0.4008427223596226, "grad_norm": 0.21466907793609222, "learning_rate": 3.97639607049889e-05, "loss": 0.6574, "num_tokens": 838895861.0, "step": 1094 }, { "epoch": 0.4012091233855455, "grad_norm": 0.2699110378948118, "learning_rate": 3.976327053525729e-05, "loss": 0.6024, "num_tokens": 839667325.0, "step": 1095 }, { "epoch": 0.4015755244114684, "grad_norm": 0.21814342913695703, "learning_rate": 3.9762579364662334e-05, "loss": 0.5591, "num_tokens": 840379143.0, "step": 1096 }, { "epoch": 0.4019419254373912, "grad_norm": 0.23432634844749228, "learning_rate": 3.976188719324298e-05, "loss": 0.6136, "num_tokens": 841140582.0, "step": 1097 }, { "epoch": 0.4023083264633141, "grad_norm": 0.2130970484741689, "learning_rate": 3.976119402103823e-05, "loss": 0.5679, "num_tokens": 841833790.0, "step": 1098 }, { "epoch": 0.402674727489237, "grad_norm": 0.22446105364289357, "learning_rate": 3.976049984808714e-05, "loss": 0.5916, "num_tokens": 842505162.0, "step": 1099 }, { "epoch": 0.40304112851515983, "grad_norm": 0.24881615295372703, "learning_rate": 3.9759804674428825e-05, "loss": 0.6269, "num_tokens": 843240159.0, "step": 1100 }, { "epoch": 0.4034075295410827, "grad_norm": 0.22575579357501158, "learning_rate": 3.9759108500102454e-05, "loss": 0.5775, "num_tokens": 843958829.0, "step": 1101 }, { "epoch": 0.4037739305670056, "grad_norm": 0.20521057429655915, "learning_rate": 3.975841132514725e-05, "loss": 0.5733, "num_tokens": 844751672.0, "step": 1102 }, { "epoch": 0.40414033159292845, "grad_norm": 0.23378047118751868, "learning_rate": 3.975771314960249e-05, "loss": 0.5839, "num_tokens": 845526782.0, "step": 1103 }, { "epoch": 0.40450673261885134, "grad_norm": 0.23364401780335867, "learning_rate": 3.9757013973507524e-05, "loss": 0.6093, "num_tokens": 846320157.0, "step": 1104 }, { "epoch": 0.40487313364477423, "grad_norm": 0.2510439854377022, "learning_rate": 3.975631379690173e-05, "loss": 0.6364, "num_tokens": 847094319.0, "step": 1105 }, { "epoch": 0.40523953467069707, "grad_norm": 0.2136666254476039, "learning_rate": 3.9755612619824574e-05, "loss": 0.5469, "num_tokens": 847875423.0, "step": 1106 }, { "epoch": 0.40560593569661996, "grad_norm": 0.28252611040744063, "learning_rate": 3.9754910442315565e-05, "loss": 0.578, "num_tokens": 848522762.0, "step": 1107 }, { "epoch": 0.40597233672254285, "grad_norm": 0.23087197679820565, "learning_rate": 3.975420726441426e-05, "loss": 0.5915, "num_tokens": 849388514.0, "step": 1108 }, { "epoch": 0.4063387377484657, "grad_norm": 0.20695697826229306, "learning_rate": 3.9753503086160286e-05, "loss": 0.6124, "num_tokens": 850146659.0, "step": 1109 }, { "epoch": 0.4067051387743886, "grad_norm": 0.21666095564167492, "learning_rate": 3.9752797907593304e-05, "loss": 0.5619, "num_tokens": 850879102.0, "step": 1110 }, { "epoch": 0.40707153980031147, "grad_norm": 0.2375661818014018, "learning_rate": 3.975209172875306e-05, "loss": 0.5837, "num_tokens": 851601346.0, "step": 1111 }, { "epoch": 0.4074379408262343, "grad_norm": 0.22594962288117845, "learning_rate": 3.975138454967933e-05, "loss": 0.6176, "num_tokens": 852268798.0, "step": 1112 }, { "epoch": 0.4078043418521572, "grad_norm": 0.24709287609176, "learning_rate": 3.975067637041199e-05, "loss": 0.5923, "num_tokens": 853023229.0, "step": 1113 }, { "epoch": 0.4081707428780801, "grad_norm": 0.2394550690245026, "learning_rate": 3.9749967190990914e-05, "loss": 0.5764, "num_tokens": 853749311.0, "step": 1114 }, { "epoch": 0.4085371439040029, "grad_norm": 0.22579527078564873, "learning_rate": 3.974925701145606e-05, "loss": 0.5935, "num_tokens": 854588216.0, "step": 1115 }, { "epoch": 0.4089035449299258, "grad_norm": 0.24796962779957776, "learning_rate": 3.974854583184745e-05, "loss": 0.5646, "num_tokens": 855355985.0, "step": 1116 }, { "epoch": 0.4092699459558487, "grad_norm": 0.23732933582776808, "learning_rate": 3.9747833652205174e-05, "loss": 0.6172, "num_tokens": 856061916.0, "step": 1117 }, { "epoch": 0.40963634698177154, "grad_norm": 0.21651983677327433, "learning_rate": 3.974712047256933e-05, "loss": 0.5961, "num_tokens": 856861820.0, "step": 1118 }, { "epoch": 0.41000274800769443, "grad_norm": 0.2483503429301452, "learning_rate": 3.974640629298011e-05, "loss": 0.5955, "num_tokens": 857650116.0, "step": 1119 }, { "epoch": 0.4103691490336173, "grad_norm": 0.2971457485628807, "learning_rate": 3.9745691113477766e-05, "loss": 0.6743, "num_tokens": 858336627.0, "step": 1120 }, { "epoch": 0.41073555005954016, "grad_norm": 0.2675312131335014, "learning_rate": 3.974497493410258e-05, "loss": 0.6098, "num_tokens": 859058614.0, "step": 1121 }, { "epoch": 0.41110195108546305, "grad_norm": 0.21750632525387767, "learning_rate": 3.974425775489491e-05, "loss": 0.5642, "num_tokens": 859838835.0, "step": 1122 }, { "epoch": 0.41146835211138594, "grad_norm": 0.2622238238599173, "learning_rate": 3.974353957589517e-05, "loss": 0.5746, "num_tokens": 860678344.0, "step": 1123 }, { "epoch": 0.4118347531373088, "grad_norm": 0.25740826045222676, "learning_rate": 3.974282039714382e-05, "loss": 0.5689, "num_tokens": 861530346.0, "step": 1124 }, { "epoch": 0.41220115416323166, "grad_norm": 0.2526987425510158, "learning_rate": 3.974210021868138e-05, "loss": 0.6039, "num_tokens": 862264612.0, "step": 1125 }, { "epoch": 0.41256755518915456, "grad_norm": 0.23091962072918376, "learning_rate": 3.974137904054844e-05, "loss": 0.5823, "num_tokens": 862982151.0, "step": 1126 }, { "epoch": 0.4129339562150774, "grad_norm": 0.2559604763041704, "learning_rate": 3.974065686278562e-05, "loss": 0.5957, "num_tokens": 863733623.0, "step": 1127 }, { "epoch": 0.4133003572410003, "grad_norm": 0.22964286072652185, "learning_rate": 3.973993368543362e-05, "loss": 0.6133, "num_tokens": 864461412.0, "step": 1128 }, { "epoch": 0.4136667582669232, "grad_norm": 0.2629559841067815, "learning_rate": 3.9739209508533176e-05, "loss": 0.5911, "num_tokens": 865217183.0, "step": 1129 }, { "epoch": 0.414033159292846, "grad_norm": 0.2912578516466609, "learning_rate": 3.973848433212511e-05, "loss": 0.6653, "num_tokens": 865915350.0, "step": 1130 }, { "epoch": 0.4143995603187689, "grad_norm": 0.22662830272714546, "learning_rate": 3.973775815625026e-05, "loss": 0.5519, "num_tokens": 866722592.0, "step": 1131 }, { "epoch": 0.4147659613446918, "grad_norm": 0.2749447936426221, "learning_rate": 3.973703098094956e-05, "loss": 0.6455, "num_tokens": 867524711.0, "step": 1132 }, { "epoch": 0.4151323623706146, "grad_norm": 0.25312140216219936, "learning_rate": 3.9736302806263964e-05, "loss": 0.5831, "num_tokens": 868223600.0, "step": 1133 }, { "epoch": 0.4154987633965375, "grad_norm": 0.22385897718444586, "learning_rate": 3.9735573632234525e-05, "loss": 0.5579, "num_tokens": 868967420.0, "step": 1134 }, { "epoch": 0.4158651644224604, "grad_norm": 0.20251190434669106, "learning_rate": 3.973484345890231e-05, "loss": 0.5672, "num_tokens": 869803312.0, "step": 1135 }, { "epoch": 0.41623156544838324, "grad_norm": 0.22962854257769924, "learning_rate": 3.973411228630847e-05, "loss": 0.6161, "num_tokens": 870652592.0, "step": 1136 }, { "epoch": 0.41659796647430614, "grad_norm": 0.2088291661833921, "learning_rate": 3.973338011449419e-05, "loss": 0.5584, "num_tokens": 871342971.0, "step": 1137 }, { "epoch": 0.416964367500229, "grad_norm": 0.25978562744338596, "learning_rate": 3.9732646943500736e-05, "loss": 0.642, "num_tokens": 872124514.0, "step": 1138 }, { "epoch": 0.41733076852615186, "grad_norm": 0.21259842313353192, "learning_rate": 3.973191277336941e-05, "loss": 0.6184, "num_tokens": 872800072.0, "step": 1139 }, { "epoch": 0.41769716955207475, "grad_norm": 0.257302767831891, "learning_rate": 3.973117760414158e-05, "loss": 0.5865, "num_tokens": 873668054.0, "step": 1140 }, { "epoch": 0.41806357057799765, "grad_norm": 0.2545898404262352, "learning_rate": 3.973044143585868e-05, "loss": 0.5822, "num_tokens": 874355404.0, "step": 1141 }, { "epoch": 0.4184299716039205, "grad_norm": 0.22601262849642353, "learning_rate": 3.9729704268562175e-05, "loss": 0.5407, "num_tokens": 875105002.0, "step": 1142 }, { "epoch": 0.41879637262984337, "grad_norm": 0.21424358060102555, "learning_rate": 3.9728966102293607e-05, "loss": 0.5826, "num_tokens": 875792334.0, "step": 1143 }, { "epoch": 0.41916277365576626, "grad_norm": 0.2152651890793271, "learning_rate": 3.972822693709456e-05, "loss": 0.608, "num_tokens": 876645005.0, "step": 1144 }, { "epoch": 0.4195291746816891, "grad_norm": 0.291070162732847, "learning_rate": 3.9727486773006696e-05, "loss": 0.565, "num_tokens": 877507538.0, "step": 1145 }, { "epoch": 0.419895575707612, "grad_norm": 0.20582530114530664, "learning_rate": 3.972674561007171e-05, "loss": 0.5611, "num_tokens": 878322528.0, "step": 1146 }, { "epoch": 0.4202619767335349, "grad_norm": 0.20714794227354594, "learning_rate": 3.972600344833137e-05, "loss": 0.6063, "num_tokens": 879111523.0, "step": 1147 }, { "epoch": 0.4206283777594577, "grad_norm": 0.31019880805071465, "learning_rate": 3.972526028782747e-05, "loss": 0.6112, "num_tokens": 879806251.0, "step": 1148 }, { "epoch": 0.4209947787853806, "grad_norm": 0.24846890324298956, "learning_rate": 3.972451612860191e-05, "loss": 0.5698, "num_tokens": 880550112.0, "step": 1149 }, { "epoch": 0.4213611798113035, "grad_norm": 0.2300671794039106, "learning_rate": 3.972377097069661e-05, "loss": 0.6001, "num_tokens": 881363708.0, "step": 1150 }, { "epoch": 0.42172758083722633, "grad_norm": 0.24872726620473912, "learning_rate": 3.9723024814153545e-05, "loss": 0.547, "num_tokens": 882234427.0, "step": 1151 }, { "epoch": 0.4220939818631492, "grad_norm": 0.19483120939278792, "learning_rate": 3.9722277659014775e-05, "loss": 0.5624, "num_tokens": 882927525.0, "step": 1152 }, { "epoch": 0.4224603828890721, "grad_norm": 0.24255237877228714, "learning_rate": 3.972152950532238e-05, "loss": 0.5882, "num_tokens": 883660116.0, "step": 1153 }, { "epoch": 0.42282678391499495, "grad_norm": 0.22152912228847496, "learning_rate": 3.972078035311854e-05, "loss": 0.5773, "num_tokens": 884416308.0, "step": 1154 }, { "epoch": 0.42319318494091784, "grad_norm": 0.19945403232370448, "learning_rate": 3.9720030202445436e-05, "loss": 0.5749, "num_tokens": 885138651.0, "step": 1155 }, { "epoch": 0.42355958596684073, "grad_norm": 0.24080789273132325, "learning_rate": 3.971927905334535e-05, "loss": 0.5705, "num_tokens": 885844685.0, "step": 1156 }, { "epoch": 0.42392598699276357, "grad_norm": 0.24401901647262406, "learning_rate": 3.9718526905860606e-05, "loss": 0.6086, "num_tokens": 886612071.0, "step": 1157 }, { "epoch": 0.42429238801868646, "grad_norm": 0.2262886836128181, "learning_rate": 3.971777376003358e-05, "loss": 0.5987, "num_tokens": 887391816.0, "step": 1158 }, { "epoch": 0.4246587890446093, "grad_norm": 0.2466778854757874, "learning_rate": 3.971701961590671e-05, "loss": 0.6061, "num_tokens": 888075135.0, "step": 1159 }, { "epoch": 0.4250251900705322, "grad_norm": 0.22474385574770775, "learning_rate": 3.9716264473522486e-05, "loss": 0.5517, "num_tokens": 888780853.0, "step": 1160 }, { "epoch": 0.4253915910964551, "grad_norm": 0.20994092598014746, "learning_rate": 3.971550833292346e-05, "loss": 0.5686, "num_tokens": 889460096.0, "step": 1161 }, { "epoch": 0.4257579921223779, "grad_norm": 0.2236345885290093, "learning_rate": 3.971475119415223e-05, "loss": 0.5624, "num_tokens": 890299780.0, "step": 1162 }, { "epoch": 0.4261243931483008, "grad_norm": 0.21177105453721118, "learning_rate": 3.971399305725145e-05, "loss": 0.6138, "num_tokens": 891104988.0, "step": 1163 }, { "epoch": 0.4264907941742237, "grad_norm": 0.2412861151932828, "learning_rate": 3.971323392226386e-05, "loss": 0.5573, "num_tokens": 891885785.0, "step": 1164 }, { "epoch": 0.42685719520014653, "grad_norm": 0.24401471510930095, "learning_rate": 3.9712473789232214e-05, "loss": 0.5551, "num_tokens": 892682092.0, "step": 1165 }, { "epoch": 0.4272235962260694, "grad_norm": 0.25467606207091903, "learning_rate": 3.9711712658199346e-05, "loss": 0.6023, "num_tokens": 893295101.0, "step": 1166 }, { "epoch": 0.4275899972519923, "grad_norm": 0.24340610315818811, "learning_rate": 3.971095052920814e-05, "loss": 0.6014, "num_tokens": 893978698.0, "step": 1167 }, { "epoch": 0.42795639827791515, "grad_norm": 0.24841044912642574, "learning_rate": 3.971018740230155e-05, "loss": 0.5764, "num_tokens": 894778278.0, "step": 1168 }, { "epoch": 0.42832279930383804, "grad_norm": 0.2435836675250707, "learning_rate": 3.970942327752255e-05, "loss": 0.6078, "num_tokens": 895530454.0, "step": 1169 }, { "epoch": 0.42868920032976093, "grad_norm": 0.2592961256515257, "learning_rate": 3.970865815491422e-05, "loss": 0.6154, "num_tokens": 896209180.0, "step": 1170 }, { "epoch": 0.42905560135568377, "grad_norm": 0.23984758699113135, "learning_rate": 3.970789203451966e-05, "loss": 0.5689, "num_tokens": 896960756.0, "step": 1171 }, { "epoch": 0.42942200238160666, "grad_norm": 0.21854277617571963, "learning_rate": 3.9707124916382025e-05, "loss": 0.5868, "num_tokens": 897746744.0, "step": 1172 }, { "epoch": 0.42978840340752955, "grad_norm": 0.23282052551400134, "learning_rate": 3.970635680054456e-05, "loss": 0.5995, "num_tokens": 898467438.0, "step": 1173 }, { "epoch": 0.4301548044334524, "grad_norm": 0.21326822033208853, "learning_rate": 3.970558768705053e-05, "loss": 0.6231, "num_tokens": 899215632.0, "step": 1174 }, { "epoch": 0.4305212054593753, "grad_norm": 0.2148039725117222, "learning_rate": 3.9704817575943264e-05, "loss": 0.6169, "num_tokens": 900040206.0, "step": 1175 }, { "epoch": 0.43088760648529817, "grad_norm": 0.21309480149419258, "learning_rate": 3.9704046467266176e-05, "loss": 0.5595, "num_tokens": 900897556.0, "step": 1176 }, { "epoch": 0.431254007511221, "grad_norm": 0.20791266355424748, "learning_rate": 3.9703274361062693e-05, "loss": 0.6126, "num_tokens": 901777450.0, "step": 1177 }, { "epoch": 0.4316204085371439, "grad_norm": 0.24128526307783146, "learning_rate": 3.9702501257376325e-05, "loss": 0.603, "num_tokens": 902590292.0, "step": 1178 }, { "epoch": 0.4319868095630668, "grad_norm": 0.19508329330174198, "learning_rate": 3.9701727156250637e-05, "loss": 0.603, "num_tokens": 903345449.0, "step": 1179 }, { "epoch": 0.4323532105889896, "grad_norm": 0.23481080531084458, "learning_rate": 3.9700952057729246e-05, "loss": 0.5892, "num_tokens": 904063138.0, "step": 1180 }, { "epoch": 0.4327196116149125, "grad_norm": 0.20372808263016312, "learning_rate": 3.970017596185582e-05, "loss": 0.5941, "num_tokens": 904861967.0, "step": 1181 }, { "epoch": 0.4330860126408354, "grad_norm": 0.197316033226755, "learning_rate": 3.969939886867408e-05, "loss": 0.6028, "num_tokens": 905571961.0, "step": 1182 }, { "epoch": 0.43345241366675824, "grad_norm": 0.2693939419192058, "learning_rate": 3.9698620778227825e-05, "loss": 0.6116, "num_tokens": 906284899.0, "step": 1183 }, { "epoch": 0.43381881469268113, "grad_norm": 0.2090524195448201, "learning_rate": 3.9697841690560886e-05, "loss": 0.5795, "num_tokens": 907099143.0, "step": 1184 }, { "epoch": 0.434185215718604, "grad_norm": 0.24360922464019302, "learning_rate": 3.969706160571717e-05, "loss": 0.5617, "num_tokens": 907791688.0, "step": 1185 }, { "epoch": 0.43455161674452686, "grad_norm": 0.26890538388450586, "learning_rate": 3.9696280523740624e-05, "loss": 0.6001, "num_tokens": 908532892.0, "step": 1186 }, { "epoch": 0.43491801777044975, "grad_norm": 0.22338260220968834, "learning_rate": 3.9695498444675256e-05, "loss": 0.5982, "num_tokens": 909394028.0, "step": 1187 }, { "epoch": 0.43528441879637264, "grad_norm": 0.21816633093184393, "learning_rate": 3.969471536856514e-05, "loss": 0.586, "num_tokens": 910171348.0, "step": 1188 }, { "epoch": 0.4356508198222955, "grad_norm": 0.19863798985364498, "learning_rate": 3.969393129545439e-05, "loss": 0.5959, "num_tokens": 910945405.0, "step": 1189 }, { "epoch": 0.43601722084821837, "grad_norm": 0.22184808163600497, "learning_rate": 3.969314622538719e-05, "loss": 0.5932, "num_tokens": 911719909.0, "step": 1190 }, { "epoch": 0.43638362187414126, "grad_norm": 0.19579718793720735, "learning_rate": 3.969236015840777e-05, "loss": 0.581, "num_tokens": 912550905.0, "step": 1191 }, { "epoch": 0.4367500229000641, "grad_norm": 0.20850591340061525, "learning_rate": 3.969157309456042e-05, "loss": 0.5689, "num_tokens": 913207441.0, "step": 1192 }, { "epoch": 0.437116423925987, "grad_norm": 0.19875733954696614, "learning_rate": 3.9690785033889485e-05, "loss": 0.5561, "num_tokens": 914037570.0, "step": 1193 }, { "epoch": 0.4374828249519099, "grad_norm": 0.19078493385784231, "learning_rate": 3.968999597643938e-05, "loss": 0.5449, "num_tokens": 914705989.0, "step": 1194 }, { "epoch": 0.4378492259778327, "grad_norm": 0.2100813664386862, "learning_rate": 3.9689205922254555e-05, "loss": 0.599, "num_tokens": 915382203.0, "step": 1195 }, { "epoch": 0.4382156270037556, "grad_norm": 0.19809484604694533, "learning_rate": 3.968841487137953e-05, "loss": 0.5882, "num_tokens": 916129737.0, "step": 1196 }, { "epoch": 0.4385820280296785, "grad_norm": 0.20048871399250498, "learning_rate": 3.968762282385886e-05, "loss": 0.5508, "num_tokens": 916944361.0, "step": 1197 }, { "epoch": 0.43894842905560133, "grad_norm": 0.18562940339707826, "learning_rate": 3.9686829779737196e-05, "loss": 0.5332, "num_tokens": 917671381.0, "step": 1198 }, { "epoch": 0.4393148300815242, "grad_norm": 0.20156068678071015, "learning_rate": 3.9686035739059206e-05, "loss": 0.5973, "num_tokens": 918504106.0, "step": 1199 }, { "epoch": 0.4396812311074471, "grad_norm": 0.21987013323094987, "learning_rate": 3.9685240701869634e-05, "loss": 0.5949, "num_tokens": 919249149.0, "step": 1200 }, { "epoch": 0.44004763213336995, "grad_norm": 0.197379114102327, "learning_rate": 3.968444466821328e-05, "loss": 0.5861, "num_tokens": 920094158.0, "step": 1201 }, { "epoch": 0.44041403315929284, "grad_norm": 0.19630923216968843, "learning_rate": 3.9683647638134994e-05, "loss": 0.5699, "num_tokens": 920871395.0, "step": 1202 }, { "epoch": 0.44078043418521573, "grad_norm": 0.2401815634867311, "learning_rate": 3.968284961167968e-05, "loss": 0.6255, "num_tokens": 921593749.0, "step": 1203 }, { "epoch": 0.44114683521113857, "grad_norm": 0.2208912634038696, "learning_rate": 3.9682050588892305e-05, "loss": 0.5978, "num_tokens": 922418157.0, "step": 1204 }, { "epoch": 0.44151323623706146, "grad_norm": 0.2138552222979519, "learning_rate": 3.9681250569817894e-05, "loss": 0.5902, "num_tokens": 923143823.0, "step": 1205 }, { "epoch": 0.44187963726298435, "grad_norm": 0.24774612223903525, "learning_rate": 3.9680449554501514e-05, "loss": 0.5792, "num_tokens": 923927610.0, "step": 1206 }, { "epoch": 0.4422460382889072, "grad_norm": 0.20956464862715252, "learning_rate": 3.967964754298831e-05, "loss": 0.6369, "num_tokens": 924618823.0, "step": 1207 }, { "epoch": 0.4426124393148301, "grad_norm": 0.1931350593295709, "learning_rate": 3.9678844535323465e-05, "loss": 0.5757, "num_tokens": 925417202.0, "step": 1208 }, { "epoch": 0.44297884034075297, "grad_norm": 0.2176801002948919, "learning_rate": 3.967804053155222e-05, "loss": 0.5542, "num_tokens": 926129395.0, "step": 1209 }, { "epoch": 0.4433452413666758, "grad_norm": 0.20557736063929824, "learning_rate": 3.9677235531719873e-05, "loss": 0.5918, "num_tokens": 926909829.0, "step": 1210 }, { "epoch": 0.4437116423925987, "grad_norm": 0.21487192657156123, "learning_rate": 3.96764295358718e-05, "loss": 0.6183, "num_tokens": 927641234.0, "step": 1211 }, { "epoch": 0.4440780434185216, "grad_norm": 0.19810671624206094, "learning_rate": 3.96756225440534e-05, "loss": 0.5858, "num_tokens": 928489015.0, "step": 1212 }, { "epoch": 0.4444444444444444, "grad_norm": 0.203345855972784, "learning_rate": 3.9674814556310145e-05, "loss": 0.5997, "num_tokens": 929323715.0, "step": 1213 }, { "epoch": 0.4448108454703673, "grad_norm": 0.2104437419941944, "learning_rate": 3.967400557268756e-05, "loss": 0.61, "num_tokens": 930069947.0, "step": 1214 }, { "epoch": 0.4451772464962902, "grad_norm": 0.2128491297030547, "learning_rate": 3.967319559323123e-05, "loss": 0.5658, "num_tokens": 930885130.0, "step": 1215 }, { "epoch": 0.44554364752221304, "grad_norm": 0.18872336851484117, "learning_rate": 3.967238461798679e-05, "loss": 0.5853, "num_tokens": 931675510.0, "step": 1216 }, { "epoch": 0.44591004854813593, "grad_norm": 0.2342370163216296, "learning_rate": 3.9671572646999935e-05, "loss": 0.5784, "num_tokens": 932532862.0, "step": 1217 }, { "epoch": 0.4462764495740588, "grad_norm": 0.20357479799508707, "learning_rate": 3.967075968031641e-05, "loss": 0.5868, "num_tokens": 933252968.0, "step": 1218 }, { "epoch": 0.44664285059998166, "grad_norm": 0.20363620158021495, "learning_rate": 3.966994571798203e-05, "loss": 0.583, "num_tokens": 933957241.0, "step": 1219 }, { "epoch": 0.44700925162590455, "grad_norm": 0.19589532862755316, "learning_rate": 3.9669130760042655e-05, "loss": 0.6114, "num_tokens": 934724673.0, "step": 1220 }, { "epoch": 0.44737565265182744, "grad_norm": 0.20044520378879704, "learning_rate": 3.96683148065442e-05, "loss": 0.5712, "num_tokens": 935502040.0, "step": 1221 }, { "epoch": 0.4477420536777503, "grad_norm": 0.1854759994233389, "learning_rate": 3.966749785753264e-05, "loss": 0.594, "num_tokens": 936254785.0, "step": 1222 }, { "epoch": 0.44810845470367316, "grad_norm": 0.19158369124793198, "learning_rate": 3.9666679913054016e-05, "loss": 0.5627, "num_tokens": 936941818.0, "step": 1223 }, { "epoch": 0.44847485572959606, "grad_norm": 0.20332744256890103, "learning_rate": 3.96658609731544e-05, "loss": 0.5954, "num_tokens": 937829749.0, "step": 1224 }, { "epoch": 0.4488412567555189, "grad_norm": 0.19347891453995802, "learning_rate": 3.966504103787994e-05, "loss": 0.5544, "num_tokens": 938615358.0, "step": 1225 }, { "epoch": 0.4492076577814418, "grad_norm": 0.21586050625842043, "learning_rate": 3.966422010727684e-05, "loss": 0.6192, "num_tokens": 939500936.0, "step": 1226 }, { "epoch": 0.4495740588073647, "grad_norm": 0.1998965640507073, "learning_rate": 3.966339818139134e-05, "loss": 0.5785, "num_tokens": 940384371.0, "step": 1227 }, { "epoch": 0.4499404598332875, "grad_norm": 0.2612341899665889, "learning_rate": 3.966257526026977e-05, "loss": 0.5613, "num_tokens": 941042467.0, "step": 1228 }, { "epoch": 0.4503068608592104, "grad_norm": 0.2608948198592661, "learning_rate": 3.966175134395849e-05, "loss": 0.5617, "num_tokens": 941785218.0, "step": 1229 }, { "epoch": 0.4506732618851333, "grad_norm": 0.24505573591822497, "learning_rate": 3.9660926432503916e-05, "loss": 0.5572, "num_tokens": 942625377.0, "step": 1230 }, { "epoch": 0.4510396629110561, "grad_norm": 0.2523184164819657, "learning_rate": 3.9660100525952535e-05, "loss": 0.5948, "num_tokens": 943420764.0, "step": 1231 }, { "epoch": 0.451406063936979, "grad_norm": 0.20233294945156866, "learning_rate": 3.965927362435088e-05, "loss": 0.5984, "num_tokens": 944147020.0, "step": 1232 }, { "epoch": 0.4517724649629019, "grad_norm": 0.27024508254489066, "learning_rate": 3.9658445727745544e-05, "loss": 0.6046, "num_tokens": 944804366.0, "step": 1233 }, { "epoch": 0.45213886598882475, "grad_norm": 0.2336514614284094, "learning_rate": 3.9657616836183175e-05, "loss": 0.6096, "num_tokens": 945531986.0, "step": 1234 }, { "epoch": 0.45250526701474764, "grad_norm": 0.18766649463706245, "learning_rate": 3.9656786949710475e-05, "loss": 0.5727, "num_tokens": 946266148.0, "step": 1235 }, { "epoch": 0.4528716680406705, "grad_norm": 0.24647896503468492, "learning_rate": 3.9655956068374194e-05, "loss": 0.6447, "num_tokens": 946916590.0, "step": 1236 }, { "epoch": 0.45323806906659336, "grad_norm": 0.20774427178942745, "learning_rate": 3.965512419222117e-05, "loss": 0.6196, "num_tokens": 947570658.0, "step": 1237 }, { "epoch": 0.45360447009251625, "grad_norm": 0.222540049435277, "learning_rate": 3.965429132129826e-05, "loss": 0.6482, "num_tokens": 948355111.0, "step": 1238 }, { "epoch": 0.45397087111843915, "grad_norm": 0.206666400311846, "learning_rate": 3.965345745565239e-05, "loss": 0.5581, "num_tokens": 949105814.0, "step": 1239 }, { "epoch": 0.454337272144362, "grad_norm": 0.25132735517491284, "learning_rate": 3.965262259533054e-05, "loss": 0.5845, "num_tokens": 949791508.0, "step": 1240 }, { "epoch": 0.45470367317028487, "grad_norm": 0.25260665070091065, "learning_rate": 3.965178674037977e-05, "loss": 0.5783, "num_tokens": 950550450.0, "step": 1241 }, { "epoch": 0.45507007419620776, "grad_norm": 0.2596734559025109, "learning_rate": 3.965094989084715e-05, "loss": 0.5879, "num_tokens": 951318450.0, "step": 1242 }, { "epoch": 0.4554364752221306, "grad_norm": 0.22101929993967048, "learning_rate": 3.965011204677985e-05, "loss": 0.5882, "num_tokens": 952010326.0, "step": 1243 }, { "epoch": 0.4558028762480535, "grad_norm": 0.23663821143067462, "learning_rate": 3.964927320822508e-05, "loss": 0.5936, "num_tokens": 952856692.0, "step": 1244 }, { "epoch": 0.4561692772739764, "grad_norm": 0.2328755239659435, "learning_rate": 3.964843337523009e-05, "loss": 0.5897, "num_tokens": 953635076.0, "step": 1245 }, { "epoch": 0.4565356782998992, "grad_norm": 0.22865426108554493, "learning_rate": 3.96475925478422e-05, "loss": 0.5814, "num_tokens": 954399333.0, "step": 1246 }, { "epoch": 0.4569020793258221, "grad_norm": 0.30518559977550125, "learning_rate": 3.964675072610881e-05, "loss": 0.5776, "num_tokens": 955236876.0, "step": 1247 }, { "epoch": 0.457268480351745, "grad_norm": 0.20357741493783252, "learning_rate": 3.9645907910077317e-05, "loss": 0.5526, "num_tokens": 956064486.0, "step": 1248 }, { "epoch": 0.45763488137766783, "grad_norm": 0.1994205143683827, "learning_rate": 3.964506409979524e-05, "loss": 0.5914, "num_tokens": 956902617.0, "step": 1249 }, { "epoch": 0.4580012824035907, "grad_norm": 0.1830973578332392, "learning_rate": 3.96442192953101e-05, "loss": 0.5605, "num_tokens": 957692964.0, "step": 1250 }, { "epoch": 0.4583676834295136, "grad_norm": 0.20712799546763652, "learning_rate": 3.964337349666951e-05, "loss": 0.567, "num_tokens": 958521390.0, "step": 1251 }, { "epoch": 0.45873408445543645, "grad_norm": 0.21340498716936032, "learning_rate": 3.964252670392113e-05, "loss": 0.5604, "num_tokens": 959391402.0, "step": 1252 }, { "epoch": 0.45910048548135934, "grad_norm": 0.20805808462582318, "learning_rate": 3.9641678917112655e-05, "loss": 0.6046, "num_tokens": 960122990.0, "step": 1253 }, { "epoch": 0.45946688650728224, "grad_norm": 0.2581684356321505, "learning_rate": 3.964083013629187e-05, "loss": 0.5651, "num_tokens": 960944223.0, "step": 1254 }, { "epoch": 0.45983328753320507, "grad_norm": 0.21250100620802126, "learning_rate": 3.963998036150659e-05, "loss": 0.5749, "num_tokens": 961713657.0, "step": 1255 }, { "epoch": 0.46019968855912796, "grad_norm": 0.20940869824981762, "learning_rate": 3.96391295928047e-05, "loss": 0.5771, "num_tokens": 962535067.0, "step": 1256 }, { "epoch": 0.46056608958505085, "grad_norm": 0.20539949482163614, "learning_rate": 3.9638277830234136e-05, "loss": 0.5513, "num_tokens": 963272945.0, "step": 1257 }, { "epoch": 0.4609324906109737, "grad_norm": 0.189684475249381, "learning_rate": 3.9637425073842885e-05, "loss": 0.5917, "num_tokens": 963917299.0, "step": 1258 }, { "epoch": 0.4612988916368966, "grad_norm": 0.2518734541824927, "learning_rate": 3.9636571323679004e-05, "loss": 0.6078, "num_tokens": 964654599.0, "step": 1259 }, { "epoch": 0.46166529266281947, "grad_norm": 0.2299400735863963, "learning_rate": 3.9635716579790584e-05, "loss": 0.5997, "num_tokens": 965409366.0, "step": 1260 }, { "epoch": 0.4620316936887423, "grad_norm": 0.20787781330357555, "learning_rate": 3.96348608422258e-05, "loss": 0.5393, "num_tokens": 966269867.0, "step": 1261 }, { "epoch": 0.4623980947146652, "grad_norm": 0.24566335000525816, "learning_rate": 3.963400411103286e-05, "loss": 0.591, "num_tokens": 967043847.0, "step": 1262 }, { "epoch": 0.4627644957405881, "grad_norm": 0.21292070547564218, "learning_rate": 3.9633146386260025e-05, "loss": 0.6007, "num_tokens": 967876477.0, "step": 1263 }, { "epoch": 0.4631308967665109, "grad_norm": 0.1938014811309294, "learning_rate": 3.963228766795565e-05, "loss": 0.5651, "num_tokens": 968600540.0, "step": 1264 }, { "epoch": 0.4634972977924338, "grad_norm": 0.20431849938141644, "learning_rate": 3.963142795616809e-05, "loss": 0.5743, "num_tokens": 969264783.0, "step": 1265 }, { "epoch": 0.4638636988183567, "grad_norm": 0.20111983983227633, "learning_rate": 3.963056725094581e-05, "loss": 0.547, "num_tokens": 969986807.0, "step": 1266 }, { "epoch": 0.46423009984427954, "grad_norm": 0.19084665610468768, "learning_rate": 3.962970555233729e-05, "loss": 0.5716, "num_tokens": 970687294.0, "step": 1267 }, { "epoch": 0.46459650087020243, "grad_norm": 0.21084719616230324, "learning_rate": 3.962884286039109e-05, "loss": 0.5905, "num_tokens": 971446340.0, "step": 1268 }, { "epoch": 0.4649629018961253, "grad_norm": 0.22028189305004914, "learning_rate": 3.962797917515581e-05, "loss": 0.5804, "num_tokens": 972221509.0, "step": 1269 }, { "epoch": 0.46532930292204816, "grad_norm": 0.2413319077493928, "learning_rate": 3.962711449668013e-05, "loss": 0.5544, "num_tokens": 973037100.0, "step": 1270 }, { "epoch": 0.46569570394797105, "grad_norm": 0.2120242838483683, "learning_rate": 3.9626248825012755e-05, "loss": 0.571, "num_tokens": 973672522.0, "step": 1271 }, { "epoch": 0.46606210497389394, "grad_norm": 0.2316058259248407, "learning_rate": 3.962538216020246e-05, "loss": 0.6144, "num_tokens": 974388048.0, "step": 1272 }, { "epoch": 0.4664285059998168, "grad_norm": 0.20395714468518222, "learning_rate": 3.9624514502298085e-05, "loss": 0.6031, "num_tokens": 975074993.0, "step": 1273 }, { "epoch": 0.46679490702573967, "grad_norm": 0.2094972111060609, "learning_rate": 3.962364585134851e-05, "loss": 0.5465, "num_tokens": 975724531.0, "step": 1274 }, { "epoch": 0.46716130805166256, "grad_norm": 0.210831134306174, "learning_rate": 3.9622776207402696e-05, "loss": 0.567, "num_tokens": 976499170.0, "step": 1275 }, { "epoch": 0.4675277090775854, "grad_norm": 0.2362492049553823, "learning_rate": 3.962190557050962e-05, "loss": 0.6084, "num_tokens": 977214363.0, "step": 1276 }, { "epoch": 0.4678941101035083, "grad_norm": 0.2110075137851438, "learning_rate": 3.9621033940718345e-05, "loss": 0.6175, "num_tokens": 977890448.0, "step": 1277 }, { "epoch": 0.4682605111294312, "grad_norm": 0.2834638340840166, "learning_rate": 3.962016131807799e-05, "loss": 0.6096, "num_tokens": 978626403.0, "step": 1278 }, { "epoch": 0.468626912155354, "grad_norm": 0.20580590341731603, "learning_rate": 3.9619287702637716e-05, "loss": 0.5854, "num_tokens": 979343858.0, "step": 1279 }, { "epoch": 0.4689933131812769, "grad_norm": 0.28452929508520564, "learning_rate": 3.961841309444676e-05, "loss": 0.5701, "num_tokens": 980037032.0, "step": 1280 }, { "epoch": 0.4693597142071998, "grad_norm": 0.26003555822322855, "learning_rate": 3.9617537493554375e-05, "loss": 0.5749, "num_tokens": 980857652.0, "step": 1281 }, { "epoch": 0.46972611523312263, "grad_norm": 0.21518550335361283, "learning_rate": 3.961666090000991e-05, "loss": 0.5987, "num_tokens": 981679522.0, "step": 1282 }, { "epoch": 0.4700925162590455, "grad_norm": 0.28763743313131224, "learning_rate": 3.961578331386276e-05, "loss": 0.5702, "num_tokens": 982379298.0, "step": 1283 }, { "epoch": 0.4704589172849684, "grad_norm": 0.1843319912878515, "learning_rate": 3.961490473516238e-05, "loss": 0.5945, "num_tokens": 983208221.0, "step": 1284 }, { "epoch": 0.47082531831089125, "grad_norm": 0.24986639448593895, "learning_rate": 3.961402516395825e-05, "loss": 0.5947, "num_tokens": 983902253.0, "step": 1285 }, { "epoch": 0.47119171933681414, "grad_norm": 0.21987640054949537, "learning_rate": 3.961314460029995e-05, "loss": 0.598, "num_tokens": 984659853.0, "step": 1286 }, { "epoch": 0.47155812036273703, "grad_norm": 0.23294356725220072, "learning_rate": 3.961226304423708e-05, "loss": 0.5783, "num_tokens": 985299586.0, "step": 1287 }, { "epoch": 0.47192452138865987, "grad_norm": 0.2177989858268083, "learning_rate": 3.9611380495819325e-05, "loss": 0.6243, "num_tokens": 986093703.0, "step": 1288 }, { "epoch": 0.47229092241458276, "grad_norm": 0.2099442770751587, "learning_rate": 3.96104969550964e-05, "loss": 0.6241, "num_tokens": 986860623.0, "step": 1289 }, { "epoch": 0.47265732344050565, "grad_norm": 0.1971681082095028, "learning_rate": 3.960961242211809e-05, "loss": 0.5777, "num_tokens": 987636888.0, "step": 1290 }, { "epoch": 0.4730237244664285, "grad_norm": 0.2247045236572672, "learning_rate": 3.960872689693424e-05, "loss": 0.5895, "num_tokens": 988408326.0, "step": 1291 }, { "epoch": 0.4733901254923514, "grad_norm": 0.20017222978429025, "learning_rate": 3.960784037959473e-05, "loss": 0.5952, "num_tokens": 989263975.0, "step": 1292 }, { "epoch": 0.47375652651827427, "grad_norm": 0.21536642674792197, "learning_rate": 3.960695287014953e-05, "loss": 0.5946, "num_tokens": 990053802.0, "step": 1293 }, { "epoch": 0.4741229275441971, "grad_norm": 0.2051558538292573, "learning_rate": 3.9606064368648635e-05, "loss": 0.586, "num_tokens": 990817074.0, "step": 1294 }, { "epoch": 0.47448932857012, "grad_norm": 0.21905926094735437, "learning_rate": 3.960517487514211e-05, "loss": 0.5996, "num_tokens": 991519160.0, "step": 1295 }, { "epoch": 0.4748557295960429, "grad_norm": 0.20779821625589817, "learning_rate": 3.960428438968007e-05, "loss": 0.5858, "num_tokens": 992266557.0, "step": 1296 }, { "epoch": 0.4752221306219657, "grad_norm": 0.20390072749891444, "learning_rate": 3.960339291231269e-05, "loss": 0.5897, "num_tokens": 992896892.0, "step": 1297 }, { "epoch": 0.4755885316478886, "grad_norm": 0.2442904232245069, "learning_rate": 3.9602500443090205e-05, "loss": 0.5884, "num_tokens": 993608375.0, "step": 1298 }, { "epoch": 0.4759549326738115, "grad_norm": 0.20221019933951498, "learning_rate": 3.9601606982062894e-05, "loss": 0.5654, "num_tokens": 994508951.0, "step": 1299 }, { "epoch": 0.47632133369973434, "grad_norm": 0.19557594574108408, "learning_rate": 3.9600712529281095e-05, "loss": 0.5846, "num_tokens": 995210696.0, "step": 1300 }, { "epoch": 0.47668773472565723, "grad_norm": 0.2121340944071062, "learning_rate": 3.959981708479522e-05, "loss": 0.5879, "num_tokens": 996033572.0, "step": 1301 }, { "epoch": 0.4770541357515801, "grad_norm": 0.20028904792770263, "learning_rate": 3.9598920648655716e-05, "loss": 0.5637, "num_tokens": 996861096.0, "step": 1302 }, { "epoch": 0.47742053677750296, "grad_norm": 0.20692808228796514, "learning_rate": 3.959802322091309e-05, "loss": 0.588, "num_tokens": 997597934.0, "step": 1303 }, { "epoch": 0.47778693780342585, "grad_norm": 0.2303965079016076, "learning_rate": 3.95971248016179e-05, "loss": 0.6202, "num_tokens": 998307983.0, "step": 1304 }, { "epoch": 0.47815333882934874, "grad_norm": 0.21775725209385585, "learning_rate": 3.959622539082078e-05, "loss": 0.5282, "num_tokens": 999132890.0, "step": 1305 }, { "epoch": 0.4785197398552716, "grad_norm": 0.20662656306069663, "learning_rate": 3.95953249885724e-05, "loss": 0.5625, "num_tokens": 999921238.0, "step": 1306 }, { "epoch": 0.47888614088119447, "grad_norm": 0.21922123487469702, "learning_rate": 3.9594423594923495e-05, "loss": 0.5895, "num_tokens": 1000645723.0, "step": 1307 }, { "epoch": 0.47925254190711736, "grad_norm": 0.2153152439051097, "learning_rate": 3.9593521209924855e-05, "loss": 0.5624, "num_tokens": 1001333990.0, "step": 1308 }, { "epoch": 0.4796189429330402, "grad_norm": 0.19626236969063948, "learning_rate": 3.959261783362733e-05, "loss": 0.6151, "num_tokens": 1002072120.0, "step": 1309 }, { "epoch": 0.4799853439589631, "grad_norm": 0.2332630188617175, "learning_rate": 3.9591713466081795e-05, "loss": 0.6011, "num_tokens": 1002705711.0, "step": 1310 }, { "epoch": 0.480351744984886, "grad_norm": 0.224467267294686, "learning_rate": 3.959080810733923e-05, "loss": 0.5661, "num_tokens": 1003529201.0, "step": 1311 }, { "epoch": 0.4807181460108088, "grad_norm": 0.21953683500233973, "learning_rate": 3.958990175745065e-05, "loss": 0.5704, "num_tokens": 1004289988.0, "step": 1312 }, { "epoch": 0.4810845470367317, "grad_norm": 0.1892184813674755, "learning_rate": 3.9588994416467106e-05, "loss": 0.5663, "num_tokens": 1005111050.0, "step": 1313 }, { "epoch": 0.4814509480626546, "grad_norm": 0.23078125108676706, "learning_rate": 3.9588086084439735e-05, "loss": 0.5793, "num_tokens": 1006000822.0, "step": 1314 }, { "epoch": 0.48181734908857743, "grad_norm": 0.18376090545782667, "learning_rate": 3.9587176761419696e-05, "loss": 0.5955, "num_tokens": 1006794752.0, "step": 1315 }, { "epoch": 0.4821837501145003, "grad_norm": 0.21274547307755873, "learning_rate": 3.958626644745826e-05, "loss": 0.5652, "num_tokens": 1007672848.0, "step": 1316 }, { "epoch": 0.4825501511404232, "grad_norm": 0.20491361938696073, "learning_rate": 3.958535514260668e-05, "loss": 0.5459, "num_tokens": 1008374617.0, "step": 1317 }, { "epoch": 0.48291655216634605, "grad_norm": 0.21152070637941878, "learning_rate": 3.9584442846916336e-05, "loss": 0.5909, "num_tokens": 1009135241.0, "step": 1318 }, { "epoch": 0.48328295319226894, "grad_norm": 0.2027874744253964, "learning_rate": 3.958352956043861e-05, "loss": 0.5901, "num_tokens": 1009813211.0, "step": 1319 }, { "epoch": 0.48364935421819183, "grad_norm": 0.21759206666773467, "learning_rate": 3.958261528322497e-05, "loss": 0.5808, "num_tokens": 1010609467.0, "step": 1320 }, { "epoch": 0.48401575524411466, "grad_norm": 0.21930288686110253, "learning_rate": 3.958170001532692e-05, "loss": 0.5696, "num_tokens": 1011421653.0, "step": 1321 }, { "epoch": 0.48438215627003756, "grad_norm": 0.20065515203908077, "learning_rate": 3.958078375679604e-05, "loss": 0.5412, "num_tokens": 1012270151.0, "step": 1322 }, { "epoch": 0.48474855729596045, "grad_norm": 0.22533871047452045, "learning_rate": 3.957986650768395e-05, "loss": 0.5732, "num_tokens": 1013027867.0, "step": 1323 }, { "epoch": 0.4851149583218833, "grad_norm": 0.23173262945645212, "learning_rate": 3.957894826804234e-05, "loss": 0.5947, "num_tokens": 1013730729.0, "step": 1324 }, { "epoch": 0.4854813593478062, "grad_norm": 0.2604582347392973, "learning_rate": 3.9578029037922945e-05, "loss": 0.5915, "num_tokens": 1014527404.0, "step": 1325 }, { "epoch": 0.48584776037372907, "grad_norm": 0.18974269404192093, "learning_rate": 3.957710881737756e-05, "loss": 0.5638, "num_tokens": 1015398853.0, "step": 1326 }, { "epoch": 0.4862141613996519, "grad_norm": 0.20143922947919882, "learning_rate": 3.957618760645801e-05, "loss": 0.6151, "num_tokens": 1016241730.0, "step": 1327 }, { "epoch": 0.4865805624255748, "grad_norm": 0.24793825618543072, "learning_rate": 3.9575265405216243e-05, "loss": 0.6113, "num_tokens": 1017125535.0, "step": 1328 }, { "epoch": 0.4869469634514977, "grad_norm": 0.23585682146991588, "learning_rate": 3.9574342213704197e-05, "loss": 0.5783, "num_tokens": 1017919493.0, "step": 1329 }, { "epoch": 0.4873133644774205, "grad_norm": 0.2037106518238918, "learning_rate": 3.957341803197388e-05, "loss": 0.5963, "num_tokens": 1018645253.0, "step": 1330 }, { "epoch": 0.4876797655033434, "grad_norm": 0.26432751622656975, "learning_rate": 3.9572492860077376e-05, "loss": 0.6036, "num_tokens": 1019345695.0, "step": 1331 }, { "epoch": 0.4880461665292663, "grad_norm": 0.23119413545599274, "learning_rate": 3.957156669806682e-05, "loss": 0.572, "num_tokens": 1020154872.0, "step": 1332 }, { "epoch": 0.48841256755518914, "grad_norm": 0.20371847070608237, "learning_rate": 3.9570639545994385e-05, "loss": 0.5687, "num_tokens": 1020921254.0, "step": 1333 }, { "epoch": 0.488778968581112, "grad_norm": 0.2419892436984405, "learning_rate": 3.9569711403912315e-05, "loss": 0.6198, "num_tokens": 1021669806.0, "step": 1334 }, { "epoch": 0.4891453696070349, "grad_norm": 0.20693397654141893, "learning_rate": 3.956878227187291e-05, "loss": 0.6032, "num_tokens": 1022373946.0, "step": 1335 }, { "epoch": 0.48951177063295775, "grad_norm": 0.20706730110294272, "learning_rate": 3.9567852149928504e-05, "loss": 0.6072, "num_tokens": 1023129644.0, "step": 1336 }, { "epoch": 0.48987817165888065, "grad_norm": 0.20555962294866537, "learning_rate": 3.956692103813153e-05, "loss": 0.625, "num_tokens": 1023869136.0, "step": 1337 }, { "epoch": 0.49024457268480354, "grad_norm": 0.21231175927341148, "learning_rate": 3.9565988936534434e-05, "loss": 0.5964, "num_tokens": 1024556798.0, "step": 1338 }, { "epoch": 0.4906109737107264, "grad_norm": 0.20600960720041256, "learning_rate": 3.9565055845189724e-05, "loss": 0.5916, "num_tokens": 1025369288.0, "step": 1339 }, { "epoch": 0.49097737473664926, "grad_norm": 0.214723196394803, "learning_rate": 3.956412176415001e-05, "loss": 0.578, "num_tokens": 1026140583.0, "step": 1340 }, { "epoch": 0.49134377576257215, "grad_norm": 0.2170945197869511, "learning_rate": 3.9563186693467896e-05, "loss": 0.5895, "num_tokens": 1026855965.0, "step": 1341 }, { "epoch": 0.491710176788495, "grad_norm": 0.21051932583469563, "learning_rate": 3.956225063319607e-05, "loss": 0.5718, "num_tokens": 1027681491.0, "step": 1342 }, { "epoch": 0.4920765778144179, "grad_norm": 0.2294173695919449, "learning_rate": 3.956131358338728e-05, "loss": 0.5951, "num_tokens": 1028452062.0, "step": 1343 }, { "epoch": 0.4924429788403408, "grad_norm": 0.19775135169743294, "learning_rate": 3.956037554409432e-05, "loss": 0.6147, "num_tokens": 1029166697.0, "step": 1344 }, { "epoch": 0.4928093798662636, "grad_norm": 0.236647191545824, "learning_rate": 3.955943651537004e-05, "loss": 0.5872, "num_tokens": 1029937973.0, "step": 1345 }, { "epoch": 0.4931757808921865, "grad_norm": 0.21056865252847687, "learning_rate": 3.955849649726737e-05, "loss": 0.5725, "num_tokens": 1030712963.0, "step": 1346 }, { "epoch": 0.4935421819181094, "grad_norm": 0.17854994210345593, "learning_rate": 3.955755548983924e-05, "loss": 0.587, "num_tokens": 1031527571.0, "step": 1347 }, { "epoch": 0.4939085829440322, "grad_norm": 0.19720016442957627, "learning_rate": 3.95566134931387e-05, "loss": 0.5994, "num_tokens": 1032351350.0, "step": 1348 }, { "epoch": 0.4942749839699551, "grad_norm": 0.18372352430506783, "learning_rate": 3.9555670507218815e-05, "loss": 0.5546, "num_tokens": 1033109858.0, "step": 1349 }, { "epoch": 0.494641384995878, "grad_norm": 0.17660877696857094, "learning_rate": 3.9554726532132715e-05, "loss": 0.5746, "num_tokens": 1034031168.0, "step": 1350 }, { "epoch": 0.49500778602180084, "grad_norm": 0.19012326118869816, "learning_rate": 3.9553781567933594e-05, "loss": 0.5954, "num_tokens": 1034698300.0, "step": 1351 }, { "epoch": 0.49537418704772374, "grad_norm": 0.17551610594299863, "learning_rate": 3.9552835614674694e-05, "loss": 0.5682, "num_tokens": 1035479056.0, "step": 1352 }, { "epoch": 0.4957405880736466, "grad_norm": 0.20322749490692008, "learning_rate": 3.955188867240931e-05, "loss": 0.5908, "num_tokens": 1036344226.0, "step": 1353 }, { "epoch": 0.49610698909956946, "grad_norm": 0.1980107828571538, "learning_rate": 3.95509407411908e-05, "loss": 0.6215, "num_tokens": 1037035580.0, "step": 1354 }, { "epoch": 0.49647339012549235, "grad_norm": 0.2359842416129749, "learning_rate": 3.9549991821072574e-05, "loss": 0.6273, "num_tokens": 1037871020.0, "step": 1355 }, { "epoch": 0.49683979115141524, "grad_norm": 0.18364374480763457, "learning_rate": 3.95490419121081e-05, "loss": 0.5486, "num_tokens": 1038643040.0, "step": 1356 }, { "epoch": 0.4972061921773381, "grad_norm": 0.19075787830640606, "learning_rate": 3.95480910143509e-05, "loss": 0.5616, "num_tokens": 1039441753.0, "step": 1357 }, { "epoch": 0.49757259320326097, "grad_norm": 0.20708552509736186, "learning_rate": 3.954713912785455e-05, "loss": 0.5941, "num_tokens": 1040289315.0, "step": 1358 }, { "epoch": 0.49793899422918386, "grad_norm": 0.20594011847092347, "learning_rate": 3.954618625267268e-05, "loss": 0.5816, "num_tokens": 1041000949.0, "step": 1359 }, { "epoch": 0.4983053952551067, "grad_norm": 0.18839359053871005, "learning_rate": 3.954523238885899e-05, "loss": 0.5737, "num_tokens": 1041700309.0, "step": 1360 }, { "epoch": 0.4986717962810296, "grad_norm": 0.2008997845450464, "learning_rate": 3.954427753646721e-05, "loss": 0.6053, "num_tokens": 1042439527.0, "step": 1361 }, { "epoch": 0.4990381973069525, "grad_norm": 0.21076165119496976, "learning_rate": 3.954332169555116e-05, "loss": 0.5789, "num_tokens": 1043228218.0, "step": 1362 }, { "epoch": 0.4994045983328753, "grad_norm": 0.2132046167832493, "learning_rate": 3.954236486616468e-05, "loss": 0.5959, "num_tokens": 1043956035.0, "step": 1363 }, { "epoch": 0.4997709993587982, "grad_norm": 0.20056545728411782, "learning_rate": 3.95414070483617e-05, "loss": 0.5723, "num_tokens": 1044737589.0, "step": 1364 }, { "epoch": 0.5001374003847211, "grad_norm": 0.22246393515629384, "learning_rate": 3.9540448242196166e-05, "loss": 0.5792, "num_tokens": 1045567352.0, "step": 1365 }, { "epoch": 0.500503801410644, "grad_norm": 0.2285488284847756, "learning_rate": 3.953948844772211e-05, "loss": 0.5479, "num_tokens": 1046383979.0, "step": 1366 }, { "epoch": 0.5008702024365668, "grad_norm": 0.2198165388671474, "learning_rate": 3.9538527664993615e-05, "loss": 0.6133, "num_tokens": 1047225639.0, "step": 1367 }, { "epoch": 0.5012366034624897, "grad_norm": 0.21362399471046517, "learning_rate": 3.9537565894064814e-05, "loss": 0.5761, "num_tokens": 1047870206.0, "step": 1368 }, { "epoch": 0.5016030044884126, "grad_norm": 0.1968892158184414, "learning_rate": 3.95366031349899e-05, "loss": 0.5887, "num_tokens": 1048645282.0, "step": 1369 }, { "epoch": 0.5019694055143354, "grad_norm": 0.2333389657853368, "learning_rate": 3.953563938782311e-05, "loss": 0.5872, "num_tokens": 1049381860.0, "step": 1370 }, { "epoch": 0.5023358065402583, "grad_norm": 0.20163439293432692, "learning_rate": 3.9534674652618755e-05, "loss": 0.5636, "num_tokens": 1050112396.0, "step": 1371 }, { "epoch": 0.5027022075661812, "grad_norm": 0.21177754895805914, "learning_rate": 3.9533708929431193e-05, "loss": 0.58, "num_tokens": 1050833179.0, "step": 1372 }, { "epoch": 0.503068608592104, "grad_norm": 0.21911426233573073, "learning_rate": 3.953274221831483e-05, "loss": 0.6089, "num_tokens": 1051503637.0, "step": 1373 }, { "epoch": 0.5034350096180269, "grad_norm": 0.20326039681806776, "learning_rate": 3.953177451932414e-05, "loss": 0.5395, "num_tokens": 1052178189.0, "step": 1374 }, { "epoch": 0.5038014106439498, "grad_norm": 0.26531728851234904, "learning_rate": 3.953080583251365e-05, "loss": 0.5976, "num_tokens": 1052907611.0, "step": 1375 }, { "epoch": 0.5041678116698727, "grad_norm": 0.19875236294331475, "learning_rate": 3.9529836157937933e-05, "loss": 0.6131, "num_tokens": 1053588162.0, "step": 1376 }, { "epoch": 0.5045342126957956, "grad_norm": 0.23386567053859428, "learning_rate": 3.952886549565163e-05, "loss": 0.6043, "num_tokens": 1054267288.0, "step": 1377 }, { "epoch": 0.5049006137217185, "grad_norm": 0.2218731975959665, "learning_rate": 3.9527893845709434e-05, "loss": 0.5628, "num_tokens": 1055092559.0, "step": 1378 }, { "epoch": 0.5052670147476412, "grad_norm": 0.20295327405650668, "learning_rate": 3.952692120816608e-05, "loss": 0.6116, "num_tokens": 1055847528.0, "step": 1379 }, { "epoch": 0.5056334157735641, "grad_norm": 0.23571538689450036, "learning_rate": 3.952594758307639e-05, "loss": 0.591, "num_tokens": 1056722514.0, "step": 1380 }, { "epoch": 0.505999816799487, "grad_norm": 0.19193855131031937, "learning_rate": 3.9524972970495204e-05, "loss": 0.5586, "num_tokens": 1057429382.0, "step": 1381 }, { "epoch": 0.5063662178254099, "grad_norm": 0.19723445930056663, "learning_rate": 3.9523997370477454e-05, "loss": 0.5472, "num_tokens": 1058082658.0, "step": 1382 }, { "epoch": 0.5067326188513328, "grad_norm": 0.196742599291462, "learning_rate": 3.952302078307809e-05, "loss": 0.5859, "num_tokens": 1058845536.0, "step": 1383 }, { "epoch": 0.5070990198772557, "grad_norm": 0.21231614487594355, "learning_rate": 3.952204320835215e-05, "loss": 0.6006, "num_tokens": 1059595330.0, "step": 1384 }, { "epoch": 0.5074654209031785, "grad_norm": 0.20530071208191025, "learning_rate": 3.952106464635472e-05, "loss": 0.576, "num_tokens": 1060368290.0, "step": 1385 }, { "epoch": 0.5078318219291014, "grad_norm": 0.22077837541021653, "learning_rate": 3.952008509714093e-05, "loss": 0.57, "num_tokens": 1061179500.0, "step": 1386 }, { "epoch": 0.5081982229550243, "grad_norm": 0.21634935451394907, "learning_rate": 3.9519104560765955e-05, "loss": 0.5688, "num_tokens": 1061984648.0, "step": 1387 }, { "epoch": 0.5085646239809472, "grad_norm": 0.1885506631004462, "learning_rate": 3.9518123037285076e-05, "loss": 0.6019, "num_tokens": 1062746220.0, "step": 1388 }, { "epoch": 0.50893102500687, "grad_norm": 0.20176831263216893, "learning_rate": 3.951714052675357e-05, "loss": 0.5727, "num_tokens": 1063408817.0, "step": 1389 }, { "epoch": 0.5092974260327929, "grad_norm": 0.21438592999839726, "learning_rate": 3.951615702922682e-05, "loss": 0.6203, "num_tokens": 1064195648.0, "step": 1390 }, { "epoch": 0.5096638270587157, "grad_norm": 0.19320418783166593, "learning_rate": 3.9515172544760215e-05, "loss": 0.5678, "num_tokens": 1064960531.0, "step": 1391 }, { "epoch": 0.5100302280846386, "grad_norm": 0.20426668389551803, "learning_rate": 3.9514187073409235e-05, "loss": 0.5469, "num_tokens": 1065685287.0, "step": 1392 }, { "epoch": 0.5103966291105615, "grad_norm": 0.2306721472659558, "learning_rate": 3.951320061522942e-05, "loss": 0.6164, "num_tokens": 1066343678.0, "step": 1393 }, { "epoch": 0.5107630301364844, "grad_norm": 0.2115014228073527, "learning_rate": 3.951221317027633e-05, "loss": 0.5717, "num_tokens": 1067055691.0, "step": 1394 }, { "epoch": 0.5111294311624073, "grad_norm": 0.22745824952240395, "learning_rate": 3.951122473860562e-05, "loss": 0.5681, "num_tokens": 1067733751.0, "step": 1395 }, { "epoch": 0.5114958321883302, "grad_norm": 0.25415502784665994, "learning_rate": 3.9510235320272965e-05, "loss": 0.5978, "num_tokens": 1068402243.0, "step": 1396 }, { "epoch": 0.511862233214253, "grad_norm": 0.21823129631298183, "learning_rate": 3.9509244915334125e-05, "loss": 0.5765, "num_tokens": 1069147725.0, "step": 1397 }, { "epoch": 0.5122286342401758, "grad_norm": 0.24854348289416245, "learning_rate": 3.950825352384491e-05, "loss": 0.6149, "num_tokens": 1069822882.0, "step": 1398 }, { "epoch": 0.5125950352660987, "grad_norm": 0.2079298998254928, "learning_rate": 3.950726114586117e-05, "loss": 0.5699, "num_tokens": 1070719175.0, "step": 1399 }, { "epoch": 0.5129614362920216, "grad_norm": 0.19123718025004668, "learning_rate": 3.9506267781438805e-05, "loss": 0.6154, "num_tokens": 1071493122.0, "step": 1400 }, { "epoch": 0.5133278373179445, "grad_norm": 0.2332719702998166, "learning_rate": 3.9505273430633816e-05, "loss": 0.5931, "num_tokens": 1072307640.0, "step": 1401 }, { "epoch": 0.5136942383438674, "grad_norm": 0.1904744310742374, "learning_rate": 3.9504278093502206e-05, "loss": 0.5901, "num_tokens": 1073113323.0, "step": 1402 }, { "epoch": 0.5140606393697902, "grad_norm": 0.2413826030557772, "learning_rate": 3.9503281770100075e-05, "loss": 0.6052, "num_tokens": 1073834065.0, "step": 1403 }, { "epoch": 0.5144270403957131, "grad_norm": 0.21981337417028413, "learning_rate": 3.950228446048355e-05, "loss": 0.6035, "num_tokens": 1074536145.0, "step": 1404 }, { "epoch": 0.514793441421636, "grad_norm": 0.20919671982270727, "learning_rate": 3.9501286164708825e-05, "loss": 0.5623, "num_tokens": 1075371827.0, "step": 1405 }, { "epoch": 0.5151598424475589, "grad_norm": 0.24295161422616038, "learning_rate": 3.950028688283215e-05, "loss": 0.6601, "num_tokens": 1076124929.0, "step": 1406 }, { "epoch": 0.5155262434734817, "grad_norm": 0.21709694644397023, "learning_rate": 3.949928661490982e-05, "loss": 0.5858, "num_tokens": 1076834876.0, "step": 1407 }, { "epoch": 0.5158926444994046, "grad_norm": 0.2276829864987823, "learning_rate": 3.949828536099821e-05, "loss": 0.5627, "num_tokens": 1077651335.0, "step": 1408 }, { "epoch": 0.5162590455253274, "grad_norm": 0.2110081227113961, "learning_rate": 3.949728312115373e-05, "loss": 0.5992, "num_tokens": 1078489647.0, "step": 1409 }, { "epoch": 0.5166254465512503, "grad_norm": 0.21468714765785504, "learning_rate": 3.949627989543284e-05, "loss": 0.5751, "num_tokens": 1079289334.0, "step": 1410 }, { "epoch": 0.5169918475771732, "grad_norm": 0.20228827872674224, "learning_rate": 3.9495275683892084e-05, "loss": 0.5425, "num_tokens": 1080130282.0, "step": 1411 }, { "epoch": 0.5173582486030961, "grad_norm": 0.20717617118281942, "learning_rate": 3.949427048658803e-05, "loss": 0.5706, "num_tokens": 1080882896.0, "step": 1412 }, { "epoch": 0.517724649629019, "grad_norm": 0.20806871541189084, "learning_rate": 3.949326430357732e-05, "loss": 0.6179, "num_tokens": 1081644022.0, "step": 1413 }, { "epoch": 0.5180910506549419, "grad_norm": 0.24389164450155607, "learning_rate": 3.949225713491665e-05, "loss": 0.5989, "num_tokens": 1082379662.0, "step": 1414 }, { "epoch": 0.5184574516808647, "grad_norm": 0.1884734113783523, "learning_rate": 3.949124898066276e-05, "loss": 0.5814, "num_tokens": 1083121519.0, "step": 1415 }, { "epoch": 0.5188238527067875, "grad_norm": 0.2246061387089414, "learning_rate": 3.949023984087246e-05, "loss": 0.5657, "num_tokens": 1083837652.0, "step": 1416 }, { "epoch": 0.5191902537327104, "grad_norm": 0.2244918322779442, "learning_rate": 3.948922971560261e-05, "loss": 0.5654, "num_tokens": 1084666698.0, "step": 1417 }, { "epoch": 0.5195566547586333, "grad_norm": 0.21185355413390758, "learning_rate": 3.948821860491012e-05, "loss": 0.5451, "num_tokens": 1085422473.0, "step": 1418 }, { "epoch": 0.5199230557845562, "grad_norm": 0.1942189930490401, "learning_rate": 3.9487206508851973e-05, "loss": 0.5715, "num_tokens": 1086208790.0, "step": 1419 }, { "epoch": 0.5202894568104791, "grad_norm": 0.1977192327141816, "learning_rate": 3.9486193427485185e-05, "loss": 0.5952, "num_tokens": 1086953280.0, "step": 1420 }, { "epoch": 0.5206558578364019, "grad_norm": 0.21085372975229844, "learning_rate": 3.9485179360866836e-05, "loss": 0.572, "num_tokens": 1087724149.0, "step": 1421 }, { "epoch": 0.5210222588623248, "grad_norm": 0.1983898342992096, "learning_rate": 3.9484164309054064e-05, "loss": 0.5518, "num_tokens": 1088517897.0, "step": 1422 }, { "epoch": 0.5213886598882477, "grad_norm": 0.189801902580859, "learning_rate": 3.948314827210407e-05, "loss": 0.5549, "num_tokens": 1089305396.0, "step": 1423 }, { "epoch": 0.5217550609141706, "grad_norm": 0.2037941872987267, "learning_rate": 3.9482131250074095e-05, "loss": 0.5558, "num_tokens": 1090109224.0, "step": 1424 }, { "epoch": 0.5221214619400935, "grad_norm": 0.20458762325825533, "learning_rate": 3.948111324302144e-05, "loss": 0.5794, "num_tokens": 1090819356.0, "step": 1425 }, { "epoch": 0.5224878629660163, "grad_norm": 0.22156585716551794, "learning_rate": 3.9480094251003474e-05, "loss": 0.59, "num_tokens": 1091566156.0, "step": 1426 }, { "epoch": 0.5228542639919391, "grad_norm": 0.22130546789071623, "learning_rate": 3.9479074274077603e-05, "loss": 0.5534, "num_tokens": 1092348040.0, "step": 1427 }, { "epoch": 0.523220665017862, "grad_norm": 0.19391176659998832, "learning_rate": 3.9478053312301294e-05, "loss": 0.5722, "num_tokens": 1093201959.0, "step": 1428 }, { "epoch": 0.5235870660437849, "grad_norm": 0.1861918418967183, "learning_rate": 3.947703136573208e-05, "loss": 0.5748, "num_tokens": 1093983646.0, "step": 1429 }, { "epoch": 0.5239534670697078, "grad_norm": 0.2127258656724231, "learning_rate": 3.947600843442755e-05, "loss": 0.6003, "num_tokens": 1094820594.0, "step": 1430 }, { "epoch": 0.5243198680956307, "grad_norm": 0.17737376266576982, "learning_rate": 3.947498451844532e-05, "loss": 0.5715, "num_tokens": 1095565084.0, "step": 1431 }, { "epoch": 0.5246862691215536, "grad_norm": 0.1918838497774844, "learning_rate": 3.9473959617843094e-05, "loss": 0.6046, "num_tokens": 1096343238.0, "step": 1432 }, { "epoch": 0.5250526701474764, "grad_norm": 0.2201072330841672, "learning_rate": 3.947293373267862e-05, "loss": 0.5834, "num_tokens": 1097192216.0, "step": 1433 }, { "epoch": 0.5254190711733993, "grad_norm": 0.20451919754549022, "learning_rate": 3.9471906863009703e-05, "loss": 0.6013, "num_tokens": 1097986681.0, "step": 1434 }, { "epoch": 0.5257854721993221, "grad_norm": 0.19935615340163954, "learning_rate": 3.94708790088942e-05, "loss": 0.5757, "num_tokens": 1098665542.0, "step": 1435 }, { "epoch": 0.526151873225245, "grad_norm": 0.19210402024117015, "learning_rate": 3.946985017039002e-05, "loss": 0.5885, "num_tokens": 1099441948.0, "step": 1436 }, { "epoch": 0.5265182742511679, "grad_norm": 0.21428433060493246, "learning_rate": 3.9468820347555134e-05, "loss": 0.5337, "num_tokens": 1100205552.0, "step": 1437 }, { "epoch": 0.5268846752770908, "grad_norm": 0.187658102254825, "learning_rate": 3.946778954044757e-05, "loss": 0.555, "num_tokens": 1101030623.0, "step": 1438 }, { "epoch": 0.5272510763030136, "grad_norm": 0.21264990535985934, "learning_rate": 3.946675774912541e-05, "loss": 0.5435, "num_tokens": 1101847162.0, "step": 1439 }, { "epoch": 0.5276174773289365, "grad_norm": 0.19820636528556046, "learning_rate": 3.946572497364678e-05, "loss": 0.5952, "num_tokens": 1102645928.0, "step": 1440 }, { "epoch": 0.5279838783548594, "grad_norm": 0.22756610167990307, "learning_rate": 3.946469121406988e-05, "loss": 0.6104, "num_tokens": 1103389564.0, "step": 1441 }, { "epoch": 0.5283502793807823, "grad_norm": 0.204754379541376, "learning_rate": 3.9463656470452954e-05, "loss": 0.5909, "num_tokens": 1104169214.0, "step": 1442 }, { "epoch": 0.5287166804067052, "grad_norm": 0.18901947444326433, "learning_rate": 3.9462620742854304e-05, "loss": 0.5804, "num_tokens": 1104904006.0, "step": 1443 }, { "epoch": 0.529083081432628, "grad_norm": 0.22908968298454865, "learning_rate": 3.9461584031332296e-05, "loss": 0.5397, "num_tokens": 1105680191.0, "step": 1444 }, { "epoch": 0.5294494824585508, "grad_norm": 0.19040143566573808, "learning_rate": 3.946054633594533e-05, "loss": 0.5841, "num_tokens": 1106545044.0, "step": 1445 }, { "epoch": 0.5298158834844737, "grad_norm": 0.20784382782973615, "learning_rate": 3.945950765675187e-05, "loss": 0.5987, "num_tokens": 1107327425.0, "step": 1446 }, { "epoch": 0.5301822845103966, "grad_norm": 0.19416353036462752, "learning_rate": 3.945846799381047e-05, "loss": 0.5628, "num_tokens": 1108134781.0, "step": 1447 }, { "epoch": 0.5305486855363195, "grad_norm": 0.20438557975761396, "learning_rate": 3.9457427347179676e-05, "loss": 0.5887, "num_tokens": 1108994073.0, "step": 1448 }, { "epoch": 0.5309150865622424, "grad_norm": 0.19987273409760445, "learning_rate": 3.945638571691814e-05, "loss": 0.5925, "num_tokens": 1109793596.0, "step": 1449 }, { "epoch": 0.5312814875881653, "grad_norm": 0.23047401350765673, "learning_rate": 3.9455343103084546e-05, "loss": 0.5879, "num_tokens": 1110538693.0, "step": 1450 }, { "epoch": 0.5316478886140881, "grad_norm": 0.21752259186070838, "learning_rate": 3.945429950573764e-05, "loss": 0.5791, "num_tokens": 1111376361.0, "step": 1451 }, { "epoch": 0.532014289640011, "grad_norm": 0.19423312183095576, "learning_rate": 3.9453254924936226e-05, "loss": 0.6145, "num_tokens": 1112091435.0, "step": 1452 }, { "epoch": 0.5323806906659339, "grad_norm": 0.2343095880890326, "learning_rate": 3.945220936073916e-05, "loss": 0.6001, "num_tokens": 1112841478.0, "step": 1453 }, { "epoch": 0.5327470916918567, "grad_norm": 0.20502140777027666, "learning_rate": 3.9451162813205354e-05, "loss": 0.5681, "num_tokens": 1113596669.0, "step": 1454 }, { "epoch": 0.5331134927177796, "grad_norm": 0.20023062201274802, "learning_rate": 3.945011528239377e-05, "loss": 0.5928, "num_tokens": 1114309407.0, "step": 1455 }, { "epoch": 0.5334798937437025, "grad_norm": 0.23224757165360307, "learning_rate": 3.9449066768363436e-05, "loss": 0.5657, "num_tokens": 1115128955.0, "step": 1456 }, { "epoch": 0.5338462947696253, "grad_norm": 0.2037321374968706, "learning_rate": 3.9448017271173436e-05, "loss": 0.5772, "num_tokens": 1115952045.0, "step": 1457 }, { "epoch": 0.5342126957955482, "grad_norm": 0.20555447673882837, "learning_rate": 3.944696679088289e-05, "loss": 0.58, "num_tokens": 1116712588.0, "step": 1458 }, { "epoch": 0.5345790968214711, "grad_norm": 0.2344650892127083, "learning_rate": 3.944591532755099e-05, "loss": 0.5875, "num_tokens": 1117345048.0, "step": 1459 }, { "epoch": 0.534945497847394, "grad_norm": 0.20182809946771982, "learning_rate": 3.9444862881236987e-05, "loss": 0.5497, "num_tokens": 1118117287.0, "step": 1460 }, { "epoch": 0.5353118988733169, "grad_norm": 0.18255307681936822, "learning_rate": 3.944380945200018e-05, "loss": 0.5576, "num_tokens": 1119079490.0, "step": 1461 }, { "epoch": 0.5356782998992398, "grad_norm": 0.19675222374635576, "learning_rate": 3.944275503989992e-05, "loss": 0.5552, "num_tokens": 1119945358.0, "step": 1462 }, { "epoch": 0.5360447009251625, "grad_norm": 0.18465520972459912, "learning_rate": 3.9441699644995616e-05, "loss": 0.5748, "num_tokens": 1120688674.0, "step": 1463 }, { "epoch": 0.5364111019510854, "grad_norm": 0.20501400385459514, "learning_rate": 3.944064326734673e-05, "loss": 0.5702, "num_tokens": 1121379489.0, "step": 1464 }, { "epoch": 0.5367775029770083, "grad_norm": 0.20292966959181374, "learning_rate": 3.94395859070128e-05, "loss": 0.5529, "num_tokens": 1122310379.0, "step": 1465 }, { "epoch": 0.5371439040029312, "grad_norm": 0.19600410854607944, "learning_rate": 3.943852756405338e-05, "loss": 0.6225, "num_tokens": 1123052088.0, "step": 1466 }, { "epoch": 0.5375103050288541, "grad_norm": 0.2162472412730876, "learning_rate": 3.9437468238528126e-05, "loss": 0.5708, "num_tokens": 1123875563.0, "step": 1467 }, { "epoch": 0.537876706054777, "grad_norm": 0.20259247636995853, "learning_rate": 3.943640793049671e-05, "loss": 0.5769, "num_tokens": 1124689485.0, "step": 1468 }, { "epoch": 0.5382431070806998, "grad_norm": 0.20924597148359986, "learning_rate": 3.9435346640018865e-05, "loss": 0.607, "num_tokens": 1125478767.0, "step": 1469 }, { "epoch": 0.5386095081066227, "grad_norm": 0.18669644813268946, "learning_rate": 3.943428436715441e-05, "loss": 0.5442, "num_tokens": 1126178728.0, "step": 1470 }, { "epoch": 0.5389759091325456, "grad_norm": 0.22132831771802394, "learning_rate": 3.943322111196319e-05, "loss": 0.61, "num_tokens": 1126842077.0, "step": 1471 }, { "epoch": 0.5393423101584685, "grad_norm": 0.1882870589409508, "learning_rate": 3.943215687450512e-05, "loss": 0.5695, "num_tokens": 1127735718.0, "step": 1472 }, { "epoch": 0.5397087111843913, "grad_norm": 0.21987264668839804, "learning_rate": 3.943109165484015e-05, "loss": 0.5956, "num_tokens": 1128461715.0, "step": 1473 }, { "epoch": 0.5400751122103142, "grad_norm": 0.2247032040565789, "learning_rate": 3.94300254530283e-05, "loss": 0.5918, "num_tokens": 1129199075.0, "step": 1474 }, { "epoch": 0.540441513236237, "grad_norm": 0.20122062733320908, "learning_rate": 3.942895826912966e-05, "loss": 0.5613, "num_tokens": 1129992570.0, "step": 1475 }, { "epoch": 0.5408079142621599, "grad_norm": 0.24748574706216858, "learning_rate": 3.9427890103204346e-05, "loss": 0.5696, "num_tokens": 1130791253.0, "step": 1476 }, { "epoch": 0.5411743152880828, "grad_norm": 0.22261017622551188, "learning_rate": 3.942682095531255e-05, "loss": 0.5646, "num_tokens": 1131505111.0, "step": 1477 }, { "epoch": 0.5415407163140057, "grad_norm": 0.21551786713570714, "learning_rate": 3.942575082551452e-05, "loss": 0.5908, "num_tokens": 1132229976.0, "step": 1478 }, { "epoch": 0.5419071173399286, "grad_norm": 0.24812930168563224, "learning_rate": 3.942467971387053e-05, "loss": 0.6243, "num_tokens": 1133048824.0, "step": 1479 }, { "epoch": 0.5422735183658515, "grad_norm": 0.18674509546310902, "learning_rate": 3.942360762044095e-05, "loss": 0.5409, "num_tokens": 1133768942.0, "step": 1480 }, { "epoch": 0.5426399193917743, "grad_norm": 0.23861194395515195, "learning_rate": 3.942253454528618e-05, "loss": 0.5827, "num_tokens": 1134468477.0, "step": 1481 }, { "epoch": 0.5430063204176971, "grad_norm": 0.23328459598753984, "learning_rate": 3.942146048846668e-05, "loss": 0.5517, "num_tokens": 1135193448.0, "step": 1482 }, { "epoch": 0.54337272144362, "grad_norm": 0.2310989360590956, "learning_rate": 3.9420385450042976e-05, "loss": 0.5445, "num_tokens": 1136028307.0, "step": 1483 }, { "epoch": 0.5437391224695429, "grad_norm": 0.197667401574314, "learning_rate": 3.941930943007563e-05, "loss": 0.599, "num_tokens": 1136713923.0, "step": 1484 }, { "epoch": 0.5441055234954658, "grad_norm": 0.22082421347732287, "learning_rate": 3.941823242862527e-05, "loss": 0.6009, "num_tokens": 1137424693.0, "step": 1485 }, { "epoch": 0.5444719245213887, "grad_norm": 0.2323201306622524, "learning_rate": 3.941715444575259e-05, "loss": 0.5856, "num_tokens": 1138255131.0, "step": 1486 }, { "epoch": 0.5448383255473115, "grad_norm": 0.18517161077293973, "learning_rate": 3.9416075481518324e-05, "loss": 0.5885, "num_tokens": 1139015012.0, "step": 1487 }, { "epoch": 0.5452047265732344, "grad_norm": 0.19888155588432904, "learning_rate": 3.941499553598326e-05, "loss": 0.5442, "num_tokens": 1139753804.0, "step": 1488 }, { "epoch": 0.5455711275991573, "grad_norm": 0.2174983906262053, "learning_rate": 3.941391460920825e-05, "loss": 0.638, "num_tokens": 1140524472.0, "step": 1489 }, { "epoch": 0.5459375286250802, "grad_norm": 0.2294316971783784, "learning_rate": 3.94128327012542e-05, "loss": 0.6077, "num_tokens": 1141174287.0, "step": 1490 }, { "epoch": 0.546303929651003, "grad_norm": 0.19503357066826993, "learning_rate": 3.941174981218207e-05, "loss": 0.5574, "num_tokens": 1141905867.0, "step": 1491 }, { "epoch": 0.5466703306769259, "grad_norm": 0.19475621351682132, "learning_rate": 3.9410665942052876e-05, "loss": 0.6003, "num_tokens": 1142621040.0, "step": 1492 }, { "epoch": 0.5470367317028487, "grad_norm": 0.24680169048349354, "learning_rate": 3.9409581090927683e-05, "loss": 0.5938, "num_tokens": 1143253442.0, "step": 1493 }, { "epoch": 0.5474031327287716, "grad_norm": 0.20824476103672784, "learning_rate": 3.9408495258867614e-05, "loss": 0.5849, "num_tokens": 1143995889.0, "step": 1494 }, { "epoch": 0.5477695337546945, "grad_norm": 0.19586208650544754, "learning_rate": 3.940740844593386e-05, "loss": 0.5718, "num_tokens": 1144800998.0, "step": 1495 }, { "epoch": 0.5481359347806174, "grad_norm": 0.19622268868528067, "learning_rate": 3.940632065218765e-05, "loss": 0.5592, "num_tokens": 1145642818.0, "step": 1496 }, { "epoch": 0.5485023358065403, "grad_norm": 0.18926323391323963, "learning_rate": 3.9405231877690287e-05, "loss": 0.587, "num_tokens": 1146483608.0, "step": 1497 }, { "epoch": 0.5488687368324632, "grad_norm": 0.19149855219574183, "learning_rate": 3.9404142122503095e-05, "loss": 0.5993, "num_tokens": 1147244477.0, "step": 1498 }, { "epoch": 0.549235137858386, "grad_norm": 0.19834520053381394, "learning_rate": 3.9403051386687496e-05, "loss": 0.5789, "num_tokens": 1147960005.0, "step": 1499 }, { "epoch": 0.5496015388843088, "grad_norm": 0.17624277004221975, "learning_rate": 3.940195967030493e-05, "loss": 0.5441, "num_tokens": 1148791861.0, "step": 1500 }, { "epoch": 0.5499679399102317, "grad_norm": 0.17580310946700076, "learning_rate": 3.940086697341693e-05, "loss": 0.5802, "num_tokens": 1149581302.0, "step": 1501 }, { "epoch": 0.5503343409361546, "grad_norm": 0.1775725706356487, "learning_rate": 3.9399773296085044e-05, "loss": 0.5614, "num_tokens": 1150419005.0, "step": 1502 }, { "epoch": 0.5507007419620775, "grad_norm": 0.16983546085369167, "learning_rate": 3.939867863837091e-05, "loss": 0.5924, "num_tokens": 1151199820.0, "step": 1503 }, { "epoch": 0.5510671429880004, "grad_norm": 0.19819727926518876, "learning_rate": 3.9397583000336195e-05, "loss": 0.586, "num_tokens": 1152005859.0, "step": 1504 }, { "epoch": 0.5514335440139232, "grad_norm": 0.19657900739607917, "learning_rate": 3.939648638204263e-05, "loss": 0.5804, "num_tokens": 1152726125.0, "step": 1505 }, { "epoch": 0.5517999450398461, "grad_norm": 0.20559863237649237, "learning_rate": 3.939538878355202e-05, "loss": 0.5764, "num_tokens": 1153474688.0, "step": 1506 }, { "epoch": 0.552166346065769, "grad_norm": 0.23143756133430163, "learning_rate": 3.93942902049262e-05, "loss": 0.5749, "num_tokens": 1154302493.0, "step": 1507 }, { "epoch": 0.5525327470916919, "grad_norm": 0.19098940992304017, "learning_rate": 3.939319064622706e-05, "loss": 0.5565, "num_tokens": 1155083770.0, "step": 1508 }, { "epoch": 0.5528991481176148, "grad_norm": 0.1958616823984615, "learning_rate": 3.939209010751656e-05, "loss": 0.6083, "num_tokens": 1155828642.0, "step": 1509 }, { "epoch": 0.5532655491435376, "grad_norm": 0.18367381630283452, "learning_rate": 3.939098858885672e-05, "loss": 0.6085, "num_tokens": 1156561252.0, "step": 1510 }, { "epoch": 0.5536319501694604, "grad_norm": 0.20042876085984748, "learning_rate": 3.9389886090309584e-05, "loss": 0.5618, "num_tokens": 1157408296.0, "step": 1511 }, { "epoch": 0.5539983511953833, "grad_norm": 0.19637791322051243, "learning_rate": 3.938878261193729e-05, "loss": 0.5592, "num_tokens": 1158199361.0, "step": 1512 }, { "epoch": 0.5543647522213062, "grad_norm": 0.18435463183426057, "learning_rate": 3.9387678153802e-05, "loss": 0.5663, "num_tokens": 1158977497.0, "step": 1513 }, { "epoch": 0.5547311532472291, "grad_norm": 0.19998394597439959, "learning_rate": 3.938657271596596e-05, "loss": 0.578, "num_tokens": 1159693530.0, "step": 1514 }, { "epoch": 0.555097554273152, "grad_norm": 0.1975382463751049, "learning_rate": 3.9385466298491434e-05, "loss": 0.5851, "num_tokens": 1160533761.0, "step": 1515 }, { "epoch": 0.5554639552990749, "grad_norm": 0.21274416188199952, "learning_rate": 3.938435890144078e-05, "loss": 0.5671, "num_tokens": 1161250134.0, "step": 1516 }, { "epoch": 0.5558303563249977, "grad_norm": 0.19255966378587713, "learning_rate": 3.938325052487638e-05, "loss": 0.5622, "num_tokens": 1162044281.0, "step": 1517 }, { "epoch": 0.5561967573509206, "grad_norm": 0.21258010833732155, "learning_rate": 3.938214116886071e-05, "loss": 0.5969, "num_tokens": 1162874969.0, "step": 1518 }, { "epoch": 0.5565631583768434, "grad_norm": 0.21900155759734832, "learning_rate": 3.9381030833456246e-05, "loss": 0.5822, "num_tokens": 1163675871.0, "step": 1519 }, { "epoch": 0.5569295594027663, "grad_norm": 0.20405817886843858, "learning_rate": 3.937991951872556e-05, "loss": 0.5468, "num_tokens": 1164420427.0, "step": 1520 }, { "epoch": 0.5572959604286892, "grad_norm": 0.20465537216637972, "learning_rate": 3.937880722473128e-05, "loss": 0.5479, "num_tokens": 1165129809.0, "step": 1521 }, { "epoch": 0.5576623614546121, "grad_norm": 0.1958000150203754, "learning_rate": 3.9377693951536066e-05, "loss": 0.5735, "num_tokens": 1165921386.0, "step": 1522 }, { "epoch": 0.5580287624805349, "grad_norm": 0.19242437720472955, "learning_rate": 3.937657969920265e-05, "loss": 0.5742, "num_tokens": 1166651231.0, "step": 1523 }, { "epoch": 0.5583951635064578, "grad_norm": 0.21892352609153276, "learning_rate": 3.937546446779381e-05, "loss": 0.6024, "num_tokens": 1167410992.0, "step": 1524 }, { "epoch": 0.5587615645323807, "grad_norm": 0.20054791793982502, "learning_rate": 3.937434825737238e-05, "loss": 0.5841, "num_tokens": 1168288017.0, "step": 1525 }, { "epoch": 0.5591279655583036, "grad_norm": 0.1898378734469963, "learning_rate": 3.9373231068001264e-05, "loss": 0.572, "num_tokens": 1169104378.0, "step": 1526 }, { "epoch": 0.5594943665842265, "grad_norm": 0.18893324624957222, "learning_rate": 3.93721128997434e-05, "loss": 0.5858, "num_tokens": 1169955902.0, "step": 1527 }, { "epoch": 0.5598607676101494, "grad_norm": 0.20057031872924866, "learning_rate": 3.9370993752661795e-05, "loss": 0.5908, "num_tokens": 1170732096.0, "step": 1528 }, { "epoch": 0.5602271686360721, "grad_norm": 0.1825885809114868, "learning_rate": 3.9369873626819496e-05, "loss": 0.6012, "num_tokens": 1171495667.0, "step": 1529 }, { "epoch": 0.560593569661995, "grad_norm": 0.2186438658188663, "learning_rate": 3.936875252227963e-05, "loss": 0.5575, "num_tokens": 1172301898.0, "step": 1530 }, { "epoch": 0.5609599706879179, "grad_norm": 0.20471845101971148, "learning_rate": 3.9367630439105364e-05, "loss": 0.5415, "num_tokens": 1173047150.0, "step": 1531 }, { "epoch": 0.5613263717138408, "grad_norm": 0.17482342101540668, "learning_rate": 3.9366507377359916e-05, "loss": 0.5463, "num_tokens": 1173813727.0, "step": 1532 }, { "epoch": 0.5616927727397637, "grad_norm": 0.18486162835062964, "learning_rate": 3.9365383337106564e-05, "loss": 0.5901, "num_tokens": 1174574465.0, "step": 1533 }, { "epoch": 0.5620591737656866, "grad_norm": 0.22134767554139434, "learning_rate": 3.936425831840865e-05, "loss": 0.6065, "num_tokens": 1175352243.0, "step": 1534 }, { "epoch": 0.5624255747916094, "grad_norm": 0.196385250765079, "learning_rate": 3.936313232132954e-05, "loss": 0.6, "num_tokens": 1176245519.0, "step": 1535 }, { "epoch": 0.5627919758175323, "grad_norm": 0.1956131937233612, "learning_rate": 3.936200534593271e-05, "loss": 0.5804, "num_tokens": 1176966515.0, "step": 1536 }, { "epoch": 0.5631583768434552, "grad_norm": 0.18298709503807994, "learning_rate": 3.9360877392281634e-05, "loss": 0.5356, "num_tokens": 1177762914.0, "step": 1537 }, { "epoch": 0.563524777869378, "grad_norm": 0.20303708932990405, "learning_rate": 3.9359748460439887e-05, "loss": 0.5929, "num_tokens": 1178575163.0, "step": 1538 }, { "epoch": 0.5638911788953009, "grad_norm": 0.18462755716015225, "learning_rate": 3.9358618550471055e-05, "loss": 0.5908, "num_tokens": 1179316793.0, "step": 1539 }, { "epoch": 0.5642575799212238, "grad_norm": 0.22446682029213916, "learning_rate": 3.935748766243881e-05, "loss": 0.5687, "num_tokens": 1180065112.0, "step": 1540 }, { "epoch": 0.5646239809471466, "grad_norm": 0.1973651111316988, "learning_rate": 3.9356355796406884e-05, "loss": 0.6145, "num_tokens": 1180888251.0, "step": 1541 }, { "epoch": 0.5649903819730695, "grad_norm": 0.24423389657227632, "learning_rate": 3.9355222952439034e-05, "loss": 0.5527, "num_tokens": 1181643095.0, "step": 1542 }, { "epoch": 0.5653567829989924, "grad_norm": 0.22701642944669748, "learning_rate": 3.93540891305991e-05, "loss": 0.5415, "num_tokens": 1182384035.0, "step": 1543 }, { "epoch": 0.5657231840249153, "grad_norm": 0.21781335463610077, "learning_rate": 3.9352954330950967e-05, "loss": 0.5576, "num_tokens": 1183199054.0, "step": 1544 }, { "epoch": 0.5660895850508382, "grad_norm": 0.22957429799869444, "learning_rate": 3.935181855355857e-05, "loss": 0.5468, "num_tokens": 1184036797.0, "step": 1545 }, { "epoch": 0.5664559860767611, "grad_norm": 0.21868251845649953, "learning_rate": 3.935068179848591e-05, "loss": 0.5971, "num_tokens": 1184766884.0, "step": 1546 }, { "epoch": 0.5668223871026838, "grad_norm": 0.2203526223467234, "learning_rate": 3.934954406579703e-05, "loss": 0.5702, "num_tokens": 1185538697.0, "step": 1547 }, { "epoch": 0.5671887881286067, "grad_norm": 0.21597119920071906, "learning_rate": 3.934840535555603e-05, "loss": 0.5825, "num_tokens": 1186359054.0, "step": 1548 }, { "epoch": 0.5675551891545296, "grad_norm": 0.18574980453237028, "learning_rate": 3.934726566782709e-05, "loss": 0.5573, "num_tokens": 1187247291.0, "step": 1549 }, { "epoch": 0.5679215901804525, "grad_norm": 0.2704335970168503, "learning_rate": 3.934612500267441e-05, "loss": 0.6057, "num_tokens": 1188039477.0, "step": 1550 }, { "epoch": 0.5682879912063754, "grad_norm": 0.2702817983366742, "learning_rate": 3.934498336016226e-05, "loss": 0.6154, "num_tokens": 1188694774.0, "step": 1551 }, { "epoch": 0.5686543922322983, "grad_norm": 0.22561597907075734, "learning_rate": 3.934384074035497e-05, "loss": 0.5632, "num_tokens": 1189350739.0, "step": 1552 }, { "epoch": 0.5690207932582211, "grad_norm": 0.2139418502067602, "learning_rate": 3.934269714331692e-05, "loss": 0.5692, "num_tokens": 1190054328.0, "step": 1553 }, { "epoch": 0.569387194284144, "grad_norm": 0.22215473428472948, "learning_rate": 3.934155256911255e-05, "loss": 0.6167, "num_tokens": 1190908430.0, "step": 1554 }, { "epoch": 0.5697535953100669, "grad_norm": 0.2042484429946978, "learning_rate": 3.934040701780634e-05, "loss": 0.5585, "num_tokens": 1191732405.0, "step": 1555 }, { "epoch": 0.5701199963359898, "grad_norm": 0.17732481061839284, "learning_rate": 3.9339260489462835e-05, "loss": 0.5593, "num_tokens": 1192650074.0, "step": 1556 }, { "epoch": 0.5704863973619126, "grad_norm": 0.2106024910383575, "learning_rate": 3.933811298414665e-05, "loss": 0.571, "num_tokens": 1193460395.0, "step": 1557 }, { "epoch": 0.5708527983878355, "grad_norm": 0.20277885317687003, "learning_rate": 3.9336964501922425e-05, "loss": 0.5898, "num_tokens": 1194211861.0, "step": 1558 }, { "epoch": 0.5712191994137583, "grad_norm": 0.1834891783089377, "learning_rate": 3.9335815042854885e-05, "loss": 0.5698, "num_tokens": 1195081507.0, "step": 1559 }, { "epoch": 0.5715856004396812, "grad_norm": 0.20460552863228854, "learning_rate": 3.9334664607008783e-05, "loss": 0.5946, "num_tokens": 1195854825.0, "step": 1560 }, { "epoch": 0.5719520014656041, "grad_norm": 0.2014874238337436, "learning_rate": 3.933351319444895e-05, "loss": 0.5658, "num_tokens": 1196753294.0, "step": 1561 }, { "epoch": 0.572318402491527, "grad_norm": 0.19865972779140514, "learning_rate": 3.9332360805240254e-05, "loss": 0.613, "num_tokens": 1197461103.0, "step": 1562 }, { "epoch": 0.5726848035174499, "grad_norm": 26.207038248323638, "learning_rate": 3.9331207439447625e-05, "loss": 0.582, "num_tokens": 1198223180.0, "step": 1563 }, { "epoch": 0.5730512045433728, "grad_norm": 0.25515319423537014, "learning_rate": 3.933005309713606e-05, "loss": 0.5748, "num_tokens": 1199071996.0, "step": 1564 }, { "epoch": 0.5734176055692956, "grad_norm": 0.2018658419288561, "learning_rate": 3.932889777837059e-05, "loss": 0.5741, "num_tokens": 1199910055.0, "step": 1565 }, { "epoch": 0.5737840065952184, "grad_norm": 0.1968682363189719, "learning_rate": 3.9327741483216316e-05, "loss": 0.5603, "num_tokens": 1200604593.0, "step": 1566 }, { "epoch": 0.5741504076211413, "grad_norm": 0.24589883245991193, "learning_rate": 3.9326584211738376e-05, "loss": 0.5539, "num_tokens": 1201390306.0, "step": 1567 }, { "epoch": 0.5745168086470642, "grad_norm": 0.20379786450416001, "learning_rate": 3.932542596400199e-05, "loss": 0.5685, "num_tokens": 1202132090.0, "step": 1568 }, { "epoch": 0.5748832096729871, "grad_norm": 0.2297806711943231, "learning_rate": 3.9324266740072424e-05, "loss": 0.5711, "num_tokens": 1202804650.0, "step": 1569 }, { "epoch": 0.57524961069891, "grad_norm": 0.21309745364419752, "learning_rate": 3.9323106540014986e-05, "loss": 0.5353, "num_tokens": 1203560331.0, "step": 1570 }, { "epoch": 0.5756160117248328, "grad_norm": 0.1844778558198471, "learning_rate": 3.932194536389504e-05, "loss": 0.5934, "num_tokens": 1204351107.0, "step": 1571 }, { "epoch": 0.5759824127507557, "grad_norm": 0.23547269608562826, "learning_rate": 3.9320783211778015e-05, "loss": 0.5876, "num_tokens": 1205109698.0, "step": 1572 }, { "epoch": 0.5763488137766786, "grad_norm": 0.2190096549319704, "learning_rate": 3.93196200837294e-05, "loss": 0.5851, "num_tokens": 1205930138.0, "step": 1573 }, { "epoch": 0.5767152148026015, "grad_norm": 0.2550805995456864, "learning_rate": 3.931845597981473e-05, "loss": 0.6273, "num_tokens": 1206586687.0, "step": 1574 }, { "epoch": 0.5770816158285244, "grad_norm": 0.20717417065005977, "learning_rate": 3.9317290900099585e-05, "loss": 0.5661, "num_tokens": 1207347905.0, "step": 1575 }, { "epoch": 0.5774480168544472, "grad_norm": 0.2767374117405942, "learning_rate": 3.9316124844649614e-05, "loss": 0.5473, "num_tokens": 1208095756.0, "step": 1576 }, { "epoch": 0.57781441788037, "grad_norm": 0.2614587715032716, "learning_rate": 3.9314957813530525e-05, "loss": 0.6068, "num_tokens": 1208867270.0, "step": 1577 }, { "epoch": 0.5781808189062929, "grad_norm": 0.18985014128336475, "learning_rate": 3.931378980680807e-05, "loss": 0.5676, "num_tokens": 1209617709.0, "step": 1578 }, { "epoch": 0.5785472199322158, "grad_norm": 0.2832269424274385, "learning_rate": 3.9312620824548066e-05, "loss": 0.6092, "num_tokens": 1210363693.0, "step": 1579 }, { "epoch": 0.5789136209581387, "grad_norm": 0.20174337827626582, "learning_rate": 3.9311450866816365e-05, "loss": 0.5835, "num_tokens": 1211037575.0, "step": 1580 }, { "epoch": 0.5792800219840616, "grad_norm": 0.21960051364118047, "learning_rate": 3.9310279933678907e-05, "loss": 0.5656, "num_tokens": 1211774993.0, "step": 1581 }, { "epoch": 0.5796464230099845, "grad_norm": 0.20632602967277897, "learning_rate": 3.930910802520165e-05, "loss": 0.5712, "num_tokens": 1212580592.0, "step": 1582 }, { "epoch": 0.5800128240359073, "grad_norm": 0.2386790224695697, "learning_rate": 3.9307935141450625e-05, "loss": 0.5658, "num_tokens": 1213231654.0, "step": 1583 }, { "epoch": 0.5803792250618302, "grad_norm": 0.1923326766112352, "learning_rate": 3.9306761282491927e-05, "loss": 0.5194, "num_tokens": 1214139283.0, "step": 1584 }, { "epoch": 0.580745626087753, "grad_norm": 0.21167657232576367, "learning_rate": 3.9305586448391704e-05, "loss": 0.5729, "num_tokens": 1215014797.0, "step": 1585 }, { "epoch": 0.5811120271136759, "grad_norm": 0.20019792456015484, "learning_rate": 3.930441063921613e-05, "loss": 0.5723, "num_tokens": 1215807758.0, "step": 1586 }, { "epoch": 0.5814784281395988, "grad_norm": 0.2154034597485213, "learning_rate": 3.930323385503147e-05, "loss": 0.6044, "num_tokens": 1216473440.0, "step": 1587 }, { "epoch": 0.5818448291655217, "grad_norm": 0.19091352992111132, "learning_rate": 3.930205609590402e-05, "loss": 0.5869, "num_tokens": 1217152838.0, "step": 1588 }, { "epoch": 0.5822112301914445, "grad_norm": 0.19850732882079625, "learning_rate": 3.930087736190015e-05, "loss": 0.5698, "num_tokens": 1217797811.0, "step": 1589 }, { "epoch": 0.5825776312173674, "grad_norm": 0.2087480879225209, "learning_rate": 3.9299697653086266e-05, "loss": 0.575, "num_tokens": 1218506018.0, "step": 1590 }, { "epoch": 0.5829440322432903, "grad_norm": 0.20814700489165164, "learning_rate": 3.929851696952885e-05, "loss": 0.5552, "num_tokens": 1219387856.0, "step": 1591 }, { "epoch": 0.5833104332692132, "grad_norm": 0.18256931696026296, "learning_rate": 3.929733531129442e-05, "loss": 0.5833, "num_tokens": 1220182093.0, "step": 1592 }, { "epoch": 0.5836768342951361, "grad_norm": 0.21409042366656017, "learning_rate": 3.929615267844955e-05, "loss": 0.5743, "num_tokens": 1220929007.0, "step": 1593 }, { "epoch": 0.584043235321059, "grad_norm": 0.20771796488888433, "learning_rate": 3.929496907106089e-05, "loss": 0.5856, "num_tokens": 1221597005.0, "step": 1594 }, { "epoch": 0.5844096363469817, "grad_norm": 0.20824911667209503, "learning_rate": 3.929378448919511e-05, "loss": 0.5872, "num_tokens": 1222303794.0, "step": 1595 }, { "epoch": 0.5847760373729046, "grad_norm": 0.18985180092705378, "learning_rate": 3.929259893291898e-05, "loss": 0.5675, "num_tokens": 1223045652.0, "step": 1596 }, { "epoch": 0.5851424383988275, "grad_norm": 0.2261984108371892, "learning_rate": 3.9291412402299275e-05, "loss": 0.5903, "num_tokens": 1223711828.0, "step": 1597 }, { "epoch": 0.5855088394247504, "grad_norm": 0.19081280316244353, "learning_rate": 3.9290224897402864e-05, "loss": 0.609, "num_tokens": 1224439088.0, "step": 1598 }, { "epoch": 0.5858752404506733, "grad_norm": 0.20114018514125173, "learning_rate": 3.928903641829665e-05, "loss": 0.5966, "num_tokens": 1225211375.0, "step": 1599 }, { "epoch": 0.5862416414765962, "grad_norm": 0.2483523526053072, "learning_rate": 3.92878469650476e-05, "loss": 0.5611, "num_tokens": 1226059912.0, "step": 1600 }, { "epoch": 0.586608042502519, "grad_norm": 0.18440322656486546, "learning_rate": 3.928665653772273e-05, "loss": 0.5881, "num_tokens": 1226932969.0, "step": 1601 }, { "epoch": 0.5869744435284419, "grad_norm": 0.24010029276328348, "learning_rate": 3.928546513638913e-05, "loss": 0.5598, "num_tokens": 1227609821.0, "step": 1602 }, { "epoch": 0.5873408445543647, "grad_norm": 0.29404673966664796, "learning_rate": 3.92842727611139e-05, "loss": 0.5965, "num_tokens": 1228297732.0, "step": 1603 }, { "epoch": 0.5877072455802876, "grad_norm": 0.201492550794875, "learning_rate": 3.9283079411964245e-05, "loss": 0.5992, "num_tokens": 1229087110.0, "step": 1604 }, { "epoch": 0.5880736466062105, "grad_norm": 0.2088662525885291, "learning_rate": 3.9281885089007394e-05, "loss": 0.5918, "num_tokens": 1229830913.0, "step": 1605 }, { "epoch": 0.5884400476321334, "grad_norm": 0.24352539773439072, "learning_rate": 3.9280689792310656e-05, "loss": 0.5742, "num_tokens": 1230626617.0, "step": 1606 }, { "epoch": 0.5888064486580562, "grad_norm": 0.20619468301216004, "learning_rate": 3.9279493521941366e-05, "loss": 0.6087, "num_tokens": 1231329722.0, "step": 1607 }, { "epoch": 0.5891728496839791, "grad_norm": 0.26056946655907837, "learning_rate": 3.927829627796692e-05, "loss": 0.5955, "num_tokens": 1232055934.0, "step": 1608 }, { "epoch": 0.589539250709902, "grad_norm": 0.21835311082110617, "learning_rate": 3.92770980604548e-05, "loss": 0.5531, "num_tokens": 1232894280.0, "step": 1609 }, { "epoch": 0.5899056517358249, "grad_norm": 0.18180368955756998, "learning_rate": 3.927589886947249e-05, "loss": 0.5584, "num_tokens": 1233575161.0, "step": 1610 }, { "epoch": 0.5902720527617478, "grad_norm": 0.2098459972530998, "learning_rate": 3.927469870508758e-05, "loss": 0.5487, "num_tokens": 1234421327.0, "step": 1611 }, { "epoch": 0.5906384537876707, "grad_norm": 0.19236840515024892, "learning_rate": 3.927349756736768e-05, "loss": 0.5621, "num_tokens": 1235260240.0, "step": 1612 }, { "epoch": 0.5910048548135934, "grad_norm": 0.18739837861730443, "learning_rate": 3.9272295456380484e-05, "loss": 0.525, "num_tokens": 1236073311.0, "step": 1613 }, { "epoch": 0.5913712558395163, "grad_norm": 0.25906905006907244, "learning_rate": 3.9271092372193706e-05, "loss": 0.598, "num_tokens": 1236822605.0, "step": 1614 }, { "epoch": 0.5917376568654392, "grad_norm": 0.17201783913516794, "learning_rate": 3.926988831487514e-05, "loss": 0.547, "num_tokens": 1237755296.0, "step": 1615 }, { "epoch": 0.5921040578913621, "grad_norm": 0.21753598561036325, "learning_rate": 3.926868328449263e-05, "loss": 0.5893, "num_tokens": 1238559083.0, "step": 1616 }, { "epoch": 0.592470458917285, "grad_norm": 0.20188684316587974, "learning_rate": 3.9267477281114074e-05, "loss": 0.5849, "num_tokens": 1239209875.0, "step": 1617 }, { "epoch": 0.5928368599432079, "grad_norm": 0.21917910167065585, "learning_rate": 3.926627030480742e-05, "loss": 0.5312, "num_tokens": 1240047473.0, "step": 1618 }, { "epoch": 0.5932032609691307, "grad_norm": 0.20691831947821296, "learning_rate": 3.9265062355640675e-05, "loss": 0.5763, "num_tokens": 1240912072.0, "step": 1619 }, { "epoch": 0.5935696619950536, "grad_norm": 0.18918834411255303, "learning_rate": 3.9263853433681906e-05, "loss": 0.5679, "num_tokens": 1241633446.0, "step": 1620 }, { "epoch": 0.5939360630209765, "grad_norm": 0.2225130255209963, "learning_rate": 3.9262643538999214e-05, "loss": 0.5847, "num_tokens": 1242478729.0, "step": 1621 }, { "epoch": 0.5943024640468993, "grad_norm": 0.18804789150445542, "learning_rate": 3.9261432671660783e-05, "loss": 0.5668, "num_tokens": 1243320131.0, "step": 1622 }, { "epoch": 0.5946688650728222, "grad_norm": 0.1862369253809114, "learning_rate": 3.926022083173484e-05, "loss": 0.5896, "num_tokens": 1244170283.0, "step": 1623 }, { "epoch": 0.5950352660987451, "grad_norm": 0.19243392010197122, "learning_rate": 3.9259008019289664e-05, "loss": 0.5629, "num_tokens": 1244861450.0, "step": 1624 }, { "epoch": 0.5954016671246679, "grad_norm": 0.20402417637939682, "learning_rate": 3.9257794234393585e-05, "loss": 0.5598, "num_tokens": 1245597671.0, "step": 1625 }, { "epoch": 0.5957680681505908, "grad_norm": 0.17134650858966355, "learning_rate": 3.925657947711499e-05, "loss": 0.5685, "num_tokens": 1246441739.0, "step": 1626 }, { "epoch": 0.5961344691765137, "grad_norm": 0.1993958489742118, "learning_rate": 3.925536374752234e-05, "loss": 0.601, "num_tokens": 1247102391.0, "step": 1627 }, { "epoch": 0.5965008702024366, "grad_norm": 0.2000914954332134, "learning_rate": 3.925414704568412e-05, "loss": 0.5737, "num_tokens": 1247836569.0, "step": 1628 }, { "epoch": 0.5968672712283595, "grad_norm": 0.191017649643665, "learning_rate": 3.925292937166889e-05, "loss": 0.5662, "num_tokens": 1248687565.0, "step": 1629 }, { "epoch": 0.5972336722542824, "grad_norm": 0.16802762279281433, "learning_rate": 3.925171072554526e-05, "loss": 0.549, "num_tokens": 1249502477.0, "step": 1630 }, { "epoch": 0.5976000732802051, "grad_norm": 0.18609088052862213, "learning_rate": 3.92504911073819e-05, "loss": 0.5761, "num_tokens": 1250302967.0, "step": 1631 }, { "epoch": 0.597966474306128, "grad_norm": 0.21840832494588233, "learning_rate": 3.92492705172475e-05, "loss": 0.5828, "num_tokens": 1251096603.0, "step": 1632 }, { "epoch": 0.5983328753320509, "grad_norm": 0.21779896307544638, "learning_rate": 3.924804895521087e-05, "loss": 0.571, "num_tokens": 1251944492.0, "step": 1633 }, { "epoch": 0.5986992763579738, "grad_norm": 0.19955057978415672, "learning_rate": 3.924682642134082e-05, "loss": 0.5788, "num_tokens": 1252726371.0, "step": 1634 }, { "epoch": 0.5990656773838967, "grad_norm": 0.20253422653537498, "learning_rate": 3.924560291570624e-05, "loss": 0.5898, "num_tokens": 1253391505.0, "step": 1635 }, { "epoch": 0.5994320784098196, "grad_norm": 0.21237006945291675, "learning_rate": 3.924437843837607e-05, "loss": 0.6198, "num_tokens": 1253974262.0, "step": 1636 }, { "epoch": 0.5997984794357424, "grad_norm": 0.1988249007482814, "learning_rate": 3.924315298941929e-05, "loss": 0.5788, "num_tokens": 1254766538.0, "step": 1637 }, { "epoch": 0.6001648804616653, "grad_norm": 0.17465735504614377, "learning_rate": 3.924192656890495e-05, "loss": 0.573, "num_tokens": 1255533304.0, "step": 1638 }, { "epoch": 0.6005312814875882, "grad_norm": 0.18293564289712458, "learning_rate": 3.9240699176902156e-05, "loss": 0.593, "num_tokens": 1256333834.0, "step": 1639 }, { "epoch": 0.6008976825135111, "grad_norm": 0.18939637667389955, "learning_rate": 3.9239470813480075e-05, "loss": 0.5344, "num_tokens": 1257108177.0, "step": 1640 }, { "epoch": 0.601264083539434, "grad_norm": 0.21036755896358625, "learning_rate": 3.923824147870789e-05, "loss": 0.6199, "num_tokens": 1257802056.0, "step": 1641 }, { "epoch": 0.6016304845653568, "grad_norm": 0.1825795002218518, "learning_rate": 3.923701117265491e-05, "loss": 0.5756, "num_tokens": 1258534238.0, "step": 1642 }, { "epoch": 0.6019968855912796, "grad_norm": 0.19242838112514238, "learning_rate": 3.9235779895390415e-05, "loss": 0.5346, "num_tokens": 1259258525.0, "step": 1643 }, { "epoch": 0.6023632866172025, "grad_norm": 0.23099189184038726, "learning_rate": 3.92345476469838e-05, "loss": 0.5931, "num_tokens": 1260049503.0, "step": 1644 }, { "epoch": 0.6027296876431254, "grad_norm": 0.18105616644692413, "learning_rate": 3.92333144275045e-05, "loss": 0.5722, "num_tokens": 1260757125.0, "step": 1645 }, { "epoch": 0.6030960886690483, "grad_norm": 0.20743369018565416, "learning_rate": 3.9232080237021996e-05, "loss": 0.5677, "num_tokens": 1261505385.0, "step": 1646 }, { "epoch": 0.6034624896949712, "grad_norm": 0.20799381178623785, "learning_rate": 3.923084507560582e-05, "loss": 0.5943, "num_tokens": 1262165478.0, "step": 1647 }, { "epoch": 0.6038288907208941, "grad_norm": 0.21604753326261095, "learning_rate": 3.9229608943325574e-05, "loss": 0.6534, "num_tokens": 1262828214.0, "step": 1648 }, { "epoch": 0.6041952917468169, "grad_norm": 0.18503894216701341, "learning_rate": 3.922837184025091e-05, "loss": 0.5707, "num_tokens": 1263597815.0, "step": 1649 }, { "epoch": 0.6045616927727397, "grad_norm": 0.22362090717042912, "learning_rate": 3.922713376645151e-05, "loss": 0.6199, "num_tokens": 1264283387.0, "step": 1650 }, { "epoch": 0.6049280937986626, "grad_norm": 0.18999762857444272, "learning_rate": 3.9225894721997166e-05, "loss": 0.5807, "num_tokens": 1265110866.0, "step": 1651 }, { "epoch": 0.6052944948245855, "grad_norm": 0.20220628430397336, "learning_rate": 3.922465470695768e-05, "loss": 0.5858, "num_tokens": 1265785287.0, "step": 1652 }, { "epoch": 0.6056608958505084, "grad_norm": 0.2062024547970512, "learning_rate": 3.92234137214029e-05, "loss": 0.5906, "num_tokens": 1266603155.0, "step": 1653 }, { "epoch": 0.6060272968764313, "grad_norm": 0.19323935680055707, "learning_rate": 3.9222171765402775e-05, "loss": 0.5631, "num_tokens": 1267342769.0, "step": 1654 }, { "epoch": 0.6063936979023541, "grad_norm": 0.19139630964517568, "learning_rate": 3.922092883902727e-05, "loss": 0.5845, "num_tokens": 1268035891.0, "step": 1655 }, { "epoch": 0.606760098928277, "grad_norm": 0.21984008939715463, "learning_rate": 3.921968494234642e-05, "loss": 0.589, "num_tokens": 1268722711.0, "step": 1656 }, { "epoch": 0.6071264999541999, "grad_norm": 0.19852209486576922, "learning_rate": 3.92184400754303e-05, "loss": 0.5997, "num_tokens": 1269385994.0, "step": 1657 }, { "epoch": 0.6074929009801228, "grad_norm": 0.1764520678187213, "learning_rate": 3.921719423834907e-05, "loss": 0.5629, "num_tokens": 1270302554.0, "step": 1658 }, { "epoch": 0.6078593020060457, "grad_norm": 0.1843715038656653, "learning_rate": 3.921594743117292e-05, "loss": 0.5579, "num_tokens": 1271046820.0, "step": 1659 }, { "epoch": 0.6082257030319684, "grad_norm": 0.1871082445062843, "learning_rate": 3.92146996539721e-05, "loss": 0.5715, "num_tokens": 1271814694.0, "step": 1660 }, { "epoch": 0.6085921040578913, "grad_norm": 0.2103760109977015, "learning_rate": 3.921345090681691e-05, "loss": 0.5767, "num_tokens": 1272616238.0, "step": 1661 }, { "epoch": 0.6089585050838142, "grad_norm": 0.1841981819869034, "learning_rate": 3.921220118977772e-05, "loss": 0.5311, "num_tokens": 1273350161.0, "step": 1662 }, { "epoch": 0.6093249061097371, "grad_norm": 0.1907623766857686, "learning_rate": 3.9210950502924936e-05, "loss": 0.5647, "num_tokens": 1274149490.0, "step": 1663 }, { "epoch": 0.60969130713566, "grad_norm": 0.22059459111792942, "learning_rate": 3.920969884632903e-05, "loss": 0.5409, "num_tokens": 1274966324.0, "step": 1664 }, { "epoch": 0.6100577081615829, "grad_norm": 0.20024746938126317, "learning_rate": 3.920844622006053e-05, "loss": 0.5715, "num_tokens": 1275645988.0, "step": 1665 }, { "epoch": 0.6104241091875057, "grad_norm": 0.20441591910662132, "learning_rate": 3.920719262419001e-05, "loss": 0.5721, "num_tokens": 1276428419.0, "step": 1666 }, { "epoch": 0.6107905102134286, "grad_norm": 0.1804368152423234, "learning_rate": 3.92059380587881e-05, "loss": 0.5556, "num_tokens": 1277172934.0, "step": 1667 }, { "epoch": 0.6111569112393515, "grad_norm": 0.22011099804812473, "learning_rate": 3.92046825239255e-05, "loss": 0.5709, "num_tokens": 1277950610.0, "step": 1668 }, { "epoch": 0.6115233122652743, "grad_norm": 0.1822386380880639, "learning_rate": 3.920342601967293e-05, "loss": 0.5893, "num_tokens": 1278627361.0, "step": 1669 }, { "epoch": 0.6118897132911972, "grad_norm": 0.17561117622089426, "learning_rate": 3.9202168546101224e-05, "loss": 0.5442, "num_tokens": 1279409641.0, "step": 1670 }, { "epoch": 0.6122561143171201, "grad_norm": 0.19332025748849951, "learning_rate": 3.92009101032812e-05, "loss": 0.5573, "num_tokens": 1280092673.0, "step": 1671 }, { "epoch": 0.6126225153430429, "grad_norm": 0.17526004746693408, "learning_rate": 3.9199650691283775e-05, "loss": 0.5726, "num_tokens": 1280887784.0, "step": 1672 }, { "epoch": 0.6129889163689658, "grad_norm": 0.19190083769516317, "learning_rate": 3.9198390310179914e-05, "loss": 0.5548, "num_tokens": 1281693626.0, "step": 1673 }, { "epoch": 0.6133553173948887, "grad_norm": 0.21259968674571228, "learning_rate": 3.9197128960040635e-05, "loss": 0.6071, "num_tokens": 1282381192.0, "step": 1674 }, { "epoch": 0.6137217184208116, "grad_norm": 0.21417570240750178, "learning_rate": 3.9195866640937e-05, "loss": 0.5768, "num_tokens": 1283003761.0, "step": 1675 }, { "epoch": 0.6140881194467345, "grad_norm": 0.19398467117246027, "learning_rate": 3.919460335294013e-05, "loss": 0.566, "num_tokens": 1283698502.0, "step": 1676 }, { "epoch": 0.6144545204726574, "grad_norm": 0.1912733596605905, "learning_rate": 3.919333909612122e-05, "loss": 0.6062, "num_tokens": 1284436927.0, "step": 1677 }, { "epoch": 0.6148209214985801, "grad_norm": 0.20291066579331207, "learning_rate": 3.919207387055149e-05, "loss": 0.5825, "num_tokens": 1285279378.0, "step": 1678 }, { "epoch": 0.615187322524503, "grad_norm": 0.18985146929183966, "learning_rate": 3.919080767630223e-05, "loss": 0.5925, "num_tokens": 1286022282.0, "step": 1679 }, { "epoch": 0.6155537235504259, "grad_norm": 0.2036178952288372, "learning_rate": 3.91895405134448e-05, "loss": 0.626, "num_tokens": 1286739560.0, "step": 1680 }, { "epoch": 0.6159201245763488, "grad_norm": 0.18439317040163153, "learning_rate": 3.9188272382050574e-05, "loss": 0.5608, "num_tokens": 1287530052.0, "step": 1681 }, { "epoch": 0.6162865256022717, "grad_norm": 0.19004822654759193, "learning_rate": 3.918700328219102e-05, "loss": 0.5843, "num_tokens": 1288359685.0, "step": 1682 }, { "epoch": 0.6166529266281946, "grad_norm": 0.19584778026569347, "learning_rate": 3.918573321393764e-05, "loss": 0.5531, "num_tokens": 1289042412.0, "step": 1683 }, { "epoch": 0.6170193276541174, "grad_norm": 0.19854004162467442, "learning_rate": 3.9184462177362e-05, "loss": 0.5795, "num_tokens": 1289811303.0, "step": 1684 }, { "epoch": 0.6173857286800403, "grad_norm": 0.19498164504282697, "learning_rate": 3.9183190172535706e-05, "loss": 0.5725, "num_tokens": 1290691638.0, "step": 1685 }, { "epoch": 0.6177521297059632, "grad_norm": 0.1852600728288811, "learning_rate": 3.9181917199530433e-05, "loss": 0.5731, "num_tokens": 1291363663.0, "step": 1686 }, { "epoch": 0.618118530731886, "grad_norm": 0.2079819021800227, "learning_rate": 3.91806432584179e-05, "loss": 0.5803, "num_tokens": 1292095765.0, "step": 1687 }, { "epoch": 0.6184849317578089, "grad_norm": 0.21912757881257616, "learning_rate": 3.917936834926991e-05, "loss": 0.5984, "num_tokens": 1292914909.0, "step": 1688 }, { "epoch": 0.6188513327837318, "grad_norm": 0.1821865440358157, "learning_rate": 3.9178092472158264e-05, "loss": 0.5646, "num_tokens": 1293702554.0, "step": 1689 }, { "epoch": 0.6192177338096546, "grad_norm": 0.2262138778345763, "learning_rate": 3.917681562715488e-05, "loss": 0.5913, "num_tokens": 1294423100.0, "step": 1690 }, { "epoch": 0.6195841348355775, "grad_norm": 0.2377340239046139, "learning_rate": 3.917553781433168e-05, "loss": 0.5982, "num_tokens": 1295199016.0, "step": 1691 }, { "epoch": 0.6199505358615004, "grad_norm": 0.1917212898541359, "learning_rate": 3.9174259033760677e-05, "loss": 0.5686, "num_tokens": 1295886455.0, "step": 1692 }, { "epoch": 0.6203169368874233, "grad_norm": 0.19610189070558112, "learning_rate": 3.917297928551391e-05, "loss": 0.5705, "num_tokens": 1296639042.0, "step": 1693 }, { "epoch": 0.6206833379133462, "grad_norm": 0.1992745340067736, "learning_rate": 3.91716985696635e-05, "loss": 0.583, "num_tokens": 1297396329.0, "step": 1694 }, { "epoch": 0.6210497389392691, "grad_norm": 0.18857003090952382, "learning_rate": 3.917041688628159e-05, "loss": 0.5614, "num_tokens": 1298195471.0, "step": 1695 }, { "epoch": 0.6214161399651918, "grad_norm": 0.1906161459336974, "learning_rate": 3.916913423544042e-05, "loss": 0.5658, "num_tokens": 1298966504.0, "step": 1696 }, { "epoch": 0.6217825409911147, "grad_norm": 0.20983088306292932, "learning_rate": 3.916785061721224e-05, "loss": 0.5651, "num_tokens": 1299627084.0, "step": 1697 }, { "epoch": 0.6221489420170376, "grad_norm": 0.2317990121386148, "learning_rate": 3.916656603166938e-05, "loss": 0.5442, "num_tokens": 1300305329.0, "step": 1698 }, { "epoch": 0.6225153430429605, "grad_norm": 0.2289674675909729, "learning_rate": 3.916528047888423e-05, "loss": 0.5838, "num_tokens": 1300936724.0, "step": 1699 }, { "epoch": 0.6228817440688834, "grad_norm": 0.22132149466082382, "learning_rate": 3.9163993958929205e-05, "loss": 0.5841, "num_tokens": 1301637555.0, "step": 1700 }, { "epoch": 0.6232481450948063, "grad_norm": 0.20684952361960823, "learning_rate": 3.916270647187681e-05, "loss": 0.5605, "num_tokens": 1302433949.0, "step": 1701 }, { "epoch": 0.6236145461207291, "grad_norm": 0.2785902951458266, "learning_rate": 3.916141801779958e-05, "loss": 0.6048, "num_tokens": 1303202588.0, "step": 1702 }, { "epoch": 0.623980947146652, "grad_norm": 0.17966311574977048, "learning_rate": 3.916012859677011e-05, "loss": 0.5968, "num_tokens": 1304000120.0, "step": 1703 }, { "epoch": 0.6243473481725749, "grad_norm": 0.259153320333381, "learning_rate": 3.9158838208861065e-05, "loss": 0.5836, "num_tokens": 1304669462.0, "step": 1704 }, { "epoch": 0.6247137491984978, "grad_norm": 0.2235311660637294, "learning_rate": 3.915754685414514e-05, "loss": 0.5654, "num_tokens": 1305472755.0, "step": 1705 }, { "epoch": 0.6250801502244206, "grad_norm": 0.20998143223386664, "learning_rate": 3.915625453269509e-05, "loss": 0.5666, "num_tokens": 1306148393.0, "step": 1706 }, { "epoch": 0.6254465512503435, "grad_norm": 0.22946448014520107, "learning_rate": 3.9154961244583745e-05, "loss": 0.5786, "num_tokens": 1306987241.0, "step": 1707 }, { "epoch": 0.6258129522762663, "grad_norm": 0.20359223628379372, "learning_rate": 3.915366698988397e-05, "loss": 0.5703, "num_tokens": 1307729351.0, "step": 1708 }, { "epoch": 0.6261793533021892, "grad_norm": 0.2011802531786746, "learning_rate": 3.915237176866867e-05, "loss": 0.619, "num_tokens": 1308400897.0, "step": 1709 }, { "epoch": 0.6265457543281121, "grad_norm": 0.22988740519373901, "learning_rate": 3.915107558101086e-05, "loss": 0.5681, "num_tokens": 1309167341.0, "step": 1710 }, { "epoch": 0.626912155354035, "grad_norm": 0.1850607720230215, "learning_rate": 3.914977842698355e-05, "loss": 0.5598, "num_tokens": 1309925151.0, "step": 1711 }, { "epoch": 0.6272785563799579, "grad_norm": 0.18480096623218126, "learning_rate": 3.914848030665983e-05, "loss": 0.5546, "num_tokens": 1310715218.0, "step": 1712 }, { "epoch": 0.6276449574058808, "grad_norm": 0.189885035450999, "learning_rate": 3.914718122011284e-05, "loss": 0.5569, "num_tokens": 1311398862.0, "step": 1713 }, { "epoch": 0.6280113584318036, "grad_norm": 0.21070856686080444, "learning_rate": 3.914588116741577e-05, "loss": 0.5948, "num_tokens": 1312145838.0, "step": 1714 }, { "epoch": 0.6283777594577264, "grad_norm": 0.20665736666000512, "learning_rate": 3.91445801486419e-05, "loss": 0.603, "num_tokens": 1312906028.0, "step": 1715 }, { "epoch": 0.6287441604836493, "grad_norm": 0.21140542963106437, "learning_rate": 3.91432781638645e-05, "loss": 0.552, "num_tokens": 1313679435.0, "step": 1716 }, { "epoch": 0.6291105615095722, "grad_norm": 0.2269406360926454, "learning_rate": 3.9141975213156956e-05, "loss": 0.6031, "num_tokens": 1314450116.0, "step": 1717 }, { "epoch": 0.6294769625354951, "grad_norm": 0.2454433947299076, "learning_rate": 3.9140671296592676e-05, "loss": 0.6129, "num_tokens": 1315159725.0, "step": 1718 }, { "epoch": 0.629843363561418, "grad_norm": 0.17461355803149223, "learning_rate": 3.9139366414245116e-05, "loss": 0.5427, "num_tokens": 1315906655.0, "step": 1719 }, { "epoch": 0.6302097645873408, "grad_norm": 0.21736510289079874, "learning_rate": 3.913806056618781e-05, "loss": 0.5555, "num_tokens": 1316709099.0, "step": 1720 }, { "epoch": 0.6305761656132637, "grad_norm": 0.1836475932599979, "learning_rate": 3.913675375249433e-05, "loss": 0.5431, "num_tokens": 1317528835.0, "step": 1721 }, { "epoch": 0.6309425666391866, "grad_norm": 0.16296508313398414, "learning_rate": 3.913544597323831e-05, "loss": 0.5616, "num_tokens": 1318332308.0, "step": 1722 }, { "epoch": 0.6313089676651095, "grad_norm": 0.2530290597371856, "learning_rate": 3.913413722849344e-05, "loss": 0.6108, "num_tokens": 1318945717.0, "step": 1723 }, { "epoch": 0.6316753686910324, "grad_norm": 0.2005508441131563, "learning_rate": 3.9132827518333457e-05, "loss": 0.5803, "num_tokens": 1319793433.0, "step": 1724 }, { "epoch": 0.6320417697169552, "grad_norm": 0.19346108753065708, "learning_rate": 3.913151684283216e-05, "loss": 0.6025, "num_tokens": 1320471277.0, "step": 1725 }, { "epoch": 0.632408170742878, "grad_norm": 0.20804427612907112, "learning_rate": 3.913020520206339e-05, "loss": 0.5565, "num_tokens": 1321303903.0, "step": 1726 }, { "epoch": 0.6327745717688009, "grad_norm": 0.18540760030703626, "learning_rate": 3.912889259610106e-05, "loss": 0.5605, "num_tokens": 1322046840.0, "step": 1727 }, { "epoch": 0.6331409727947238, "grad_norm": 0.1791763969234062, "learning_rate": 3.9127579025019114e-05, "loss": 0.563, "num_tokens": 1322819535.0, "step": 1728 }, { "epoch": 0.6335073738206467, "grad_norm": 0.17142523740913965, "learning_rate": 3.912626448889158e-05, "loss": 0.5476, "num_tokens": 1323689764.0, "step": 1729 }, { "epoch": 0.6338737748465696, "grad_norm": 0.19077325603429757, "learning_rate": 3.912494898779252e-05, "loss": 0.5515, "num_tokens": 1324498770.0, "step": 1730 }, { "epoch": 0.6342401758724925, "grad_norm": 0.19493220009276108, "learning_rate": 3.912363252179605e-05, "loss": 0.5356, "num_tokens": 1325194378.0, "step": 1731 }, { "epoch": 0.6346065768984153, "grad_norm": 0.19053041327103928, "learning_rate": 3.9122315090976364e-05, "loss": 0.5778, "num_tokens": 1325986375.0, "step": 1732 }, { "epoch": 0.6349729779243382, "grad_norm": 0.20515783611290492, "learning_rate": 3.912099669540767e-05, "loss": 0.5621, "num_tokens": 1326798114.0, "step": 1733 }, { "epoch": 0.635339378950261, "grad_norm": 0.19637354456937486, "learning_rate": 3.9119677335164264e-05, "loss": 0.5367, "num_tokens": 1327590728.0, "step": 1734 }, { "epoch": 0.6357057799761839, "grad_norm": 0.18412915436126914, "learning_rate": 3.911835701032048e-05, "loss": 0.5231, "num_tokens": 1328230015.0, "step": 1735 }, { "epoch": 0.6360721810021068, "grad_norm": 0.1844768097455852, "learning_rate": 3.911703572095071e-05, "loss": 0.563, "num_tokens": 1329013974.0, "step": 1736 }, { "epoch": 0.6364385820280297, "grad_norm": 0.18624964218524423, "learning_rate": 3.91157134671294e-05, "loss": 0.5714, "num_tokens": 1329771516.0, "step": 1737 }, { "epoch": 0.6368049830539525, "grad_norm": 0.21653918295591285, "learning_rate": 3.911439024893106e-05, "loss": 0.5583, "num_tokens": 1330488203.0, "step": 1738 }, { "epoch": 0.6371713840798754, "grad_norm": 0.2326368236107283, "learning_rate": 3.911306606643025e-05, "loss": 0.5517, "num_tokens": 1331245108.0, "step": 1739 }, { "epoch": 0.6375377851057983, "grad_norm": 0.19098433823317387, "learning_rate": 3.911174091970156e-05, "loss": 0.578, "num_tokens": 1332030790.0, "step": 1740 }, { "epoch": 0.6379041861317212, "grad_norm": 0.22906740846977877, "learning_rate": 3.9110414808819675e-05, "loss": 0.5899, "num_tokens": 1332737071.0, "step": 1741 }, { "epoch": 0.6382705871576441, "grad_norm": 0.23515158405203265, "learning_rate": 3.91090877338593e-05, "loss": 0.5567, "num_tokens": 1333502772.0, "step": 1742 }, { "epoch": 0.638636988183567, "grad_norm": 0.20940421235352805, "learning_rate": 3.910775969489522e-05, "loss": 0.5924, "num_tokens": 1334235786.0, "step": 1743 }, { "epoch": 0.6390033892094897, "grad_norm": 0.20252546915447045, "learning_rate": 3.910643069200226e-05, "loss": 0.593, "num_tokens": 1335003942.0, "step": 1744 }, { "epoch": 0.6393697902354126, "grad_norm": 0.18578350336311913, "learning_rate": 3.910510072525529e-05, "loss": 0.5765, "num_tokens": 1335668409.0, "step": 1745 }, { "epoch": 0.6397361912613355, "grad_norm": 0.18494663952442025, "learning_rate": 3.910376979472926e-05, "loss": 0.5655, "num_tokens": 1336489168.0, "step": 1746 }, { "epoch": 0.6401025922872584, "grad_norm": 0.20141216163893516, "learning_rate": 3.910243790049916e-05, "loss": 0.5358, "num_tokens": 1337188085.0, "step": 1747 }, { "epoch": 0.6404689933131813, "grad_norm": 0.1843672657212591, "learning_rate": 3.9101105042640036e-05, "loss": 0.5639, "num_tokens": 1337993868.0, "step": 1748 }, { "epoch": 0.6408353943391042, "grad_norm": 0.21224257608500702, "learning_rate": 3.909977122122697e-05, "loss": 0.6122, "num_tokens": 1338724854.0, "step": 1749 }, { "epoch": 0.641201795365027, "grad_norm": 0.19864674908927937, "learning_rate": 3.909843643633514e-05, "loss": 0.6106, "num_tokens": 1339551960.0, "step": 1750 }, { "epoch": 0.6415681963909499, "grad_norm": 0.19578351756027615, "learning_rate": 3.909710068803973e-05, "loss": 0.5506, "num_tokens": 1340352155.0, "step": 1751 }, { "epoch": 0.6419345974168728, "grad_norm": 0.19119011201723404, "learning_rate": 3.909576397641603e-05, "loss": 0.5866, "num_tokens": 1341072253.0, "step": 1752 }, { "epoch": 0.6423009984427956, "grad_norm": 0.18326606654953684, "learning_rate": 3.9094426301539325e-05, "loss": 0.578, "num_tokens": 1341883769.0, "step": 1753 }, { "epoch": 0.6426673994687185, "grad_norm": 0.19649109232963727, "learning_rate": 3.909308766348501e-05, "loss": 0.5722, "num_tokens": 1342619573.0, "step": 1754 }, { "epoch": 0.6430338004946414, "grad_norm": 0.21228386498982874, "learning_rate": 3.90917480623285e-05, "loss": 0.5874, "num_tokens": 1343396788.0, "step": 1755 }, { "epoch": 0.6434002015205642, "grad_norm": 0.1826253323720724, "learning_rate": 3.909040749814528e-05, "loss": 0.5487, "num_tokens": 1344137342.0, "step": 1756 }, { "epoch": 0.6437666025464871, "grad_norm": 0.1875941443556241, "learning_rate": 3.908906597101088e-05, "loss": 0.5896, "num_tokens": 1344956290.0, "step": 1757 }, { "epoch": 0.64413300357241, "grad_norm": 0.18464576596773496, "learning_rate": 3.908772348100088e-05, "loss": 0.5562, "num_tokens": 1345781728.0, "step": 1758 }, { "epoch": 0.6444994045983329, "grad_norm": 0.22435077124625244, "learning_rate": 3.908638002819093e-05, "loss": 0.5648, "num_tokens": 1346518331.0, "step": 1759 }, { "epoch": 0.6448658056242558, "grad_norm": 0.19358992022339652, "learning_rate": 3.908503561265673e-05, "loss": 0.5381, "num_tokens": 1347261768.0, "step": 1760 }, { "epoch": 0.6452322066501787, "grad_norm": 0.20148819785227337, "learning_rate": 3.908369023447402e-05, "loss": 0.5949, "num_tokens": 1348043526.0, "step": 1761 }, { "epoch": 0.6455986076761014, "grad_norm": 0.21600055194565349, "learning_rate": 3.9082343893718606e-05, "loss": 0.6243, "num_tokens": 1348896969.0, "step": 1762 }, { "epoch": 0.6459650087020243, "grad_norm": 0.19622205652641525, "learning_rate": 3.908099659046635e-05, "loss": 0.5947, "num_tokens": 1349675725.0, "step": 1763 }, { "epoch": 0.6463314097279472, "grad_norm": 0.18543375335793857, "learning_rate": 3.907964832479317e-05, "loss": 0.5342, "num_tokens": 1350531020.0, "step": 1764 }, { "epoch": 0.6466978107538701, "grad_norm": 0.19952755455059903, "learning_rate": 3.9078299096775044e-05, "loss": 0.6206, "num_tokens": 1351241396.0, "step": 1765 }, { "epoch": 0.647064211779793, "grad_norm": 0.22491351593554837, "learning_rate": 3.907694890648796e-05, "loss": 0.5846, "num_tokens": 1352000487.0, "step": 1766 }, { "epoch": 0.6474306128057159, "grad_norm": 0.2131201121232163, "learning_rate": 3.907559775400802e-05, "loss": 0.5622, "num_tokens": 1352873133.0, "step": 1767 }, { "epoch": 0.6477970138316387, "grad_norm": 0.1597846806840401, "learning_rate": 3.907424563941136e-05, "loss": 0.5067, "num_tokens": 1353533591.0, "step": 1768 }, { "epoch": 0.6481634148575616, "grad_norm": 0.22654573629993158, "learning_rate": 3.907289256277414e-05, "loss": 0.582, "num_tokens": 1354313010.0, "step": 1769 }, { "epoch": 0.6485298158834845, "grad_norm": 0.19835460942409874, "learning_rate": 3.907153852417261e-05, "loss": 0.5928, "num_tokens": 1355027123.0, "step": 1770 }, { "epoch": 0.6488962169094074, "grad_norm": 0.20064415388231147, "learning_rate": 3.907018352368306e-05, "loss": 0.5926, "num_tokens": 1355681746.0, "step": 1771 }, { "epoch": 0.6492626179353302, "grad_norm": 0.20024200749921875, "learning_rate": 3.906882756138184e-05, "loss": 0.5866, "num_tokens": 1356447982.0, "step": 1772 }, { "epoch": 0.6496290189612531, "grad_norm": 0.22018245803314604, "learning_rate": 3.9067470637345357e-05, "loss": 0.5384, "num_tokens": 1357264461.0, "step": 1773 }, { "epoch": 0.6499954199871759, "grad_norm": 0.1905809239401328, "learning_rate": 3.906611275165006e-05, "loss": 0.5642, "num_tokens": 1358035793.0, "step": 1774 }, { "epoch": 0.6503618210130988, "grad_norm": 0.2042411720164292, "learning_rate": 3.9064753904372454e-05, "loss": 0.5245, "num_tokens": 1358738550.0, "step": 1775 }, { "epoch": 0.6507282220390217, "grad_norm": 0.17200750030857606, "learning_rate": 3.9063394095589104e-05, "loss": 0.5847, "num_tokens": 1359550746.0, "step": 1776 }, { "epoch": 0.6510946230649446, "grad_norm": 0.19132911932112542, "learning_rate": 3.906203332537664e-05, "loss": 0.569, "num_tokens": 1360299501.0, "step": 1777 }, { "epoch": 0.6514610240908675, "grad_norm": 0.17941292218000623, "learning_rate": 3.906067159381171e-05, "loss": 0.5518, "num_tokens": 1361113806.0, "step": 1778 }, { "epoch": 0.6518274251167904, "grad_norm": 0.1554213804852457, "learning_rate": 3.905930890097107e-05, "loss": 0.523, "num_tokens": 1361927720.0, "step": 1779 }, { "epoch": 0.6521938261427132, "grad_norm": 0.19544356959683018, "learning_rate": 3.9057945246931484e-05, "loss": 0.5699, "num_tokens": 1362662642.0, "step": 1780 }, { "epoch": 0.652560227168636, "grad_norm": 0.16908582057918978, "learning_rate": 3.905658063176978e-05, "loss": 0.5339, "num_tokens": 1363382640.0, "step": 1781 }, { "epoch": 0.6529266281945589, "grad_norm": 0.1883281543628979, "learning_rate": 3.905521505556286e-05, "loss": 0.5485, "num_tokens": 1364167099.0, "step": 1782 }, { "epoch": 0.6532930292204818, "grad_norm": 0.20508525487998105, "learning_rate": 3.905384851838766e-05, "loss": 0.5739, "num_tokens": 1364944581.0, "step": 1783 }, { "epoch": 0.6536594302464047, "grad_norm": 0.1912050913513613, "learning_rate": 3.905248102032118e-05, "loss": 0.589, "num_tokens": 1365670194.0, "step": 1784 }, { "epoch": 0.6540258312723276, "grad_norm": 0.19227062381538013, "learning_rate": 3.9051112561440464e-05, "loss": 0.5904, "num_tokens": 1366485480.0, "step": 1785 }, { "epoch": 0.6543922322982504, "grad_norm": 0.188382398273767, "learning_rate": 3.904974314182263e-05, "loss": 0.5695, "num_tokens": 1367181832.0, "step": 1786 }, { "epoch": 0.6547586333241733, "grad_norm": 0.16453945733467293, "learning_rate": 3.904837276154481e-05, "loss": 0.5578, "num_tokens": 1367928890.0, "step": 1787 }, { "epoch": 0.6551250343500962, "grad_norm": 0.19289401861469757, "learning_rate": 3.904700142068426e-05, "loss": 0.583, "num_tokens": 1368724759.0, "step": 1788 }, { "epoch": 0.6554914353760191, "grad_norm": 0.1725693072250141, "learning_rate": 3.9045629119318215e-05, "loss": 0.5712, "num_tokens": 1369492903.0, "step": 1789 }, { "epoch": 0.655857836401942, "grad_norm": 0.17217197447810054, "learning_rate": 3.9044255857524004e-05, "loss": 0.5687, "num_tokens": 1370273403.0, "step": 1790 }, { "epoch": 0.6562242374278648, "grad_norm": 0.1782028881674167, "learning_rate": 3.904288163537902e-05, "loss": 0.5625, "num_tokens": 1371153604.0, "step": 1791 }, { "epoch": 0.6565906384537876, "grad_norm": 0.19929115780471723, "learning_rate": 3.9041506452960666e-05, "loss": 0.5958, "num_tokens": 1371784084.0, "step": 1792 }, { "epoch": 0.6569570394797105, "grad_norm": 0.18631051224337655, "learning_rate": 3.9040130310346436e-05, "loss": 0.5425, "num_tokens": 1372522398.0, "step": 1793 }, { "epoch": 0.6573234405056334, "grad_norm": 0.19901357952342544, "learning_rate": 3.9038753207613875e-05, "loss": 0.591, "num_tokens": 1373281058.0, "step": 1794 }, { "epoch": 0.6576898415315563, "grad_norm": 0.19338602411415337, "learning_rate": 3.9037375144840565e-05, "loss": 0.5744, "num_tokens": 1374007979.0, "step": 1795 }, { "epoch": 0.6580562425574792, "grad_norm": 0.20906774044906146, "learning_rate": 3.9035996122104156e-05, "loss": 0.5793, "num_tokens": 1374726507.0, "step": 1796 }, { "epoch": 0.6584226435834021, "grad_norm": 0.1862745068124667, "learning_rate": 3.903461613948236e-05, "loss": 0.534, "num_tokens": 1375550250.0, "step": 1797 }, { "epoch": 0.6587890446093249, "grad_norm": 0.2147092461443508, "learning_rate": 3.903323519705291e-05, "loss": 0.5521, "num_tokens": 1376283920.0, "step": 1798 }, { "epoch": 0.6591554456352477, "grad_norm": 0.19762502652393804, "learning_rate": 3.903185329489364e-05, "loss": 0.5753, "num_tokens": 1377089249.0, "step": 1799 }, { "epoch": 0.6595218466611706, "grad_norm": 0.19025898393286256, "learning_rate": 3.903047043308239e-05, "loss": 0.5939, "num_tokens": 1377807352.0, "step": 1800 }, { "epoch": 0.6598882476870935, "grad_norm": 0.16428502964910507, "learning_rate": 3.902908661169709e-05, "loss": 0.5332, "num_tokens": 1378478214.0, "step": 1801 }, { "epoch": 0.6602546487130164, "grad_norm": 0.18050416819473308, "learning_rate": 3.9027701830815704e-05, "loss": 0.5454, "num_tokens": 1379337367.0, "step": 1802 }, { "epoch": 0.6606210497389393, "grad_norm": 0.17324657793112727, "learning_rate": 3.902631609051626e-05, "loss": 0.5686, "num_tokens": 1380125925.0, "step": 1803 }, { "epoch": 0.6609874507648621, "grad_norm": 0.17279143802886507, "learning_rate": 3.902492939087684e-05, "loss": 0.552, "num_tokens": 1380954409.0, "step": 1804 }, { "epoch": 0.661353851790785, "grad_norm": 0.17694852831544175, "learning_rate": 3.9023541731975565e-05, "loss": 0.5368, "num_tokens": 1381801483.0, "step": 1805 }, { "epoch": 0.6617202528167079, "grad_norm": 0.19538121447399395, "learning_rate": 3.9022153113890626e-05, "loss": 0.5478, "num_tokens": 1382550353.0, "step": 1806 }, { "epoch": 0.6620866538426308, "grad_norm": 0.20746740584353113, "learning_rate": 3.9020763536700285e-05, "loss": 0.56, "num_tokens": 1383315221.0, "step": 1807 }, { "epoch": 0.6624530548685537, "grad_norm": 0.22173978862748994, "learning_rate": 3.901937300048281e-05, "loss": 0.5383, "num_tokens": 1384007231.0, "step": 1808 }, { "epoch": 0.6628194558944766, "grad_norm": 0.22978559033701826, "learning_rate": 3.901798150531656e-05, "loss": 0.5645, "num_tokens": 1384833356.0, "step": 1809 }, { "epoch": 0.6631858569203993, "grad_norm": 0.18069551842286466, "learning_rate": 3.9016589051279935e-05, "loss": 0.5549, "num_tokens": 1385656793.0, "step": 1810 }, { "epoch": 0.6635522579463222, "grad_norm": 0.2159077327254878, "learning_rate": 3.90151956384514e-05, "loss": 0.5864, "num_tokens": 1386434807.0, "step": 1811 }, { "epoch": 0.6639186589722451, "grad_norm": 0.1834132141048005, "learning_rate": 3.9013801266909465e-05, "loss": 0.5661, "num_tokens": 1387252120.0, "step": 1812 }, { "epoch": 0.664285059998168, "grad_norm": 0.1993775175522667, "learning_rate": 3.901240593673269e-05, "loss": 0.5576, "num_tokens": 1388057653.0, "step": 1813 }, { "epoch": 0.6646514610240909, "grad_norm": 0.18447987094345755, "learning_rate": 3.9011009647999694e-05, "loss": 0.5623, "num_tokens": 1388819067.0, "step": 1814 }, { "epoch": 0.6650178620500138, "grad_norm": 0.22008200704802472, "learning_rate": 3.9009612400789156e-05, "loss": 0.5442, "num_tokens": 1389622189.0, "step": 1815 }, { "epoch": 0.6653842630759366, "grad_norm": 0.17992604052072242, "learning_rate": 3.900821419517979e-05, "loss": 0.5628, "num_tokens": 1390367978.0, "step": 1816 }, { "epoch": 0.6657506641018595, "grad_norm": 0.21449887884019786, "learning_rate": 3.9006815031250395e-05, "loss": 0.5941, "num_tokens": 1391245222.0, "step": 1817 }, { "epoch": 0.6661170651277823, "grad_norm": 0.21517514660865356, "learning_rate": 3.900541490907981e-05, "loss": 0.5721, "num_tokens": 1392068302.0, "step": 1818 }, { "epoch": 0.6664834661537052, "grad_norm": 0.1737816796871704, "learning_rate": 3.900401382874689e-05, "loss": 0.5626, "num_tokens": 1392831849.0, "step": 1819 }, { "epoch": 0.6668498671796281, "grad_norm": 0.22490456512507317, "learning_rate": 3.900261179033062e-05, "loss": 0.5417, "num_tokens": 1393638273.0, "step": 1820 }, { "epoch": 0.667216268205551, "grad_norm": 0.19996019561995412, "learning_rate": 3.9001208793909965e-05, "loss": 0.5952, "num_tokens": 1394275005.0, "step": 1821 }, { "epoch": 0.6675826692314738, "grad_norm": 0.1941199706612649, "learning_rate": 3.8999804839564e-05, "loss": 0.6183, "num_tokens": 1395032511.0, "step": 1822 }, { "epoch": 0.6679490702573967, "grad_norm": 0.22609771143449992, "learning_rate": 3.8998399927371806e-05, "loss": 0.5924, "num_tokens": 1395766768.0, "step": 1823 }, { "epoch": 0.6683154712833196, "grad_norm": 0.1890824370987653, "learning_rate": 3.899699405741257e-05, "loss": 0.5345, "num_tokens": 1396472407.0, "step": 1824 }, { "epoch": 0.6686818723092425, "grad_norm": 0.18903673393804735, "learning_rate": 3.899558722976548e-05, "loss": 0.545, "num_tokens": 1397255200.0, "step": 1825 }, { "epoch": 0.6690482733351654, "grad_norm": 0.21007868123499643, "learning_rate": 3.899417944450982e-05, "loss": 0.588, "num_tokens": 1398017088.0, "step": 1826 }, { "epoch": 0.6694146743610883, "grad_norm": 0.1851330893935023, "learning_rate": 3.8992770701724894e-05, "loss": 0.5449, "num_tokens": 1398962860.0, "step": 1827 }, { "epoch": 0.669781075387011, "grad_norm": 0.21080059512916152, "learning_rate": 3.89913610014901e-05, "loss": 0.5455, "num_tokens": 1399634434.0, "step": 1828 }, { "epoch": 0.6701474764129339, "grad_norm": 0.20145621750074305, "learning_rate": 3.898995034388485e-05, "loss": 0.5527, "num_tokens": 1400406337.0, "step": 1829 }, { "epoch": 0.6705138774388568, "grad_norm": 0.18000030841837233, "learning_rate": 3.898853872898863e-05, "loss": 0.5727, "num_tokens": 1401213009.0, "step": 1830 }, { "epoch": 0.6708802784647797, "grad_norm": 0.18399437389951798, "learning_rate": 3.898712615688098e-05, "loss": 0.6015, "num_tokens": 1401941768.0, "step": 1831 }, { "epoch": 0.6712466794907026, "grad_norm": 0.18885016482351333, "learning_rate": 3.898571262764149e-05, "loss": 0.5872, "num_tokens": 1402752547.0, "step": 1832 }, { "epoch": 0.6716130805166255, "grad_norm": 0.18520050653462553, "learning_rate": 3.89842981413498e-05, "loss": 0.5474, "num_tokens": 1403491187.0, "step": 1833 }, { "epoch": 0.6719794815425483, "grad_norm": 0.1745913899225813, "learning_rate": 3.898288269808562e-05, "loss": 0.5752, "num_tokens": 1404294628.0, "step": 1834 }, { "epoch": 0.6723458825684712, "grad_norm": 0.16875124618188717, "learning_rate": 3.898146629792869e-05, "loss": 0.5593, "num_tokens": 1405105015.0, "step": 1835 }, { "epoch": 0.6727122835943941, "grad_norm": 0.1684119223696653, "learning_rate": 3.898004894095882e-05, "loss": 0.5355, "num_tokens": 1405879276.0, "step": 1836 }, { "epoch": 0.673078684620317, "grad_norm": 0.17432837112552915, "learning_rate": 3.897863062725587e-05, "loss": 0.5961, "num_tokens": 1406674579.0, "step": 1837 }, { "epoch": 0.6734450856462398, "grad_norm": 0.23013454472936706, "learning_rate": 3.897721135689975e-05, "loss": 0.5653, "num_tokens": 1407520890.0, "step": 1838 }, { "epoch": 0.6738114866721627, "grad_norm": 0.1783754082009078, "learning_rate": 3.8975791129970446e-05, "loss": 0.5527, "num_tokens": 1408229848.0, "step": 1839 }, { "epoch": 0.6741778876980855, "grad_norm": 0.21866493519994143, "learning_rate": 3.897436994654796e-05, "loss": 0.5958, "num_tokens": 1408938476.0, "step": 1840 }, { "epoch": 0.6745442887240084, "grad_norm": 0.2475197932357802, "learning_rate": 3.897294780671238e-05, "loss": 0.5919, "num_tokens": 1409692420.0, "step": 1841 }, { "epoch": 0.6749106897499313, "grad_norm": 0.17074677098054072, "learning_rate": 3.897152471054383e-05, "loss": 0.5498, "num_tokens": 1410554845.0, "step": 1842 }, { "epoch": 0.6752770907758542, "grad_norm": 0.2243053395133786, "learning_rate": 3.89701006581225e-05, "loss": 0.5471, "num_tokens": 1411286599.0, "step": 1843 }, { "epoch": 0.6756434918017771, "grad_norm": 0.22471907989348516, "learning_rate": 3.896867564952862e-05, "loss": 0.5714, "num_tokens": 1412090495.0, "step": 1844 }, { "epoch": 0.6760098928277, "grad_norm": 0.19304162563152746, "learning_rate": 3.896724968484248e-05, "loss": 0.5668, "num_tokens": 1412993701.0, "step": 1845 }, { "epoch": 0.6763762938536227, "grad_norm": 0.19934326705238808, "learning_rate": 3.896582276414443e-05, "loss": 0.5582, "num_tokens": 1413697384.0, "step": 1846 }, { "epoch": 0.6767426948795456, "grad_norm": 0.23125454760210015, "learning_rate": 3.8964394887514874e-05, "loss": 0.5913, "num_tokens": 1414445143.0, "step": 1847 }, { "epoch": 0.6771090959054685, "grad_norm": 0.21186515963939204, "learning_rate": 3.896296605503426e-05, "loss": 0.5614, "num_tokens": 1415241107.0, "step": 1848 }, { "epoch": 0.6774754969313914, "grad_norm": 0.18878301593848906, "learning_rate": 3.896153626678309e-05, "loss": 0.6086, "num_tokens": 1415967364.0, "step": 1849 }, { "epoch": 0.6778418979573143, "grad_norm": 0.24266523410728394, "learning_rate": 3.896010552284193e-05, "loss": 0.5789, "num_tokens": 1416721666.0, "step": 1850 }, { "epoch": 0.6782082989832372, "grad_norm": 0.23220479155103027, "learning_rate": 3.8958673823291396e-05, "loss": 0.5642, "num_tokens": 1417488096.0, "step": 1851 }, { "epoch": 0.67857470000916, "grad_norm": 0.18886972106503314, "learning_rate": 3.895724116821215e-05, "loss": 0.6043, "num_tokens": 1418317718.0, "step": 1852 }, { "epoch": 0.6789411010350829, "grad_norm": 0.24714761371863253, "learning_rate": 3.895580755768493e-05, "loss": 0.5726, "num_tokens": 1419053064.0, "step": 1853 }, { "epoch": 0.6793075020610058, "grad_norm": 0.2132318418733052, "learning_rate": 3.895437299179049e-05, "loss": 0.5641, "num_tokens": 1419807772.0, "step": 1854 }, { "epoch": 0.6796739030869287, "grad_norm": 0.21423944575828224, "learning_rate": 3.8952937470609675e-05, "loss": 0.5887, "num_tokens": 1420596633.0, "step": 1855 }, { "epoch": 0.6800403041128515, "grad_norm": 0.23851159258948562, "learning_rate": 3.895150099422337e-05, "loss": 0.5503, "num_tokens": 1421250531.0, "step": 1856 }, { "epoch": 0.6804067051387744, "grad_norm": 0.17347691132161827, "learning_rate": 3.89500635627125e-05, "loss": 0.583, "num_tokens": 1421948939.0, "step": 1857 }, { "epoch": 0.6807731061646972, "grad_norm": 0.17910602910880005, "learning_rate": 3.894862517615807e-05, "loss": 0.5635, "num_tokens": 1422762539.0, "step": 1858 }, { "epoch": 0.6811395071906201, "grad_norm": 0.18451693475463377, "learning_rate": 3.894718583464111e-05, "loss": 0.5711, "num_tokens": 1423531351.0, "step": 1859 }, { "epoch": 0.681505908216543, "grad_norm": 0.1779672450819537, "learning_rate": 3.894574553824274e-05, "loss": 0.5749, "num_tokens": 1424303304.0, "step": 1860 }, { "epoch": 0.6818723092424659, "grad_norm": 0.16751754554854636, "learning_rate": 3.894430428704409e-05, "loss": 0.5784, "num_tokens": 1425092214.0, "step": 1861 }, { "epoch": 0.6822387102683888, "grad_norm": 0.16477234627651605, "learning_rate": 3.894286208112638e-05, "loss": 0.5733, "num_tokens": 1425860853.0, "step": 1862 }, { "epoch": 0.6826051112943117, "grad_norm": 0.1723616400570538, "learning_rate": 3.894141892057087e-05, "loss": 0.5919, "num_tokens": 1426564870.0, "step": 1863 }, { "epoch": 0.6829715123202345, "grad_norm": 0.17613900248146291, "learning_rate": 3.8939974805458875e-05, "loss": 0.5495, "num_tokens": 1427276301.0, "step": 1864 }, { "epoch": 0.6833379133461573, "grad_norm": 0.1992684186855218, "learning_rate": 3.893852973587176e-05, "loss": 0.5801, "num_tokens": 1427926097.0, "step": 1865 }, { "epoch": 0.6837043143720802, "grad_norm": 0.1926315170399935, "learning_rate": 3.8937083711890945e-05, "loss": 0.5943, "num_tokens": 1428668443.0, "step": 1866 }, { "epoch": 0.6840707153980031, "grad_norm": 0.18967319591754092, "learning_rate": 3.893563673359791e-05, "loss": 0.5745, "num_tokens": 1429519439.0, "step": 1867 }, { "epoch": 0.684437116423926, "grad_norm": 0.18933539968854526, "learning_rate": 3.893418880107417e-05, "loss": 0.5707, "num_tokens": 1430238823.0, "step": 1868 }, { "epoch": 0.6848035174498489, "grad_norm": 0.16738042610158124, "learning_rate": 3.8932739914401334e-05, "loss": 0.5342, "num_tokens": 1431141836.0, "step": 1869 }, { "epoch": 0.6851699184757717, "grad_norm": 0.1900646398531285, "learning_rate": 3.8931290073661025e-05, "loss": 0.5523, "num_tokens": 1431930499.0, "step": 1870 }, { "epoch": 0.6855363195016946, "grad_norm": 0.21057885422356695, "learning_rate": 3.8929839278934924e-05, "loss": 0.5378, "num_tokens": 1432718906.0, "step": 1871 }, { "epoch": 0.6859027205276175, "grad_norm": 0.16165408198957318, "learning_rate": 3.892838753030479e-05, "loss": 0.5444, "num_tokens": 1433468590.0, "step": 1872 }, { "epoch": 0.6862691215535404, "grad_norm": 0.19077480311809167, "learning_rate": 3.8926934827852416e-05, "loss": 0.523, "num_tokens": 1434290093.0, "step": 1873 }, { "epoch": 0.6866355225794633, "grad_norm": 0.1856920975748769, "learning_rate": 3.8925481171659656e-05, "loss": 0.5425, "num_tokens": 1435075035.0, "step": 1874 }, { "epoch": 0.6870019236053861, "grad_norm": 0.19116228212689784, "learning_rate": 3.8924026561808405e-05, "loss": 0.5311, "num_tokens": 1435844238.0, "step": 1875 }, { "epoch": 0.6873683246313089, "grad_norm": 0.19788404528556755, "learning_rate": 3.892257099838064e-05, "loss": 0.5562, "num_tokens": 1436600266.0, "step": 1876 }, { "epoch": 0.6877347256572318, "grad_norm": 0.17634868413454025, "learning_rate": 3.892111448145836e-05, "loss": 0.558, "num_tokens": 1437429190.0, "step": 1877 }, { "epoch": 0.6881011266831547, "grad_norm": 0.20565966448568906, "learning_rate": 3.891965701112364e-05, "loss": 0.5513, "num_tokens": 1438203164.0, "step": 1878 }, { "epoch": 0.6884675277090776, "grad_norm": 0.16779874333040842, "learning_rate": 3.891819858745859e-05, "loss": 0.5497, "num_tokens": 1439040306.0, "step": 1879 }, { "epoch": 0.6888339287350005, "grad_norm": 0.20113139945898686, "learning_rate": 3.89167392105454e-05, "loss": 0.5408, "num_tokens": 1439596444.0, "step": 1880 }, { "epoch": 0.6892003297609234, "grad_norm": 0.18308524830563866, "learning_rate": 3.891527888046628e-05, "loss": 0.564, "num_tokens": 1440461255.0, "step": 1881 }, { "epoch": 0.6895667307868462, "grad_norm": 0.17409635279393298, "learning_rate": 3.8913817597303524e-05, "loss": 0.5533, "num_tokens": 1441214906.0, "step": 1882 }, { "epoch": 0.689933131812769, "grad_norm": 0.2000655003830997, "learning_rate": 3.891235536113947e-05, "loss": 0.574, "num_tokens": 1441991535.0, "step": 1883 }, { "epoch": 0.6902995328386919, "grad_norm": 0.19887808981967875, "learning_rate": 3.89108921720565e-05, "loss": 0.5514, "num_tokens": 1442844443.0, "step": 1884 }, { "epoch": 0.6906659338646148, "grad_norm": 0.1902843916926668, "learning_rate": 3.890942803013705e-05, "loss": 0.557, "num_tokens": 1443725351.0, "step": 1885 }, { "epoch": 0.6910323348905377, "grad_norm": 0.20381646398555914, "learning_rate": 3.890796293546364e-05, "loss": 0.5807, "num_tokens": 1444489099.0, "step": 1886 }, { "epoch": 0.6913987359164606, "grad_norm": 0.19947281038880313, "learning_rate": 3.890649688811879e-05, "loss": 0.5821, "num_tokens": 1445183002.0, "step": 1887 }, { "epoch": 0.6917651369423834, "grad_norm": 0.19401561998732447, "learning_rate": 3.890502988818512e-05, "loss": 0.5847, "num_tokens": 1445923594.0, "step": 1888 }, { "epoch": 0.6921315379683063, "grad_norm": 0.17999236812135935, "learning_rate": 3.890356193574529e-05, "loss": 0.5313, "num_tokens": 1446588281.0, "step": 1889 }, { "epoch": 0.6924979389942292, "grad_norm": 0.18662788460259447, "learning_rate": 3.8902093030882004e-05, "loss": 0.5309, "num_tokens": 1447169725.0, "step": 1890 }, { "epoch": 0.6928643400201521, "grad_norm": 0.1828110067515294, "learning_rate": 3.890062317367803e-05, "loss": 0.5959, "num_tokens": 1447943281.0, "step": 1891 }, { "epoch": 0.693230741046075, "grad_norm": 0.19693478741921047, "learning_rate": 3.889915236421619e-05, "loss": 0.568, "num_tokens": 1448683526.0, "step": 1892 }, { "epoch": 0.6935971420719979, "grad_norm": 0.1816321655780156, "learning_rate": 3.8897680602579344e-05, "loss": 0.5733, "num_tokens": 1449471213.0, "step": 1893 }, { "epoch": 0.6939635430979206, "grad_norm": 0.22444135435163748, "learning_rate": 3.889620788885043e-05, "loss": 0.5517, "num_tokens": 1450234920.0, "step": 1894 }, { "epoch": 0.6943299441238435, "grad_norm": 0.16570252172114555, "learning_rate": 3.889473422311242e-05, "loss": 0.5516, "num_tokens": 1451037523.0, "step": 1895 }, { "epoch": 0.6946963451497664, "grad_norm": 0.18169526622503762, "learning_rate": 3.889325960544835e-05, "loss": 0.5266, "num_tokens": 1451970428.0, "step": 1896 }, { "epoch": 0.6950627461756893, "grad_norm": 0.18990774124995227, "learning_rate": 3.889178403594132e-05, "loss": 0.5587, "num_tokens": 1452635972.0, "step": 1897 }, { "epoch": 0.6954291472016122, "grad_norm": 0.17544479283652084, "learning_rate": 3.889030751467444e-05, "loss": 0.5771, "num_tokens": 1453390647.0, "step": 1898 }, { "epoch": 0.6957955482275351, "grad_norm": 0.19891586627904567, "learning_rate": 3.888883004173093e-05, "loss": 0.5578, "num_tokens": 1454281819.0, "step": 1899 }, { "epoch": 0.6961619492534579, "grad_norm": 0.18827999472423745, "learning_rate": 3.888735161719403e-05, "loss": 0.5954, "num_tokens": 1455098649.0, "step": 1900 }, { "epoch": 0.6965283502793808, "grad_norm": 0.17018638648444734, "learning_rate": 3.888587224114703e-05, "loss": 0.5722, "num_tokens": 1455972955.0, "step": 1901 }, { "epoch": 0.6968947513053037, "grad_norm": 0.16630033975870123, "learning_rate": 3.88843919136733e-05, "loss": 0.5479, "num_tokens": 1456863499.0, "step": 1902 }, { "epoch": 0.6972611523312265, "grad_norm": 0.17962781241561251, "learning_rate": 3.888291063485624e-05, "loss": 0.5645, "num_tokens": 1457720369.0, "step": 1903 }, { "epoch": 0.6976275533571494, "grad_norm": 0.17553121991992984, "learning_rate": 3.8881428404779326e-05, "loss": 0.5516, "num_tokens": 1458529314.0, "step": 1904 }, { "epoch": 0.6979939543830723, "grad_norm": 0.1609735317582478, "learning_rate": 3.887994522352605e-05, "loss": 0.5376, "num_tokens": 1459285202.0, "step": 1905 }, { "epoch": 0.6983603554089951, "grad_norm": 0.18429199834382137, "learning_rate": 3.887846109118e-05, "loss": 0.5674, "num_tokens": 1460009923.0, "step": 1906 }, { "epoch": 0.698726756434918, "grad_norm": 0.18229643552802155, "learning_rate": 3.8876976007824784e-05, "loss": 0.5748, "num_tokens": 1460736738.0, "step": 1907 }, { "epoch": 0.6990931574608409, "grad_norm": 0.1864267964173356, "learning_rate": 3.88754899735441e-05, "loss": 0.6023, "num_tokens": 1461512831.0, "step": 1908 }, { "epoch": 0.6994595584867638, "grad_norm": 0.20176270922704048, "learning_rate": 3.887400298842165e-05, "loss": 0.5501, "num_tokens": 1462262817.0, "step": 1909 }, { "epoch": 0.6998259595126867, "grad_norm": 0.17363955040588708, "learning_rate": 3.887251505254124e-05, "loss": 0.5529, "num_tokens": 1463093903.0, "step": 1910 }, { "epoch": 0.7001923605386096, "grad_norm": 0.19398313379089965, "learning_rate": 3.88710261659867e-05, "loss": 0.5831, "num_tokens": 1463995189.0, "step": 1911 }, { "epoch": 0.7005587615645323, "grad_norm": 0.18423169907831496, "learning_rate": 3.886953632884192e-05, "loss": 0.5886, "num_tokens": 1464705295.0, "step": 1912 }, { "epoch": 0.7009251625904552, "grad_norm": 0.18972866788633067, "learning_rate": 3.886804554119084e-05, "loss": 0.5493, "num_tokens": 1465466607.0, "step": 1913 }, { "epoch": 0.7012915636163781, "grad_norm": 0.1959514801498212, "learning_rate": 3.886655380311747e-05, "loss": 0.5367, "num_tokens": 1466213347.0, "step": 1914 }, { "epoch": 0.701657964642301, "grad_norm": 0.21416722446561712, "learning_rate": 3.886506111470584e-05, "loss": 0.5457, "num_tokens": 1466939930.0, "step": 1915 }, { "epoch": 0.7020243656682239, "grad_norm": 0.19919688406367383, "learning_rate": 3.886356747604007e-05, "loss": 0.6272, "num_tokens": 1467751570.0, "step": 1916 }, { "epoch": 0.7023907666941468, "grad_norm": 0.19519519836148994, "learning_rate": 3.8862072887204324e-05, "loss": 0.5588, "num_tokens": 1468483423.0, "step": 1917 }, { "epoch": 0.7027571677200696, "grad_norm": 0.24724208684661617, "learning_rate": 3.8860577348282806e-05, "loss": 0.5845, "num_tokens": 1469243407.0, "step": 1918 }, { "epoch": 0.7031235687459925, "grad_norm": 0.1646999346608543, "learning_rate": 3.885908085935978e-05, "loss": 0.5727, "num_tokens": 1470110224.0, "step": 1919 }, { "epoch": 0.7034899697719154, "grad_norm": 0.1936624395400008, "learning_rate": 3.885758342051956e-05, "loss": 0.5586, "num_tokens": 1470901038.0, "step": 1920 }, { "epoch": 0.7038563707978382, "grad_norm": 0.1938775173654768, "learning_rate": 3.8856085031846536e-05, "loss": 0.5912, "num_tokens": 1471745756.0, "step": 1921 }, { "epoch": 0.7042227718237611, "grad_norm": 0.16816051960229847, "learning_rate": 3.885458569342511e-05, "loss": 0.5431, "num_tokens": 1472436879.0, "step": 1922 }, { "epoch": 0.704589172849684, "grad_norm": 0.16949534256995802, "learning_rate": 3.8853085405339785e-05, "loss": 0.5383, "num_tokens": 1473210679.0, "step": 1923 }, { "epoch": 0.7049555738756068, "grad_norm": 0.1651223303020129, "learning_rate": 3.885158416767509e-05, "loss": 0.5539, "num_tokens": 1473880191.0, "step": 1924 }, { "epoch": 0.7053219749015297, "grad_norm": 0.19013925301247234, "learning_rate": 3.885008198051559e-05, "loss": 0.5757, "num_tokens": 1474719629.0, "step": 1925 }, { "epoch": 0.7056883759274526, "grad_norm": 0.19401922123494067, "learning_rate": 3.8848578843945945e-05, "loss": 0.5679, "num_tokens": 1475508097.0, "step": 1926 }, { "epoch": 0.7060547769533755, "grad_norm": 0.211474055075435, "learning_rate": 3.884707475805085e-05, "loss": 0.552, "num_tokens": 1476208324.0, "step": 1927 }, { "epoch": 0.7064211779792984, "grad_norm": 0.18158026966989324, "learning_rate": 3.8845569722915036e-05, "loss": 0.535, "num_tokens": 1477022431.0, "step": 1928 }, { "epoch": 0.7067875790052213, "grad_norm": 0.18823123631893401, "learning_rate": 3.8844063738623316e-05, "loss": 0.5575, "num_tokens": 1477712239.0, "step": 1929 }, { "epoch": 0.707153980031144, "grad_norm": 0.17901203467091287, "learning_rate": 3.884255680526055e-05, "loss": 0.5652, "num_tokens": 1478475927.0, "step": 1930 }, { "epoch": 0.7075203810570669, "grad_norm": 0.17531012696630202, "learning_rate": 3.884104892291163e-05, "loss": 0.5792, "num_tokens": 1479121399.0, "step": 1931 }, { "epoch": 0.7078867820829898, "grad_norm": 0.20216570027954459, "learning_rate": 3.8839540091661524e-05, "loss": 0.5605, "num_tokens": 1479869753.0, "step": 1932 }, { "epoch": 0.7082531831089127, "grad_norm": 0.2144316664547138, "learning_rate": 3.883803031159524e-05, "loss": 0.5465, "num_tokens": 1480540723.0, "step": 1933 }, { "epoch": 0.7086195841348356, "grad_norm": 0.2136163303861116, "learning_rate": 3.883651958279786e-05, "loss": 0.5579, "num_tokens": 1481187772.0, "step": 1934 }, { "epoch": 0.7089859851607585, "grad_norm": 0.23036321029836548, "learning_rate": 3.8835007905354495e-05, "loss": 0.5734, "num_tokens": 1481976793.0, "step": 1935 }, { "epoch": 0.7093523861866813, "grad_norm": 0.19481422597994835, "learning_rate": 3.883349527935032e-05, "loss": 0.5564, "num_tokens": 1482692936.0, "step": 1936 }, { "epoch": 0.7097187872126042, "grad_norm": 0.2173791132693577, "learning_rate": 3.8831981704870574e-05, "loss": 0.5497, "num_tokens": 1483422190.0, "step": 1937 }, { "epoch": 0.7100851882385271, "grad_norm": 0.18748363368115514, "learning_rate": 3.883046718200052e-05, "loss": 0.5838, "num_tokens": 1484153898.0, "step": 1938 }, { "epoch": 0.71045158926445, "grad_norm": 0.17517839150811584, "learning_rate": 3.882895171082551e-05, "loss": 0.5566, "num_tokens": 1485114309.0, "step": 1939 }, { "epoch": 0.7108179902903728, "grad_norm": 0.21298444808080721, "learning_rate": 3.8827435291430924e-05, "loss": 0.5602, "num_tokens": 1485860045.0, "step": 1940 }, { "epoch": 0.7111843913162957, "grad_norm": 0.18112260324455656, "learning_rate": 3.882591792390221e-05, "loss": 0.5861, "num_tokens": 1486649867.0, "step": 1941 }, { "epoch": 0.7115507923422185, "grad_norm": 0.17409598926231099, "learning_rate": 3.882439960832485e-05, "loss": 0.5581, "num_tokens": 1487452456.0, "step": 1942 }, { "epoch": 0.7119171933681414, "grad_norm": 0.2130589345147664, "learning_rate": 3.882288034478441e-05, "loss": 0.5757, "num_tokens": 1488225057.0, "step": 1943 }, { "epoch": 0.7122835943940643, "grad_norm": 0.20077593425939788, "learning_rate": 3.8821360133366474e-05, "loss": 0.5275, "num_tokens": 1488940070.0, "step": 1944 }, { "epoch": 0.7126499954199872, "grad_norm": 0.19485385446625958, "learning_rate": 3.881983897415671e-05, "loss": 0.5827, "num_tokens": 1489638630.0, "step": 1945 }, { "epoch": 0.7130163964459101, "grad_norm": 0.1650537386979163, "learning_rate": 3.8818316867240834e-05, "loss": 0.5619, "num_tokens": 1490471369.0, "step": 1946 }, { "epoch": 0.713382797471833, "grad_norm": 0.17392875444378642, "learning_rate": 3.881679381270459e-05, "loss": 0.5563, "num_tokens": 1491169484.0, "step": 1947 }, { "epoch": 0.7137491984977558, "grad_norm": 0.19374326484866838, "learning_rate": 3.881526981063381e-05, "loss": 0.5791, "num_tokens": 1491921812.0, "step": 1948 }, { "epoch": 0.7141155995236786, "grad_norm": 0.1717860046051066, "learning_rate": 3.8813744861114346e-05, "loss": 0.5415, "num_tokens": 1492692953.0, "step": 1949 }, { "epoch": 0.7144820005496015, "grad_norm": 0.18727065208023952, "learning_rate": 3.8812218964232136e-05, "loss": 0.5287, "num_tokens": 1493497376.0, "step": 1950 }, { "epoch": 0.7148484015755244, "grad_norm": 0.16986883285928597, "learning_rate": 3.881069212007315e-05, "loss": 0.554, "num_tokens": 1494217714.0, "step": 1951 }, { "epoch": 0.7152148026014473, "grad_norm": 0.19110501177626926, "learning_rate": 3.880916432872342e-05, "loss": 0.5765, "num_tokens": 1494914743.0, "step": 1952 }, { "epoch": 0.7155812036273702, "grad_norm": 0.1719279721878569, "learning_rate": 3.8807635590269024e-05, "loss": 0.5482, "num_tokens": 1495678829.0, "step": 1953 }, { "epoch": 0.715947604653293, "grad_norm": 0.15935399883689752, "learning_rate": 3.88061059047961e-05, "loss": 0.551, "num_tokens": 1496538656.0, "step": 1954 }, { "epoch": 0.7163140056792159, "grad_norm": 0.1640172618260075, "learning_rate": 3.8804575272390843e-05, "loss": 0.5346, "num_tokens": 1497389052.0, "step": 1955 }, { "epoch": 0.7166804067051388, "grad_norm": 0.15974040848363724, "learning_rate": 3.880304369313949e-05, "loss": 0.5497, "num_tokens": 1498187796.0, "step": 1956 }, { "epoch": 0.7170468077310617, "grad_norm": 0.17909229916653974, "learning_rate": 3.8801511167128336e-05, "loss": 0.5561, "num_tokens": 1498892044.0, "step": 1957 }, { "epoch": 0.7174132087569846, "grad_norm": 0.16826598464475492, "learning_rate": 3.8799977694443734e-05, "loss": 0.5105, "num_tokens": 1499645088.0, "step": 1958 }, { "epoch": 0.7177796097829074, "grad_norm": 0.17535442844664173, "learning_rate": 3.879844327517208e-05, "loss": 0.5515, "num_tokens": 1500439482.0, "step": 1959 }, { "epoch": 0.7181460108088302, "grad_norm": 0.16108522376135007, "learning_rate": 3.879690790939984e-05, "loss": 0.5731, "num_tokens": 1501224758.0, "step": 1960 }, { "epoch": 0.7185124118347531, "grad_norm": 0.1654101521341293, "learning_rate": 3.879537159721352e-05, "loss": 0.5519, "num_tokens": 1502182562.0, "step": 1961 }, { "epoch": 0.718878812860676, "grad_norm": 0.19435005810784536, "learning_rate": 3.879383433869968e-05, "loss": 0.5613, "num_tokens": 1502909494.0, "step": 1962 }, { "epoch": 0.7192452138865989, "grad_norm": 0.14450011853099273, "learning_rate": 3.879229613394494e-05, "loss": 0.5139, "num_tokens": 1503761362.0, "step": 1963 }, { "epoch": 0.7196116149125218, "grad_norm": 0.17309242124369487, "learning_rate": 3.879075698303597e-05, "loss": 0.5417, "num_tokens": 1504511957.0, "step": 1964 }, { "epoch": 0.7199780159384447, "grad_norm": 0.17017413083505006, "learning_rate": 3.878921688605948e-05, "loss": 0.5584, "num_tokens": 1505235999.0, "step": 1965 }, { "epoch": 0.7203444169643675, "grad_norm": 0.1771143347484396, "learning_rate": 3.878767584310226e-05, "loss": 0.5708, "num_tokens": 1506081301.0, "step": 1966 }, { "epoch": 0.7207108179902904, "grad_norm": 0.20317880148835257, "learning_rate": 3.878613385425113e-05, "loss": 0.5538, "num_tokens": 1506845019.0, "step": 1967 }, { "epoch": 0.7210772190162132, "grad_norm": 0.16157929092156487, "learning_rate": 3.878459091959299e-05, "loss": 0.5592, "num_tokens": 1507716001.0, "step": 1968 }, { "epoch": 0.7214436200421361, "grad_norm": 0.1713197367903475, "learning_rate": 3.8783047039214766e-05, "loss": 0.5607, "num_tokens": 1508407948.0, "step": 1969 }, { "epoch": 0.721810021068059, "grad_norm": 0.18066575192502557, "learning_rate": 3.878150221320344e-05, "loss": 0.6107, "num_tokens": 1509075100.0, "step": 1970 }, { "epoch": 0.7221764220939819, "grad_norm": 0.1797487316879385, "learning_rate": 3.877995644164605e-05, "loss": 0.5598, "num_tokens": 1509772534.0, "step": 1971 }, { "epoch": 0.7225428231199047, "grad_norm": 0.1675031846327759, "learning_rate": 3.877840972462971e-05, "loss": 0.5625, "num_tokens": 1510462994.0, "step": 1972 }, { "epoch": 0.7229092241458276, "grad_norm": 0.17447610754854895, "learning_rate": 3.877686206224156e-05, "loss": 0.5693, "num_tokens": 1511337060.0, "step": 1973 }, { "epoch": 0.7232756251717505, "grad_norm": 0.17171114452731528, "learning_rate": 3.8775313454568813e-05, "loss": 0.5496, "num_tokens": 1512152293.0, "step": 1974 }, { "epoch": 0.7236420261976734, "grad_norm": 0.175200539383469, "learning_rate": 3.87737639016987e-05, "loss": 0.5815, "num_tokens": 1512992209.0, "step": 1975 }, { "epoch": 0.7240084272235963, "grad_norm": 0.17994051107018738, "learning_rate": 3.877221340371855e-05, "loss": 0.6019, "num_tokens": 1513797454.0, "step": 1976 }, { "epoch": 0.7243748282495192, "grad_norm": 0.19750125185655815, "learning_rate": 3.877066196071572e-05, "loss": 0.5537, "num_tokens": 1514505568.0, "step": 1977 }, { "epoch": 0.7247412292754419, "grad_norm": 0.17433743326117573, "learning_rate": 3.8769109572777624e-05, "loss": 0.5278, "num_tokens": 1515308028.0, "step": 1978 }, { "epoch": 0.7251076303013648, "grad_norm": 0.16072267362636328, "learning_rate": 3.8767556239991726e-05, "loss": 0.5248, "num_tokens": 1516129568.0, "step": 1979 }, { "epoch": 0.7254740313272877, "grad_norm": 0.24088413040759168, "learning_rate": 3.876600196244557e-05, "loss": 0.5316, "num_tokens": 1516730399.0, "step": 1980 }, { "epoch": 0.7258404323532106, "grad_norm": 0.22583973086681963, "learning_rate": 3.87644467402267e-05, "loss": 0.5787, "num_tokens": 1517441530.0, "step": 1981 }, { "epoch": 0.7262068333791335, "grad_norm": 0.19019126585162138, "learning_rate": 3.876289057342276e-05, "loss": 0.5398, "num_tokens": 1518244724.0, "step": 1982 }, { "epoch": 0.7265732344050564, "grad_norm": 0.24431160186052908, "learning_rate": 3.876133346212144e-05, "loss": 0.5513, "num_tokens": 1518839389.0, "step": 1983 }, { "epoch": 0.7269396354309792, "grad_norm": 0.18325532883329712, "learning_rate": 3.8759775406410446e-05, "loss": 0.5632, "num_tokens": 1519672250.0, "step": 1984 }, { "epoch": 0.7273060364569021, "grad_norm": 0.18884769315208394, "learning_rate": 3.8758216406377604e-05, "loss": 0.5642, "num_tokens": 1520341681.0, "step": 1985 }, { "epoch": 0.727672437482825, "grad_norm": 0.20531826949811494, "learning_rate": 3.8756656462110724e-05, "loss": 0.5998, "num_tokens": 1521070311.0, "step": 1986 }, { "epoch": 0.7280388385087478, "grad_norm": 0.19247767005343316, "learning_rate": 3.8755095573697716e-05, "loss": 0.5824, "num_tokens": 1521718461.0, "step": 1987 }, { "epoch": 0.7284052395346707, "grad_norm": 0.1952216838843003, "learning_rate": 3.875353374122652e-05, "loss": 0.5713, "num_tokens": 1522523874.0, "step": 1988 }, { "epoch": 0.7287716405605935, "grad_norm": 0.20064385452124545, "learning_rate": 3.8751970964785146e-05, "loss": 0.5173, "num_tokens": 1523350759.0, "step": 1989 }, { "epoch": 0.7291380415865164, "grad_norm": 0.18422440972131887, "learning_rate": 3.875040724446165e-05, "loss": 0.5692, "num_tokens": 1524168248.0, "step": 1990 }, { "epoch": 0.7295044426124393, "grad_norm": 0.18956602599689512, "learning_rate": 3.874884258034412e-05, "loss": 0.5635, "num_tokens": 1525006865.0, "step": 1991 }, { "epoch": 0.7298708436383622, "grad_norm": 0.20932159963466096, "learning_rate": 3.874727697252073e-05, "loss": 0.5648, "num_tokens": 1525750452.0, "step": 1992 }, { "epoch": 0.7302372446642851, "grad_norm": 0.17989512095696988, "learning_rate": 3.874571042107969e-05, "loss": 0.5477, "num_tokens": 1526515242.0, "step": 1993 }, { "epoch": 0.730603645690208, "grad_norm": 0.2079754976163544, "learning_rate": 3.8744142926109276e-05, "loss": 0.6011, "num_tokens": 1527287182.0, "step": 1994 }, { "epoch": 0.7309700467161308, "grad_norm": 0.19898177633429054, "learning_rate": 3.874257448769779e-05, "loss": 0.5656, "num_tokens": 1528010613.0, "step": 1995 }, { "epoch": 0.7313364477420536, "grad_norm": 0.1808442150980933, "learning_rate": 3.874100510593362e-05, "loss": 0.5513, "num_tokens": 1528736312.0, "step": 1996 }, { "epoch": 0.7317028487679765, "grad_norm": 0.1865324866869818, "learning_rate": 3.873943478090519e-05, "loss": 0.538, "num_tokens": 1529544266.0, "step": 1997 }, { "epoch": 0.7320692497938994, "grad_norm": 0.21169076496162206, "learning_rate": 3.8737863512700965e-05, "loss": 0.5956, "num_tokens": 1530319975.0, "step": 1998 }, { "epoch": 0.7324356508198223, "grad_norm": 0.20545970869072172, "learning_rate": 3.87362913014095e-05, "loss": 0.6018, "num_tokens": 1531029025.0, "step": 1999 }, { "epoch": 0.7328020518457452, "grad_norm": 0.19011588294496823, "learning_rate": 3.873471814711937e-05, "loss": 0.5527, "num_tokens": 1531753701.0, "step": 2000 }, { "epoch": 0.733168452871668, "grad_norm": 0.19457675235272875, "learning_rate": 3.87331440499192e-05, "loss": 0.5366, "num_tokens": 1532490662.0, "step": 2001 }, { "epoch": 0.7335348538975909, "grad_norm": 0.177753241438077, "learning_rate": 3.87315690098977e-05, "loss": 0.5265, "num_tokens": 1533223173.0, "step": 2002 }, { "epoch": 0.7339012549235138, "grad_norm": 0.18511325179858867, "learning_rate": 3.872999302714361e-05, "loss": 0.5318, "num_tokens": 1533967245.0, "step": 2003 }, { "epoch": 0.7342676559494367, "grad_norm": 0.19972774360941117, "learning_rate": 3.872841610174573e-05, "loss": 0.5573, "num_tokens": 1534778999.0, "step": 2004 }, { "epoch": 0.7346340569753596, "grad_norm": 0.15950772334479973, "learning_rate": 3.8726838233792905e-05, "loss": 0.5277, "num_tokens": 1535532943.0, "step": 2005 }, { "epoch": 0.7350004580012824, "grad_norm": 0.2076100216176803, "learning_rate": 3.8725259423374045e-05, "loss": 0.599, "num_tokens": 1536331799.0, "step": 2006 }, { "epoch": 0.7353668590272052, "grad_norm": 0.19646024541993487, "learning_rate": 3.872367967057811e-05, "loss": 0.5423, "num_tokens": 1537045606.0, "step": 2007 }, { "epoch": 0.7357332600531281, "grad_norm": 0.17435825177520203, "learning_rate": 3.8722098975494094e-05, "loss": 0.5326, "num_tokens": 1537869351.0, "step": 2008 }, { "epoch": 0.736099661079051, "grad_norm": 0.2356456294991629, "learning_rate": 3.872051733821107e-05, "loss": 0.5654, "num_tokens": 1538646116.0, "step": 2009 }, { "epoch": 0.7364660621049739, "grad_norm": 0.16824946324072673, "learning_rate": 3.871893475881817e-05, "loss": 0.571, "num_tokens": 1539428444.0, "step": 2010 }, { "epoch": 0.7368324631308968, "grad_norm": 0.1886403047232384, "learning_rate": 3.871735123740455e-05, "loss": 0.5453, "num_tokens": 1540212483.0, "step": 2011 }, { "epoch": 0.7371988641568197, "grad_norm": 0.15993897266996715, "learning_rate": 3.8715766774059426e-05, "loss": 0.5498, "num_tokens": 1540931662.0, "step": 2012 }, { "epoch": 0.7375652651827425, "grad_norm": 0.1836378394142489, "learning_rate": 3.871418136887208e-05, "loss": 0.5944, "num_tokens": 1541630873.0, "step": 2013 }, { "epoch": 0.7379316662086653, "grad_norm": 0.16228967389351237, "learning_rate": 3.8712595021931844e-05, "loss": 0.5547, "num_tokens": 1542490061.0, "step": 2014 }, { "epoch": 0.7382980672345882, "grad_norm": 0.182740787263353, "learning_rate": 3.87110077333281e-05, "loss": 0.5656, "num_tokens": 1543262997.0, "step": 2015 }, { "epoch": 0.7386644682605111, "grad_norm": 0.18410761915904325, "learning_rate": 3.870941950315028e-05, "loss": 0.5899, "num_tokens": 1543954213.0, "step": 2016 }, { "epoch": 0.739030869286434, "grad_norm": 0.16798792152170802, "learning_rate": 3.870783033148787e-05, "loss": 0.5416, "num_tokens": 1544662099.0, "step": 2017 }, { "epoch": 0.7393972703123569, "grad_norm": 0.17544947454517057, "learning_rate": 3.870624021843042e-05, "loss": 0.5639, "num_tokens": 1545490702.0, "step": 2018 }, { "epoch": 0.7397636713382797, "grad_norm": 0.17855093619126383, "learning_rate": 3.870464916406752e-05, "loss": 0.5795, "num_tokens": 1546197514.0, "step": 2019 }, { "epoch": 0.7401300723642026, "grad_norm": 0.1846947806906981, "learning_rate": 3.8703057168488814e-05, "loss": 0.5647, "num_tokens": 1546997664.0, "step": 2020 }, { "epoch": 0.7404964733901255, "grad_norm": 0.18872655855196074, "learning_rate": 3.8701464231784e-05, "loss": 0.591, "num_tokens": 1547718683.0, "step": 2021 }, { "epoch": 0.7408628744160484, "grad_norm": 0.19470774091317863, "learning_rate": 3.869987035404283e-05, "loss": 0.6161, "num_tokens": 1548553851.0, "step": 2022 }, { "epoch": 0.7412292754419713, "grad_norm": 0.1731487914918439, "learning_rate": 3.8698275535355126e-05, "loss": 0.5552, "num_tokens": 1549255030.0, "step": 2023 }, { "epoch": 0.7415956764678941, "grad_norm": 0.19718761520318348, "learning_rate": 3.869667977581073e-05, "loss": 0.6039, "num_tokens": 1550064421.0, "step": 2024 }, { "epoch": 0.7419620774938169, "grad_norm": 0.18565555107316298, "learning_rate": 3.8695083075499565e-05, "loss": 0.5562, "num_tokens": 1550745111.0, "step": 2025 }, { "epoch": 0.7423284785197398, "grad_norm": 0.17429669698076697, "learning_rate": 3.8693485434511595e-05, "loss": 0.5556, "num_tokens": 1551607410.0, "step": 2026 }, { "epoch": 0.7426948795456627, "grad_norm": 0.21991248519920548, "learning_rate": 3.869188685293683e-05, "loss": 0.5434, "num_tokens": 1552386615.0, "step": 2027 }, { "epoch": 0.7430612805715856, "grad_norm": 0.21616434455831796, "learning_rate": 3.869028733086535e-05, "loss": 0.5876, "num_tokens": 1553147553.0, "step": 2028 }, { "epoch": 0.7434276815975085, "grad_norm": 0.18744040864203493, "learning_rate": 3.868868686838728e-05, "loss": 0.5861, "num_tokens": 1553848755.0, "step": 2029 }, { "epoch": 0.7437940826234314, "grad_norm": 0.20359368030819905, "learning_rate": 3.868708546559279e-05, "loss": 0.6033, "num_tokens": 1554583283.0, "step": 2030 }, { "epoch": 0.7441604836493542, "grad_norm": 0.17831886014630344, "learning_rate": 3.868548312257211e-05, "loss": 0.5528, "num_tokens": 1555429202.0, "step": 2031 }, { "epoch": 0.7445268846752771, "grad_norm": 0.2067806177541501, "learning_rate": 3.868387983941554e-05, "loss": 0.5606, "num_tokens": 1556180910.0, "step": 2032 }, { "epoch": 0.7448932857012, "grad_norm": 0.17421423825513135, "learning_rate": 3.868227561621339e-05, "loss": 0.5603, "num_tokens": 1556946294.0, "step": 2033 }, { "epoch": 0.7452596867271228, "grad_norm": 0.20614184928420656, "learning_rate": 3.868067045305607e-05, "loss": 0.5576, "num_tokens": 1557584499.0, "step": 2034 }, { "epoch": 0.7456260877530457, "grad_norm": 0.19758584319203637, "learning_rate": 3.867906435003402e-05, "loss": 0.5605, "num_tokens": 1558432779.0, "step": 2035 }, { "epoch": 0.7459924887789686, "grad_norm": 0.1948838649776083, "learning_rate": 3.867745730723772e-05, "loss": 0.6006, "num_tokens": 1559078338.0, "step": 2036 }, { "epoch": 0.7463588898048914, "grad_norm": 0.2226508862164221, "learning_rate": 3.867584932475774e-05, "loss": 0.5645, "num_tokens": 1559892330.0, "step": 2037 }, { "epoch": 0.7467252908308143, "grad_norm": 0.17258747974489705, "learning_rate": 3.867424040268466e-05, "loss": 0.5341, "num_tokens": 1560714112.0, "step": 2038 }, { "epoch": 0.7470916918567372, "grad_norm": 0.17643617431210099, "learning_rate": 3.8672630541109145e-05, "loss": 0.5418, "num_tokens": 1561491518.0, "step": 2039 }, { "epoch": 0.7474580928826601, "grad_norm": 0.18242744889768917, "learning_rate": 3.86710197401219e-05, "loss": 0.5725, "num_tokens": 1562316784.0, "step": 2040 }, { "epoch": 0.747824493908583, "grad_norm": 0.19393475336024515, "learning_rate": 3.866940799981369e-05, "loss": 0.5865, "num_tokens": 1562936138.0, "step": 2041 }, { "epoch": 0.7481908949345059, "grad_norm": 0.18862653921212044, "learning_rate": 3.8667795320275316e-05, "loss": 0.5524, "num_tokens": 1563631051.0, "step": 2042 }, { "epoch": 0.7485572959604286, "grad_norm": 0.17867533535310284, "learning_rate": 3.8666181701597655e-05, "loss": 0.5863, "num_tokens": 1564285824.0, "step": 2043 }, { "epoch": 0.7489236969863515, "grad_norm": 0.17207134711645364, "learning_rate": 3.866456714387162e-05, "loss": 0.5736, "num_tokens": 1565060970.0, "step": 2044 }, { "epoch": 0.7492900980122744, "grad_norm": 0.1689294673033866, "learning_rate": 3.8662951647188186e-05, "loss": 0.5835, "num_tokens": 1565736254.0, "step": 2045 }, { "epoch": 0.7496564990381973, "grad_norm": 0.18338082583486323, "learning_rate": 3.866133521163837e-05, "loss": 0.548, "num_tokens": 1566434120.0, "step": 2046 }, { "epoch": 0.7500229000641202, "grad_norm": 0.1508767319868877, "learning_rate": 3.865971783731326e-05, "loss": 0.5345, "num_tokens": 1567267147.0, "step": 2047 }, { "epoch": 0.7503893010900431, "grad_norm": 0.20389057492007368, "learning_rate": 3.865809952430398e-05, "loss": 0.5347, "num_tokens": 1568049789.0, "step": 2048 }, { "epoch": 0.7507557021159659, "grad_norm": 0.2189759975548098, "learning_rate": 3.865648027270171e-05, "loss": 0.5692, "num_tokens": 1568840349.0, "step": 2049 }, { "epoch": 0.7511221031418888, "grad_norm": 0.16830752057069814, "learning_rate": 3.865486008259769e-05, "loss": 0.556, "num_tokens": 1569607106.0, "step": 2050 }, { "epoch": 0.7514885041678117, "grad_norm": 0.1949890924003802, "learning_rate": 3.865323895408321e-05, "loss": 0.5445, "num_tokens": 1570371777.0, "step": 2051 }, { "epoch": 0.7518549051937345, "grad_norm": 0.18130945127669226, "learning_rate": 3.8651616887249616e-05, "loss": 0.5883, "num_tokens": 1571148807.0, "step": 2052 }, { "epoch": 0.7522213062196574, "grad_norm": 0.17293802629666824, "learning_rate": 3.8649993882188296e-05, "loss": 0.5643, "num_tokens": 1571906463.0, "step": 2053 }, { "epoch": 0.7525877072455803, "grad_norm": 0.18622599399038578, "learning_rate": 3.864836993899069e-05, "loss": 0.5317, "num_tokens": 1572733199.0, "step": 2054 }, { "epoch": 0.7529541082715031, "grad_norm": 0.1884805414642711, "learning_rate": 3.8646745057748315e-05, "loss": 0.5948, "num_tokens": 1573465587.0, "step": 2055 }, { "epoch": 0.753320509297426, "grad_norm": 0.18402153290602882, "learning_rate": 3.8645119238552714e-05, "loss": 0.5472, "num_tokens": 1574146634.0, "step": 2056 }, { "epoch": 0.7536869103233489, "grad_norm": 0.18392372130750143, "learning_rate": 3.864349248149549e-05, "loss": 0.5597, "num_tokens": 1574797136.0, "step": 2057 }, { "epoch": 0.7540533113492718, "grad_norm": 0.1744624013418272, "learning_rate": 3.864186478666831e-05, "loss": 0.5414, "num_tokens": 1575673972.0, "step": 2058 }, { "epoch": 0.7544197123751947, "grad_norm": 0.19129430124223415, "learning_rate": 3.864023615416289e-05, "loss": 0.5815, "num_tokens": 1576509424.0, "step": 2059 }, { "epoch": 0.7547861134011176, "grad_norm": 0.18430547229430744, "learning_rate": 3.8638606584070975e-05, "loss": 0.5621, "num_tokens": 1577336496.0, "step": 2060 }, { "epoch": 0.7551525144270403, "grad_norm": 0.18227269648855363, "learning_rate": 3.86369760764844e-05, "loss": 0.5734, "num_tokens": 1578019344.0, "step": 2061 }, { "epoch": 0.7555189154529632, "grad_norm": 0.1720686627939183, "learning_rate": 3.863534463149503e-05, "loss": 0.5365, "num_tokens": 1578764954.0, "step": 2062 }, { "epoch": 0.7558853164788861, "grad_norm": 0.196703391520974, "learning_rate": 3.863371224919479e-05, "loss": 0.5754, "num_tokens": 1579443473.0, "step": 2063 }, { "epoch": 0.756251717504809, "grad_norm": 0.19439414405112146, "learning_rate": 3.8632078929675654e-05, "loss": 0.601, "num_tokens": 1580063735.0, "step": 2064 }, { "epoch": 0.7566181185307319, "grad_norm": 0.17221300874748058, "learning_rate": 3.863044467302964e-05, "loss": 0.5571, "num_tokens": 1580829646.0, "step": 2065 }, { "epoch": 0.7569845195566548, "grad_norm": 0.17064573153861112, "learning_rate": 3.862880947934886e-05, "loss": 0.5483, "num_tokens": 1581652749.0, "step": 2066 }, { "epoch": 0.7573509205825776, "grad_norm": 0.21541131448434064, "learning_rate": 3.8627173348725404e-05, "loss": 0.6136, "num_tokens": 1582382699.0, "step": 2067 }, { "epoch": 0.7577173216085005, "grad_norm": 0.1710286382838325, "learning_rate": 3.86255362812515e-05, "loss": 0.5591, "num_tokens": 1583194009.0, "step": 2068 }, { "epoch": 0.7580837226344234, "grad_norm": 0.17680471174368778, "learning_rate": 3.862389827701936e-05, "loss": 0.5417, "num_tokens": 1584012032.0, "step": 2069 }, { "epoch": 0.7584501236603463, "grad_norm": 0.21139501100678126, "learning_rate": 3.86222593361213e-05, "loss": 0.5774, "num_tokens": 1584836214.0, "step": 2070 }, { "epoch": 0.7588165246862691, "grad_norm": 0.16872022608530118, "learning_rate": 3.8620619458649645e-05, "loss": 0.5816, "num_tokens": 1585585105.0, "step": 2071 }, { "epoch": 0.759182925712192, "grad_norm": 0.1701089241287386, "learning_rate": 3.8618978644696804e-05, "loss": 0.573, "num_tokens": 1586346453.0, "step": 2072 }, { "epoch": 0.7595493267381148, "grad_norm": 0.18245290687051063, "learning_rate": 3.861733689435522e-05, "loss": 0.5891, "num_tokens": 1587033351.0, "step": 2073 }, { "epoch": 0.7599157277640377, "grad_norm": 0.1631799308124619, "learning_rate": 3.86156942077174e-05, "loss": 0.5465, "num_tokens": 1587928942.0, "step": 2074 }, { "epoch": 0.7602821287899606, "grad_norm": 0.18367060280583705, "learning_rate": 3.861405058487591e-05, "loss": 0.5514, "num_tokens": 1588677983.0, "step": 2075 }, { "epoch": 0.7606485298158835, "grad_norm": 0.1655806849608702, "learning_rate": 3.861240602592335e-05, "loss": 0.5754, "num_tokens": 1589402325.0, "step": 2076 }, { "epoch": 0.7610149308418064, "grad_norm": 0.1586151748519268, "learning_rate": 3.8610760530952375e-05, "loss": 0.5427, "num_tokens": 1590170851.0, "step": 2077 }, { "epoch": 0.7613813318677293, "grad_norm": 0.17489407291521222, "learning_rate": 3.8609114100055714e-05, "loss": 0.558, "num_tokens": 1590920797.0, "step": 2078 }, { "epoch": 0.761747732893652, "grad_norm": 0.16232582932675257, "learning_rate": 3.8607466733326126e-05, "loss": 0.5365, "num_tokens": 1591703411.0, "step": 2079 }, { "epoch": 0.7621141339195749, "grad_norm": 0.16151568305654354, "learning_rate": 3.860581843085644e-05, "loss": 0.5871, "num_tokens": 1592411663.0, "step": 2080 }, { "epoch": 0.7624805349454978, "grad_norm": 0.17929152205479928, "learning_rate": 3.8604169192739515e-05, "loss": 0.5611, "num_tokens": 1593307293.0, "step": 2081 }, { "epoch": 0.7628469359714207, "grad_norm": 0.16102992823809328, "learning_rate": 3.8602519019068284e-05, "loss": 0.5929, "num_tokens": 1594127742.0, "step": 2082 }, { "epoch": 0.7632133369973436, "grad_norm": 0.17886168626818705, "learning_rate": 3.860086790993573e-05, "loss": 0.5497, "num_tokens": 1594892218.0, "step": 2083 }, { "epoch": 0.7635797380232665, "grad_norm": 0.18068025106591387, "learning_rate": 3.8599215865434876e-05, "loss": 0.5605, "num_tokens": 1595736767.0, "step": 2084 }, { "epoch": 0.7639461390491893, "grad_norm": 0.17507329336220084, "learning_rate": 3.85975628856588e-05, "loss": 0.5687, "num_tokens": 1596644846.0, "step": 2085 }, { "epoch": 0.7643125400751122, "grad_norm": 0.1576174960288548, "learning_rate": 3.859590897070066e-05, "loss": 0.5366, "num_tokens": 1597551355.0, "step": 2086 }, { "epoch": 0.7646789411010351, "grad_norm": 0.19482637269241146, "learning_rate": 3.859425412065363e-05, "loss": 0.5767, "num_tokens": 1598356120.0, "step": 2087 }, { "epoch": 0.765045342126958, "grad_norm": 0.17024482407396727, "learning_rate": 3.859259833561094e-05, "loss": 0.5575, "num_tokens": 1599102026.0, "step": 2088 }, { "epoch": 0.7654117431528809, "grad_norm": 0.18969022815915743, "learning_rate": 3.8590941615665914e-05, "loss": 0.5968, "num_tokens": 1599859973.0, "step": 2089 }, { "epoch": 0.7657781441788037, "grad_norm": 0.19373644036354873, "learning_rate": 3.858928396091188e-05, "loss": 0.5477, "num_tokens": 1600767899.0, "step": 2090 }, { "epoch": 0.7661445452047265, "grad_norm": 0.16947312025107159, "learning_rate": 3.8587625371442235e-05, "loss": 0.5629, "num_tokens": 1601553747.0, "step": 2091 }, { "epoch": 0.7665109462306494, "grad_norm": 0.2230123505140691, "learning_rate": 3.858596584735044e-05, "loss": 0.5548, "num_tokens": 1602230885.0, "step": 2092 }, { "epoch": 0.7668773472565723, "grad_norm": 0.17173235726291283, "learning_rate": 3.8584305388730006e-05, "loss": 0.5341, "num_tokens": 1603080095.0, "step": 2093 }, { "epoch": 0.7672437482824952, "grad_norm": 0.21214568487558647, "learning_rate": 3.858264399567447e-05, "loss": 0.572, "num_tokens": 1603822330.0, "step": 2094 }, { "epoch": 0.7676101493084181, "grad_norm": 0.1731627448831024, "learning_rate": 3.858098166827746e-05, "loss": 0.5552, "num_tokens": 1604625200.0, "step": 2095 }, { "epoch": 0.767976550334341, "grad_norm": 0.20994022980115476, "learning_rate": 3.857931840663264e-05, "loss": 0.5891, "num_tokens": 1605261941.0, "step": 2096 }, { "epoch": 0.7683429513602638, "grad_norm": 0.18177278166183586, "learning_rate": 3.857765421083372e-05, "loss": 0.5396, "num_tokens": 1606035049.0, "step": 2097 }, { "epoch": 0.7687093523861867, "grad_norm": 0.17181378097938305, "learning_rate": 3.857598908097446e-05, "loss": 0.5769, "num_tokens": 1606790822.0, "step": 2098 }, { "epoch": 0.7690757534121095, "grad_norm": 0.20010288859602626, "learning_rate": 3.8574323017148694e-05, "loss": 0.5738, "num_tokens": 1607462546.0, "step": 2099 }, { "epoch": 0.7694421544380324, "grad_norm": 0.1640373171929773, "learning_rate": 3.857265601945029e-05, "loss": 0.5653, "num_tokens": 1608087951.0, "step": 2100 }, { "epoch": 0.7698085554639553, "grad_norm": 0.22748449163086817, "learning_rate": 3.857098808797317e-05, "loss": 0.5622, "num_tokens": 1608881948.0, "step": 2101 }, { "epoch": 0.7701749564898782, "grad_norm": 0.17241648634211915, "learning_rate": 3.856931922281133e-05, "loss": 0.5395, "num_tokens": 1609652015.0, "step": 2102 }, { "epoch": 0.770541357515801, "grad_norm": 0.1863086416697637, "learning_rate": 3.856764942405878e-05, "loss": 0.537, "num_tokens": 1610539626.0, "step": 2103 }, { "epoch": 0.7709077585417239, "grad_norm": 0.1684361084938555, "learning_rate": 3.856597869180961e-05, "loss": 0.5479, "num_tokens": 1611340833.0, "step": 2104 }, { "epoch": 0.7712741595676468, "grad_norm": 0.1645929809616121, "learning_rate": 3.856430702615797e-05, "loss": 0.5231, "num_tokens": 1612144134.0, "step": 2105 }, { "epoch": 0.7716405605935697, "grad_norm": 0.18240675993067598, "learning_rate": 3.856263442719803e-05, "loss": 0.569, "num_tokens": 1612799422.0, "step": 2106 }, { "epoch": 0.7720069616194926, "grad_norm": 0.1620428373841708, "learning_rate": 3.856096089502405e-05, "loss": 0.5201, "num_tokens": 1613522108.0, "step": 2107 }, { "epoch": 0.7723733626454155, "grad_norm": 0.17074643357723895, "learning_rate": 3.8559286429730304e-05, "loss": 0.5469, "num_tokens": 1614248740.0, "step": 2108 }, { "epoch": 0.7727397636713382, "grad_norm": 0.1775622784903721, "learning_rate": 3.8557611031411155e-05, "loss": 0.5277, "num_tokens": 1615118532.0, "step": 2109 }, { "epoch": 0.7731061646972611, "grad_norm": 0.15782036824047987, "learning_rate": 3.8555934700161006e-05, "loss": 0.5717, "num_tokens": 1615886389.0, "step": 2110 }, { "epoch": 0.773472565723184, "grad_norm": 0.18441814692994365, "learning_rate": 3.855425743607429e-05, "loss": 0.5553, "num_tokens": 1616623470.0, "step": 2111 }, { "epoch": 0.7738389667491069, "grad_norm": 0.21012571351291398, "learning_rate": 3.855257923924553e-05, "loss": 0.5995, "num_tokens": 1617340994.0, "step": 2112 }, { "epoch": 0.7742053677750298, "grad_norm": 0.16052594102164222, "learning_rate": 3.8550900109769274e-05, "loss": 0.5428, "num_tokens": 1618093236.0, "step": 2113 }, { "epoch": 0.7745717688009527, "grad_norm": 0.17174055818889924, "learning_rate": 3.8549220047740134e-05, "loss": 0.5635, "num_tokens": 1618928782.0, "step": 2114 }, { "epoch": 0.7749381698268755, "grad_norm": 0.1783597162963152, "learning_rate": 3.854753905325277e-05, "loss": 0.5434, "num_tokens": 1619601772.0, "step": 2115 }, { "epoch": 0.7753045708527984, "grad_norm": 0.18939063359439107, "learning_rate": 3.8545857126401895e-05, "loss": 0.5604, "num_tokens": 1620217691.0, "step": 2116 }, { "epoch": 0.7756709718787212, "grad_norm": 0.17348948045495957, "learning_rate": 3.854417426728229e-05, "loss": 0.5216, "num_tokens": 1620969566.0, "step": 2117 }, { "epoch": 0.7760373729046441, "grad_norm": 0.1765260757184881, "learning_rate": 3.854249047598876e-05, "loss": 0.5723, "num_tokens": 1621719853.0, "step": 2118 }, { "epoch": 0.776403773930567, "grad_norm": 0.21539803947634156, "learning_rate": 3.854080575261618e-05, "loss": 0.5786, "num_tokens": 1622546694.0, "step": 2119 }, { "epoch": 0.7767701749564899, "grad_norm": 0.1755518371610993, "learning_rate": 3.853912009725948e-05, "loss": 0.5776, "num_tokens": 1623363404.0, "step": 2120 }, { "epoch": 0.7771365759824127, "grad_norm": 0.22386439913060668, "learning_rate": 3.853743351001364e-05, "loss": 0.5877, "num_tokens": 1624039325.0, "step": 2121 }, { "epoch": 0.7775029770083356, "grad_norm": 0.23000686631561046, "learning_rate": 3.853574599097368e-05, "loss": 0.5591, "num_tokens": 1624627473.0, "step": 2122 }, { "epoch": 0.7778693780342585, "grad_norm": 0.16048606722719827, "learning_rate": 3.853405754023469e-05, "loss": 0.5366, "num_tokens": 1625340535.0, "step": 2123 }, { "epoch": 0.7782357790601814, "grad_norm": 0.19493967913954058, "learning_rate": 3.8532368157891796e-05, "loss": 0.5707, "num_tokens": 1626077444.0, "step": 2124 }, { "epoch": 0.7786021800861043, "grad_norm": 0.1892015045193796, "learning_rate": 3.85306778440402e-05, "loss": 0.5782, "num_tokens": 1626980927.0, "step": 2125 }, { "epoch": 0.7789685811120272, "grad_norm": 0.1627026411997614, "learning_rate": 3.852898659877513e-05, "loss": 0.5345, "num_tokens": 1627793129.0, "step": 2126 }, { "epoch": 0.7793349821379499, "grad_norm": 0.17734180614965442, "learning_rate": 3.852729442219188e-05, "loss": 0.5846, "num_tokens": 1628551552.0, "step": 2127 }, { "epoch": 0.7797013831638728, "grad_norm": 0.17280043622670035, "learning_rate": 3.8525601314385804e-05, "loss": 0.5615, "num_tokens": 1629315240.0, "step": 2128 }, { "epoch": 0.7800677841897957, "grad_norm": 0.16035207079793465, "learning_rate": 3.8523907275452294e-05, "loss": 0.5257, "num_tokens": 1630100979.0, "step": 2129 }, { "epoch": 0.7804341852157186, "grad_norm": 0.18051204302759907, "learning_rate": 3.852221230548679e-05, "loss": 0.5653, "num_tokens": 1630853991.0, "step": 2130 }, { "epoch": 0.7808005862416415, "grad_norm": 0.1860792187900819, "learning_rate": 3.852051640458481e-05, "loss": 0.5723, "num_tokens": 1631635297.0, "step": 2131 }, { "epoch": 0.7811669872675644, "grad_norm": 0.19236888663459475, "learning_rate": 3.8518819572841904e-05, "loss": 0.5424, "num_tokens": 1632507276.0, "step": 2132 }, { "epoch": 0.7815333882934872, "grad_norm": 0.17069650877510856, "learning_rate": 3.8517121810353664e-05, "loss": 0.5499, "num_tokens": 1633321539.0, "step": 2133 }, { "epoch": 0.7818997893194101, "grad_norm": 0.1626306064396581, "learning_rate": 3.851542311721578e-05, "loss": 0.549, "num_tokens": 1634053708.0, "step": 2134 }, { "epoch": 0.782266190345333, "grad_norm": 0.20880193730819457, "learning_rate": 3.851372349352393e-05, "loss": 0.6108, "num_tokens": 1634755000.0, "step": 2135 }, { "epoch": 0.7826325913712558, "grad_norm": 0.15477030412500928, "learning_rate": 3.8512022939373904e-05, "loss": 0.5542, "num_tokens": 1635568457.0, "step": 2136 }, { "epoch": 0.7829989923971787, "grad_norm": 0.1712336163930956, "learning_rate": 3.8510321454861504e-05, "loss": 0.5256, "num_tokens": 1636313805.0, "step": 2137 }, { "epoch": 0.7833653934231016, "grad_norm": 0.20800278890709012, "learning_rate": 3.850861904008261e-05, "loss": 0.5758, "num_tokens": 1637093975.0, "step": 2138 }, { "epoch": 0.7837317944490244, "grad_norm": 0.17945259624185622, "learning_rate": 3.850691569513314e-05, "loss": 0.5868, "num_tokens": 1637935828.0, "step": 2139 }, { "epoch": 0.7840981954749473, "grad_norm": 0.1848441828515672, "learning_rate": 3.850521142010907e-05, "loss": 0.5491, "num_tokens": 1638871755.0, "step": 2140 }, { "epoch": 0.7844645965008702, "grad_norm": 0.1441732781583359, "learning_rate": 3.850350621510642e-05, "loss": 0.5868, "num_tokens": 1639725069.0, "step": 2141 }, { "epoch": 0.7848309975267931, "grad_norm": 0.16254560123581238, "learning_rate": 3.8501800080221275e-05, "loss": 0.5549, "num_tokens": 1640554870.0, "step": 2142 }, { "epoch": 0.785197398552716, "grad_norm": 0.16226683237532458, "learning_rate": 3.8500093015549765e-05, "loss": 0.5291, "num_tokens": 1641260634.0, "step": 2143 }, { "epoch": 0.7855637995786389, "grad_norm": 0.19477016166322098, "learning_rate": 3.849838502118807e-05, "loss": 0.5936, "num_tokens": 1641948165.0, "step": 2144 }, { "epoch": 0.7859302006045616, "grad_norm": 0.15702479111230724, "learning_rate": 3.849667609723243e-05, "loss": 0.5282, "num_tokens": 1642713287.0, "step": 2145 }, { "epoch": 0.7862966016304845, "grad_norm": 0.2002640923838135, "learning_rate": 3.8494966243779135e-05, "loss": 0.5684, "num_tokens": 1643482565.0, "step": 2146 }, { "epoch": 0.7866630026564074, "grad_norm": 0.15692175198308725, "learning_rate": 3.849325546092452e-05, "loss": 0.5609, "num_tokens": 1644294280.0, "step": 2147 }, { "epoch": 0.7870294036823303, "grad_norm": 0.18547555204141647, "learning_rate": 3.849154374876498e-05, "loss": 0.5725, "num_tokens": 1645121855.0, "step": 2148 }, { "epoch": 0.7873958047082532, "grad_norm": 0.18404449770805353, "learning_rate": 3.848983110739697e-05, "loss": 0.5738, "num_tokens": 1645890812.0, "step": 2149 }, { "epoch": 0.7877622057341761, "grad_norm": 0.1813027814364638, "learning_rate": 3.8488117536916975e-05, "loss": 0.5361, "num_tokens": 1646671266.0, "step": 2150 }, { "epoch": 0.7881286067600989, "grad_norm": 0.1842203201894288, "learning_rate": 3.8486403037421556e-05, "loss": 0.581, "num_tokens": 1647424510.0, "step": 2151 }, { "epoch": 0.7884950077860218, "grad_norm": 0.19348598791433558, "learning_rate": 3.848468760900731e-05, "loss": 0.5979, "num_tokens": 1648176235.0, "step": 2152 }, { "epoch": 0.7888614088119447, "grad_norm": 0.1683981092151369, "learning_rate": 3.848297125177089e-05, "loss": 0.5447, "num_tokens": 1648910255.0, "step": 2153 }, { "epoch": 0.7892278098378676, "grad_norm": 0.21796108553664445, "learning_rate": 3.8481253965809e-05, "loss": 0.5887, "num_tokens": 1649634137.0, "step": 2154 }, { "epoch": 0.7895942108637904, "grad_norm": 0.16246730663259332, "learning_rate": 3.847953575121842e-05, "loss": 0.5434, "num_tokens": 1650436962.0, "step": 2155 }, { "epoch": 0.7899606118897133, "grad_norm": 0.2734164526914161, "learning_rate": 3.847781660809594e-05, "loss": 0.5629, "num_tokens": 1651182048.0, "step": 2156 }, { "epoch": 0.7903270129156361, "grad_norm": 0.1980002315138792, "learning_rate": 3.847609653653843e-05, "loss": 0.5454, "num_tokens": 1651961778.0, "step": 2157 }, { "epoch": 0.790693413941559, "grad_norm": 0.1614646483348579, "learning_rate": 3.847437553664281e-05, "loss": 0.5359, "num_tokens": 1652745709.0, "step": 2158 }, { "epoch": 0.7910598149674819, "grad_norm": 0.18460029187365115, "learning_rate": 3.847265360850606e-05, "loss": 0.5291, "num_tokens": 1653554391.0, "step": 2159 }, { "epoch": 0.7914262159934048, "grad_norm": 0.1753263306279607, "learning_rate": 3.847093075222517e-05, "loss": 0.5552, "num_tokens": 1654192619.0, "step": 2160 }, { "epoch": 0.7917926170193277, "grad_norm": 0.17055462475333427, "learning_rate": 3.846920696789724e-05, "loss": 0.5732, "num_tokens": 1654990612.0, "step": 2161 }, { "epoch": 0.7921590180452506, "grad_norm": 0.18437808507051012, "learning_rate": 3.846748225561939e-05, "loss": 0.5909, "num_tokens": 1655736797.0, "step": 2162 }, { "epoch": 0.7925254190711734, "grad_norm": 0.1717120633043978, "learning_rate": 3.84657566154888e-05, "loss": 0.5443, "num_tokens": 1656579540.0, "step": 2163 }, { "epoch": 0.7928918200970962, "grad_norm": 0.1896357294985568, "learning_rate": 3.846403004760269e-05, "loss": 0.5471, "num_tokens": 1657495701.0, "step": 2164 }, { "epoch": 0.7932582211230191, "grad_norm": 0.18579609458042834, "learning_rate": 3.846230255205835e-05, "loss": 0.6002, "num_tokens": 1658223157.0, "step": 2165 }, { "epoch": 0.793624622148942, "grad_norm": 0.19503930626762686, "learning_rate": 3.846057412895312e-05, "loss": 0.6282, "num_tokens": 1658935052.0, "step": 2166 }, { "epoch": 0.7939910231748649, "grad_norm": 0.17797723668437126, "learning_rate": 3.845884477838438e-05, "loss": 0.5727, "num_tokens": 1659617143.0, "step": 2167 }, { "epoch": 0.7943574242007878, "grad_norm": 0.16989743910643307, "learning_rate": 3.845711450044957e-05, "loss": 0.5827, "num_tokens": 1660404840.0, "step": 2168 }, { "epoch": 0.7947238252267106, "grad_norm": 0.1714847633027524, "learning_rate": 3.8455383295246186e-05, "loss": 0.5578, "num_tokens": 1661187235.0, "step": 2169 }, { "epoch": 0.7950902262526335, "grad_norm": 0.17070403114113042, "learning_rate": 3.8453651162871764e-05, "loss": 0.5692, "num_tokens": 1661897737.0, "step": 2170 }, { "epoch": 0.7954566272785564, "grad_norm": 0.17579540376284827, "learning_rate": 3.84519181034239e-05, "loss": 0.5409, "num_tokens": 1662694550.0, "step": 2171 }, { "epoch": 0.7958230283044793, "grad_norm": 0.16020882085883698, "learning_rate": 3.845018411700026e-05, "loss": 0.5324, "num_tokens": 1663460783.0, "step": 2172 }, { "epoch": 0.7961894293304022, "grad_norm": 0.1808341168338672, "learning_rate": 3.8448449203698525e-05, "loss": 0.5531, "num_tokens": 1664275158.0, "step": 2173 }, { "epoch": 0.796555830356325, "grad_norm": 0.1765072959922799, "learning_rate": 3.844671336361645e-05, "loss": 0.551, "num_tokens": 1665074952.0, "step": 2174 }, { "epoch": 0.7969222313822478, "grad_norm": 0.17388135637969945, "learning_rate": 3.8444976596851856e-05, "loss": 0.5265, "num_tokens": 1665861204.0, "step": 2175 }, { "epoch": 0.7972886324081707, "grad_norm": 0.2223054470182347, "learning_rate": 3.844323890350258e-05, "loss": 0.5437, "num_tokens": 1666654319.0, "step": 2176 }, { "epoch": 0.7976550334340936, "grad_norm": 0.17118990916623333, "learning_rate": 3.844150028366655e-05, "loss": 0.537, "num_tokens": 1667387341.0, "step": 2177 }, { "epoch": 0.7980214344600165, "grad_norm": 0.2293334910352518, "learning_rate": 3.843976073744172e-05, "loss": 0.5397, "num_tokens": 1668079894.0, "step": 2178 }, { "epoch": 0.7983878354859394, "grad_norm": 0.2174159785350426, "learning_rate": 3.84380202649261e-05, "loss": 0.5546, "num_tokens": 1668903328.0, "step": 2179 }, { "epoch": 0.7987542365118623, "grad_norm": 0.19159321134736415, "learning_rate": 3.843627886621776e-05, "loss": 0.5978, "num_tokens": 1669693380.0, "step": 2180 }, { "epoch": 0.7991206375377851, "grad_norm": 0.21334960116744078, "learning_rate": 3.8434536541414804e-05, "loss": 0.5513, "num_tokens": 1670390683.0, "step": 2181 }, { "epoch": 0.799487038563708, "grad_norm": 0.18918745045707122, "learning_rate": 3.843279329061543e-05, "loss": 0.5362, "num_tokens": 1671057306.0, "step": 2182 }, { "epoch": 0.7998534395896308, "grad_norm": 0.19023564088487332, "learning_rate": 3.843104911391784e-05, "loss": 0.5692, "num_tokens": 1671779285.0, "step": 2183 }, { "epoch": 0.8002198406155537, "grad_norm": 0.1998626605928889, "learning_rate": 3.842930401142032e-05, "loss": 0.5687, "num_tokens": 1672514833.0, "step": 2184 }, { "epoch": 0.8005862416414766, "grad_norm": 0.199753449855593, "learning_rate": 3.842755798322118e-05, "loss": 0.5793, "num_tokens": 1673264676.0, "step": 2185 }, { "epoch": 0.8009526426673995, "grad_norm": 0.1993216311609995, "learning_rate": 3.842581102941883e-05, "loss": 0.5949, "num_tokens": 1674012132.0, "step": 2186 }, { "epoch": 0.8013190436933223, "grad_norm": 0.16710872946425684, "learning_rate": 3.842406315011167e-05, "loss": 0.5635, "num_tokens": 1674744435.0, "step": 2187 }, { "epoch": 0.8016854447192452, "grad_norm": 0.18703292739870028, "learning_rate": 3.84223143453982e-05, "loss": 0.5476, "num_tokens": 1675576857.0, "step": 2188 }, { "epoch": 0.8020518457451681, "grad_norm": 0.19353660899846944, "learning_rate": 3.8420564615376955e-05, "loss": 0.5815, "num_tokens": 1676267775.0, "step": 2189 }, { "epoch": 0.802418246771091, "grad_norm": 0.1965324871895375, "learning_rate": 3.841881396014651e-05, "loss": 0.5469, "num_tokens": 1677011595.0, "step": 2190 }, { "epoch": 0.8027846477970139, "grad_norm": 0.18397704104023194, "learning_rate": 3.8417062379805526e-05, "loss": 0.6051, "num_tokens": 1677706465.0, "step": 2191 }, { "epoch": 0.8031510488229368, "grad_norm": 0.20465066888362624, "learning_rate": 3.841530987445267e-05, "loss": 0.5529, "num_tokens": 1678452848.0, "step": 2192 }, { "epoch": 0.8035174498488595, "grad_norm": 0.17241110788629185, "learning_rate": 3.841355644418671e-05, "loss": 0.5422, "num_tokens": 1679299673.0, "step": 2193 }, { "epoch": 0.8038838508747824, "grad_norm": 0.17594771214767443, "learning_rate": 3.8411802089106425e-05, "loss": 0.6061, "num_tokens": 1679976069.0, "step": 2194 }, { "epoch": 0.8042502519007053, "grad_norm": 0.18712132106632848, "learning_rate": 3.841004680931067e-05, "loss": 0.5595, "num_tokens": 1680658772.0, "step": 2195 }, { "epoch": 0.8046166529266282, "grad_norm": 0.20980597630441755, "learning_rate": 3.840829060489835e-05, "loss": 0.5502, "num_tokens": 1681483955.0, "step": 2196 }, { "epoch": 0.8049830539525511, "grad_norm": 0.19862429754286887, "learning_rate": 3.8406533475968416e-05, "loss": 0.5602, "num_tokens": 1682177334.0, "step": 2197 }, { "epoch": 0.805349454978474, "grad_norm": 0.19329165036512014, "learning_rate": 3.840477542261986e-05, "loss": 0.5577, "num_tokens": 1682882677.0, "step": 2198 }, { "epoch": 0.8057158560043968, "grad_norm": 0.1796587064233787, "learning_rate": 3.8403016444951756e-05, "loss": 0.5809, "num_tokens": 1683632061.0, "step": 2199 }, { "epoch": 0.8060822570303197, "grad_norm": 0.16796901157404384, "learning_rate": 3.84012565430632e-05, "loss": 0.5705, "num_tokens": 1684521613.0, "step": 2200 }, { "epoch": 0.8064486580562426, "grad_norm": 0.1838434358605601, "learning_rate": 3.839949571705336e-05, "loss": 0.5871, "num_tokens": 1685269952.0, "step": 2201 }, { "epoch": 0.8068150590821654, "grad_norm": 0.19298777063329417, "learning_rate": 3.839773396702145e-05, "loss": 0.558, "num_tokens": 1686069295.0, "step": 2202 }, { "epoch": 0.8071814601080883, "grad_norm": 0.17040604906978465, "learning_rate": 3.839597129306673e-05, "loss": 0.5447, "num_tokens": 1686754882.0, "step": 2203 }, { "epoch": 0.8075478611340112, "grad_norm": 0.22481624400508193, "learning_rate": 3.839420769528852e-05, "loss": 0.588, "num_tokens": 1687411927.0, "step": 2204 }, { "epoch": 0.807914262159934, "grad_norm": 0.2053677413947612, "learning_rate": 3.839244317378619e-05, "loss": 0.5765, "num_tokens": 1688162461.0, "step": 2205 }, { "epoch": 0.8082806631858569, "grad_norm": 0.20486573394812063, "learning_rate": 3.8390677728659146e-05, "loss": 0.5914, "num_tokens": 1688855334.0, "step": 2206 }, { "epoch": 0.8086470642117798, "grad_norm": 0.19978738184882616, "learning_rate": 3.838891136000689e-05, "loss": 0.5311, "num_tokens": 1689551549.0, "step": 2207 }, { "epoch": 0.8090134652377027, "grad_norm": 0.1586617545891741, "learning_rate": 3.838714406792892e-05, "loss": 0.5437, "num_tokens": 1690324682.0, "step": 2208 }, { "epoch": 0.8093798662636256, "grad_norm": 0.1988262860661221, "learning_rate": 3.838537585252483e-05, "loss": 0.576, "num_tokens": 1691062200.0, "step": 2209 }, { "epoch": 0.8097462672895485, "grad_norm": 0.2146653643493275, "learning_rate": 3.838360671389425e-05, "loss": 0.5837, "num_tokens": 1691899108.0, "step": 2210 }, { "epoch": 0.8101126683154712, "grad_norm": 0.18638545017081326, "learning_rate": 3.838183665213685e-05, "loss": 0.5532, "num_tokens": 1692660399.0, "step": 2211 }, { "epoch": 0.8104790693413941, "grad_norm": 0.1583214362434416, "learning_rate": 3.838006566735236e-05, "loss": 0.525, "num_tokens": 1693559031.0, "step": 2212 }, { "epoch": 0.810845470367317, "grad_norm": 0.165862782272274, "learning_rate": 3.837829375964059e-05, "loss": 0.5252, "num_tokens": 1694381671.0, "step": 2213 }, { "epoch": 0.8112118713932399, "grad_norm": 0.18831544128921976, "learning_rate": 3.837652092910135e-05, "loss": 0.5846, "num_tokens": 1695113942.0, "step": 2214 }, { "epoch": 0.8115782724191628, "grad_norm": 0.17521527326070593, "learning_rate": 3.8374747175834545e-05, "loss": 0.5453, "num_tokens": 1695859082.0, "step": 2215 }, { "epoch": 0.8119446734450857, "grad_norm": 0.18870217697522992, "learning_rate": 3.837297249994011e-05, "loss": 0.5685, "num_tokens": 1696661118.0, "step": 2216 }, { "epoch": 0.8123110744710085, "grad_norm": 0.14806995566280884, "learning_rate": 3.837119690151804e-05, "loss": 0.5734, "num_tokens": 1697495365.0, "step": 2217 }, { "epoch": 0.8126774754969314, "grad_norm": 0.20736441752430504, "learning_rate": 3.836942038066839e-05, "loss": 0.5289, "num_tokens": 1698225317.0, "step": 2218 }, { "epoch": 0.8130438765228543, "grad_norm": 0.1799822476572131, "learning_rate": 3.836764293749124e-05, "loss": 0.5868, "num_tokens": 1698957303.0, "step": 2219 }, { "epoch": 0.8134102775487771, "grad_norm": 0.1709683916098636, "learning_rate": 3.836586457208675e-05, "loss": 0.5241, "num_tokens": 1699729561.0, "step": 2220 }, { "epoch": 0.8137766785747, "grad_norm": 0.19973364447922498, "learning_rate": 3.8364085284555114e-05, "loss": 0.5359, "num_tokens": 1700553098.0, "step": 2221 }, { "epoch": 0.8141430796006229, "grad_norm": 0.17339357542777592, "learning_rate": 3.836230507499659e-05, "loss": 0.5761, "num_tokens": 1701335636.0, "step": 2222 }, { "epoch": 0.8145094806265457, "grad_norm": 0.17801139327732407, "learning_rate": 3.836052394351149e-05, "loss": 0.5606, "num_tokens": 1702078170.0, "step": 2223 }, { "epoch": 0.8148758816524686, "grad_norm": 0.17550273358830487, "learning_rate": 3.835874189020015e-05, "loss": 0.5405, "num_tokens": 1702912353.0, "step": 2224 }, { "epoch": 0.8152422826783915, "grad_norm": 0.2012907242611795, "learning_rate": 3.8356958915163e-05, "loss": 0.6258, "num_tokens": 1703544028.0, "step": 2225 }, { "epoch": 0.8156086837043144, "grad_norm": 0.1796165782361199, "learning_rate": 3.8355175018500494e-05, "loss": 0.5412, "num_tokens": 1704386859.0, "step": 2226 }, { "epoch": 0.8159750847302373, "grad_norm": 0.1605780075557802, "learning_rate": 3.835339020031315e-05, "loss": 0.5363, "num_tokens": 1705253612.0, "step": 2227 }, { "epoch": 0.8163414857561602, "grad_norm": 0.16046441845851014, "learning_rate": 3.835160446070152e-05, "loss": 0.5407, "num_tokens": 1706021566.0, "step": 2228 }, { "epoch": 0.816707886782083, "grad_norm": 0.18162458711721205, "learning_rate": 3.834981779976623e-05, "loss": 0.5299, "num_tokens": 1706815145.0, "step": 2229 }, { "epoch": 0.8170742878080058, "grad_norm": 0.18858641133795873, "learning_rate": 3.834803021760795e-05, "loss": 0.5406, "num_tokens": 1707453418.0, "step": 2230 }, { "epoch": 0.8174406888339287, "grad_norm": 0.23914494325094615, "learning_rate": 3.834624171432739e-05, "loss": 0.5751, "num_tokens": 1708164587.0, "step": 2231 }, { "epoch": 0.8178070898598516, "grad_norm": 0.2028127369962195, "learning_rate": 3.834445229002533e-05, "loss": 0.5485, "num_tokens": 1708951318.0, "step": 2232 }, { "epoch": 0.8181734908857745, "grad_norm": 0.1786940572561508, "learning_rate": 3.8342661944802594e-05, "loss": 0.5875, "num_tokens": 1709628134.0, "step": 2233 }, { "epoch": 0.8185398919116974, "grad_norm": 0.19875577535309927, "learning_rate": 3.8340870678760056e-05, "loss": 0.5569, "num_tokens": 1710404347.0, "step": 2234 }, { "epoch": 0.8189062929376202, "grad_norm": 0.17299566928185425, "learning_rate": 3.8339078491998654e-05, "loss": 0.5673, "num_tokens": 1711201263.0, "step": 2235 }, { "epoch": 0.8192726939635431, "grad_norm": 0.17763340942047146, "learning_rate": 3.8337285384619354e-05, "loss": 0.5889, "num_tokens": 1711957061.0, "step": 2236 }, { "epoch": 0.819639094989466, "grad_norm": 0.1747871908900048, "learning_rate": 3.83354913567232e-05, "loss": 0.6042, "num_tokens": 1712786555.0, "step": 2237 }, { "epoch": 0.8200054960153889, "grad_norm": 0.1654237955506603, "learning_rate": 3.833369640841126e-05, "loss": 0.5227, "num_tokens": 1713517837.0, "step": 2238 }, { "epoch": 0.8203718970413117, "grad_norm": 0.16826713369248156, "learning_rate": 3.833190053978468e-05, "loss": 0.5459, "num_tokens": 1714344966.0, "step": 2239 }, { "epoch": 0.8207382980672346, "grad_norm": 0.17087969436859032, "learning_rate": 3.833010375094465e-05, "loss": 0.5567, "num_tokens": 1715126698.0, "step": 2240 }, { "epoch": 0.8211046990931574, "grad_norm": 0.21557920827794433, "learning_rate": 3.8328306041992404e-05, "loss": 0.583, "num_tokens": 1715910317.0, "step": 2241 }, { "epoch": 0.8214711001190803, "grad_norm": 0.16747181299720568, "learning_rate": 3.832650741302923e-05, "loss": 0.5639, "num_tokens": 1716590188.0, "step": 2242 }, { "epoch": 0.8218375011450032, "grad_norm": 0.17590161966511778, "learning_rate": 3.8324707864156486e-05, "loss": 0.5526, "num_tokens": 1717363676.0, "step": 2243 }, { "epoch": 0.8222039021709261, "grad_norm": 0.18281297543361233, "learning_rate": 3.832290739547554e-05, "loss": 0.5616, "num_tokens": 1718242167.0, "step": 2244 }, { "epoch": 0.822570303196849, "grad_norm": 0.20048993820826835, "learning_rate": 3.8321106007087875e-05, "loss": 0.577, "num_tokens": 1719036547.0, "step": 2245 }, { "epoch": 0.8229367042227719, "grad_norm": 0.1971773069125562, "learning_rate": 3.831930369909495e-05, "loss": 0.5634, "num_tokens": 1719855288.0, "step": 2246 }, { "epoch": 0.8233031052486947, "grad_norm": 0.18349092527469285, "learning_rate": 3.8317500471598353e-05, "loss": 0.5682, "num_tokens": 1720705496.0, "step": 2247 }, { "epoch": 0.8236695062746175, "grad_norm": 0.19270805029254084, "learning_rate": 3.831569632469965e-05, "loss": 0.5639, "num_tokens": 1721492739.0, "step": 2248 }, { "epoch": 0.8240359073005404, "grad_norm": 0.15105074496181226, "learning_rate": 3.8313891258500525e-05, "loss": 0.5173, "num_tokens": 1722300265.0, "step": 2249 }, { "epoch": 0.8244023083264633, "grad_norm": 0.1904551247535506, "learning_rate": 3.831208527310266e-05, "loss": 0.5831, "num_tokens": 1723041565.0, "step": 2250 }, { "epoch": 0.8247687093523862, "grad_norm": 0.1746982017165658, "learning_rate": 3.831027836860783e-05, "loss": 0.5439, "num_tokens": 1723810719.0, "step": 2251 }, { "epoch": 0.8251351103783091, "grad_norm": 0.17383111999490602, "learning_rate": 3.830847054511784e-05, "loss": 0.5564, "num_tokens": 1724762682.0, "step": 2252 }, { "epoch": 0.8255015114042319, "grad_norm": 0.17641005852021657, "learning_rate": 3.8306661802734544e-05, "loss": 0.5558, "num_tokens": 1725553462.0, "step": 2253 }, { "epoch": 0.8258679124301548, "grad_norm": 0.1738413633160345, "learning_rate": 3.8304852141559855e-05, "loss": 0.5349, "num_tokens": 1726397271.0, "step": 2254 }, { "epoch": 0.8262343134560777, "grad_norm": 0.16329240822094682, "learning_rate": 3.830304156169575e-05, "loss": 0.5642, "num_tokens": 1727161836.0, "step": 2255 }, { "epoch": 0.8266007144820006, "grad_norm": 0.18886962400514232, "learning_rate": 3.830123006324424e-05, "loss": 0.6143, "num_tokens": 1727888633.0, "step": 2256 }, { "epoch": 0.8269671155079235, "grad_norm": 0.1796203742872129, "learning_rate": 3.829941764630738e-05, "loss": 0.542, "num_tokens": 1728667960.0, "step": 2257 }, { "epoch": 0.8273335165338463, "grad_norm": 0.1628824595254974, "learning_rate": 3.829760431098731e-05, "loss": 0.5834, "num_tokens": 1729417571.0, "step": 2258 }, { "epoch": 0.8276999175597691, "grad_norm": 0.16566843347278035, "learning_rate": 3.829579005738618e-05, "loss": 0.5947, "num_tokens": 1730296185.0, "step": 2259 }, { "epoch": 0.828066318585692, "grad_norm": 0.2059229729878456, "learning_rate": 3.829397488560623e-05, "loss": 0.6073, "num_tokens": 1730942430.0, "step": 2260 }, { "epoch": 0.8284327196116149, "grad_norm": 0.16777829018327137, "learning_rate": 3.8292158795749735e-05, "loss": 0.5565, "num_tokens": 1731801583.0, "step": 2261 }, { "epoch": 0.8287991206375378, "grad_norm": 0.15549224892934263, "learning_rate": 3.8290341787919006e-05, "loss": 0.5268, "num_tokens": 1732622437.0, "step": 2262 }, { "epoch": 0.8291655216634607, "grad_norm": 0.17062509833707795, "learning_rate": 3.828852386221644e-05, "loss": 0.5604, "num_tokens": 1733453304.0, "step": 2263 }, { "epoch": 0.8295319226893836, "grad_norm": 0.16839948614341865, "learning_rate": 3.8286705018744454e-05, "loss": 0.5526, "num_tokens": 1734125668.0, "step": 2264 }, { "epoch": 0.8298983237153064, "grad_norm": 0.1812462036282693, "learning_rate": 3.828488525760554e-05, "loss": 0.5629, "num_tokens": 1734985276.0, "step": 2265 }, { "epoch": 0.8302647247412293, "grad_norm": 0.19578042957389033, "learning_rate": 3.828306457890222e-05, "loss": 0.6187, "num_tokens": 1735633633.0, "step": 2266 }, { "epoch": 0.8306311257671521, "grad_norm": 0.18626842644271963, "learning_rate": 3.828124298273709e-05, "loss": 0.5626, "num_tokens": 1736399663.0, "step": 2267 }, { "epoch": 0.830997526793075, "grad_norm": 0.17631243399356833, "learning_rate": 3.827942046921278e-05, "loss": 0.532, "num_tokens": 1737103302.0, "step": 2268 }, { "epoch": 0.8313639278189979, "grad_norm": 0.17769325802202776, "learning_rate": 3.827759703843199e-05, "loss": 0.5258, "num_tokens": 1737928566.0, "step": 2269 }, { "epoch": 0.8317303288449208, "grad_norm": 0.17724380033076276, "learning_rate": 3.827577269049745e-05, "loss": 0.5533, "num_tokens": 1738665702.0, "step": 2270 }, { "epoch": 0.8320967298708436, "grad_norm": 0.1713991457203914, "learning_rate": 3.827394742551195e-05, "loss": 0.5563, "num_tokens": 1739415874.0, "step": 2271 }, { "epoch": 0.8324631308967665, "grad_norm": 0.1799820763187895, "learning_rate": 3.827212124357834e-05, "loss": 0.5691, "num_tokens": 1740203183.0, "step": 2272 }, { "epoch": 0.8328295319226894, "grad_norm": 0.1596689924966694, "learning_rate": 3.827029414479952e-05, "loss": 0.5468, "num_tokens": 1740999560.0, "step": 2273 }, { "epoch": 0.8331959329486123, "grad_norm": 0.1839843748905018, "learning_rate": 3.826846612927841e-05, "loss": 0.537, "num_tokens": 1741738508.0, "step": 2274 }, { "epoch": 0.8335623339745352, "grad_norm": 0.15388655141645585, "learning_rate": 3.826663719711804e-05, "loss": 0.5353, "num_tokens": 1742549784.0, "step": 2275 }, { "epoch": 0.833928735000458, "grad_norm": 0.2048959333987052, "learning_rate": 3.826480734842146e-05, "loss": 0.6271, "num_tokens": 1743238503.0, "step": 2276 }, { "epoch": 0.8342951360263808, "grad_norm": 0.16892008452785423, "learning_rate": 3.826297658329175e-05, "loss": 0.521, "num_tokens": 1744068430.0, "step": 2277 }, { "epoch": 0.8346615370523037, "grad_norm": 0.18091079876438318, "learning_rate": 3.8261144901832075e-05, "loss": 0.5997, "num_tokens": 1744733524.0, "step": 2278 }, { "epoch": 0.8350279380782266, "grad_norm": 0.2016950628628756, "learning_rate": 3.8259312304145646e-05, "loss": 0.5934, "num_tokens": 1745467796.0, "step": 2279 }, { "epoch": 0.8353943391041495, "grad_norm": 0.17926472303005325, "learning_rate": 3.82574787903357e-05, "loss": 0.5501, "num_tokens": 1746172020.0, "step": 2280 }, { "epoch": 0.8357607401300724, "grad_norm": 0.17132794399264345, "learning_rate": 3.825564436050558e-05, "loss": 0.5715, "num_tokens": 1746899700.0, "step": 2281 }, { "epoch": 0.8361271411559953, "grad_norm": 0.177019332715678, "learning_rate": 3.8253809014758615e-05, "loss": 0.5766, "num_tokens": 1747681921.0, "step": 2282 }, { "epoch": 0.8364935421819181, "grad_norm": 0.1901317751027385, "learning_rate": 3.8251972753198226e-05, "loss": 0.5909, "num_tokens": 1748325051.0, "step": 2283 }, { "epoch": 0.836859943207841, "grad_norm": 0.18040481236963196, "learning_rate": 3.825013557592788e-05, "loss": 0.5624, "num_tokens": 1749104153.0, "step": 2284 }, { "epoch": 0.8372263442337639, "grad_norm": 0.1617776793147378, "learning_rate": 3.824829748305109e-05, "loss": 0.5506, "num_tokens": 1749860347.0, "step": 2285 }, { "epoch": 0.8375927452596867, "grad_norm": 0.1843265272811794, "learning_rate": 3.824645847467142e-05, "loss": 0.5761, "num_tokens": 1750499688.0, "step": 2286 }, { "epoch": 0.8379591462856096, "grad_norm": 0.17216251258382742, "learning_rate": 3.8244618550892494e-05, "loss": 0.5509, "num_tokens": 1751240107.0, "step": 2287 }, { "epoch": 0.8383255473115325, "grad_norm": 0.17667074939615568, "learning_rate": 3.824277771181797e-05, "loss": 0.5521, "num_tokens": 1752086084.0, "step": 2288 }, { "epoch": 0.8386919483374553, "grad_norm": 0.17018843392209385, "learning_rate": 3.824093595755159e-05, "loss": 0.5986, "num_tokens": 1752817389.0, "step": 2289 }, { "epoch": 0.8390583493633782, "grad_norm": 0.19513913058081397, "learning_rate": 3.8239093288197104e-05, "loss": 0.581, "num_tokens": 1753582180.0, "step": 2290 }, { "epoch": 0.8394247503893011, "grad_norm": 0.17900778366147585, "learning_rate": 3.8237249703858346e-05, "loss": 0.5761, "num_tokens": 1754284739.0, "step": 2291 }, { "epoch": 0.839791151415224, "grad_norm": 0.1580493180753291, "learning_rate": 3.82354052046392e-05, "loss": 0.5631, "num_tokens": 1755144829.0, "step": 2292 }, { "epoch": 0.8401575524411469, "grad_norm": 0.1631154490184015, "learning_rate": 3.823355979064358e-05, "loss": 0.5417, "num_tokens": 1755942285.0, "step": 2293 }, { "epoch": 0.8405239534670698, "grad_norm": 0.18891163988178417, "learning_rate": 3.8231713461975473e-05, "loss": 0.5526, "num_tokens": 1756706982.0, "step": 2294 }, { "epoch": 0.8408903544929925, "grad_norm": 0.16652378393060954, "learning_rate": 3.8229866218738905e-05, "loss": 0.5607, "num_tokens": 1757404930.0, "step": 2295 }, { "epoch": 0.8412567555189154, "grad_norm": 0.16102501184751286, "learning_rate": 3.8228018061037956e-05, "loss": 0.5509, "num_tokens": 1758125157.0, "step": 2296 }, { "epoch": 0.8416231565448383, "grad_norm": 0.17516857224768176, "learning_rate": 3.8226168988976766e-05, "loss": 0.5947, "num_tokens": 1758855477.0, "step": 2297 }, { "epoch": 0.8419895575707612, "grad_norm": 0.16172947826761674, "learning_rate": 3.8224319002659525e-05, "loss": 0.5419, "num_tokens": 1759647555.0, "step": 2298 }, { "epoch": 0.8423559585966841, "grad_norm": 0.16990346675234672, "learning_rate": 3.822246810219046e-05, "loss": 0.5503, "num_tokens": 1760507604.0, "step": 2299 }, { "epoch": 0.842722359622607, "grad_norm": 0.1649602462384635, "learning_rate": 3.822061628767386e-05, "loss": 0.5754, "num_tokens": 1761204709.0, "step": 2300 }, { "epoch": 0.8430887606485298, "grad_norm": 0.17500603283819724, "learning_rate": 3.8218763559214065e-05, "loss": 0.5245, "num_tokens": 1761902816.0, "step": 2301 }, { "epoch": 0.8434551616744527, "grad_norm": 0.16879887960515444, "learning_rate": 3.8216909916915464e-05, "loss": 0.5378, "num_tokens": 1762723681.0, "step": 2302 }, { "epoch": 0.8438215627003756, "grad_norm": 0.17799429328106495, "learning_rate": 3.821505536088251e-05, "loss": 0.573, "num_tokens": 1763452149.0, "step": 2303 }, { "epoch": 0.8441879637262985, "grad_norm": 0.16616625032195703, "learning_rate": 3.821319989121969e-05, "loss": 0.5583, "num_tokens": 1764124193.0, "step": 2304 }, { "epoch": 0.8445543647522213, "grad_norm": 0.20880031064058022, "learning_rate": 3.8211343508031546e-05, "loss": 0.5667, "num_tokens": 1764918678.0, "step": 2305 }, { "epoch": 0.8449207657781442, "grad_norm": 0.17092627324961363, "learning_rate": 3.820948621142268e-05, "loss": 0.5195, "num_tokens": 1765643293.0, "step": 2306 }, { "epoch": 0.845287166804067, "grad_norm": 0.2016879254192219, "learning_rate": 3.8207628001497744e-05, "loss": 0.5532, "num_tokens": 1766386938.0, "step": 2307 }, { "epoch": 0.8456535678299899, "grad_norm": 0.1718227081703912, "learning_rate": 3.820576887836143e-05, "loss": 0.5829, "num_tokens": 1767160406.0, "step": 2308 }, { "epoch": 0.8460199688559128, "grad_norm": 0.1708393521983875, "learning_rate": 3.8203908842118486e-05, "loss": 0.5573, "num_tokens": 1768049383.0, "step": 2309 }, { "epoch": 0.8463863698818357, "grad_norm": 0.1782952041404469, "learning_rate": 3.820204789287373e-05, "loss": 0.5744, "num_tokens": 1768760516.0, "step": 2310 }, { "epoch": 0.8467527709077586, "grad_norm": 0.1872368199800736, "learning_rate": 3.8200186030732004e-05, "loss": 0.5742, "num_tokens": 1769491503.0, "step": 2311 }, { "epoch": 0.8471191719336815, "grad_norm": 0.16744414410383934, "learning_rate": 3.819832325579822e-05, "loss": 0.5832, "num_tokens": 1770252359.0, "step": 2312 }, { "epoch": 0.8474855729596042, "grad_norm": 0.19660848362777908, "learning_rate": 3.8196459568177335e-05, "loss": 0.6085, "num_tokens": 1771142433.0, "step": 2313 }, { "epoch": 0.8478519739855271, "grad_norm": 0.17334526690460594, "learning_rate": 3.819459496797435e-05, "loss": 0.5525, "num_tokens": 1771929448.0, "step": 2314 }, { "epoch": 0.84821837501145, "grad_norm": 0.18523409410347189, "learning_rate": 3.8192729455294335e-05, "loss": 0.5382, "num_tokens": 1772639585.0, "step": 2315 }, { "epoch": 0.8485847760373729, "grad_norm": 0.17394721614491576, "learning_rate": 3.8190863030242396e-05, "loss": 0.543, "num_tokens": 1773315645.0, "step": 2316 }, { "epoch": 0.8489511770632958, "grad_norm": 0.18628837187004757, "learning_rate": 3.8188995692923694e-05, "loss": 0.5445, "num_tokens": 1774257091.0, "step": 2317 }, { "epoch": 0.8493175780892186, "grad_norm": 0.18477372548426335, "learning_rate": 3.818712744344345e-05, "loss": 0.5281, "num_tokens": 1775164421.0, "step": 2318 }, { "epoch": 0.8496839791151415, "grad_norm": 0.16788985300526577, "learning_rate": 3.818525828190693e-05, "loss": 0.552, "num_tokens": 1775951286.0, "step": 2319 }, { "epoch": 0.8500503801410644, "grad_norm": 0.18845962002134464, "learning_rate": 3.818338820841944e-05, "loss": 0.526, "num_tokens": 1776731859.0, "step": 2320 }, { "epoch": 0.8504167811669873, "grad_norm": 0.15004135906646882, "learning_rate": 3.818151722308637e-05, "loss": 0.5561, "num_tokens": 1777507776.0, "step": 2321 }, { "epoch": 0.8507831821929102, "grad_norm": 0.15141186145824415, "learning_rate": 3.817964532601311e-05, "loss": 0.5363, "num_tokens": 1778332029.0, "step": 2322 }, { "epoch": 0.851149583218833, "grad_norm": 0.17634327882341463, "learning_rate": 3.817777251730515e-05, "loss": 0.5746, "num_tokens": 1779051269.0, "step": 2323 }, { "epoch": 0.8515159842447558, "grad_norm": 0.16874259404753125, "learning_rate": 3.8175898797068e-05, "loss": 0.552, "num_tokens": 1779850817.0, "step": 2324 }, { "epoch": 0.8518823852706787, "grad_norm": 0.16615275935572707, "learning_rate": 3.8174024165407255e-05, "loss": 0.558, "num_tokens": 1780524726.0, "step": 2325 }, { "epoch": 0.8522487862966016, "grad_norm": 0.18858830699611237, "learning_rate": 3.8172148622428525e-05, "loss": 0.5829, "num_tokens": 1781292700.0, "step": 2326 }, { "epoch": 0.8526151873225245, "grad_norm": 0.1844587694630251, "learning_rate": 3.817027216823749e-05, "loss": 0.5622, "num_tokens": 1782008251.0, "step": 2327 }, { "epoch": 0.8529815883484474, "grad_norm": 0.18039953360135533, "learning_rate": 3.816839480293988e-05, "loss": 0.55, "num_tokens": 1782579170.0, "step": 2328 }, { "epoch": 0.8533479893743703, "grad_norm": 0.1680777931381416, "learning_rate": 3.8166516526641466e-05, "loss": 0.5736, "num_tokens": 1783261172.0, "step": 2329 }, { "epoch": 0.8537143904002931, "grad_norm": 0.1827107415487931, "learning_rate": 3.8164637339448086e-05, "loss": 0.5308, "num_tokens": 1784039818.0, "step": 2330 }, { "epoch": 0.854080791426216, "grad_norm": 0.18333399852422794, "learning_rate": 3.816275724146563e-05, "loss": 0.5504, "num_tokens": 1784740740.0, "step": 2331 }, { "epoch": 0.8544471924521388, "grad_norm": 0.16734567374816464, "learning_rate": 3.816087623280001e-05, "loss": 0.5947, "num_tokens": 1785464202.0, "step": 2332 }, { "epoch": 0.8548135934780617, "grad_norm": 0.1749000097230894, "learning_rate": 3.815899431355723e-05, "loss": 0.5507, "num_tokens": 1786242482.0, "step": 2333 }, { "epoch": 0.8551799945039846, "grad_norm": 0.17078885590459075, "learning_rate": 3.815711148384331e-05, "loss": 0.5577, "num_tokens": 1786980933.0, "step": 2334 }, { "epoch": 0.8555463955299075, "grad_norm": 0.20452377853260897, "learning_rate": 3.8155227743764355e-05, "loss": 0.6122, "num_tokens": 1787628410.0, "step": 2335 }, { "epoch": 0.8559127965558303, "grad_norm": 0.19422361884350675, "learning_rate": 3.8153343093426484e-05, "loss": 0.5604, "num_tokens": 1788344335.0, "step": 2336 }, { "epoch": 0.8562791975817532, "grad_norm": 0.1631186747323115, "learning_rate": 3.815145753293591e-05, "loss": 0.5392, "num_tokens": 1789064657.0, "step": 2337 }, { "epoch": 0.8566455986076761, "grad_norm": 0.19809711575669897, "learning_rate": 3.814957106239885e-05, "loss": 0.573, "num_tokens": 1789804034.0, "step": 2338 }, { "epoch": 0.857011999633599, "grad_norm": 0.17852515605511063, "learning_rate": 3.814768368192161e-05, "loss": 0.5739, "num_tokens": 1790546538.0, "step": 2339 }, { "epoch": 0.8573784006595219, "grad_norm": 0.19893878811661383, "learning_rate": 3.814579539161053e-05, "loss": 0.5399, "num_tokens": 1791345989.0, "step": 2340 }, { "epoch": 0.8577448016854448, "grad_norm": 0.18514816178854715, "learning_rate": 3.814390619157201e-05, "loss": 0.5674, "num_tokens": 1792214082.0, "step": 2341 }, { "epoch": 0.8581112027113675, "grad_norm": 0.18047566707826765, "learning_rate": 3.814201608191248e-05, "loss": 0.5359, "num_tokens": 1793087892.0, "step": 2342 }, { "epoch": 0.8584776037372904, "grad_norm": 0.1745292953392888, "learning_rate": 3.8140125062738456e-05, "loss": 0.5765, "num_tokens": 1793789065.0, "step": 2343 }, { "epoch": 0.8588440047632133, "grad_norm": 0.17484883941597765, "learning_rate": 3.8138233134156484e-05, "loss": 0.5779, "num_tokens": 1794447692.0, "step": 2344 }, { "epoch": 0.8592104057891362, "grad_norm": 0.18687799720594972, "learning_rate": 3.813634029627316e-05, "loss": 0.56, "num_tokens": 1795306510.0, "step": 2345 }, { "epoch": 0.8595768068150591, "grad_norm": 0.18078791022742477, "learning_rate": 3.813444654919513e-05, "loss": 0.6099, "num_tokens": 1795947157.0, "step": 2346 }, { "epoch": 0.859943207840982, "grad_norm": 0.1759001403461035, "learning_rate": 3.8132551893029094e-05, "loss": 0.5606, "num_tokens": 1796787289.0, "step": 2347 }, { "epoch": 0.8603096088669048, "grad_norm": 0.19100185694196506, "learning_rate": 3.8130656327881816e-05, "loss": 0.5373, "num_tokens": 1797617888.0, "step": 2348 }, { "epoch": 0.8606760098928277, "grad_norm": 0.18053795238875586, "learning_rate": 3.81287598538601e-05, "loss": 0.5292, "num_tokens": 1798305764.0, "step": 2349 }, { "epoch": 0.8610424109187506, "grad_norm": 0.2001065941001377, "learning_rate": 3.8126862471070806e-05, "loss": 0.5816, "num_tokens": 1799107682.0, "step": 2350 }, { "epoch": 0.8614088119446734, "grad_norm": 0.1969115204119376, "learning_rate": 3.8124964179620824e-05, "loss": 0.558, "num_tokens": 1799816579.0, "step": 2351 }, { "epoch": 0.8617752129705963, "grad_norm": 0.23697803048558347, "learning_rate": 3.8123064979617126e-05, "loss": 0.5638, "num_tokens": 1800605672.0, "step": 2352 }, { "epoch": 0.8621416139965192, "grad_norm": 0.22767565924370697, "learning_rate": 3.812116487116672e-05, "loss": 0.5246, "num_tokens": 1801474165.0, "step": 2353 }, { "epoch": 0.862508015022442, "grad_norm": 0.1692003253184652, "learning_rate": 3.811926385437666e-05, "loss": 0.5628, "num_tokens": 1802316945.0, "step": 2354 }, { "epoch": 0.8628744160483649, "grad_norm": 0.20117361277337456, "learning_rate": 3.811736192935407e-05, "loss": 0.5561, "num_tokens": 1803011083.0, "step": 2355 }, { "epoch": 0.8632408170742878, "grad_norm": 0.21189826089509145, "learning_rate": 3.81154590962061e-05, "loss": 0.5749, "num_tokens": 1803763684.0, "step": 2356 }, { "epoch": 0.8636072181002107, "grad_norm": 0.19230266023920567, "learning_rate": 3.811355535503997e-05, "loss": 0.5245, "num_tokens": 1804456195.0, "step": 2357 }, { "epoch": 0.8639736191261336, "grad_norm": 0.16762615512180934, "learning_rate": 3.811165070596295e-05, "loss": 0.539, "num_tokens": 1805205077.0, "step": 2358 }, { "epoch": 0.8643400201520565, "grad_norm": 0.19980054126664054, "learning_rate": 3.810974514908235e-05, "loss": 0.5299, "num_tokens": 1806035569.0, "step": 2359 }, { "epoch": 0.8647064211779792, "grad_norm": 0.17489957112414042, "learning_rate": 3.810783868450554e-05, "loss": 0.5401, "num_tokens": 1806808636.0, "step": 2360 }, { "epoch": 0.8650728222039021, "grad_norm": 0.20261423064378736, "learning_rate": 3.810593131233994e-05, "loss": 0.5479, "num_tokens": 1807564292.0, "step": 2361 }, { "epoch": 0.865439223229825, "grad_norm": 0.19018965155681297, "learning_rate": 3.8104023032693015e-05, "loss": 0.5361, "num_tokens": 1808308185.0, "step": 2362 }, { "epoch": 0.8658056242557479, "grad_norm": 0.1980661479126474, "learning_rate": 3.810211384567229e-05, "loss": 0.5332, "num_tokens": 1809009793.0, "step": 2363 }, { "epoch": 0.8661720252816708, "grad_norm": 0.1858205639255198, "learning_rate": 3.810020375138534e-05, "loss": 0.5609, "num_tokens": 1809689985.0, "step": 2364 }, { "epoch": 0.8665384263075937, "grad_norm": 0.1740853452083846, "learning_rate": 3.809829274993978e-05, "loss": 0.5864, "num_tokens": 1810441799.0, "step": 2365 }, { "epoch": 0.8669048273335165, "grad_norm": 0.18650474284699897, "learning_rate": 3.80963808414433e-05, "loss": 0.5511, "num_tokens": 1811172154.0, "step": 2366 }, { "epoch": 0.8672712283594394, "grad_norm": 0.16047419497766344, "learning_rate": 3.809446802600361e-05, "loss": 0.5252, "num_tokens": 1811985498.0, "step": 2367 }, { "epoch": 0.8676376293853623, "grad_norm": 0.16151174057035186, "learning_rate": 3.809255430372849e-05, "loss": 0.5344, "num_tokens": 1812741029.0, "step": 2368 }, { "epoch": 0.8680040304112852, "grad_norm": 0.19889865799660175, "learning_rate": 3.809063967472577e-05, "loss": 0.5449, "num_tokens": 1813589758.0, "step": 2369 }, { "epoch": 0.868370431437208, "grad_norm": 0.1767385915035772, "learning_rate": 3.808872413910334e-05, "loss": 0.543, "num_tokens": 1814365295.0, "step": 2370 }, { "epoch": 0.8687368324631309, "grad_norm": 0.20492243865871848, "learning_rate": 3.80868076969691e-05, "loss": 0.5472, "num_tokens": 1815145411.0, "step": 2371 }, { "epoch": 0.8691032334890537, "grad_norm": 0.16239370094685007, "learning_rate": 3.808489034843107e-05, "loss": 0.5282, "num_tokens": 1815960599.0, "step": 2372 }, { "epoch": 0.8694696345149766, "grad_norm": 0.16613804590846637, "learning_rate": 3.8082972093597254e-05, "loss": 0.5257, "num_tokens": 1816710111.0, "step": 2373 }, { "epoch": 0.8698360355408995, "grad_norm": 0.17239306554754008, "learning_rate": 3.8081052932575746e-05, "loss": 0.5344, "num_tokens": 1817439644.0, "step": 2374 }, { "epoch": 0.8702024365668224, "grad_norm": 0.18302103700718242, "learning_rate": 3.807913286547467e-05, "loss": 0.5622, "num_tokens": 1818148010.0, "step": 2375 }, { "epoch": 0.8705688375927453, "grad_norm": 0.178083624679395, "learning_rate": 3.8077211892402226e-05, "loss": 0.5638, "num_tokens": 1818874979.0, "step": 2376 }, { "epoch": 0.8709352386186682, "grad_norm": 0.15667933228619985, "learning_rate": 3.807529001346664e-05, "loss": 0.5566, "num_tokens": 1819613935.0, "step": 2377 }, { "epoch": 0.871301639644591, "grad_norm": 0.19377155917068112, "learning_rate": 3.80733672287762e-05, "loss": 0.5873, "num_tokens": 1820389086.0, "step": 2378 }, { "epoch": 0.8716680406705138, "grad_norm": 0.17823341529324574, "learning_rate": 3.8071443538439255e-05, "loss": 0.5761, "num_tokens": 1821176249.0, "step": 2379 }, { "epoch": 0.8720344416964367, "grad_norm": 0.1785776371496104, "learning_rate": 3.806951894256418e-05, "loss": 0.5293, "num_tokens": 1821851072.0, "step": 2380 }, { "epoch": 0.8724008427223596, "grad_norm": 0.15583901811218182, "learning_rate": 3.8067593441259424e-05, "loss": 0.5355, "num_tokens": 1822710048.0, "step": 2381 }, { "epoch": 0.8727672437482825, "grad_norm": 0.16089811469768767, "learning_rate": 3.8065667034633477e-05, "loss": 0.5455, "num_tokens": 1823482969.0, "step": 2382 }, { "epoch": 0.8731336447742054, "grad_norm": 0.1731188247448997, "learning_rate": 3.8063739722794884e-05, "loss": 0.5374, "num_tokens": 1824239454.0, "step": 2383 }, { "epoch": 0.8735000458001282, "grad_norm": 0.16862035930080987, "learning_rate": 3.806181150585223e-05, "loss": 0.567, "num_tokens": 1824963027.0, "step": 2384 }, { "epoch": 0.8738664468260511, "grad_norm": 0.1711513011163839, "learning_rate": 3.805988238391416e-05, "loss": 0.5287, "num_tokens": 1825738441.0, "step": 2385 }, { "epoch": 0.874232847851974, "grad_norm": 0.17246496571057202, "learning_rate": 3.8057952357089395e-05, "loss": 0.5668, "num_tokens": 1826439540.0, "step": 2386 }, { "epoch": 0.8745992488778969, "grad_norm": 0.17450750733075868, "learning_rate": 3.8056021425486645e-05, "loss": 0.5671, "num_tokens": 1827180355.0, "step": 2387 }, { "epoch": 0.8749656499038198, "grad_norm": 0.17524404874259975, "learning_rate": 3.805408958921472e-05, "loss": 0.56, "num_tokens": 1828048419.0, "step": 2388 }, { "epoch": 0.8753320509297426, "grad_norm": 0.14803231659155763, "learning_rate": 3.805215684838248e-05, "loss": 0.5556, "num_tokens": 1828946421.0, "step": 2389 }, { "epoch": 0.8756984519556654, "grad_norm": 0.17435402117299834, "learning_rate": 3.8050223203098816e-05, "loss": 0.5606, "num_tokens": 1829627471.0, "step": 2390 }, { "epoch": 0.8760648529815883, "grad_norm": 0.17490960715718004, "learning_rate": 3.804828865347268e-05, "loss": 0.5551, "num_tokens": 1830363621.0, "step": 2391 }, { "epoch": 0.8764312540075112, "grad_norm": 0.1900502274843471, "learning_rate": 3.8046353199613065e-05, "loss": 0.5971, "num_tokens": 1831157556.0, "step": 2392 }, { "epoch": 0.8767976550334341, "grad_norm": 0.1764443376037142, "learning_rate": 3.8044416841629035e-05, "loss": 0.5738, "num_tokens": 1831963865.0, "step": 2393 }, { "epoch": 0.877164056059357, "grad_norm": 0.18133863320176954, "learning_rate": 3.804247957962969e-05, "loss": 0.5878, "num_tokens": 1832767695.0, "step": 2394 }, { "epoch": 0.8775304570852799, "grad_norm": 0.16077561307730454, "learning_rate": 3.8040541413724185e-05, "loss": 0.5125, "num_tokens": 1833621743.0, "step": 2395 }, { "epoch": 0.8778968581112027, "grad_norm": 0.1771801788712356, "learning_rate": 3.803860234402171e-05, "loss": 0.5334, "num_tokens": 1834434465.0, "step": 2396 }, { "epoch": 0.8782632591371256, "grad_norm": 0.16888252925533945, "learning_rate": 3.803666237063154e-05, "loss": 0.5252, "num_tokens": 1835275586.0, "step": 2397 }, { "epoch": 0.8786296601630484, "grad_norm": 0.1659317100764909, "learning_rate": 3.8034721493662986e-05, "loss": 0.5741, "num_tokens": 1836074571.0, "step": 2398 }, { "epoch": 0.8789960611889713, "grad_norm": 0.17784123191354703, "learning_rate": 3.8032779713225386e-05, "loss": 0.5705, "num_tokens": 1836797607.0, "step": 2399 }, { "epoch": 0.8793624622148942, "grad_norm": 0.17592774606103992, "learning_rate": 3.803083702942816e-05, "loss": 0.5788, "num_tokens": 1837524609.0, "step": 2400 }, { "epoch": 0.8797288632408171, "grad_norm": 0.21122253062393545, "learning_rate": 3.802889344238076e-05, "loss": 0.5824, "num_tokens": 1838218723.0, "step": 2401 }, { "epoch": 0.8800952642667399, "grad_norm": 0.20569513552180074, "learning_rate": 3.802694895219271e-05, "loss": 0.5764, "num_tokens": 1838951873.0, "step": 2402 }, { "epoch": 0.8804616652926628, "grad_norm": 0.16688748380434118, "learning_rate": 3.802500355897356e-05, "loss": 0.5191, "num_tokens": 1839606904.0, "step": 2403 }, { "epoch": 0.8808280663185857, "grad_norm": 0.2059275868326307, "learning_rate": 3.802305726283293e-05, "loss": 0.5572, "num_tokens": 1840350987.0, "step": 2404 }, { "epoch": 0.8811944673445086, "grad_norm": 0.19072005417420043, "learning_rate": 3.802111006388047e-05, "loss": 0.5745, "num_tokens": 1841082467.0, "step": 2405 }, { "epoch": 0.8815608683704315, "grad_norm": 0.18047149675350946, "learning_rate": 3.801916196222591e-05, "loss": 0.5983, "num_tokens": 1841872504.0, "step": 2406 }, { "epoch": 0.8819272693963544, "grad_norm": 0.18681691391914346, "learning_rate": 3.8017212957979e-05, "loss": 0.5636, "num_tokens": 1842618350.0, "step": 2407 }, { "epoch": 0.8822936704222771, "grad_norm": 0.19253761217620735, "learning_rate": 3.801526305124958e-05, "loss": 0.5306, "num_tokens": 1843401084.0, "step": 2408 }, { "epoch": 0.8826600714482, "grad_norm": 0.17632215986857153, "learning_rate": 3.801331224214749e-05, "loss": 0.5584, "num_tokens": 1844200842.0, "step": 2409 }, { "epoch": 0.8830264724741229, "grad_norm": 0.17260338236930226, "learning_rate": 3.8011360530782656e-05, "loss": 0.5459, "num_tokens": 1845105831.0, "step": 2410 }, { "epoch": 0.8833928735000458, "grad_norm": 0.17308128845889686, "learning_rate": 3.800940791726505e-05, "loss": 0.5799, "num_tokens": 1845889389.0, "step": 2411 }, { "epoch": 0.8837592745259687, "grad_norm": 0.1625288561666306, "learning_rate": 3.80074544017047e-05, "loss": 0.5428, "num_tokens": 1846626240.0, "step": 2412 }, { "epoch": 0.8841256755518916, "grad_norm": 0.15549717522528925, "learning_rate": 3.800549998421165e-05, "loss": 0.5559, "num_tokens": 1847387288.0, "step": 2413 }, { "epoch": 0.8844920765778144, "grad_norm": 0.18663518524509, "learning_rate": 3.800354466489604e-05, "loss": 0.5799, "num_tokens": 1848190138.0, "step": 2414 }, { "epoch": 0.8848584776037373, "grad_norm": 0.17334263390276727, "learning_rate": 3.800158844386804e-05, "loss": 0.544, "num_tokens": 1848909359.0, "step": 2415 }, { "epoch": 0.8852248786296602, "grad_norm": 0.14955977294958767, "learning_rate": 3.799963132123787e-05, "loss": 0.545, "num_tokens": 1849771110.0, "step": 2416 }, { "epoch": 0.885591279655583, "grad_norm": 0.183065916598821, "learning_rate": 3.79976732971158e-05, "loss": 0.5745, "num_tokens": 1850608548.0, "step": 2417 }, { "epoch": 0.8859576806815059, "grad_norm": 0.1593597482482216, "learning_rate": 3.7995714371612156e-05, "loss": 0.5498, "num_tokens": 1851378734.0, "step": 2418 }, { "epoch": 0.8863240817074288, "grad_norm": 0.1736962880262282, "learning_rate": 3.799375454483732e-05, "loss": 0.5618, "num_tokens": 1852093132.0, "step": 2419 }, { "epoch": 0.8866904827333516, "grad_norm": 0.17539968257681382, "learning_rate": 3.799179381690171e-05, "loss": 0.5862, "num_tokens": 1852850804.0, "step": 2420 }, { "epoch": 0.8870568837592745, "grad_norm": 0.19038181853702937, "learning_rate": 3.79898321879158e-05, "loss": 0.5523, "num_tokens": 1853593342.0, "step": 2421 }, { "epoch": 0.8874232847851974, "grad_norm": 0.17835337993538855, "learning_rate": 3.7987869657990126e-05, "loss": 0.5837, "num_tokens": 1854403093.0, "step": 2422 }, { "epoch": 0.8877896858111203, "grad_norm": 0.16643422460880086, "learning_rate": 3.798590622723525e-05, "loss": 0.5434, "num_tokens": 1855203532.0, "step": 2423 }, { "epoch": 0.8881560868370432, "grad_norm": 0.1989418830052116, "learning_rate": 3.7983941895761824e-05, "loss": 0.5763, "num_tokens": 1855917763.0, "step": 2424 }, { "epoch": 0.8885224878629661, "grad_norm": 0.16443856749850885, "learning_rate": 3.798197666368051e-05, "loss": 0.5452, "num_tokens": 1856628056.0, "step": 2425 }, { "epoch": 0.8888888888888888, "grad_norm": 0.18435037098459248, "learning_rate": 3.798001053110204e-05, "loss": 0.5864, "num_tokens": 1857294127.0, "step": 2426 }, { "epoch": 0.8892552899148117, "grad_norm": 0.1719943353441356, "learning_rate": 3.7978043498137195e-05, "loss": 0.5422, "num_tokens": 1858030058.0, "step": 2427 }, { "epoch": 0.8896216909407346, "grad_norm": 0.1793433608098383, "learning_rate": 3.797607556489681e-05, "loss": 0.5549, "num_tokens": 1858784580.0, "step": 2428 }, { "epoch": 0.8899880919666575, "grad_norm": 0.1826080387237739, "learning_rate": 3.7974106731491774e-05, "loss": 0.574, "num_tokens": 1859528180.0, "step": 2429 }, { "epoch": 0.8903544929925804, "grad_norm": 0.1831812221644986, "learning_rate": 3.797213699803301e-05, "loss": 0.6154, "num_tokens": 1860180376.0, "step": 2430 }, { "epoch": 0.8907208940185033, "grad_norm": 0.1913918956607796, "learning_rate": 3.79701663646315e-05, "loss": 0.5894, "num_tokens": 1860892989.0, "step": 2431 }, { "epoch": 0.8910872950444261, "grad_norm": 0.16806760902621443, "learning_rate": 3.7968194831398275e-05, "loss": 0.5552, "num_tokens": 1861579893.0, "step": 2432 }, { "epoch": 0.891453696070349, "grad_norm": 0.1791068771014105, "learning_rate": 3.7966222398444443e-05, "loss": 0.5852, "num_tokens": 1862279176.0, "step": 2433 }, { "epoch": 0.8918200970962719, "grad_norm": 0.19398566166625988, "learning_rate": 3.796424906588112e-05, "loss": 0.5397, "num_tokens": 1863042786.0, "step": 2434 }, { "epoch": 0.8921864981221947, "grad_norm": 0.18409753374194315, "learning_rate": 3.7962274833819494e-05, "loss": 0.5641, "num_tokens": 1863830950.0, "step": 2435 }, { "epoch": 0.8925528991481176, "grad_norm": 0.17745901805077233, "learning_rate": 3.7960299702370806e-05, "loss": 0.5396, "num_tokens": 1864551360.0, "step": 2436 }, { "epoch": 0.8929193001740405, "grad_norm": 0.2023539903738524, "learning_rate": 3.795832367164635e-05, "loss": 0.5501, "num_tokens": 1865421536.0, "step": 2437 }, { "epoch": 0.8932857011999633, "grad_norm": 0.15966791716145395, "learning_rate": 3.7956346741757445e-05, "loss": 0.5446, "num_tokens": 1866222182.0, "step": 2438 }, { "epoch": 0.8936521022258862, "grad_norm": 0.17229197488310075, "learning_rate": 3.79543689128155e-05, "loss": 0.5349, "num_tokens": 1867030078.0, "step": 2439 }, { "epoch": 0.8940185032518091, "grad_norm": 0.17233059010627688, "learning_rate": 3.795239018493195e-05, "loss": 0.5587, "num_tokens": 1867714409.0, "step": 2440 }, { "epoch": 0.894384904277732, "grad_norm": 0.17847736618015403, "learning_rate": 3.795041055821828e-05, "loss": 0.5708, "num_tokens": 1868546084.0, "step": 2441 }, { "epoch": 0.8947513053036549, "grad_norm": 0.22699138125484689, "learning_rate": 3.794843003278603e-05, "loss": 0.5695, "num_tokens": 1869254221.0, "step": 2442 }, { "epoch": 0.8951177063295778, "grad_norm": 0.19312766052887, "learning_rate": 3.794644860874681e-05, "loss": 0.6012, "num_tokens": 1869995537.0, "step": 2443 }, { "epoch": 0.8954841073555005, "grad_norm": 0.16720923104744143, "learning_rate": 3.794446628621224e-05, "loss": 0.5815, "num_tokens": 1870743061.0, "step": 2444 }, { "epoch": 0.8958505083814234, "grad_norm": 0.16673531800291083, "learning_rate": 3.7942483065294014e-05, "loss": 0.5616, "num_tokens": 1871413982.0, "step": 2445 }, { "epoch": 0.8962169094073463, "grad_norm": 0.1673176949867145, "learning_rate": 3.794049894610389e-05, "loss": 0.5496, "num_tokens": 1872234671.0, "step": 2446 }, { "epoch": 0.8965833104332692, "grad_norm": 0.1831647051664234, "learning_rate": 3.793851392875366e-05, "loss": 0.5745, "num_tokens": 1872880050.0, "step": 2447 }, { "epoch": 0.8969497114591921, "grad_norm": 0.163664265820138, "learning_rate": 3.793652801335516e-05, "loss": 0.5672, "num_tokens": 1873663552.0, "step": 2448 }, { "epoch": 0.897316112485115, "grad_norm": 0.1625819508252458, "learning_rate": 3.7934541200020284e-05, "loss": 0.5601, "num_tokens": 1874405857.0, "step": 2449 }, { "epoch": 0.8976825135110378, "grad_norm": 0.17917432935771008, "learning_rate": 3.7932553488860994e-05, "loss": 0.5534, "num_tokens": 1875203461.0, "step": 2450 }, { "epoch": 0.8980489145369607, "grad_norm": 0.18356640974102825, "learning_rate": 3.793056487998928e-05, "loss": 0.545, "num_tokens": 1875934076.0, "step": 2451 }, { "epoch": 0.8984153155628836, "grad_norm": 0.15484810522588882, "learning_rate": 3.7928575373517175e-05, "loss": 0.5468, "num_tokens": 1876752089.0, "step": 2452 }, { "epoch": 0.8987817165888065, "grad_norm": 0.1867064632141737, "learning_rate": 3.792658496955679e-05, "loss": 0.5644, "num_tokens": 1877485303.0, "step": 2453 }, { "epoch": 0.8991481176147293, "grad_norm": 0.16845884520139692, "learning_rate": 3.792459366822028e-05, "loss": 0.5495, "num_tokens": 1878126983.0, "step": 2454 }, { "epoch": 0.8995145186406522, "grad_norm": 0.20944069960608722, "learning_rate": 3.7922601469619824e-05, "loss": 0.5389, "num_tokens": 1878874283.0, "step": 2455 }, { "epoch": 0.899880919666575, "grad_norm": 0.18511547121759148, "learning_rate": 3.792060837386769e-05, "loss": 0.5986, "num_tokens": 1879670288.0, "step": 2456 }, { "epoch": 0.9002473206924979, "grad_norm": 0.1762113679691568, "learning_rate": 3.791861438107618e-05, "loss": 0.5477, "num_tokens": 1880453987.0, "step": 2457 }, { "epoch": 0.9006137217184208, "grad_norm": 0.18843059400874757, "learning_rate": 3.791661949135761e-05, "loss": 0.5217, "num_tokens": 1881190823.0, "step": 2458 }, { "epoch": 0.9009801227443437, "grad_norm": 0.18642852940574445, "learning_rate": 3.7914623704824424e-05, "loss": 0.5493, "num_tokens": 1882081865.0, "step": 2459 }, { "epoch": 0.9013465237702666, "grad_norm": 0.1933564799129779, "learning_rate": 3.791262702158906e-05, "loss": 0.5821, "num_tokens": 1882948075.0, "step": 2460 }, { "epoch": 0.9017129247961895, "grad_norm": 0.18728075363343447, "learning_rate": 3.7910629441764005e-05, "loss": 0.5433, "num_tokens": 1883783813.0, "step": 2461 }, { "epoch": 0.9020793258221123, "grad_norm": 0.1688537635572885, "learning_rate": 3.790863096546183e-05, "loss": 0.5639, "num_tokens": 1884475557.0, "step": 2462 }, { "epoch": 0.9024457268480351, "grad_norm": 0.19885246706573292, "learning_rate": 3.7906631592795126e-05, "loss": 0.5577, "num_tokens": 1885209275.0, "step": 2463 }, { "epoch": 0.902812127873958, "grad_norm": 0.21867570837516698, "learning_rate": 3.790463132387656e-05, "loss": 0.5525, "num_tokens": 1885930017.0, "step": 2464 }, { "epoch": 0.9031785288998809, "grad_norm": 0.15802318115191102, "learning_rate": 3.790263015881882e-05, "loss": 0.5092, "num_tokens": 1886770343.0, "step": 2465 }, { "epoch": 0.9035449299258038, "grad_norm": 0.17900572863647965, "learning_rate": 3.790062809773468e-05, "loss": 0.5474, "num_tokens": 1887568921.0, "step": 2466 }, { "epoch": 0.9039113309517267, "grad_norm": 0.18609342742375357, "learning_rate": 3.789862514073692e-05, "loss": 0.5492, "num_tokens": 1888391481.0, "step": 2467 }, { "epoch": 0.9042777319776495, "grad_norm": 0.17392683852331645, "learning_rate": 3.789662128793842e-05, "loss": 0.5571, "num_tokens": 1889075530.0, "step": 2468 }, { "epoch": 0.9046441330035724, "grad_norm": 0.18076380471058626, "learning_rate": 3.789461653945207e-05, "loss": 0.5563, "num_tokens": 1889836006.0, "step": 2469 }, { "epoch": 0.9050105340294953, "grad_norm": 0.19900407964512837, "learning_rate": 3.7892610895390835e-05, "loss": 0.5962, "num_tokens": 1890555364.0, "step": 2470 }, { "epoch": 0.9053769350554182, "grad_norm": 0.15997663092266964, "learning_rate": 3.789060435586773e-05, "loss": 0.5312, "num_tokens": 1891373030.0, "step": 2471 }, { "epoch": 0.905743336081341, "grad_norm": 0.16177170952549227, "learning_rate": 3.788859692099579e-05, "loss": 0.5804, "num_tokens": 1892057242.0, "step": 2472 }, { "epoch": 0.906109737107264, "grad_norm": 0.1924448364579353, "learning_rate": 3.788658859088814e-05, "loss": 0.5373, "num_tokens": 1892809238.0, "step": 2473 }, { "epoch": 0.9064761381331867, "grad_norm": 0.1921813267483528, "learning_rate": 3.788457936565793e-05, "loss": 0.5813, "num_tokens": 1893488008.0, "step": 2474 }, { "epoch": 0.9068425391591096, "grad_norm": 0.17246630041313785, "learning_rate": 3.7882569245418375e-05, "loss": 0.5614, "num_tokens": 1894235180.0, "step": 2475 }, { "epoch": 0.9072089401850325, "grad_norm": 0.19666465708826716, "learning_rate": 3.7880558230282735e-05, "loss": 0.519, "num_tokens": 1895018507.0, "step": 2476 }, { "epoch": 0.9075753412109554, "grad_norm": 0.14772042958842338, "learning_rate": 3.787854632036432e-05, "loss": 0.5448, "num_tokens": 1895845896.0, "step": 2477 }, { "epoch": 0.9079417422368783, "grad_norm": 0.1718985711567184, "learning_rate": 3.787653351577648e-05, "loss": 0.5305, "num_tokens": 1896621807.0, "step": 2478 }, { "epoch": 0.9083081432628012, "grad_norm": 0.16231736990206622, "learning_rate": 3.787451981663263e-05, "loss": 0.5667, "num_tokens": 1897427556.0, "step": 2479 }, { "epoch": 0.908674544288724, "grad_norm": 0.1656303302116806, "learning_rate": 3.787250522304624e-05, "loss": 0.5799, "num_tokens": 1898114584.0, "step": 2480 }, { "epoch": 0.9090409453146469, "grad_norm": 0.18294839614701458, "learning_rate": 3.7870489735130814e-05, "loss": 0.5491, "num_tokens": 1898864174.0, "step": 2481 }, { "epoch": 0.9094073463405697, "grad_norm": 0.15888412097139148, "learning_rate": 3.786847335299991e-05, "loss": 0.5681, "num_tokens": 1899617736.0, "step": 2482 }, { "epoch": 0.9097737473664926, "grad_norm": 0.16506076003889694, "learning_rate": 3.7866456076767145e-05, "loss": 0.5755, "num_tokens": 1900310208.0, "step": 2483 }, { "epoch": 0.9101401483924155, "grad_norm": 0.15719647845144571, "learning_rate": 3.786443790654619e-05, "loss": 0.5075, "num_tokens": 1901141024.0, "step": 2484 }, { "epoch": 0.9105065494183384, "grad_norm": 0.1611209255422428, "learning_rate": 3.7862418842450744e-05, "loss": 0.5656, "num_tokens": 1901966056.0, "step": 2485 }, { "epoch": 0.9108729504442612, "grad_norm": 0.1440663377871398, "learning_rate": 3.786039888459458e-05, "loss": 0.5374, "num_tokens": 1902838240.0, "step": 2486 }, { "epoch": 0.9112393514701841, "grad_norm": 0.176277627733382, "learning_rate": 3.785837803309149e-05, "loss": 0.582, "num_tokens": 1903536256.0, "step": 2487 }, { "epoch": 0.911605752496107, "grad_norm": 0.16624664149147844, "learning_rate": 3.785635628805537e-05, "loss": 0.5402, "num_tokens": 1904245190.0, "step": 2488 }, { "epoch": 0.9119721535220299, "grad_norm": 0.1993923279239986, "learning_rate": 3.7854333649600116e-05, "loss": 0.5767, "num_tokens": 1904882932.0, "step": 2489 }, { "epoch": 0.9123385545479528, "grad_norm": 0.16288216981201414, "learning_rate": 3.7852310117839695e-05, "loss": 0.5471, "num_tokens": 1905685655.0, "step": 2490 }, { "epoch": 0.9127049555738757, "grad_norm": 0.1598292306518652, "learning_rate": 3.785028569288813e-05, "loss": 0.5336, "num_tokens": 1906598458.0, "step": 2491 }, { "epoch": 0.9130713565997984, "grad_norm": 0.16441791182207124, "learning_rate": 3.7848260374859464e-05, "loss": 0.5532, "num_tokens": 1907369391.0, "step": 2492 }, { "epoch": 0.9134377576257213, "grad_norm": 0.15680348132268415, "learning_rate": 3.784623416386784e-05, "loss": 0.5079, "num_tokens": 1908135158.0, "step": 2493 }, { "epoch": 0.9138041586516442, "grad_norm": 0.16168572191128222, "learning_rate": 3.784420706002741e-05, "loss": 0.5315, "num_tokens": 1908821236.0, "step": 2494 }, { "epoch": 0.9141705596775671, "grad_norm": 0.1718442538900975, "learning_rate": 3.784217906345238e-05, "loss": 0.5344, "num_tokens": 1909453038.0, "step": 2495 }, { "epoch": 0.91453696070349, "grad_norm": 0.15625072311000504, "learning_rate": 3.7840150174257046e-05, "loss": 0.5071, "num_tokens": 1910148594.0, "step": 2496 }, { "epoch": 0.9149033617294129, "grad_norm": 0.1453009903379273, "learning_rate": 3.783812039255569e-05, "loss": 0.5015, "num_tokens": 1910921944.0, "step": 2497 }, { "epoch": 0.9152697627553357, "grad_norm": 0.18839242993435584, "learning_rate": 3.7836089718462716e-05, "loss": 0.56, "num_tokens": 1911616056.0, "step": 2498 }, { "epoch": 0.9156361637812586, "grad_norm": 0.20151526786014165, "learning_rate": 3.783405815209251e-05, "loss": 0.5916, "num_tokens": 1912289158.0, "step": 2499 }, { "epoch": 0.9160025648071815, "grad_norm": 0.14769131981089287, "learning_rate": 3.783202569355955e-05, "loss": 0.5516, "num_tokens": 1913060580.0, "step": 2500 }, { "epoch": 0.9163689658331043, "grad_norm": 0.18244044952763422, "learning_rate": 3.782999234297836e-05, "loss": 0.5646, "num_tokens": 1913787718.0, "step": 2501 }, { "epoch": 0.9167353668590272, "grad_norm": 0.16116846730568798, "learning_rate": 3.78279581004635e-05, "loss": 0.5319, "num_tokens": 1914514815.0, "step": 2502 }, { "epoch": 0.9171017678849501, "grad_norm": 0.16838154521168688, "learning_rate": 3.782592296612959e-05, "loss": 0.5094, "num_tokens": 1915243982.0, "step": 2503 }, { "epoch": 0.9174681689108729, "grad_norm": 0.16776282568158707, "learning_rate": 3.782388694009131e-05, "loss": 0.603, "num_tokens": 1915913152.0, "step": 2504 }, { "epoch": 0.9178345699367958, "grad_norm": 0.1551806297730787, "learning_rate": 3.7821850022463356e-05, "loss": 0.5527, "num_tokens": 1916639173.0, "step": 2505 }, { "epoch": 0.9182009709627187, "grad_norm": 0.15740366563007863, "learning_rate": 3.7819812213360524e-05, "loss": 0.5368, "num_tokens": 1917549187.0, "step": 2506 }, { "epoch": 0.9185673719886416, "grad_norm": 0.14811516694488736, "learning_rate": 3.7817773512897615e-05, "loss": 0.5568, "num_tokens": 1918334908.0, "step": 2507 }, { "epoch": 0.9189337730145645, "grad_norm": 0.1667906384992464, "learning_rate": 3.7815733921189504e-05, "loss": 0.5847, "num_tokens": 1919021029.0, "step": 2508 }, { "epoch": 0.9193001740404874, "grad_norm": 0.1669650790852399, "learning_rate": 3.7813693438351105e-05, "loss": 0.5907, "num_tokens": 1919742084.0, "step": 2509 }, { "epoch": 0.9196665750664101, "grad_norm": 0.1715770224088313, "learning_rate": 3.781165206449741e-05, "loss": 0.5478, "num_tokens": 1920525533.0, "step": 2510 }, { "epoch": 0.920032976092333, "grad_norm": 0.16872950789132415, "learning_rate": 3.780960979974341e-05, "loss": 0.5849, "num_tokens": 1921247990.0, "step": 2511 }, { "epoch": 0.9203993771182559, "grad_norm": 0.17140749071762487, "learning_rate": 3.7807566644204193e-05, "loss": 0.5784, "num_tokens": 1921947279.0, "step": 2512 }, { "epoch": 0.9207657781441788, "grad_norm": 0.17499201004179318, "learning_rate": 3.780552259799487e-05, "loss": 0.5807, "num_tokens": 1922722886.0, "step": 2513 }, { "epoch": 0.9211321791701017, "grad_norm": 0.17985108495791494, "learning_rate": 3.780347766123063e-05, "loss": 0.5596, "num_tokens": 1923451916.0, "step": 2514 }, { "epoch": 0.9214985801960246, "grad_norm": 0.16771863814761478, "learning_rate": 3.780143183402667e-05, "loss": 0.5511, "num_tokens": 1924162483.0, "step": 2515 }, { "epoch": 0.9218649812219474, "grad_norm": 0.17304917750200857, "learning_rate": 3.779938511649827e-05, "loss": 0.546, "num_tokens": 1924892090.0, "step": 2516 }, { "epoch": 0.9222313822478703, "grad_norm": 0.16041176748673475, "learning_rate": 3.779733750876076e-05, "loss": 0.5435, "num_tokens": 1925674851.0, "step": 2517 }, { "epoch": 0.9225977832737932, "grad_norm": 0.1737088926786358, "learning_rate": 3.77952890109295e-05, "loss": 0.553, "num_tokens": 1926322684.0, "step": 2518 }, { "epoch": 0.922964184299716, "grad_norm": 0.15808382025719842, "learning_rate": 3.779323962311992e-05, "loss": 0.5436, "num_tokens": 1927142031.0, "step": 2519 }, { "epoch": 0.9233305853256389, "grad_norm": 0.15266582656511213, "learning_rate": 3.779118934544749e-05, "loss": 0.5199, "num_tokens": 1927997948.0, "step": 2520 }, { "epoch": 0.9236969863515618, "grad_norm": 0.1571351448057999, "learning_rate": 3.778913817802773e-05, "loss": 0.5187, "num_tokens": 1928711696.0, "step": 2521 }, { "epoch": 0.9240633873774846, "grad_norm": 0.16786519907588052, "learning_rate": 3.778708612097621e-05, "loss": 0.5767, "num_tokens": 1929470402.0, "step": 2522 }, { "epoch": 0.9244297884034075, "grad_norm": 0.1792168367034148, "learning_rate": 3.778503317440856e-05, "loss": 0.6285, "num_tokens": 1930314479.0, "step": 2523 }, { "epoch": 0.9247961894293304, "grad_norm": 0.15754477427863348, "learning_rate": 3.778297933844045e-05, "loss": 0.5673, "num_tokens": 1931107604.0, "step": 2524 }, { "epoch": 0.9251625904552533, "grad_norm": 0.15076865211297427, "learning_rate": 3.778092461318759e-05, "loss": 0.5598, "num_tokens": 1931896218.0, "step": 2525 }, { "epoch": 0.9255289914811762, "grad_norm": 0.1656250911795971, "learning_rate": 3.7778868998765765e-05, "loss": 0.5552, "num_tokens": 1932672347.0, "step": 2526 }, { "epoch": 0.9258953925070991, "grad_norm": 0.17205511352933267, "learning_rate": 3.77768124952908e-05, "loss": 0.5615, "num_tokens": 1933391526.0, "step": 2527 }, { "epoch": 0.9262617935330218, "grad_norm": 0.1578973890047386, "learning_rate": 3.777475510287856e-05, "loss": 0.5356, "num_tokens": 1934165884.0, "step": 2528 }, { "epoch": 0.9266281945589447, "grad_norm": 0.15416101849409683, "learning_rate": 3.777269682164497e-05, "loss": 0.5351, "num_tokens": 1935022923.0, "step": 2529 }, { "epoch": 0.9269945955848676, "grad_norm": 0.16527863608999363, "learning_rate": 3.7770637651706e-05, "loss": 0.5628, "num_tokens": 1935694394.0, "step": 2530 }, { "epoch": 0.9273609966107905, "grad_norm": 0.15294132608509448, "learning_rate": 3.776857759317769e-05, "loss": 0.5716, "num_tokens": 1936444989.0, "step": 2531 }, { "epoch": 0.9277273976367134, "grad_norm": 0.16155823367415945, "learning_rate": 3.776651664617609e-05, "loss": 0.5352, "num_tokens": 1937251131.0, "step": 2532 }, { "epoch": 0.9280937986626363, "grad_norm": 0.15145549478431955, "learning_rate": 3.776445481081733e-05, "loss": 0.541, "num_tokens": 1938113370.0, "step": 2533 }, { "epoch": 0.9284601996885591, "grad_norm": 0.1809175131993074, "learning_rate": 3.776239208721759e-05, "loss": 0.5667, "num_tokens": 1938853501.0, "step": 2534 }, { "epoch": 0.928826600714482, "grad_norm": 0.1561073550338385, "learning_rate": 3.776032847549308e-05, "loss": 0.5791, "num_tokens": 1939682292.0, "step": 2535 }, { "epoch": 0.9291930017404049, "grad_norm": 0.19720716735685104, "learning_rate": 3.7758263975760094e-05, "loss": 0.5747, "num_tokens": 1940449385.0, "step": 2536 }, { "epoch": 0.9295594027663278, "grad_norm": 0.1620375576309438, "learning_rate": 3.775619858813494e-05, "loss": 0.528, "num_tokens": 1941217234.0, "step": 2537 }, { "epoch": 0.9299258037922506, "grad_norm": 0.16390708668522183, "learning_rate": 3.775413231273399e-05, "loss": 0.5358, "num_tokens": 1942070991.0, "step": 2538 }, { "epoch": 0.9302922048181735, "grad_norm": 0.16825218553875576, "learning_rate": 3.775206514967368e-05, "loss": 0.5628, "num_tokens": 1942801637.0, "step": 2539 }, { "epoch": 0.9306586058440963, "grad_norm": 0.20301887799976598, "learning_rate": 3.774999709907047e-05, "loss": 0.5718, "num_tokens": 1943581261.0, "step": 2540 }, { "epoch": 0.9310250068700192, "grad_norm": 0.15383185154226495, "learning_rate": 3.7747928161040894e-05, "loss": 0.5289, "num_tokens": 1944357325.0, "step": 2541 }, { "epoch": 0.9313914078959421, "grad_norm": 0.17223745006644442, "learning_rate": 3.774585833570152e-05, "loss": 0.5373, "num_tokens": 1945081392.0, "step": 2542 }, { "epoch": 0.931757808921865, "grad_norm": 0.1639165136026701, "learning_rate": 3.7743787623168963e-05, "loss": 0.5373, "num_tokens": 1945880607.0, "step": 2543 }, { "epoch": 0.9321242099477879, "grad_norm": 0.18643954529240397, "learning_rate": 3.774171602355991e-05, "loss": 0.5827, "num_tokens": 1946547697.0, "step": 2544 }, { "epoch": 0.9324906109737108, "grad_norm": 0.17853463516567725, "learning_rate": 3.7739643536991083e-05, "loss": 0.5698, "num_tokens": 1947325517.0, "step": 2545 }, { "epoch": 0.9328570119996336, "grad_norm": 0.17467702990850184, "learning_rate": 3.7737570163579254e-05, "loss": 0.5392, "num_tokens": 1948047352.0, "step": 2546 }, { "epoch": 0.9332234130255564, "grad_norm": 0.1821245888550301, "learning_rate": 3.773549590344123e-05, "loss": 0.5767, "num_tokens": 1948719642.0, "step": 2547 }, { "epoch": 0.9335898140514793, "grad_norm": 0.16850370605779613, "learning_rate": 3.773342075669391e-05, "loss": 0.5295, "num_tokens": 1949489591.0, "step": 2548 }, { "epoch": 0.9339562150774022, "grad_norm": 0.19328299290247297, "learning_rate": 3.7731344723454204e-05, "loss": 0.5756, "num_tokens": 1950210843.0, "step": 2549 }, { "epoch": 0.9343226161033251, "grad_norm": 0.159854379848784, "learning_rate": 3.772926780383908e-05, "loss": 0.5278, "num_tokens": 1950989687.0, "step": 2550 }, { "epoch": 0.934689017129248, "grad_norm": 0.18215896334201934, "learning_rate": 3.7727189997965575e-05, "loss": 0.5693, "num_tokens": 1951746397.0, "step": 2551 }, { "epoch": 0.9350554181551708, "grad_norm": 0.14851116362487332, "learning_rate": 3.772511130595075e-05, "loss": 0.5573, "num_tokens": 1952556887.0, "step": 2552 }, { "epoch": 0.9354218191810937, "grad_norm": 0.17181535037182835, "learning_rate": 3.772303172791174e-05, "loss": 0.5572, "num_tokens": 1953203696.0, "step": 2553 }, { "epoch": 0.9357882202070166, "grad_norm": 0.1721071568420864, "learning_rate": 3.77209512639657e-05, "loss": 0.5609, "num_tokens": 1954010561.0, "step": 2554 }, { "epoch": 0.9361546212329395, "grad_norm": 0.16338605862035235, "learning_rate": 3.771886991422987e-05, "loss": 0.5548, "num_tokens": 1954647297.0, "step": 2555 }, { "epoch": 0.9365210222588624, "grad_norm": 0.18414992690074614, "learning_rate": 3.771678767882152e-05, "loss": 0.5684, "num_tokens": 1955434935.0, "step": 2556 }, { "epoch": 0.9368874232847852, "grad_norm": 0.1712189369824638, "learning_rate": 3.7714704557857957e-05, "loss": 0.5501, "num_tokens": 1956172415.0, "step": 2557 }, { "epoch": 0.937253824310708, "grad_norm": 0.19363231737941644, "learning_rate": 3.7712620551456575e-05, "loss": 0.5445, "num_tokens": 1956963773.0, "step": 2558 }, { "epoch": 0.9376202253366309, "grad_norm": 0.16727359495141, "learning_rate": 3.771053565973478e-05, "loss": 0.5209, "num_tokens": 1957710410.0, "step": 2559 }, { "epoch": 0.9379866263625538, "grad_norm": 0.1671048530244807, "learning_rate": 3.7708449882810054e-05, "loss": 0.5529, "num_tokens": 1958508977.0, "step": 2560 }, { "epoch": 0.9383530273884767, "grad_norm": 0.21649357189782434, "learning_rate": 3.770636322079991e-05, "loss": 0.551, "num_tokens": 1959169309.0, "step": 2561 }, { "epoch": 0.9387194284143996, "grad_norm": 0.18046580112201102, "learning_rate": 3.7704275673821936e-05, "loss": 0.5532, "num_tokens": 1959897714.0, "step": 2562 }, { "epoch": 0.9390858294403225, "grad_norm": 0.18262463883796337, "learning_rate": 3.770218724199373e-05, "loss": 0.553, "num_tokens": 1960605696.0, "step": 2563 }, { "epoch": 0.9394522304662453, "grad_norm": 0.1753221225715318, "learning_rate": 3.770009792543299e-05, "loss": 0.5694, "num_tokens": 1961404146.0, "step": 2564 }, { "epoch": 0.9398186314921682, "grad_norm": 0.17692650520858286, "learning_rate": 3.769800772425742e-05, "loss": 0.5965, "num_tokens": 1962120543.0, "step": 2565 }, { "epoch": 0.940185032518091, "grad_norm": 0.16469060359700585, "learning_rate": 3.7695916638584796e-05, "loss": 0.5505, "num_tokens": 1963007677.0, "step": 2566 }, { "epoch": 0.9405514335440139, "grad_norm": 0.17515815555832576, "learning_rate": 3.7693824668532945e-05, "loss": 0.5477, "num_tokens": 1963850812.0, "step": 2567 }, { "epoch": 0.9409178345699368, "grad_norm": 0.16716086469975744, "learning_rate": 3.7691731814219726e-05, "loss": 0.5683, "num_tokens": 1964629383.0, "step": 2568 }, { "epoch": 0.9412842355958597, "grad_norm": 0.166238575276799, "learning_rate": 3.768963807576307e-05, "loss": 0.5505, "num_tokens": 1965367770.0, "step": 2569 }, { "epoch": 0.9416506366217825, "grad_norm": 0.17070048256136983, "learning_rate": 3.7687543453280946e-05, "loss": 0.5445, "num_tokens": 1966105338.0, "step": 2570 }, { "epoch": 0.9420170376477054, "grad_norm": 0.17985259057109473, "learning_rate": 3.7685447946891374e-05, "loss": 0.5427, "num_tokens": 1967000469.0, "step": 2571 }, { "epoch": 0.9423834386736283, "grad_norm": 0.16600212669116696, "learning_rate": 3.7683351556712416e-05, "loss": 0.5878, "num_tokens": 1967597569.0, "step": 2572 }, { "epoch": 0.9427498396995512, "grad_norm": 0.20082777736258528, "learning_rate": 3.76812542828622e-05, "loss": 0.5905, "num_tokens": 1968373761.0, "step": 2573 }, { "epoch": 0.9431162407254741, "grad_norm": 0.16464948606542903, "learning_rate": 3.7679156125458905e-05, "loss": 0.528, "num_tokens": 1969144702.0, "step": 2574 }, { "epoch": 0.943482641751397, "grad_norm": 0.1863804054439373, "learning_rate": 3.7677057084620734e-05, "loss": 0.5893, "num_tokens": 1969872042.0, "step": 2575 }, { "epoch": 0.9438490427773197, "grad_norm": 0.17177641949012817, "learning_rate": 3.767495716046597e-05, "loss": 0.5268, "num_tokens": 1970642890.0, "step": 2576 }, { "epoch": 0.9442154438032426, "grad_norm": 0.17601158290796445, "learning_rate": 3.7672856353112914e-05, "loss": 0.5375, "num_tokens": 1971391290.0, "step": 2577 }, { "epoch": 0.9445818448291655, "grad_norm": 0.17264265595413877, "learning_rate": 3.767075466267995e-05, "loss": 0.5765, "num_tokens": 1972155329.0, "step": 2578 }, { "epoch": 0.9449482458550884, "grad_norm": 0.16329375058604548, "learning_rate": 3.76686520892855e-05, "loss": 0.551, "num_tokens": 1972985664.0, "step": 2579 }, { "epoch": 0.9453146468810113, "grad_norm": 0.1811272148806615, "learning_rate": 3.766654863304803e-05, "loss": 0.5312, "num_tokens": 1973822912.0, "step": 2580 }, { "epoch": 0.9456810479069342, "grad_norm": 0.18007752778500988, "learning_rate": 3.766444429408604e-05, "loss": 0.5695, "num_tokens": 1974643148.0, "step": 2581 }, { "epoch": 0.946047448932857, "grad_norm": 0.17888696728977432, "learning_rate": 3.766233907251813e-05, "loss": 0.5584, "num_tokens": 1975450545.0, "step": 2582 }, { "epoch": 0.9464138499587799, "grad_norm": 0.17205624100444392, "learning_rate": 3.766023296846288e-05, "loss": 0.5983, "num_tokens": 1976108160.0, "step": 2583 }, { "epoch": 0.9467802509847028, "grad_norm": 0.18169147831106822, "learning_rate": 3.7658125982038994e-05, "loss": 0.5644, "num_tokens": 1976953187.0, "step": 2584 }, { "epoch": 0.9471466520106256, "grad_norm": 0.15508447033194622, "learning_rate": 3.765601811336517e-05, "loss": 0.5555, "num_tokens": 1977771974.0, "step": 2585 }, { "epoch": 0.9475130530365485, "grad_norm": 0.16177415986804833, "learning_rate": 3.7653909362560175e-05, "loss": 0.5271, "num_tokens": 1978635434.0, "step": 2586 }, { "epoch": 0.9478794540624714, "grad_norm": 0.17058432927517145, "learning_rate": 3.765179972974283e-05, "loss": 0.565, "num_tokens": 1979327986.0, "step": 2587 }, { "epoch": 0.9482458550883942, "grad_norm": 0.15105605257679994, "learning_rate": 3.7649689215032e-05, "loss": 0.5088, "num_tokens": 1980095261.0, "step": 2588 }, { "epoch": 0.9486122561143171, "grad_norm": 0.16551815779729936, "learning_rate": 3.76475778185466e-05, "loss": 0.5444, "num_tokens": 1981034828.0, "step": 2589 }, { "epoch": 0.94897865714024, "grad_norm": 0.1934123269776675, "learning_rate": 3.76454655404056e-05, "loss": 0.5766, "num_tokens": 1981791293.0, "step": 2590 }, { "epoch": 0.9493450581661629, "grad_norm": 0.18627801163704166, "learning_rate": 3.7643352380728e-05, "loss": 0.5462, "num_tokens": 1982518609.0, "step": 2591 }, { "epoch": 0.9497114591920858, "grad_norm": 0.1713698974425174, "learning_rate": 3.764123833963289e-05, "loss": 0.5342, "num_tokens": 1983233155.0, "step": 2592 }, { "epoch": 0.9500778602180087, "grad_norm": 0.17067154569418555, "learning_rate": 3.763912341723936e-05, "loss": 0.5352, "num_tokens": 1984062731.0, "step": 2593 }, { "epoch": 0.9504442612439314, "grad_norm": 0.17741331980178096, "learning_rate": 3.763700761366659e-05, "loss": 0.5403, "num_tokens": 1984787207.0, "step": 2594 }, { "epoch": 0.9508106622698543, "grad_norm": 0.17493343091797522, "learning_rate": 3.76348909290338e-05, "loss": 0.5401, "num_tokens": 1985545994.0, "step": 2595 }, { "epoch": 0.9511770632957772, "grad_norm": 0.17251319625324857, "learning_rate": 3.763277336346023e-05, "loss": 0.5148, "num_tokens": 1986255476.0, "step": 2596 }, { "epoch": 0.9515434643217001, "grad_norm": 0.1636524659511774, "learning_rate": 3.763065491706521e-05, "loss": 0.5117, "num_tokens": 1987157855.0, "step": 2597 }, { "epoch": 0.951909865347623, "grad_norm": 0.18481303098082266, "learning_rate": 3.76285355899681e-05, "loss": 0.5474, "num_tokens": 1987819717.0, "step": 2598 }, { "epoch": 0.9522762663735459, "grad_norm": 0.16243842167679934, "learning_rate": 3.7626415382288314e-05, "loss": 0.5563, "num_tokens": 1988575014.0, "step": 2599 }, { "epoch": 0.9526426673994687, "grad_norm": 0.16997144634107172, "learning_rate": 3.762429429414531e-05, "loss": 0.5487, "num_tokens": 1989428636.0, "step": 2600 }, { "epoch": 0.9530090684253916, "grad_norm": 0.1889142462192695, "learning_rate": 3.7622172325658605e-05, "loss": 0.5401, "num_tokens": 1990201856.0, "step": 2601 }, { "epoch": 0.9533754694513145, "grad_norm": 0.15449648674246158, "learning_rate": 3.7620049476947755e-05, "loss": 0.5272, "num_tokens": 1991016093.0, "step": 2602 }, { "epoch": 0.9537418704772374, "grad_norm": 0.1996773182953543, "learning_rate": 3.761792574813238e-05, "loss": 0.5429, "num_tokens": 1991870933.0, "step": 2603 }, { "epoch": 0.9541082715031602, "grad_norm": 0.2126982648007531, "learning_rate": 3.761580113933212e-05, "loss": 0.5653, "num_tokens": 1992585872.0, "step": 2604 }, { "epoch": 0.9544746725290831, "grad_norm": 0.16635332159136676, "learning_rate": 3.761367565066671e-05, "loss": 0.5641, "num_tokens": 1993309579.0, "step": 2605 }, { "epoch": 0.9548410735550059, "grad_norm": 0.20683764640992094, "learning_rate": 3.761154928225589e-05, "loss": 0.586, "num_tokens": 1993923134.0, "step": 2606 }, { "epoch": 0.9552074745809288, "grad_norm": 0.19888595686950492, "learning_rate": 3.760942203421949e-05, "loss": 0.5485, "num_tokens": 1994765763.0, "step": 2607 }, { "epoch": 0.9555738756068517, "grad_norm": 0.17316522315908942, "learning_rate": 3.760729390667734e-05, "loss": 0.5893, "num_tokens": 1995573064.0, "step": 2608 }, { "epoch": 0.9559402766327746, "grad_norm": 0.21495316065750614, "learning_rate": 3.7605164899749376e-05, "loss": 0.5709, "num_tokens": 1996296423.0, "step": 2609 }, { "epoch": 0.9563066776586975, "grad_norm": 0.20175896928691334, "learning_rate": 3.7603035013555544e-05, "loss": 0.567, "num_tokens": 1997089379.0, "step": 2610 }, { "epoch": 0.9566730786846204, "grad_norm": 0.17684236098163023, "learning_rate": 3.7600904248215855e-05, "loss": 0.5812, "num_tokens": 1997803916.0, "step": 2611 }, { "epoch": 0.9570394797105432, "grad_norm": 0.2292592035748412, "learning_rate": 3.759877260385036e-05, "loss": 0.594, "num_tokens": 1998606276.0, "step": 2612 }, { "epoch": 0.957405880736466, "grad_norm": 0.17612332634504305, "learning_rate": 3.759664008057916e-05, "loss": 0.5876, "num_tokens": 1999360852.0, "step": 2613 }, { "epoch": 0.9577722817623889, "grad_norm": 0.1708455775154065, "learning_rate": 3.759450667852243e-05, "loss": 0.5504, "num_tokens": 2000072013.0, "step": 2614 }, { "epoch": 0.9581386827883118, "grad_norm": 0.2051522155875703, "learning_rate": 3.759237239780036e-05, "loss": 0.5359, "num_tokens": 2000902394.0, "step": 2615 }, { "epoch": 0.9585050838142347, "grad_norm": 0.18134983496291657, "learning_rate": 3.759023723853322e-05, "loss": 0.5974, "num_tokens": 2001514356.0, "step": 2616 }, { "epoch": 0.9588714848401576, "grad_norm": 0.19196688854866828, "learning_rate": 3.75881012008413e-05, "loss": 0.5691, "num_tokens": 2002201406.0, "step": 2617 }, { "epoch": 0.9592378858660804, "grad_norm": 0.17799764103385357, "learning_rate": 3.758596428484496e-05, "loss": 0.6004, "num_tokens": 2002928816.0, "step": 2618 }, { "epoch": 0.9596042868920033, "grad_norm": 0.17049851954613682, "learning_rate": 3.75838264906646e-05, "loss": 0.5349, "num_tokens": 2003705406.0, "step": 2619 }, { "epoch": 0.9599706879179262, "grad_norm": 0.15740125731410087, "learning_rate": 3.758168781842067e-05, "loss": 0.5472, "num_tokens": 2004408440.0, "step": 2620 }, { "epoch": 0.9603370889438491, "grad_norm": 0.16336572666326543, "learning_rate": 3.7579548268233684e-05, "loss": 0.5492, "num_tokens": 2005060521.0, "step": 2621 }, { "epoch": 0.960703489969772, "grad_norm": 0.17050948866980592, "learning_rate": 3.75774078402242e-05, "loss": 0.5656, "num_tokens": 2005774143.0, "step": 2622 }, { "epoch": 0.9610698909956948, "grad_norm": 0.16890287800122686, "learning_rate": 3.757526653451279e-05, "loss": 0.5737, "num_tokens": 2006588678.0, "step": 2623 }, { "epoch": 0.9614362920216176, "grad_norm": 0.15268647430317456, "learning_rate": 3.757312435122013e-05, "loss": 0.5561, "num_tokens": 2007437650.0, "step": 2624 }, { "epoch": 0.9618026930475405, "grad_norm": 0.18002377983509543, "learning_rate": 3.7570981290466915e-05, "loss": 0.5511, "num_tokens": 2008094855.0, "step": 2625 }, { "epoch": 0.9621690940734634, "grad_norm": 0.15090309729279547, "learning_rate": 3.7568837352373885e-05, "loss": 0.5057, "num_tokens": 2008813276.0, "step": 2626 }, { "epoch": 0.9625354950993863, "grad_norm": 0.14492085316517286, "learning_rate": 3.7566692537061856e-05, "loss": 0.55, "num_tokens": 2009537154.0, "step": 2627 }, { "epoch": 0.9629018961253092, "grad_norm": 0.15132663859872483, "learning_rate": 3.756454684465166e-05, "loss": 0.5367, "num_tokens": 2010330104.0, "step": 2628 }, { "epoch": 0.9632682971512321, "grad_norm": 0.16216430182897354, "learning_rate": 3.756240027526421e-05, "loss": 0.543, "num_tokens": 2011155616.0, "step": 2629 }, { "epoch": 0.9636346981771549, "grad_norm": 0.14504636415689987, "learning_rate": 3.756025282902044e-05, "loss": 0.5431, "num_tokens": 2011932961.0, "step": 2630 }, { "epoch": 0.9640010992030777, "grad_norm": 0.16896677090640155, "learning_rate": 3.7558104506041355e-05, "loss": 0.5711, "num_tokens": 2012748167.0, "step": 2631 }, { "epoch": 0.9643675002290006, "grad_norm": 0.17644322293274806, "learning_rate": 3.7555955306448e-05, "loss": 0.5483, "num_tokens": 2013464129.0, "step": 2632 }, { "epoch": 0.9647339012549235, "grad_norm": 0.17256888428726977, "learning_rate": 3.7553805230361465e-05, "loss": 0.5627, "num_tokens": 2014188668.0, "step": 2633 }, { "epoch": 0.9651003022808464, "grad_norm": 0.16328665034003306, "learning_rate": 3.755165427790291e-05, "loss": 0.5761, "num_tokens": 2014945491.0, "step": 2634 }, { "epoch": 0.9654667033067693, "grad_norm": 0.19295962984722922, "learning_rate": 3.7549502449193514e-05, "loss": 0.5565, "num_tokens": 2015675669.0, "step": 2635 }, { "epoch": 0.9658331043326921, "grad_norm": 0.1569930623727823, "learning_rate": 3.754734974435453e-05, "loss": 0.5451, "num_tokens": 2016419650.0, "step": 2636 }, { "epoch": 0.966199505358615, "grad_norm": 0.17363025371516236, "learning_rate": 3.754519616350724e-05, "loss": 0.5884, "num_tokens": 2017114019.0, "step": 2637 }, { "epoch": 0.9665659063845379, "grad_norm": 0.18898342216952804, "learning_rate": 3.7543041706772994e-05, "loss": 0.5538, "num_tokens": 2017795636.0, "step": 2638 }, { "epoch": 0.9669323074104608, "grad_norm": 0.1659053435777532, "learning_rate": 3.754088637427318e-05, "loss": 0.5255, "num_tokens": 2018585471.0, "step": 2639 }, { "epoch": 0.9672987084363837, "grad_norm": 0.19635263972579445, "learning_rate": 3.753873016612925e-05, "loss": 0.546, "num_tokens": 2019363665.0, "step": 2640 }, { "epoch": 0.9676651094623065, "grad_norm": 0.18226977594785218, "learning_rate": 3.753657308246269e-05, "loss": 0.532, "num_tokens": 2020140096.0, "step": 2641 }, { "epoch": 0.9680315104882293, "grad_norm": 0.19929610378607787, "learning_rate": 3.753441512339503e-05, "loss": 0.536, "num_tokens": 2021007540.0, "step": 2642 }, { "epoch": 0.9683979115141522, "grad_norm": 0.17899150564855726, "learning_rate": 3.753225628904788e-05, "loss": 0.5455, "num_tokens": 2021933958.0, "step": 2643 }, { "epoch": 0.9687643125400751, "grad_norm": 0.18940510119980697, "learning_rate": 3.7530096579542854e-05, "loss": 0.5245, "num_tokens": 2022723846.0, "step": 2644 }, { "epoch": 0.969130713565998, "grad_norm": 0.17140000168699257, "learning_rate": 3.752793599500165e-05, "loss": 0.5376, "num_tokens": 2023404548.0, "step": 2645 }, { "epoch": 0.9694971145919209, "grad_norm": 0.1748922329503326, "learning_rate": 3.7525774535546015e-05, "loss": 0.5431, "num_tokens": 2024114871.0, "step": 2646 }, { "epoch": 0.9698635156178437, "grad_norm": 0.1677249594139352, "learning_rate": 3.752361220129772e-05, "loss": 0.5435, "num_tokens": 2024871901.0, "step": 2647 }, { "epoch": 0.9702299166437666, "grad_norm": 0.1722558249432404, "learning_rate": 3.7521448992378605e-05, "loss": 0.536, "num_tokens": 2025619482.0, "step": 2648 }, { "epoch": 0.9705963176696895, "grad_norm": 0.19174138914622246, "learning_rate": 3.751928490891056e-05, "loss": 0.5438, "num_tokens": 2026333927.0, "step": 2649 }, { "epoch": 0.9709627186956123, "grad_norm": 0.18184954462890493, "learning_rate": 3.751711995101552e-05, "loss": 0.5475, "num_tokens": 2027091600.0, "step": 2650 }, { "epoch": 0.9713291197215352, "grad_norm": 0.16689623239876225, "learning_rate": 3.7514954118815456e-05, "loss": 0.5202, "num_tokens": 2027891802.0, "step": 2651 }, { "epoch": 0.9716955207474581, "grad_norm": 0.19671457483821703, "learning_rate": 3.751278741243241e-05, "loss": 0.549, "num_tokens": 2028718430.0, "step": 2652 }, { "epoch": 0.9720619217733809, "grad_norm": 0.18012402748854364, "learning_rate": 3.7510619831988475e-05, "loss": 0.5552, "num_tokens": 2029466332.0, "step": 2653 }, { "epoch": 0.9724283227993038, "grad_norm": 0.17604627856754182, "learning_rate": 3.750845137760577e-05, "loss": 0.5686, "num_tokens": 2030242961.0, "step": 2654 }, { "epoch": 0.9727947238252267, "grad_norm": 0.2027087326797253, "learning_rate": 3.750628204940647e-05, "loss": 0.5596, "num_tokens": 2031054097.0, "step": 2655 }, { "epoch": 0.9731611248511496, "grad_norm": 0.1722019359549401, "learning_rate": 3.750411184751282e-05, "loss": 0.5285, "num_tokens": 2031810178.0, "step": 2656 }, { "epoch": 0.9735275258770725, "grad_norm": 0.21919709257250494, "learning_rate": 3.750194077204708e-05, "loss": 0.5405, "num_tokens": 2032647198.0, "step": 2657 }, { "epoch": 0.9738939269029954, "grad_norm": 0.18743425706309297, "learning_rate": 3.74997688231316e-05, "loss": 0.5435, "num_tokens": 2033429391.0, "step": 2658 }, { "epoch": 0.9742603279289181, "grad_norm": 0.19112411210343094, "learning_rate": 3.7497596000888735e-05, "loss": 0.546, "num_tokens": 2034233866.0, "step": 2659 }, { "epoch": 0.974626728954841, "grad_norm": 0.19023336699433793, "learning_rate": 3.7495422305440935e-05, "loss": 0.5429, "num_tokens": 2034978213.0, "step": 2660 }, { "epoch": 0.9749931299807639, "grad_norm": 0.2140879051785515, "learning_rate": 3.749324773691065e-05, "loss": 0.532, "num_tokens": 2035746961.0, "step": 2661 }, { "epoch": 0.9753595310066868, "grad_norm": 0.1680611149855662, "learning_rate": 3.7491072295420434e-05, "loss": 0.5799, "num_tokens": 2036511766.0, "step": 2662 }, { "epoch": 0.9757259320326097, "grad_norm": 0.1868114283648724, "learning_rate": 3.748889598109284e-05, "loss": 0.5454, "num_tokens": 2037416153.0, "step": 2663 }, { "epoch": 0.9760923330585326, "grad_norm": 0.1818432400706982, "learning_rate": 3.74867187940505e-05, "loss": 0.5515, "num_tokens": 2038201050.0, "step": 2664 }, { "epoch": 0.9764587340844554, "grad_norm": 0.16441818975777056, "learning_rate": 3.748454073441608e-05, "loss": 0.5513, "num_tokens": 2038973829.0, "step": 2665 }, { "epoch": 0.9768251351103783, "grad_norm": 0.2016303622309546, "learning_rate": 3.748236180231231e-05, "loss": 0.5682, "num_tokens": 2039707942.0, "step": 2666 }, { "epoch": 0.9771915361363012, "grad_norm": 0.17068707379296064, "learning_rate": 3.748018199786196e-05, "loss": 0.5492, "num_tokens": 2040513338.0, "step": 2667 }, { "epoch": 0.977557937162224, "grad_norm": 0.19866066739870186, "learning_rate": 3.747800132118784e-05, "loss": 0.5357, "num_tokens": 2041139365.0, "step": 2668 }, { "epoch": 0.977924338188147, "grad_norm": 0.1854171758818911, "learning_rate": 3.747581977241283e-05, "loss": 0.5569, "num_tokens": 2041922761.0, "step": 2669 }, { "epoch": 0.9782907392140698, "grad_norm": 0.23771948121061173, "learning_rate": 3.7473637351659844e-05, "loss": 0.5531, "num_tokens": 2042704180.0, "step": 2670 }, { "epoch": 0.9786571402399926, "grad_norm": 0.17394023532722602, "learning_rate": 3.7471454059051853e-05, "loss": 0.5647, "num_tokens": 2043575836.0, "step": 2671 }, { "epoch": 0.9790235412659155, "grad_norm": 0.168668378314355, "learning_rate": 3.746926989471187e-05, "loss": 0.5776, "num_tokens": 2044331181.0, "step": 2672 }, { "epoch": 0.9793899422918384, "grad_norm": 0.18314928110173828, "learning_rate": 3.746708485876295e-05, "loss": 0.5103, "num_tokens": 2045268267.0, "step": 2673 }, { "epoch": 0.9797563433177613, "grad_norm": 0.1829187109967729, "learning_rate": 3.746489895132824e-05, "loss": 0.5632, "num_tokens": 2046045810.0, "step": 2674 }, { "epoch": 0.9801227443436842, "grad_norm": 0.1596490679782415, "learning_rate": 3.746271217253087e-05, "loss": 0.5458, "num_tokens": 2046850038.0, "step": 2675 }, { "epoch": 0.9804891453696071, "grad_norm": 0.18947158890015192, "learning_rate": 3.7460524522494066e-05, "loss": 0.5685, "num_tokens": 2047612440.0, "step": 2676 }, { "epoch": 0.9808555463955299, "grad_norm": 0.19672363534804987, "learning_rate": 3.74583360013411e-05, "loss": 0.5613, "num_tokens": 2048381323.0, "step": 2677 }, { "epoch": 0.9812219474214527, "grad_norm": 0.17059585029022217, "learning_rate": 3.7456146609195265e-05, "loss": 0.5616, "num_tokens": 2049116410.0, "step": 2678 }, { "epoch": 0.9815883484473756, "grad_norm": 0.2246518556096526, "learning_rate": 3.745395634617994e-05, "loss": 0.5345, "num_tokens": 2049746808.0, "step": 2679 }, { "epoch": 0.9819547494732985, "grad_norm": 0.19016255606363197, "learning_rate": 3.745176521241851e-05, "loss": 0.5849, "num_tokens": 2050428380.0, "step": 2680 }, { "epoch": 0.9823211504992214, "grad_norm": 0.2319734318056936, "learning_rate": 3.744957320803446e-05, "loss": 0.5698, "num_tokens": 2051158953.0, "step": 2681 }, { "epoch": 0.9826875515251443, "grad_norm": 0.21591313483026192, "learning_rate": 3.744738033315128e-05, "loss": 0.5455, "num_tokens": 2051991912.0, "step": 2682 }, { "epoch": 0.9830539525510671, "grad_norm": 0.19822739613244442, "learning_rate": 3.744518658789253e-05, "loss": 0.4991, "num_tokens": 2052798860.0, "step": 2683 }, { "epoch": 0.98342035357699, "grad_norm": 0.19144671819905268, "learning_rate": 3.7442991972381825e-05, "loss": 0.5035, "num_tokens": 2053586797.0, "step": 2684 }, { "epoch": 0.9837867546029129, "grad_norm": 0.1773479289043394, "learning_rate": 3.744079648674281e-05, "loss": 0.5884, "num_tokens": 2054346293.0, "step": 2685 }, { "epoch": 0.9841531556288358, "grad_norm": 0.22288596389727086, "learning_rate": 3.743860013109919e-05, "loss": 0.551, "num_tokens": 2055144001.0, "step": 2686 }, { "epoch": 0.9845195566547587, "grad_norm": 0.1635383445102445, "learning_rate": 3.743640290557471e-05, "loss": 0.547, "num_tokens": 2055826056.0, "step": 2687 }, { "epoch": 0.9848859576806815, "grad_norm": 0.1789184255979525, "learning_rate": 3.743420481029319e-05, "loss": 0.5311, "num_tokens": 2056632756.0, "step": 2688 }, { "epoch": 0.9852523587066043, "grad_norm": 0.17092223194909315, "learning_rate": 3.7432005845378465e-05, "loss": 0.5289, "num_tokens": 2057416971.0, "step": 2689 }, { "epoch": 0.9856187597325272, "grad_norm": 0.18015660359882213, "learning_rate": 3.742980601095445e-05, "loss": 0.5354, "num_tokens": 2058176317.0, "step": 2690 }, { "epoch": 0.9859851607584501, "grad_norm": 0.1618176047288596, "learning_rate": 3.742760530714508e-05, "loss": 0.5602, "num_tokens": 2058872723.0, "step": 2691 }, { "epoch": 0.986351561784373, "grad_norm": 0.18231753122987251, "learning_rate": 3.742540373407435e-05, "loss": 0.5633, "num_tokens": 2059628116.0, "step": 2692 }, { "epoch": 0.9867179628102959, "grad_norm": 0.17907226426946776, "learning_rate": 3.742320129186632e-05, "loss": 0.552, "num_tokens": 2060348755.0, "step": 2693 }, { "epoch": 0.9870843638362188, "grad_norm": 0.19020410046537614, "learning_rate": 3.7420997980645074e-05, "loss": 0.5652, "num_tokens": 2060963441.0, "step": 2694 }, { "epoch": 0.9874507648621416, "grad_norm": 0.2012969418325286, "learning_rate": 3.7418793800534764e-05, "loss": 0.5535, "num_tokens": 2061649241.0, "step": 2695 }, { "epoch": 0.9878171658880645, "grad_norm": 0.17683826916710732, "learning_rate": 3.741658875165958e-05, "loss": 0.5452, "num_tokens": 2062442268.0, "step": 2696 }, { "epoch": 0.9881835669139873, "grad_norm": 0.20819695712595468, "learning_rate": 3.741438283414377e-05, "loss": 0.579, "num_tokens": 2063257649.0, "step": 2697 }, { "epoch": 0.9885499679399102, "grad_norm": 0.1754547866610915, "learning_rate": 3.741217604811162e-05, "loss": 0.5207, "num_tokens": 2064072461.0, "step": 2698 }, { "epoch": 0.9889163689658331, "grad_norm": 0.1928191412348134, "learning_rate": 3.740996839368747e-05, "loss": 0.5392, "num_tokens": 2064809761.0, "step": 2699 }, { "epoch": 0.989282769991756, "grad_norm": 0.17817391201010208, "learning_rate": 3.7407759870995705e-05, "loss": 0.5551, "num_tokens": 2065578282.0, "step": 2700 }, { "epoch": 0.9896491710176788, "grad_norm": 0.2141917935234414, "learning_rate": 3.740555048016078e-05, "loss": 0.5272, "num_tokens": 2066322139.0, "step": 2701 }, { "epoch": 0.9900155720436017, "grad_norm": 0.19449836324693343, "learning_rate": 3.740334022130716e-05, "loss": 0.5155, "num_tokens": 2067067944.0, "step": 2702 }, { "epoch": 0.9903819730695246, "grad_norm": 0.1572541162689839, "learning_rate": 3.740112909455939e-05, "loss": 0.5231, "num_tokens": 2067993596.0, "step": 2703 }, { "epoch": 0.9907483740954475, "grad_norm": 0.23518324137344268, "learning_rate": 3.739891710004207e-05, "loss": 0.5755, "num_tokens": 2068705313.0, "step": 2704 }, { "epoch": 0.9911147751213704, "grad_norm": 0.17174220765382595, "learning_rate": 3.7396704237879816e-05, "loss": 0.5518, "num_tokens": 2069541911.0, "step": 2705 }, { "epoch": 0.9914811761472933, "grad_norm": 0.21804257975601227, "learning_rate": 3.7394490508197314e-05, "loss": 0.5975, "num_tokens": 2070160204.0, "step": 2706 }, { "epoch": 0.991847577173216, "grad_norm": 0.18372823759411314, "learning_rate": 3.7392275911119293e-05, "loss": 0.513, "num_tokens": 2070932941.0, "step": 2707 }, { "epoch": 0.9922139781991389, "grad_norm": 0.19036995456385555, "learning_rate": 3.739006044677054e-05, "loss": 0.5201, "num_tokens": 2071789751.0, "step": 2708 }, { "epoch": 0.9925803792250618, "grad_norm": 0.2281137083026567, "learning_rate": 3.738784411527589e-05, "loss": 0.5476, "num_tokens": 2072433594.0, "step": 2709 }, { "epoch": 0.9929467802509847, "grad_norm": 0.19730990073519186, "learning_rate": 3.7385626916760206e-05, "loss": 0.5656, "num_tokens": 2073275295.0, "step": 2710 }, { "epoch": 0.9933131812769076, "grad_norm": 0.23742913738507027, "learning_rate": 3.738340885134842e-05, "loss": 0.5623, "num_tokens": 2074003112.0, "step": 2711 }, { "epoch": 0.9936795823028305, "grad_norm": 0.16708438825300537, "learning_rate": 3.738118991916551e-05, "loss": 0.5177, "num_tokens": 2074843494.0, "step": 2712 }, { "epoch": 0.9940459833287533, "grad_norm": 0.24229806566243084, "learning_rate": 3.73789701203365e-05, "loss": 0.5453, "num_tokens": 2075579685.0, "step": 2713 }, { "epoch": 0.9944123843546762, "grad_norm": 0.24920976242830328, "learning_rate": 3.737674945498646e-05, "loss": 0.5329, "num_tokens": 2076473790.0, "step": 2714 }, { "epoch": 0.994778785380599, "grad_norm": 0.1905054350100517, "learning_rate": 3.737452792324053e-05, "loss": 0.5478, "num_tokens": 2077191740.0, "step": 2715 }, { "epoch": 0.9951451864065219, "grad_norm": 0.24572904083909058, "learning_rate": 3.737230552522386e-05, "loss": 0.543, "num_tokens": 2078040639.0, "step": 2716 }, { "epoch": 0.9955115874324448, "grad_norm": 0.15910714464599465, "learning_rate": 3.7370082261061674e-05, "loss": 0.5276, "num_tokens": 2078756982.0, "step": 2717 }, { "epoch": 0.9958779884583677, "grad_norm": 0.18618334330553798, "learning_rate": 3.7367858130879245e-05, "loss": 0.5515, "num_tokens": 2079487759.0, "step": 2718 }, { "epoch": 0.9962443894842905, "grad_norm": 0.23204663011479842, "learning_rate": 3.7365633134801896e-05, "loss": 0.5857, "num_tokens": 2080204088.0, "step": 2719 }, { "epoch": 0.9966107905102134, "grad_norm": 0.18271980560891216, "learning_rate": 3.7363407272954986e-05, "loss": 0.5469, "num_tokens": 2081022240.0, "step": 2720 }, { "epoch": 0.9969771915361363, "grad_norm": 0.21119172898792396, "learning_rate": 3.736118054546393e-05, "loss": 0.5643, "num_tokens": 2081831909.0, "step": 2721 }, { "epoch": 0.9973435925620592, "grad_norm": 0.17033261194719254, "learning_rate": 3.7358952952454195e-05, "loss": 0.4988, "num_tokens": 2082660180.0, "step": 2722 }, { "epoch": 0.9977099935879821, "grad_norm": 0.17786509953145166, "learning_rate": 3.735672449405129e-05, "loss": 0.4884, "num_tokens": 2083374508.0, "step": 2723 }, { "epoch": 0.998076394613905, "grad_norm": 0.20522325836556096, "learning_rate": 3.7354495170380775e-05, "loss": 0.5568, "num_tokens": 2084177128.0, "step": 2724 }, { "epoch": 0.9984427956398277, "grad_norm": 0.18051462710063187, "learning_rate": 3.7352264981568274e-05, "loss": 0.5705, "num_tokens": 2084927081.0, "step": 2725 }, { "epoch": 0.9988091966657506, "grad_norm": 0.23906629213302816, "learning_rate": 3.735003392773943e-05, "loss": 0.5678, "num_tokens": 2085734935.0, "step": 2726 }, { "epoch": 0.9991755976916735, "grad_norm": 0.182651158926682, "learning_rate": 3.734780200901995e-05, "loss": 0.5907, "num_tokens": 2086539945.0, "step": 2727 }, { "epoch": 0.9995419987175964, "grad_norm": 0.19256012243099332, "learning_rate": 3.734556922553561e-05, "loss": 0.5799, "num_tokens": 2087307222.0, "step": 2728 }, { "epoch": 0.9999083997435193, "grad_norm": 0.19024309999715208, "learning_rate": 3.73433355774122e-05, "loss": 0.5377, "num_tokens": 2088070552.0, "step": 2729 }, { "epoch": 1.0, "grad_norm": 0.19024309999715208, "learning_rate": 3.7341101064775574e-05, "loss": 0.5891, "num_tokens": 2088345337.0, "step": 2730 }, { "epoch": 1.0003664010259228, "grad_norm": 0.36458557071603853, "learning_rate": 3.7338865687751634e-05, "loss": 0.5111, "num_tokens": 2088969508.0, "step": 2731 }, { "epoch": 1.0007328020518458, "grad_norm": 0.18213673144708153, "learning_rate": 3.7336629446466334e-05, "loss": 0.5036, "num_tokens": 2089779208.0, "step": 2732 }, { "epoch": 1.0010992030777686, "grad_norm": 0.1713330886432346, "learning_rate": 3.733439234104567e-05, "loss": 0.5396, "num_tokens": 2090466489.0, "step": 2733 }, { "epoch": 1.0014656041036916, "grad_norm": 0.18834856328312063, "learning_rate": 3.73321543716157e-05, "loss": 0.5559, "num_tokens": 2091243927.0, "step": 2734 }, { "epoch": 1.0018320051296143, "grad_norm": 0.18772435926922554, "learning_rate": 3.732991553830252e-05, "loss": 0.4854, "num_tokens": 2092063147.0, "step": 2735 }, { "epoch": 1.0021984061555371, "grad_norm": 0.18590215723446038, "learning_rate": 3.7327675841232266e-05, "loss": 0.4862, "num_tokens": 2092821646.0, "step": 2736 }, { "epoch": 1.0025648071814601, "grad_norm": 0.22138942151429225, "learning_rate": 3.732543528053114e-05, "loss": 0.519, "num_tokens": 2093600685.0, "step": 2737 }, { "epoch": 1.002931208207383, "grad_norm": 0.22442167613066608, "learning_rate": 3.732319385632538e-05, "loss": 0.4864, "num_tokens": 2094423994.0, "step": 2738 }, { "epoch": 1.003297609233306, "grad_norm": 0.2224019335123366, "learning_rate": 3.732095156874129e-05, "loss": 0.4777, "num_tokens": 2095215928.0, "step": 2739 }, { "epoch": 1.0036640102592287, "grad_norm": 0.2690800705050918, "learning_rate": 3.73187084179052e-05, "loss": 0.5487, "num_tokens": 2095882872.0, "step": 2740 }, { "epoch": 1.0040304112851517, "grad_norm": 0.1854857056226777, "learning_rate": 3.7316464403943496e-05, "loss": 0.5351, "num_tokens": 2096582122.0, "step": 2741 }, { "epoch": 1.0043968123110745, "grad_norm": 0.18166912512591665, "learning_rate": 3.7314219526982634e-05, "loss": 0.5272, "num_tokens": 2097255550.0, "step": 2742 }, { "epoch": 1.0047632133369973, "grad_norm": 0.17606942328778236, "learning_rate": 3.731197378714908e-05, "loss": 0.4775, "num_tokens": 2098038130.0, "step": 2743 }, { "epoch": 1.0051296143629203, "grad_norm": 0.14684534372164826, "learning_rate": 3.730972718456938e-05, "loss": 0.4808, "num_tokens": 2098874128.0, "step": 2744 }, { "epoch": 1.005496015388843, "grad_norm": 0.1704479320658339, "learning_rate": 3.730747971937012e-05, "loss": 0.5192, "num_tokens": 2099588194.0, "step": 2745 }, { "epoch": 1.005862416414766, "grad_norm": 0.17219027367647413, "learning_rate": 3.730523139167792e-05, "loss": 0.509, "num_tokens": 2100243850.0, "step": 2746 }, { "epoch": 1.0062288174406888, "grad_norm": 0.17437760498384677, "learning_rate": 3.730298220161947e-05, "loss": 0.5633, "num_tokens": 2100873979.0, "step": 2747 }, { "epoch": 1.0065952184666116, "grad_norm": 0.18972814841707303, "learning_rate": 3.73007321493215e-05, "loss": 0.5329, "num_tokens": 2101506714.0, "step": 2748 }, { "epoch": 1.0069616194925346, "grad_norm": 0.16648032945815092, "learning_rate": 3.729848123491079e-05, "loss": 0.5089, "num_tokens": 2102236855.0, "step": 2749 }, { "epoch": 1.0073280205184574, "grad_norm": 0.13707642130361672, "learning_rate": 3.7296229458514164e-05, "loss": 0.4932, "num_tokens": 2103133382.0, "step": 2750 }, { "epoch": 1.0076944215443804, "grad_norm": 0.1891857047805903, "learning_rate": 3.729397682025849e-05, "loss": 0.509, "num_tokens": 2103905288.0, "step": 2751 }, { "epoch": 1.0080608225703032, "grad_norm": 0.1684991486428666, "learning_rate": 3.72917233202707e-05, "loss": 0.5308, "num_tokens": 2104679866.0, "step": 2752 }, { "epoch": 1.0084272235962262, "grad_norm": 0.16913691147516394, "learning_rate": 3.728946895867778e-05, "loss": 0.5031, "num_tokens": 2105401615.0, "step": 2753 }, { "epoch": 1.008793624622149, "grad_norm": 0.14717992883394299, "learning_rate": 3.728721373560672e-05, "loss": 0.473, "num_tokens": 2106065701.0, "step": 2754 }, { "epoch": 1.0091600256480717, "grad_norm": 0.16144113425635742, "learning_rate": 3.728495765118461e-05, "loss": 0.4569, "num_tokens": 2106752992.0, "step": 2755 }, { "epoch": 1.0095264266739947, "grad_norm": 0.16233034488006728, "learning_rate": 3.7282700705538555e-05, "loss": 0.5027, "num_tokens": 2107600033.0, "step": 2756 }, { "epoch": 1.0098928276999175, "grad_norm": 0.15332846621706914, "learning_rate": 3.7280442898795745e-05, "loss": 0.4765, "num_tokens": 2108421215.0, "step": 2757 }, { "epoch": 1.0102592287258405, "grad_norm": 0.1610259741484892, "learning_rate": 3.727818423108337e-05, "loss": 0.4909, "num_tokens": 2109070313.0, "step": 2758 }, { "epoch": 1.0106256297517633, "grad_norm": 0.1724125883760334, "learning_rate": 3.7275924702528705e-05, "loss": 0.5179, "num_tokens": 2109783347.0, "step": 2759 }, { "epoch": 1.010992030777686, "grad_norm": 0.16929137435831612, "learning_rate": 3.7273664313259066e-05, "loss": 0.523, "num_tokens": 2110593860.0, "step": 2760 }, { "epoch": 1.011358431803609, "grad_norm": 0.1629740522576631, "learning_rate": 3.7271403063401804e-05, "loss": 0.5476, "num_tokens": 2111364705.0, "step": 2761 }, { "epoch": 1.0117248328295319, "grad_norm": 0.1622669046866306, "learning_rate": 3.726914095308433e-05, "loss": 0.501, "num_tokens": 2112111641.0, "step": 2762 }, { "epoch": 1.0120912338554549, "grad_norm": 0.19641106551427542, "learning_rate": 3.7266877982434116e-05, "loss": 0.4999, "num_tokens": 2112852116.0, "step": 2763 }, { "epoch": 1.0124576348813776, "grad_norm": 0.18490535259326127, "learning_rate": 3.726461415157865e-05, "loss": 0.5399, "num_tokens": 2113628033.0, "step": 2764 }, { "epoch": 1.0128240359073006, "grad_norm": 0.1666456957051732, "learning_rate": 3.726234946064549e-05, "loss": 0.5028, "num_tokens": 2114310101.0, "step": 2765 }, { "epoch": 1.0131904369332234, "grad_norm": 0.1639383490054749, "learning_rate": 3.726008390976224e-05, "loss": 0.4747, "num_tokens": 2115186186.0, "step": 2766 }, { "epoch": 1.0135568379591462, "grad_norm": 0.1520003653870732, "learning_rate": 3.725781749905656e-05, "loss": 0.5123, "num_tokens": 2116008943.0, "step": 2767 }, { "epoch": 1.0139232389850692, "grad_norm": 0.17047345585549734, "learning_rate": 3.7255550228656144e-05, "loss": 0.5158, "num_tokens": 2116749556.0, "step": 2768 }, { "epoch": 1.014289640010992, "grad_norm": 0.17153569781385286, "learning_rate": 3.725328209868873e-05, "loss": 0.4947, "num_tokens": 2117508425.0, "step": 2769 }, { "epoch": 1.014656041036915, "grad_norm": 0.16546147673089123, "learning_rate": 3.7251013109282126e-05, "loss": 0.4997, "num_tokens": 2118371445.0, "step": 2770 }, { "epoch": 1.0150224420628378, "grad_norm": 0.1702736257628676, "learning_rate": 3.724874326056417e-05, "loss": 0.4826, "num_tokens": 2119109367.0, "step": 2771 }, { "epoch": 1.0153888430887605, "grad_norm": 0.1534805862600555, "learning_rate": 3.724647255266278e-05, "loss": 0.5158, "num_tokens": 2119873412.0, "step": 2772 }, { "epoch": 1.0157552441146835, "grad_norm": 0.20834728863600632, "learning_rate": 3.724420098570587e-05, "loss": 0.5194, "num_tokens": 2120523283.0, "step": 2773 }, { "epoch": 1.0161216451406063, "grad_norm": 0.16476781627790232, "learning_rate": 3.724192855982143e-05, "loss": 0.5469, "num_tokens": 2121300642.0, "step": 2774 }, { "epoch": 1.0164880461665293, "grad_norm": 0.17697940284569602, "learning_rate": 3.7239655275137517e-05, "loss": 0.5205, "num_tokens": 2121999359.0, "step": 2775 }, { "epoch": 1.016854447192452, "grad_norm": 0.17869936918423965, "learning_rate": 3.7237381131782204e-05, "loss": 0.5231, "num_tokens": 2122812641.0, "step": 2776 }, { "epoch": 1.017220848218375, "grad_norm": 0.1596966423216333, "learning_rate": 3.7235106129883634e-05, "loss": 0.5175, "num_tokens": 2123730294.0, "step": 2777 }, { "epoch": 1.0175872492442979, "grad_norm": 0.1507640019590851, "learning_rate": 3.723283026956999e-05, "loss": 0.495, "num_tokens": 2124537719.0, "step": 2778 }, { "epoch": 1.0179536502702207, "grad_norm": 0.17938979158018645, "learning_rate": 3.72305535509695e-05, "loss": 0.5381, "num_tokens": 2125298968.0, "step": 2779 }, { "epoch": 1.0183200512961437, "grad_norm": 0.1675105975614111, "learning_rate": 3.7228275974210454e-05, "loss": 0.4967, "num_tokens": 2126218168.0, "step": 2780 }, { "epoch": 1.0186864523220664, "grad_norm": 0.21158789201554823, "learning_rate": 3.722599753942117e-05, "loss": 0.4953, "num_tokens": 2126960177.0, "step": 2781 }, { "epoch": 1.0190528533479895, "grad_norm": 0.1884865631527741, "learning_rate": 3.7223718246730035e-05, "loss": 0.5275, "num_tokens": 2127733944.0, "step": 2782 }, { "epoch": 1.0194192543739122, "grad_norm": 0.20813998780624246, "learning_rate": 3.7221438096265464e-05, "loss": 0.5196, "num_tokens": 2128474247.0, "step": 2783 }, { "epoch": 1.019785655399835, "grad_norm": 0.18191083675301303, "learning_rate": 3.721915708815594e-05, "loss": 0.4928, "num_tokens": 2129235745.0, "step": 2784 }, { "epoch": 1.020152056425758, "grad_norm": 0.17984066932908474, "learning_rate": 3.721687522252998e-05, "loss": 0.5095, "num_tokens": 2129883400.0, "step": 2785 }, { "epoch": 1.0205184574516808, "grad_norm": 0.20059976776784838, "learning_rate": 3.7214592499516165e-05, "loss": 0.4878, "num_tokens": 2130687399.0, "step": 2786 }, { "epoch": 1.0208848584776038, "grad_norm": 0.1548157029791772, "learning_rate": 3.7212308919243094e-05, "loss": 0.494, "num_tokens": 2131412216.0, "step": 2787 }, { "epoch": 1.0212512595035266, "grad_norm": 0.2532589249026752, "learning_rate": 3.7210024481839446e-05, "loss": 0.5081, "num_tokens": 2132167652.0, "step": 2788 }, { "epoch": 1.0216176605294496, "grad_norm": 0.15000734056597606, "learning_rate": 3.7207739187433944e-05, "loss": 0.5261, "num_tokens": 2132924571.0, "step": 2789 }, { "epoch": 1.0219840615553724, "grad_norm": 0.20932667713998263, "learning_rate": 3.7205453036155343e-05, "loss": 0.5034, "num_tokens": 2133697151.0, "step": 2790 }, { "epoch": 1.0223504625812951, "grad_norm": 0.16796389664555667, "learning_rate": 3.720316602813246e-05, "loss": 0.4944, "num_tokens": 2134443609.0, "step": 2791 }, { "epoch": 1.0227168636072181, "grad_norm": 0.1984475255856214, "learning_rate": 3.720087816349415e-05, "loss": 0.5016, "num_tokens": 2135246226.0, "step": 2792 }, { "epoch": 1.023083264633141, "grad_norm": 0.2122201985050522, "learning_rate": 3.7198589442369316e-05, "loss": 0.5218, "num_tokens": 2136027360.0, "step": 2793 }, { "epoch": 1.023449665659064, "grad_norm": 0.14946780848889984, "learning_rate": 3.7196299864886923e-05, "loss": 0.5106, "num_tokens": 2136746578.0, "step": 2794 }, { "epoch": 1.0238160666849867, "grad_norm": 0.20947511356444762, "learning_rate": 3.7194009431175984e-05, "loss": 0.5361, "num_tokens": 2137512810.0, "step": 2795 }, { "epoch": 1.0241824677109095, "grad_norm": 0.15233066890513408, "learning_rate": 3.7191718141365544e-05, "loss": 0.4859, "num_tokens": 2138250935.0, "step": 2796 }, { "epoch": 1.0245488687368325, "grad_norm": 0.1710319480398681, "learning_rate": 3.7189425995584706e-05, "loss": 0.5088, "num_tokens": 2139017701.0, "step": 2797 }, { "epoch": 1.0249152697627553, "grad_norm": 0.1599136735152738, "learning_rate": 3.718713299396261e-05, "loss": 0.5126, "num_tokens": 2139747061.0, "step": 2798 }, { "epoch": 1.0252816707886783, "grad_norm": 0.18967899599457066, "learning_rate": 3.718483913662847e-05, "loss": 0.5095, "num_tokens": 2140621828.0, "step": 2799 }, { "epoch": 1.025648071814601, "grad_norm": 0.16923901237710545, "learning_rate": 3.718254442371153e-05, "loss": 0.5018, "num_tokens": 2141387703.0, "step": 2800 }, { "epoch": 1.026014472840524, "grad_norm": 0.16002222108579164, "learning_rate": 3.718024885534107e-05, "loss": 0.486, "num_tokens": 2142125463.0, "step": 2801 }, { "epoch": 1.0263808738664468, "grad_norm": 0.16208988149429335, "learning_rate": 3.717795243164644e-05, "loss": 0.4778, "num_tokens": 2142973039.0, "step": 2802 }, { "epoch": 1.0267472748923696, "grad_norm": 0.13994965734943718, "learning_rate": 3.7175655152757046e-05, "loss": 0.4741, "num_tokens": 2143870701.0, "step": 2803 }, { "epoch": 1.0271136759182926, "grad_norm": 0.14183386482800828, "learning_rate": 3.717335701880231e-05, "loss": 0.5171, "num_tokens": 2144543318.0, "step": 2804 }, { "epoch": 1.0274800769442154, "grad_norm": 0.21116450888009514, "learning_rate": 3.717105802991172e-05, "loss": 0.4727, "num_tokens": 2145308139.0, "step": 2805 }, { "epoch": 1.0278464779701384, "grad_norm": 0.15350002420403708, "learning_rate": 3.716875818621482e-05, "loss": 0.49, "num_tokens": 2146049707.0, "step": 2806 }, { "epoch": 1.0282128789960612, "grad_norm": 0.16922433545587384, "learning_rate": 3.716645748784119e-05, "loss": 0.5133, "num_tokens": 2146755211.0, "step": 2807 }, { "epoch": 1.028579280021984, "grad_norm": 0.16877531050885614, "learning_rate": 3.716415593492046e-05, "loss": 0.5066, "num_tokens": 2147461130.0, "step": 2808 }, { "epoch": 1.028945681047907, "grad_norm": 0.18088786757093397, "learning_rate": 3.716185352758231e-05, "loss": 0.5045, "num_tokens": 2148164344.0, "step": 2809 }, { "epoch": 1.0293120820738297, "grad_norm": 0.1633604724228161, "learning_rate": 3.715955026595647e-05, "loss": 0.5028, "num_tokens": 2148996422.0, "step": 2810 }, { "epoch": 1.0296784830997527, "grad_norm": 0.19339531104218363, "learning_rate": 3.715724615017272e-05, "loss": 0.5032, "num_tokens": 2149680037.0, "step": 2811 }, { "epoch": 1.0300448841256755, "grad_norm": 0.16734842133407768, "learning_rate": 3.715494118036087e-05, "loss": 0.5543, "num_tokens": 2150372954.0, "step": 2812 }, { "epoch": 1.0304112851515985, "grad_norm": 0.18736521966918576, "learning_rate": 3.71526353566508e-05, "loss": 0.5247, "num_tokens": 2151152231.0, "step": 2813 }, { "epoch": 1.0307776861775213, "grad_norm": 0.15914479813922577, "learning_rate": 3.715032867917244e-05, "loss": 0.5005, "num_tokens": 2151976937.0, "step": 2814 }, { "epoch": 1.031144087203444, "grad_norm": 0.15947977418263892, "learning_rate": 3.714802114805575e-05, "loss": 0.4609, "num_tokens": 2152759415.0, "step": 2815 }, { "epoch": 1.031510488229367, "grad_norm": 0.19939046102466285, "learning_rate": 3.714571276343075e-05, "loss": 0.512, "num_tokens": 2153674402.0, "step": 2816 }, { "epoch": 1.0318768892552899, "grad_norm": 0.16499126530834485, "learning_rate": 3.71434035254275e-05, "loss": 0.5192, "num_tokens": 2154513181.0, "step": 2817 }, { "epoch": 1.0322432902812129, "grad_norm": 0.16480915024203133, "learning_rate": 3.714109343417612e-05, "loss": 0.5238, "num_tokens": 2155306476.0, "step": 2818 }, { "epoch": 1.0326096913071356, "grad_norm": 0.19241270362227703, "learning_rate": 3.7138782489806765e-05, "loss": 0.4963, "num_tokens": 2156114828.0, "step": 2819 }, { "epoch": 1.0329760923330584, "grad_norm": 0.1506907078174128, "learning_rate": 3.713647069244965e-05, "loss": 0.5418, "num_tokens": 2156938938.0, "step": 2820 }, { "epoch": 1.0333424933589814, "grad_norm": 0.24977744002983762, "learning_rate": 3.713415804223502e-05, "loss": 0.5179, "num_tokens": 2157660595.0, "step": 2821 }, { "epoch": 1.0337088943849042, "grad_norm": 0.15999487765602488, "learning_rate": 3.7131844539293195e-05, "loss": 0.4789, "num_tokens": 2158374268.0, "step": 2822 }, { "epoch": 1.0340752954108272, "grad_norm": 0.15787880269915844, "learning_rate": 3.712953018375452e-05, "loss": 0.4953, "num_tokens": 2159054407.0, "step": 2823 }, { "epoch": 1.03444169643675, "grad_norm": 0.17391599311271996, "learning_rate": 3.712721497574939e-05, "loss": 0.5058, "num_tokens": 2159839605.0, "step": 2824 }, { "epoch": 1.034808097462673, "grad_norm": 0.14348824874632957, "learning_rate": 3.7124898915408275e-05, "loss": 0.4873, "num_tokens": 2160725152.0, "step": 2825 }, { "epoch": 1.0351744984885958, "grad_norm": 0.15846392225296066, "learning_rate": 3.712258200286166e-05, "loss": 0.4662, "num_tokens": 2161494620.0, "step": 2826 }, { "epoch": 1.0355408995145186, "grad_norm": 0.16074494015870885, "learning_rate": 3.712026423824007e-05, "loss": 0.521, "num_tokens": 2162144951.0, "step": 2827 }, { "epoch": 1.0359073005404416, "grad_norm": 0.1521848173007835, "learning_rate": 3.7117945621674135e-05, "loss": 0.4744, "num_tokens": 2162814941.0, "step": 2828 }, { "epoch": 1.0362737015663643, "grad_norm": 0.1974089004536749, "learning_rate": 3.7115626153294476e-05, "loss": 0.4841, "num_tokens": 2163618980.0, "step": 2829 }, { "epoch": 1.0366401025922873, "grad_norm": 0.14723384875482678, "learning_rate": 3.7113305833231795e-05, "loss": 0.4595, "num_tokens": 2164380515.0, "step": 2830 }, { "epoch": 1.0370065036182101, "grad_norm": 0.16363447977266465, "learning_rate": 3.711098466161681e-05, "loss": 0.5059, "num_tokens": 2165049271.0, "step": 2831 }, { "epoch": 1.037372904644133, "grad_norm": 0.17888599081829815, "learning_rate": 3.7108662638580316e-05, "loss": 0.5115, "num_tokens": 2165811973.0, "step": 2832 }, { "epoch": 1.037739305670056, "grad_norm": 0.1364846985180609, "learning_rate": 3.710633976425316e-05, "loss": 0.4955, "num_tokens": 2166647992.0, "step": 2833 }, { "epoch": 1.0381057066959787, "grad_norm": 0.17566636472506728, "learning_rate": 3.71040160387662e-05, "loss": 0.5166, "num_tokens": 2167361333.0, "step": 2834 }, { "epoch": 1.0384721077219017, "grad_norm": 0.16204857412296006, "learning_rate": 3.7101691462250375e-05, "loss": 0.4858, "num_tokens": 2168049766.0, "step": 2835 }, { "epoch": 1.0388385087478245, "grad_norm": 0.17113153587312838, "learning_rate": 3.7099366034836676e-05, "loss": 0.4861, "num_tokens": 2168851289.0, "step": 2836 }, { "epoch": 1.0392049097737475, "grad_norm": 0.1418191133540418, "learning_rate": 3.70970397566561e-05, "loss": 0.5131, "num_tokens": 2169559298.0, "step": 2837 }, { "epoch": 1.0395713107996702, "grad_norm": 0.1638794499996644, "learning_rate": 3.7094712627839735e-05, "loss": 0.4831, "num_tokens": 2170369106.0, "step": 2838 }, { "epoch": 1.039937711825593, "grad_norm": 0.15992043075939427, "learning_rate": 3.709238464851871e-05, "loss": 0.5329, "num_tokens": 2171101301.0, "step": 2839 }, { "epoch": 1.040304112851516, "grad_norm": 0.1561604564065243, "learning_rate": 3.7090055818824185e-05, "loss": 0.471, "num_tokens": 2171845735.0, "step": 2840 }, { "epoch": 1.0406705138774388, "grad_norm": 0.15110310463566828, "learning_rate": 3.708772613888738e-05, "loss": 0.4861, "num_tokens": 2172638484.0, "step": 2841 }, { "epoch": 1.0410369149033618, "grad_norm": 0.16046418282933309, "learning_rate": 3.708539560883955e-05, "loss": 0.5233, "num_tokens": 2173314088.0, "step": 2842 }, { "epoch": 1.0414033159292846, "grad_norm": 0.15219233561182843, "learning_rate": 3.7083064228812024e-05, "loss": 0.489, "num_tokens": 2174123984.0, "step": 2843 }, { "epoch": 1.0417697169552074, "grad_norm": 0.15664192529983142, "learning_rate": 3.708073199893615e-05, "loss": 0.5554, "num_tokens": 2174900148.0, "step": 2844 }, { "epoch": 1.0421361179811304, "grad_norm": 0.15462554444881507, "learning_rate": 3.707839891934334e-05, "loss": 0.5112, "num_tokens": 2175571069.0, "step": 2845 }, { "epoch": 1.0425025190070532, "grad_norm": 0.16391806744216234, "learning_rate": 3.707606499016505e-05, "loss": 0.5113, "num_tokens": 2176229764.0, "step": 2846 }, { "epoch": 1.0428689200329762, "grad_norm": 0.16437812370225724, "learning_rate": 3.707373021153279e-05, "loss": 0.5305, "num_tokens": 2176942623.0, "step": 2847 }, { "epoch": 1.043235321058899, "grad_norm": 0.1714218205136982, "learning_rate": 3.7071394583578103e-05, "loss": 0.4834, "num_tokens": 2177707180.0, "step": 2848 }, { "epoch": 1.043601722084822, "grad_norm": 0.18773384243173774, "learning_rate": 3.7069058106432596e-05, "loss": 0.5275, "num_tokens": 2178432810.0, "step": 2849 }, { "epoch": 1.0439681231107447, "grad_norm": 0.16050365059226163, "learning_rate": 3.706672078022791e-05, "loss": 0.5249, "num_tokens": 2179356568.0, "step": 2850 }, { "epoch": 1.0443345241366675, "grad_norm": 0.1604864471131983, "learning_rate": 3.706438260509574e-05, "loss": 0.5227, "num_tokens": 2180106298.0, "step": 2851 }, { "epoch": 1.0447009251625905, "grad_norm": 0.19565711210198417, "learning_rate": 3.7062043581167834e-05, "loss": 0.5359, "num_tokens": 2180776671.0, "step": 2852 }, { "epoch": 1.0450673261885133, "grad_norm": 0.1650228320615684, "learning_rate": 3.7059703708575995e-05, "loss": 0.5222, "num_tokens": 2181487863.0, "step": 2853 }, { "epoch": 1.0454337272144363, "grad_norm": 0.15038974578705294, "learning_rate": 3.705736298745204e-05, "loss": 0.51, "num_tokens": 2182186487.0, "step": 2854 }, { "epoch": 1.045800128240359, "grad_norm": 0.18610632664355564, "learning_rate": 3.7055021417927866e-05, "loss": 0.5143, "num_tokens": 2182862016.0, "step": 2855 }, { "epoch": 1.0461665292662818, "grad_norm": 0.17385950628131194, "learning_rate": 3.705267900013541e-05, "loss": 0.5251, "num_tokens": 2183632958.0, "step": 2856 }, { "epoch": 1.0465329302922048, "grad_norm": 0.18693588855644508, "learning_rate": 3.705033573420665e-05, "loss": 0.486, "num_tokens": 2184436958.0, "step": 2857 }, { "epoch": 1.0468993313181276, "grad_norm": 0.17539902092025195, "learning_rate": 3.704799162027362e-05, "loss": 0.5268, "num_tokens": 2185127873.0, "step": 2858 }, { "epoch": 1.0472657323440506, "grad_norm": 0.1613214920366636, "learning_rate": 3.704564665846839e-05, "loss": 0.5185, "num_tokens": 2185873204.0, "step": 2859 }, { "epoch": 1.0476321333699734, "grad_norm": 0.17868794723723322, "learning_rate": 3.70433008489231e-05, "loss": 0.5017, "num_tokens": 2186618053.0, "step": 2860 }, { "epoch": 1.0479985343958964, "grad_norm": 0.159264924500862, "learning_rate": 3.7040954191769913e-05, "loss": 0.4957, "num_tokens": 2187340324.0, "step": 2861 }, { "epoch": 1.0483649354218192, "grad_norm": 0.15211213162245021, "learning_rate": 3.703860668714105e-05, "loss": 0.4904, "num_tokens": 2188108358.0, "step": 2862 }, { "epoch": 1.048731336447742, "grad_norm": 0.17165847295050782, "learning_rate": 3.7036258335168784e-05, "loss": 0.4972, "num_tokens": 2188780843.0, "step": 2863 }, { "epoch": 1.049097737473665, "grad_norm": 0.17673778429383125, "learning_rate": 3.703390913598543e-05, "loss": 0.5177, "num_tokens": 2189579071.0, "step": 2864 }, { "epoch": 1.0494641384995878, "grad_norm": 0.150123163332228, "learning_rate": 3.7031559089723355e-05, "loss": 0.4907, "num_tokens": 2190402304.0, "step": 2865 }, { "epoch": 1.0498305395255108, "grad_norm": 0.1787240750409366, "learning_rate": 3.702920819651497e-05, "loss": 0.5068, "num_tokens": 2191209563.0, "step": 2866 }, { "epoch": 1.0501969405514335, "grad_norm": 0.16627730738131727, "learning_rate": 3.702685645649273e-05, "loss": 0.4928, "num_tokens": 2192083765.0, "step": 2867 }, { "epoch": 1.0505633415773563, "grad_norm": 0.18567194148825594, "learning_rate": 3.702450386978915e-05, "loss": 0.5455, "num_tokens": 2192733253.0, "step": 2868 }, { "epoch": 1.0509297426032793, "grad_norm": 0.17246305928916078, "learning_rate": 3.702215043653679e-05, "loss": 0.5277, "num_tokens": 2193418993.0, "step": 2869 }, { "epoch": 1.051296143629202, "grad_norm": 0.17386374189270354, "learning_rate": 3.7019796156868235e-05, "loss": 0.5374, "num_tokens": 2194089837.0, "step": 2870 }, { "epoch": 1.051662544655125, "grad_norm": 0.1629825911527305, "learning_rate": 3.7017441030916155e-05, "loss": 0.4931, "num_tokens": 2194898763.0, "step": 2871 }, { "epoch": 1.0520289456810479, "grad_norm": 0.1617345188971316, "learning_rate": 3.7015085058813234e-05, "loss": 0.4921, "num_tokens": 2195768083.0, "step": 2872 }, { "epoch": 1.0523953467069709, "grad_norm": 0.15348797010851875, "learning_rate": 3.701272824069222e-05, "loss": 0.5035, "num_tokens": 2196681255.0, "step": 2873 }, { "epoch": 1.0527617477328937, "grad_norm": 0.15091830397056338, "learning_rate": 3.7010370576685914e-05, "loss": 0.4989, "num_tokens": 2197385207.0, "step": 2874 }, { "epoch": 1.0531281487588164, "grad_norm": 0.17415667677784036, "learning_rate": 3.700801206692716e-05, "loss": 0.4995, "num_tokens": 2198199938.0, "step": 2875 }, { "epoch": 1.0534945497847394, "grad_norm": 0.15061141500789627, "learning_rate": 3.700565271154884e-05, "loss": 0.5054, "num_tokens": 2198892694.0, "step": 2876 }, { "epoch": 1.0538609508106622, "grad_norm": 0.15902145260686207, "learning_rate": 3.700329251068389e-05, "loss": 0.5259, "num_tokens": 2199628888.0, "step": 2877 }, { "epoch": 1.0542273518365852, "grad_norm": 0.14783244600428846, "learning_rate": 3.70009314644653e-05, "loss": 0.4786, "num_tokens": 2200455060.0, "step": 2878 }, { "epoch": 1.054593752862508, "grad_norm": 0.1928664275548707, "learning_rate": 3.6998569573026093e-05, "loss": 0.5212, "num_tokens": 2201168287.0, "step": 2879 }, { "epoch": 1.0549601538884308, "grad_norm": 0.17684746183362787, "learning_rate": 3.699620683649936e-05, "loss": 0.5112, "num_tokens": 2201857424.0, "step": 2880 }, { "epoch": 1.0553265549143538, "grad_norm": 0.16193077684431528, "learning_rate": 3.699384325501822e-05, "loss": 0.4873, "num_tokens": 2202599934.0, "step": 2881 }, { "epoch": 1.0556929559402766, "grad_norm": 0.16577579292128708, "learning_rate": 3.699147882871585e-05, "loss": 0.4963, "num_tokens": 2203378211.0, "step": 2882 }, { "epoch": 1.0560593569661996, "grad_norm": 0.1578069912557328, "learning_rate": 3.698911355772547e-05, "loss": 0.5197, "num_tokens": 2204102275.0, "step": 2883 }, { "epoch": 1.0564257579921223, "grad_norm": 0.15101014385902048, "learning_rate": 3.698674744218037e-05, "loss": 0.5038, "num_tokens": 2204903567.0, "step": 2884 }, { "epoch": 1.0567921590180454, "grad_norm": 0.15115724248202583, "learning_rate": 3.6984380482213834e-05, "loss": 0.5056, "num_tokens": 2205641836.0, "step": 2885 }, { "epoch": 1.0571585600439681, "grad_norm": 0.15835560038636928, "learning_rate": 3.6982012677959254e-05, "loss": 0.5124, "num_tokens": 2206338322.0, "step": 2886 }, { "epoch": 1.057524961069891, "grad_norm": 0.1886362900042711, "learning_rate": 3.6979644029550026e-05, "loss": 0.501, "num_tokens": 2207051879.0, "step": 2887 }, { "epoch": 1.057891362095814, "grad_norm": 0.15398642796278156, "learning_rate": 3.697727453711963e-05, "loss": 0.4766, "num_tokens": 2207883831.0, "step": 2888 }, { "epoch": 1.0582577631217367, "grad_norm": 0.17488302001393247, "learning_rate": 3.697490420080155e-05, "loss": 0.5232, "num_tokens": 2208678804.0, "step": 2889 }, { "epoch": 1.0586241641476597, "grad_norm": 0.1768357618320109, "learning_rate": 3.697253302072936e-05, "loss": 0.535, "num_tokens": 2209408311.0, "step": 2890 }, { "epoch": 1.0589905651735825, "grad_norm": 0.16015161715670634, "learning_rate": 3.6970160997036646e-05, "loss": 0.5081, "num_tokens": 2210163482.0, "step": 2891 }, { "epoch": 1.0593569661995053, "grad_norm": 0.18783870608078002, "learning_rate": 3.696778812985708e-05, "loss": 0.4957, "num_tokens": 2210930422.0, "step": 2892 }, { "epoch": 1.0597233672254283, "grad_norm": 0.15390950223288136, "learning_rate": 3.696541441932435e-05, "loss": 0.5112, "num_tokens": 2211601437.0, "step": 2893 }, { "epoch": 1.060089768251351, "grad_norm": 0.20405799299104735, "learning_rate": 3.696303986557219e-05, "loss": 0.4789, "num_tokens": 2212346787.0, "step": 2894 }, { "epoch": 1.060456169277274, "grad_norm": 0.1414288988830007, "learning_rate": 3.696066446873441e-05, "loss": 0.5191, "num_tokens": 2213078276.0, "step": 2895 }, { "epoch": 1.0608225703031968, "grad_norm": 0.18509141844786828, "learning_rate": 3.695828822894484e-05, "loss": 0.4944, "num_tokens": 2213902399.0, "step": 2896 }, { "epoch": 1.0611889713291198, "grad_norm": 0.15539011455213522, "learning_rate": 3.6955911146337376e-05, "loss": 0.5013, "num_tokens": 2214638620.0, "step": 2897 }, { "epoch": 1.0615553723550426, "grad_norm": 0.16497355838898062, "learning_rate": 3.6953533221045955e-05, "loss": 0.5083, "num_tokens": 2215474295.0, "step": 2898 }, { "epoch": 1.0619217733809654, "grad_norm": 0.166712715761867, "learning_rate": 3.695115445320455e-05, "loss": 0.5311, "num_tokens": 2216170269.0, "step": 2899 }, { "epoch": 1.0622881744068884, "grad_norm": 0.16388603935689663, "learning_rate": 3.6948774842947196e-05, "loss": 0.4842, "num_tokens": 2217062065.0, "step": 2900 }, { "epoch": 1.0626545754328112, "grad_norm": 0.14842429620754646, "learning_rate": 3.6946394390407974e-05, "loss": 0.4776, "num_tokens": 2217875685.0, "step": 2901 }, { "epoch": 1.0630209764587342, "grad_norm": 0.17245537222437454, "learning_rate": 3.6944013095721004e-05, "loss": 0.5033, "num_tokens": 2218707434.0, "step": 2902 }, { "epoch": 1.063387377484657, "grad_norm": 0.15861382159433735, "learning_rate": 3.694163095902047e-05, "loss": 0.4925, "num_tokens": 2219311851.0, "step": 2903 }, { "epoch": 1.0637537785105797, "grad_norm": 0.1866375756829911, "learning_rate": 3.693924798044058e-05, "loss": 0.4992, "num_tokens": 2220225520.0, "step": 2904 }, { "epoch": 1.0641201795365027, "grad_norm": 0.13458554583180576, "learning_rate": 3.6936864160115604e-05, "loss": 0.4941, "num_tokens": 2221058301.0, "step": 2905 }, { "epoch": 1.0644865805624255, "grad_norm": 0.1703376320656851, "learning_rate": 3.6934479498179866e-05, "loss": 0.5204, "num_tokens": 2221840113.0, "step": 2906 }, { "epoch": 1.0648529815883485, "grad_norm": 0.17293402675972996, "learning_rate": 3.693209399476772e-05, "loss": 0.5149, "num_tokens": 2222477220.0, "step": 2907 }, { "epoch": 1.0652193826142713, "grad_norm": 0.14474595039622087, "learning_rate": 3.692970765001358e-05, "loss": 0.4823, "num_tokens": 2223155799.0, "step": 2908 }, { "epoch": 1.0655857836401943, "grad_norm": 0.15925602703579114, "learning_rate": 3.6927320464051895e-05, "loss": 0.5152, "num_tokens": 2223827107.0, "step": 2909 }, { "epoch": 1.065952184666117, "grad_norm": 0.1844479910716243, "learning_rate": 3.692493243701717e-05, "loss": 0.4936, "num_tokens": 2224613105.0, "step": 2910 }, { "epoch": 1.0663185856920399, "grad_norm": 0.15319699069039625, "learning_rate": 3.692254356904398e-05, "loss": 0.5523, "num_tokens": 2225395739.0, "step": 2911 }, { "epoch": 1.0666849867179629, "grad_norm": 0.16318823075824504, "learning_rate": 3.692015386026691e-05, "loss": 0.5424, "num_tokens": 2226110340.0, "step": 2912 }, { "epoch": 1.0670513877438856, "grad_norm": 0.1501633624427817, "learning_rate": 3.69177633108206e-05, "loss": 0.4992, "num_tokens": 2226871060.0, "step": 2913 }, { "epoch": 1.0674177887698086, "grad_norm": 0.16591885835959833, "learning_rate": 3.6915371920839756e-05, "loss": 0.4908, "num_tokens": 2227477117.0, "step": 2914 }, { "epoch": 1.0677841897957314, "grad_norm": 0.1596044003888804, "learning_rate": 3.69129796904591e-05, "loss": 0.5289, "num_tokens": 2228273927.0, "step": 2915 }, { "epoch": 1.0681505908216542, "grad_norm": 0.1413867877466681, "learning_rate": 3.691058661981345e-05, "loss": 0.5287, "num_tokens": 2229090630.0, "step": 2916 }, { "epoch": 1.0685169918475772, "grad_norm": 0.1855344575692447, "learning_rate": 3.6908192709037624e-05, "loss": 0.477, "num_tokens": 2229796918.0, "step": 2917 }, { "epoch": 1.0688833928735, "grad_norm": 0.14741659928299025, "learning_rate": 3.690579795826651e-05, "loss": 0.5173, "num_tokens": 2230594849.0, "step": 2918 }, { "epoch": 1.069249793899423, "grad_norm": 0.1617231011753368, "learning_rate": 3.6903402367635035e-05, "loss": 0.4979, "num_tokens": 2231258261.0, "step": 2919 }, { "epoch": 1.0696161949253458, "grad_norm": 0.1543552525563131, "learning_rate": 3.690100593727819e-05, "loss": 0.5128, "num_tokens": 2231990135.0, "step": 2920 }, { "epoch": 1.0699825959512688, "grad_norm": 0.1914003854959595, "learning_rate": 3.689860866733098e-05, "loss": 0.5362, "num_tokens": 2232663611.0, "step": 2921 }, { "epoch": 1.0703489969771915, "grad_norm": 0.15307086127270375, "learning_rate": 3.6896210557928495e-05, "loss": 0.5216, "num_tokens": 2233390268.0, "step": 2922 }, { "epoch": 1.0707153980031143, "grad_norm": 0.1690683463278635, "learning_rate": 3.6893811609205854e-05, "loss": 0.5372, "num_tokens": 2234305024.0, "step": 2923 }, { "epoch": 1.0710817990290373, "grad_norm": 0.17983385054421736, "learning_rate": 3.689141182129822e-05, "loss": 0.5511, "num_tokens": 2234956502.0, "step": 2924 }, { "epoch": 1.07144820005496, "grad_norm": 0.18239820142194224, "learning_rate": 3.688901119434081e-05, "loss": 0.5296, "num_tokens": 2235628632.0, "step": 2925 }, { "epoch": 1.0718146010808831, "grad_norm": 0.17382960926605243, "learning_rate": 3.688660972846888e-05, "loss": 0.4863, "num_tokens": 2236409856.0, "step": 2926 }, { "epoch": 1.072181002106806, "grad_norm": 0.1729730510358241, "learning_rate": 3.688420742381775e-05, "loss": 0.5266, "num_tokens": 2237067934.0, "step": 2927 }, { "epoch": 1.0725474031327287, "grad_norm": 0.19441918553527632, "learning_rate": 3.6881804280522764e-05, "loss": 0.4683, "num_tokens": 2237949916.0, "step": 2928 }, { "epoch": 1.0729138041586517, "grad_norm": 0.17349594719309253, "learning_rate": 3.687940029871935e-05, "loss": 0.4969, "num_tokens": 2238771431.0, "step": 2929 }, { "epoch": 1.0732802051845745, "grad_norm": 0.1815443413356029, "learning_rate": 3.6876995478542925e-05, "loss": 0.5023, "num_tokens": 2239530421.0, "step": 2930 }, { "epoch": 1.0736466062104975, "grad_norm": 0.17937867880958322, "learning_rate": 3.687458982012902e-05, "loss": 0.5148, "num_tokens": 2240211202.0, "step": 2931 }, { "epoch": 1.0740130072364202, "grad_norm": 0.19578067362336554, "learning_rate": 3.687218332361315e-05, "loss": 0.5345, "num_tokens": 2240899015.0, "step": 2932 }, { "epoch": 1.0743794082623432, "grad_norm": 0.17082029532802598, "learning_rate": 3.686977598913093e-05, "loss": 0.5393, "num_tokens": 2241528561.0, "step": 2933 }, { "epoch": 1.074745809288266, "grad_norm": 0.17428533552797076, "learning_rate": 3.6867367816817996e-05, "loss": 0.522, "num_tokens": 2242280566.0, "step": 2934 }, { "epoch": 1.0751122103141888, "grad_norm": 0.17131679447966022, "learning_rate": 3.686495880681004e-05, "loss": 0.4955, "num_tokens": 2243002449.0, "step": 2935 }, { "epoch": 1.0754786113401118, "grad_norm": 0.14650412529069406, "learning_rate": 3.686254895924278e-05, "loss": 0.5009, "num_tokens": 2243760822.0, "step": 2936 }, { "epoch": 1.0758450123660346, "grad_norm": 0.17979211515361082, "learning_rate": 3.686013827425201e-05, "loss": 0.4837, "num_tokens": 2244503569.0, "step": 2937 }, { "epoch": 1.0762114133919576, "grad_norm": 0.15322176184419795, "learning_rate": 3.685772675197356e-05, "loss": 0.5117, "num_tokens": 2245218599.0, "step": 2938 }, { "epoch": 1.0765778144178804, "grad_norm": 0.24342009015023397, "learning_rate": 3.68553143925433e-05, "loss": 0.5395, "num_tokens": 2245878040.0, "step": 2939 }, { "epoch": 1.0769442154438031, "grad_norm": 0.17834283310850396, "learning_rate": 3.685290119609716e-05, "loss": 0.5189, "num_tokens": 2246628838.0, "step": 2940 }, { "epoch": 1.0773106164697261, "grad_norm": 0.18440342752035366, "learning_rate": 3.6850487162771096e-05, "loss": 0.5226, "num_tokens": 2247300373.0, "step": 2941 }, { "epoch": 1.077677017495649, "grad_norm": 0.16105541898118456, "learning_rate": 3.6848072292701146e-05, "loss": 0.519, "num_tokens": 2248205761.0, "step": 2942 }, { "epoch": 1.078043418521572, "grad_norm": 0.1769673534880614, "learning_rate": 3.684565658602337e-05, "loss": 0.4883, "num_tokens": 2249112652.0, "step": 2943 }, { "epoch": 1.0784098195474947, "grad_norm": 0.19881053166741608, "learning_rate": 3.684324004287386e-05, "loss": 0.5051, "num_tokens": 2249850571.0, "step": 2944 }, { "epoch": 1.0787762205734177, "grad_norm": 0.17179820619123065, "learning_rate": 3.68408226633888e-05, "loss": 0.4775, "num_tokens": 2250571553.0, "step": 2945 }, { "epoch": 1.0791426215993405, "grad_norm": 0.1656564229779607, "learning_rate": 3.683840444770438e-05, "loss": 0.5226, "num_tokens": 2251329845.0, "step": 2946 }, { "epoch": 1.0795090226252633, "grad_norm": 0.16598942290625243, "learning_rate": 3.6835985395956864e-05, "loss": 0.4979, "num_tokens": 2252088427.0, "step": 2947 }, { "epoch": 1.0798754236511863, "grad_norm": 0.16538603016961853, "learning_rate": 3.6833565508282554e-05, "loss": 0.5144, "num_tokens": 2252779537.0, "step": 2948 }, { "epoch": 1.080241824677109, "grad_norm": 0.17810572115062762, "learning_rate": 3.6831144784817775e-05, "loss": 0.4649, "num_tokens": 2253485044.0, "step": 2949 }, { "epoch": 1.080608225703032, "grad_norm": 0.17278361921068172, "learning_rate": 3.6828723225698954e-05, "loss": 0.4964, "num_tokens": 2254216301.0, "step": 2950 }, { "epoch": 1.0809746267289548, "grad_norm": 0.15307012041239237, "learning_rate": 3.682630083106251e-05, "loss": 0.4996, "num_tokens": 2254978893.0, "step": 2951 }, { "epoch": 1.0813410277548776, "grad_norm": 0.16995109920391457, "learning_rate": 3.682387760104494e-05, "loss": 0.5381, "num_tokens": 2255832901.0, "step": 2952 }, { "epoch": 1.0817074287808006, "grad_norm": 0.18219793462670725, "learning_rate": 3.682145353578277e-05, "loss": 0.4992, "num_tokens": 2256485267.0, "step": 2953 }, { "epoch": 1.0820738298067234, "grad_norm": 0.16181185822040384, "learning_rate": 3.68190286354126e-05, "loss": 0.4986, "num_tokens": 2257305777.0, "step": 2954 }, { "epoch": 1.0824402308326464, "grad_norm": 0.1663307682211384, "learning_rate": 3.681660290007104e-05, "loss": 0.5493, "num_tokens": 2257978457.0, "step": 2955 }, { "epoch": 1.0828066318585692, "grad_norm": 0.18711938078602908, "learning_rate": 3.681417632989478e-05, "loss": 0.4907, "num_tokens": 2258745662.0, "step": 2956 }, { "epoch": 1.083173032884492, "grad_norm": 0.15264300605690442, "learning_rate": 3.6811748925020555e-05, "loss": 0.5365, "num_tokens": 2259487715.0, "step": 2957 }, { "epoch": 1.083539433910415, "grad_norm": 0.17603759945855932, "learning_rate": 3.680932068558511e-05, "loss": 0.4878, "num_tokens": 2260400439.0, "step": 2958 }, { "epoch": 1.0839058349363377, "grad_norm": 0.16297867969202456, "learning_rate": 3.6806891611725274e-05, "loss": 0.5108, "num_tokens": 2261221928.0, "step": 2959 }, { "epoch": 1.0842722359622607, "grad_norm": 0.16452689367027626, "learning_rate": 3.680446170357792e-05, "loss": 0.5038, "num_tokens": 2262016506.0, "step": 2960 }, { "epoch": 1.0846386369881835, "grad_norm": 0.15653514503630653, "learning_rate": 3.680203096127995e-05, "loss": 0.4877, "num_tokens": 2262858160.0, "step": 2961 }, { "epoch": 1.0850050380141065, "grad_norm": 0.17121247540805426, "learning_rate": 3.6799599384968326e-05, "loss": 0.4812, "num_tokens": 2263636971.0, "step": 2962 }, { "epoch": 1.0853714390400293, "grad_norm": 0.14853217018691578, "learning_rate": 3.679716697478005e-05, "loss": 0.5369, "num_tokens": 2264464413.0, "step": 2963 }, { "epoch": 1.085737840065952, "grad_norm": 0.1718022121432431, "learning_rate": 3.6794733730852186e-05, "loss": 0.4911, "num_tokens": 2265254074.0, "step": 2964 }, { "epoch": 1.086104241091875, "grad_norm": 0.1391068208399496, "learning_rate": 3.679229965332183e-05, "loss": 0.4876, "num_tokens": 2266118457.0, "step": 2965 }, { "epoch": 1.0864706421177979, "grad_norm": 0.18343959168563181, "learning_rate": 3.678986474232613e-05, "loss": 0.512, "num_tokens": 2266763462.0, "step": 2966 }, { "epoch": 1.0868370431437209, "grad_norm": 0.1690213032806933, "learning_rate": 3.678742899800227e-05, "loss": 0.5358, "num_tokens": 2267569106.0, "step": 2967 }, { "epoch": 1.0872034441696437, "grad_norm": 0.19536717451571717, "learning_rate": 3.6784992420487496e-05, "loss": 0.5086, "num_tokens": 2268378187.0, "step": 2968 }, { "epoch": 1.0875698451955667, "grad_norm": 0.1574246849540293, "learning_rate": 3.678255500991911e-05, "loss": 0.5234, "num_tokens": 2269117847.0, "step": 2969 }, { "epoch": 1.0879362462214894, "grad_norm": 0.16085197825241437, "learning_rate": 3.6780116766434416e-05, "loss": 0.504, "num_tokens": 2269861830.0, "step": 2970 }, { "epoch": 1.0883026472474122, "grad_norm": 0.16776282742868912, "learning_rate": 3.677767769017082e-05, "loss": 0.5137, "num_tokens": 2270538131.0, "step": 2971 }, { "epoch": 1.0886690482733352, "grad_norm": 0.1611521535749844, "learning_rate": 3.677523778126575e-05, "loss": 0.4841, "num_tokens": 2271234758.0, "step": 2972 }, { "epoch": 1.089035449299258, "grad_norm": 0.16189004610282365, "learning_rate": 3.6772797039856674e-05, "loss": 0.5259, "num_tokens": 2271986387.0, "step": 2973 }, { "epoch": 1.089401850325181, "grad_norm": 0.15644862255853284, "learning_rate": 3.677035546608112e-05, "loss": 0.4994, "num_tokens": 2272743916.0, "step": 2974 }, { "epoch": 1.0897682513511038, "grad_norm": 0.1559892486897835, "learning_rate": 3.676791306007665e-05, "loss": 0.5216, "num_tokens": 2273457589.0, "step": 2975 }, { "epoch": 1.0901346523770266, "grad_norm": 0.15744666455789474, "learning_rate": 3.6765469821980885e-05, "loss": 0.5063, "num_tokens": 2274335620.0, "step": 2976 }, { "epoch": 1.0905010534029496, "grad_norm": 0.1555946125836143, "learning_rate": 3.6763025751931485e-05, "loss": 0.5155, "num_tokens": 2275169730.0, "step": 2977 }, { "epoch": 1.0908674544288723, "grad_norm": 0.1438495502248134, "learning_rate": 3.676058085006616e-05, "loss": 0.4744, "num_tokens": 2275966453.0, "step": 2978 }, { "epoch": 1.0912338554547953, "grad_norm": 0.19294312631649116, "learning_rate": 3.6758135116522676e-05, "loss": 0.5226, "num_tokens": 2276583124.0, "step": 2979 }, { "epoch": 1.0916002564807181, "grad_norm": 0.1525678496697054, "learning_rate": 3.675568855143882e-05, "loss": 0.5096, "num_tokens": 2277375419.0, "step": 2980 }, { "epoch": 1.091966657506641, "grad_norm": 0.15764013263159884, "learning_rate": 3.6753241154952465e-05, "loss": 0.492, "num_tokens": 2278244892.0, "step": 2981 }, { "epoch": 1.092333058532564, "grad_norm": 0.17144961348361118, "learning_rate": 3.675079292720149e-05, "loss": 0.5188, "num_tokens": 2279022884.0, "step": 2982 }, { "epoch": 1.0926994595584867, "grad_norm": 0.13798417304908783, "learning_rate": 3.6748343868323845e-05, "loss": 0.4952, "num_tokens": 2279750745.0, "step": 2983 }, { "epoch": 1.0930658605844097, "grad_norm": 0.17192177354638724, "learning_rate": 3.674589397845752e-05, "loss": 0.5391, "num_tokens": 2280534117.0, "step": 2984 }, { "epoch": 1.0934322616103325, "grad_norm": 0.12991182696922998, "learning_rate": 3.674344325774056e-05, "loss": 0.5108, "num_tokens": 2281316125.0, "step": 2985 }, { "epoch": 1.0937986626362555, "grad_norm": 0.18609728777452086, "learning_rate": 3.674099170631104e-05, "loss": 0.5285, "num_tokens": 2282069579.0, "step": 2986 }, { "epoch": 1.0941650636621782, "grad_norm": 0.15677491511446437, "learning_rate": 3.673853932430709e-05, "loss": 0.522, "num_tokens": 2282832912.0, "step": 2987 }, { "epoch": 1.094531464688101, "grad_norm": 0.16301420391596175, "learning_rate": 3.67360861118669e-05, "loss": 0.4959, "num_tokens": 2283584620.0, "step": 2988 }, { "epoch": 1.094897865714024, "grad_norm": 0.15615991247046804, "learning_rate": 3.6733632069128694e-05, "loss": 0.4901, "num_tokens": 2284293946.0, "step": 2989 }, { "epoch": 1.0952642667399468, "grad_norm": 0.14684782791297127, "learning_rate": 3.6731177196230736e-05, "loss": 0.5236, "num_tokens": 2285064302.0, "step": 2990 }, { "epoch": 1.0956306677658698, "grad_norm": 0.15429609815158585, "learning_rate": 3.672872149331134e-05, "loss": 0.4718, "num_tokens": 2285898797.0, "step": 2991 }, { "epoch": 1.0959970687917926, "grad_norm": 0.15631256805954394, "learning_rate": 3.672626496050889e-05, "loss": 0.4877, "num_tokens": 2286593789.0, "step": 2992 }, { "epoch": 1.0963634698177156, "grad_norm": 0.15115433696576033, "learning_rate": 3.6723807597961786e-05, "loss": 0.5227, "num_tokens": 2287383540.0, "step": 2993 }, { "epoch": 1.0967298708436384, "grad_norm": 0.13905616260977277, "learning_rate": 3.672134940580848e-05, "loss": 0.5089, "num_tokens": 2288154497.0, "step": 2994 }, { "epoch": 1.0970962718695612, "grad_norm": 0.1602704437376978, "learning_rate": 3.6718890384187485e-05, "loss": 0.5067, "num_tokens": 2288962537.0, "step": 2995 }, { "epoch": 1.0974626728954842, "grad_norm": 0.15843361786803234, "learning_rate": 3.671643053323736e-05, "loss": 0.5093, "num_tokens": 2289681102.0, "step": 2996 }, { "epoch": 1.097829073921407, "grad_norm": 0.1714112736004304, "learning_rate": 3.67139698530967e-05, "loss": 0.5053, "num_tokens": 2290487877.0, "step": 2997 }, { "epoch": 1.09819547494733, "grad_norm": 0.15144232778713562, "learning_rate": 3.671150834390415e-05, "loss": 0.5419, "num_tokens": 2291092017.0, "step": 2998 }, { "epoch": 1.0985618759732527, "grad_norm": 0.1785447569773189, "learning_rate": 3.67090460057984e-05, "loss": 0.5265, "num_tokens": 2291871222.0, "step": 2999 }, { "epoch": 1.0989282769991755, "grad_norm": 0.15100800405291046, "learning_rate": 3.670658283891819e-05, "loss": 0.5403, "num_tokens": 2292677611.0, "step": 3000 }, { "epoch": 1.0992946780250985, "grad_norm": 0.17783007878524135, "learning_rate": 3.670411884340231e-05, "loss": 0.529, "num_tokens": 2293368382.0, "step": 3001 }, { "epoch": 1.0996610790510213, "grad_norm": 0.161266113100266, "learning_rate": 3.670165401938959e-05, "loss": 0.4866, "num_tokens": 2294086736.0, "step": 3002 }, { "epoch": 1.1000274800769443, "grad_norm": 0.17888083810720795, "learning_rate": 3.6699188367018905e-05, "loss": 0.5536, "num_tokens": 2294730775.0, "step": 3003 }, { "epoch": 1.100393881102867, "grad_norm": 0.17007877442876934, "learning_rate": 3.669672188642919e-05, "loss": 0.5162, "num_tokens": 2295495339.0, "step": 3004 }, { "epoch": 1.1007602821287898, "grad_norm": 0.16502493933381623, "learning_rate": 3.669425457775941e-05, "loss": 0.5127, "num_tokens": 2296263984.0, "step": 3005 }, { "epoch": 1.1011266831547128, "grad_norm": 0.1856682240907268, "learning_rate": 3.669178644114859e-05, "loss": 0.526, "num_tokens": 2297064288.0, "step": 3006 }, { "epoch": 1.1014930841806356, "grad_norm": 0.16698818144917624, "learning_rate": 3.668931747673579e-05, "loss": 0.5231, "num_tokens": 2297859088.0, "step": 3007 }, { "epoch": 1.1018594852065586, "grad_norm": 0.19496318416778785, "learning_rate": 3.668684768466013e-05, "loss": 0.503, "num_tokens": 2298684709.0, "step": 3008 }, { "epoch": 1.1022258862324814, "grad_norm": 0.16547376225564306, "learning_rate": 3.668437706506077e-05, "loss": 0.4913, "num_tokens": 2299445884.0, "step": 3009 }, { "epoch": 1.1025922872584044, "grad_norm": 0.16365512782392966, "learning_rate": 3.668190561807691e-05, "loss": 0.4393, "num_tokens": 2300172502.0, "step": 3010 }, { "epoch": 1.1029586882843272, "grad_norm": 0.16230763688949998, "learning_rate": 3.66794333438478e-05, "loss": 0.5379, "num_tokens": 2300780958.0, "step": 3011 }, { "epoch": 1.10332508931025, "grad_norm": 0.15457358291897957, "learning_rate": 3.667696024251274e-05, "loss": 0.4837, "num_tokens": 2301660225.0, "step": 3012 }, { "epoch": 1.103691490336173, "grad_norm": 0.1695692377185406, "learning_rate": 3.6674486314211084e-05, "loss": 0.4979, "num_tokens": 2302367153.0, "step": 3013 }, { "epoch": 1.1040578913620958, "grad_norm": 0.1550309423901393, "learning_rate": 3.6672011559082214e-05, "loss": 0.5215, "num_tokens": 2303149300.0, "step": 3014 }, { "epoch": 1.1044242923880188, "grad_norm": 0.15607521220100243, "learning_rate": 3.6669535977265585e-05, "loss": 0.4779, "num_tokens": 2303821583.0, "step": 3015 }, { "epoch": 1.1047906934139415, "grad_norm": 0.15592734077905035, "learning_rate": 3.666705956890067e-05, "loss": 0.5226, "num_tokens": 2304523776.0, "step": 3016 }, { "epoch": 1.1051570944398645, "grad_norm": 0.154873875756747, "learning_rate": 3.666458233412699e-05, "loss": 0.5336, "num_tokens": 2305199000.0, "step": 3017 }, { "epoch": 1.1055234954657873, "grad_norm": 0.1635548877768989, "learning_rate": 3.666210427308415e-05, "loss": 0.4599, "num_tokens": 2306009202.0, "step": 3018 }, { "epoch": 1.10588989649171, "grad_norm": 0.17116622167673484, "learning_rate": 3.665962538591176e-05, "loss": 0.5279, "num_tokens": 2306764067.0, "step": 3019 }, { "epoch": 1.106256297517633, "grad_norm": 0.17659317938997093, "learning_rate": 3.665714567274948e-05, "loss": 0.4869, "num_tokens": 2307460383.0, "step": 3020 }, { "epoch": 1.1066226985435559, "grad_norm": 0.1745002726370407, "learning_rate": 3.665466513373706e-05, "loss": 0.5238, "num_tokens": 2308213524.0, "step": 3021 }, { "epoch": 1.1069890995694789, "grad_norm": 0.17582818866412903, "learning_rate": 3.665218376901424e-05, "loss": 0.471, "num_tokens": 2308972884.0, "step": 3022 }, { "epoch": 1.1073555005954017, "grad_norm": 0.17564500568502223, "learning_rate": 3.664970157872084e-05, "loss": 0.5202, "num_tokens": 2309645947.0, "step": 3023 }, { "epoch": 1.1077219016213244, "grad_norm": 0.1830155037193055, "learning_rate": 3.664721856299671e-05, "loss": 0.5162, "num_tokens": 2310360277.0, "step": 3024 }, { "epoch": 1.1080883026472474, "grad_norm": 0.15382452021389376, "learning_rate": 3.6644734721981765e-05, "loss": 0.492, "num_tokens": 2311125846.0, "step": 3025 }, { "epoch": 1.1084547036731702, "grad_norm": 0.19578612136977408, "learning_rate": 3.664225005581595e-05, "loss": 0.5159, "num_tokens": 2311814749.0, "step": 3026 }, { "epoch": 1.1088211046990932, "grad_norm": 0.1582929721510193, "learning_rate": 3.663976456463927e-05, "loss": 0.5515, "num_tokens": 2312561894.0, "step": 3027 }, { "epoch": 1.109187505725016, "grad_norm": 0.1885695656304994, "learning_rate": 3.663727824859175e-05, "loss": 0.5048, "num_tokens": 2313219197.0, "step": 3028 }, { "epoch": 1.1095539067509388, "grad_norm": 0.1416749884494884, "learning_rate": 3.66347911078135e-05, "loss": 0.5048, "num_tokens": 2313985110.0, "step": 3029 }, { "epoch": 1.1099203077768618, "grad_norm": 0.17937343834161648, "learning_rate": 3.663230314244465e-05, "loss": 0.4996, "num_tokens": 2314640580.0, "step": 3030 }, { "epoch": 1.1102867088027846, "grad_norm": 0.17568433281303136, "learning_rate": 3.662981435262538e-05, "loss": 0.5395, "num_tokens": 2315317117.0, "step": 3031 }, { "epoch": 1.1106531098287076, "grad_norm": 0.16763454628401475, "learning_rate": 3.662732473849593e-05, "loss": 0.5032, "num_tokens": 2316229335.0, "step": 3032 }, { "epoch": 1.1110195108546304, "grad_norm": 0.18505696359242174, "learning_rate": 3.6624834300196556e-05, "loss": 0.5103, "num_tokens": 2316925720.0, "step": 3033 }, { "epoch": 1.1113859118805534, "grad_norm": 0.16002401612911868, "learning_rate": 3.6622343037867605e-05, "loss": 0.4863, "num_tokens": 2317698448.0, "step": 3034 }, { "epoch": 1.1117523129064761, "grad_norm": 0.16383058588798521, "learning_rate": 3.661985095164943e-05, "loss": 0.4789, "num_tokens": 2318486805.0, "step": 3035 }, { "epoch": 1.112118713932399, "grad_norm": 0.1873516285939085, "learning_rate": 3.661735804168245e-05, "loss": 0.524, "num_tokens": 2319257333.0, "step": 3036 }, { "epoch": 1.112485114958322, "grad_norm": 0.1706094893658435, "learning_rate": 3.6614864308107126e-05, "loss": 0.5492, "num_tokens": 2320066235.0, "step": 3037 }, { "epoch": 1.1128515159842447, "grad_norm": 0.1832710865122934, "learning_rate": 3.6612369751063976e-05, "loss": 0.5232, "num_tokens": 2320781805.0, "step": 3038 }, { "epoch": 1.1132179170101677, "grad_norm": 0.1886463925263613, "learning_rate": 3.660987437069354e-05, "loss": 0.5162, "num_tokens": 2321362187.0, "step": 3039 }, { "epoch": 1.1135843180360905, "grad_norm": 0.17918271275008552, "learning_rate": 3.6607378167136424e-05, "loss": 0.516, "num_tokens": 2322179530.0, "step": 3040 }, { "epoch": 1.1139507190620135, "grad_norm": 0.1830449699976922, "learning_rate": 3.6604881140533276e-05, "loss": 0.5192, "num_tokens": 2322866508.0, "step": 3041 }, { "epoch": 1.1143171200879363, "grad_norm": 0.1859985452954444, "learning_rate": 3.66023832910248e-05, "loss": 0.4869, "num_tokens": 2323627993.0, "step": 3042 }, { "epoch": 1.114683521113859, "grad_norm": 0.1802866878911855, "learning_rate": 3.6599884618751725e-05, "loss": 0.5096, "num_tokens": 2324436242.0, "step": 3043 }, { "epoch": 1.115049922139782, "grad_norm": 0.14351267096637285, "learning_rate": 3.659738512385483e-05, "loss": 0.494, "num_tokens": 2325185935.0, "step": 3044 }, { "epoch": 1.1154163231657048, "grad_norm": 0.17430090765836218, "learning_rate": 3.659488480647497e-05, "loss": 0.4963, "num_tokens": 2325885931.0, "step": 3045 }, { "epoch": 1.1157827241916278, "grad_norm": 0.160379890897826, "learning_rate": 3.6592383666753e-05, "loss": 0.4892, "num_tokens": 2326568077.0, "step": 3046 }, { "epoch": 1.1161491252175506, "grad_norm": 0.15730284714700052, "learning_rate": 3.6589881704829866e-05, "loss": 0.4924, "num_tokens": 2327354704.0, "step": 3047 }, { "epoch": 1.1165155262434734, "grad_norm": 0.19029220916253023, "learning_rate": 3.658737892084654e-05, "loss": 0.5519, "num_tokens": 2328057013.0, "step": 3048 }, { "epoch": 1.1168819272693964, "grad_norm": 0.18494497283899025, "learning_rate": 3.6584875314944015e-05, "loss": 0.5313, "num_tokens": 2328738892.0, "step": 3049 }, { "epoch": 1.1172483282953192, "grad_norm": 0.17726952919085903, "learning_rate": 3.6582370887263375e-05, "loss": 0.4827, "num_tokens": 2329431097.0, "step": 3050 }, { "epoch": 1.1176147293212422, "grad_norm": 0.17103344601293646, "learning_rate": 3.6579865637945733e-05, "loss": 0.5209, "num_tokens": 2330146084.0, "step": 3051 }, { "epoch": 1.117981130347165, "grad_norm": 0.17522939725207512, "learning_rate": 3.657735956713224e-05, "loss": 0.4928, "num_tokens": 2330940772.0, "step": 3052 }, { "epoch": 1.1183475313730877, "grad_norm": 0.16394660122839802, "learning_rate": 3.6574852674964096e-05, "loss": 0.5201, "num_tokens": 2331711289.0, "step": 3053 }, { "epoch": 1.1187139323990107, "grad_norm": 0.1534062473542433, "learning_rate": 3.657234496158256e-05, "loss": 0.4627, "num_tokens": 2332521896.0, "step": 3054 }, { "epoch": 1.1190803334249335, "grad_norm": 0.16092853443673347, "learning_rate": 3.656983642712892e-05, "loss": 0.4927, "num_tokens": 2333315973.0, "step": 3055 }, { "epoch": 1.1194467344508565, "grad_norm": 0.13505121855581897, "learning_rate": 3.656732707174452e-05, "loss": 0.5194, "num_tokens": 2334193673.0, "step": 3056 }, { "epoch": 1.1198131354767793, "grad_norm": 0.17352961829446328, "learning_rate": 3.656481689557076e-05, "loss": 0.4829, "num_tokens": 2334888191.0, "step": 3057 }, { "epoch": 1.1201795365027023, "grad_norm": 0.1736675811557616, "learning_rate": 3.656230589874905e-05, "loss": 0.5257, "num_tokens": 2335586171.0, "step": 3058 }, { "epoch": 1.120545937528625, "grad_norm": 0.18265272790304918, "learning_rate": 3.65597940814209e-05, "loss": 0.544, "num_tokens": 2336355619.0, "step": 3059 }, { "epoch": 1.1209123385545479, "grad_norm": 0.1779080446046906, "learning_rate": 3.6557281443727816e-05, "loss": 0.5276, "num_tokens": 2337018925.0, "step": 3060 }, { "epoch": 1.1212787395804709, "grad_norm": 0.15469286900639867, "learning_rate": 3.6554767985811375e-05, "loss": 0.5188, "num_tokens": 2337755987.0, "step": 3061 }, { "epoch": 1.1216451406063936, "grad_norm": 0.1889388148834371, "learning_rate": 3.6552253707813206e-05, "loss": 0.4869, "num_tokens": 2338482786.0, "step": 3062 }, { "epoch": 1.1220115416323166, "grad_norm": 0.1589800729745525, "learning_rate": 3.654973860987497e-05, "loss": 0.5148, "num_tokens": 2339227749.0, "step": 3063 }, { "epoch": 1.1223779426582394, "grad_norm": 0.15880172544655075, "learning_rate": 3.654722269213837e-05, "loss": 0.4789, "num_tokens": 2339969196.0, "step": 3064 }, { "epoch": 1.1227443436841624, "grad_norm": 0.1606903416736503, "learning_rate": 3.654470595474518e-05, "loss": 0.5027, "num_tokens": 2340738483.0, "step": 3065 }, { "epoch": 1.1231107447100852, "grad_norm": 0.15098053910589748, "learning_rate": 3.6542188397837184e-05, "loss": 0.4858, "num_tokens": 2341442160.0, "step": 3066 }, { "epoch": 1.123477145736008, "grad_norm": 0.15398811387092765, "learning_rate": 3.653967002155625e-05, "loss": 0.489, "num_tokens": 2342295224.0, "step": 3067 }, { "epoch": 1.123843546761931, "grad_norm": 0.14115225672019632, "learning_rate": 3.6537150826044273e-05, "loss": 0.4978, "num_tokens": 2343136581.0, "step": 3068 }, { "epoch": 1.1242099477878538, "grad_norm": 0.17291521105564323, "learning_rate": 3.653463081144319e-05, "loss": 0.5226, "num_tokens": 2343783842.0, "step": 3069 }, { "epoch": 1.1245763488137768, "grad_norm": 0.15247060536420992, "learning_rate": 3.653210997789499e-05, "loss": 0.5247, "num_tokens": 2344511181.0, "step": 3070 }, { "epoch": 1.1249427498396996, "grad_norm": 0.16011354491437027, "learning_rate": 3.652958832554172e-05, "loss": 0.4904, "num_tokens": 2345187829.0, "step": 3071 }, { "epoch": 1.1253091508656223, "grad_norm": 0.17314222275113733, "learning_rate": 3.652706585452545e-05, "loss": 0.5423, "num_tokens": 2345964662.0, "step": 3072 }, { "epoch": 1.1256755518915453, "grad_norm": 0.14517825062112352, "learning_rate": 3.65245425649883e-05, "loss": 0.5331, "num_tokens": 2346652863.0, "step": 3073 }, { "epoch": 1.1260419529174681, "grad_norm": 0.15809693891533225, "learning_rate": 3.652201845707246e-05, "loss": 0.52, "num_tokens": 2347437843.0, "step": 3074 }, { "epoch": 1.1264083539433911, "grad_norm": 0.16217136804140586, "learning_rate": 3.6519493530920145e-05, "loss": 0.5136, "num_tokens": 2348266049.0, "step": 3075 }, { "epoch": 1.126774754969314, "grad_norm": 0.16960583768897855, "learning_rate": 3.6516967786673616e-05, "loss": 0.583, "num_tokens": 2348986759.0, "step": 3076 }, { "epoch": 1.1271411559952367, "grad_norm": 0.16218320198859065, "learning_rate": 3.651444122447519e-05, "loss": 0.4925, "num_tokens": 2349662996.0, "step": 3077 }, { "epoch": 1.1275075570211597, "grad_norm": 0.1530345733928176, "learning_rate": 3.6511913844467224e-05, "loss": 0.4966, "num_tokens": 2350397531.0, "step": 3078 }, { "epoch": 1.1278739580470825, "grad_norm": 0.14108737385947384, "learning_rate": 3.6509385646792114e-05, "loss": 0.5017, "num_tokens": 2351192104.0, "step": 3079 }, { "epoch": 1.1282403590730055, "grad_norm": 0.19259782660666408, "learning_rate": 3.650685663159233e-05, "loss": 0.5035, "num_tokens": 2351945587.0, "step": 3080 }, { "epoch": 1.1286067600989282, "grad_norm": 0.15838951006703647, "learning_rate": 3.650432679901033e-05, "loss": 0.5261, "num_tokens": 2352717347.0, "step": 3081 }, { "epoch": 1.1289731611248512, "grad_norm": 0.17265268269066286, "learning_rate": 3.65017961491887e-05, "loss": 0.5205, "num_tokens": 2353572978.0, "step": 3082 }, { "epoch": 1.129339562150774, "grad_norm": 0.14808301547061511, "learning_rate": 3.649926468227001e-05, "loss": 0.5166, "num_tokens": 2354354218.0, "step": 3083 }, { "epoch": 1.1297059631766968, "grad_norm": 0.1566891751468377, "learning_rate": 3.649673239839689e-05, "loss": 0.51, "num_tokens": 2355125983.0, "step": 3084 }, { "epoch": 1.1300723642026198, "grad_norm": 0.16645867188268887, "learning_rate": 3.649419929771203e-05, "loss": 0.5281, "num_tokens": 2355913275.0, "step": 3085 }, { "epoch": 1.1304387652285426, "grad_norm": 0.1869602427916607, "learning_rate": 3.6491665380358145e-05, "loss": 0.5235, "num_tokens": 2356654366.0, "step": 3086 }, { "epoch": 1.1308051662544656, "grad_norm": 0.16067011602548237, "learning_rate": 3.6489130646478013e-05, "loss": 0.5116, "num_tokens": 2357373416.0, "step": 3087 }, { "epoch": 1.1311715672803884, "grad_norm": 0.16528170818309923, "learning_rate": 3.648659509621445e-05, "loss": 0.5105, "num_tokens": 2358126898.0, "step": 3088 }, { "epoch": 1.1315379683063114, "grad_norm": 0.2057849398670138, "learning_rate": 3.648405872971033e-05, "loss": 0.5144, "num_tokens": 2358902505.0, "step": 3089 }, { "epoch": 1.1319043693322342, "grad_norm": 0.17461634364998557, "learning_rate": 3.648152154710855e-05, "loss": 0.526, "num_tokens": 2359687473.0, "step": 3090 }, { "epoch": 1.132270770358157, "grad_norm": 0.16747639671184272, "learning_rate": 3.647898354855208e-05, "loss": 0.5208, "num_tokens": 2360538989.0, "step": 3091 }, { "epoch": 1.13263717138408, "grad_norm": 0.20071175538102534, "learning_rate": 3.647644473418391e-05, "loss": 0.5165, "num_tokens": 2361359531.0, "step": 3092 }, { "epoch": 1.1330035724100027, "grad_norm": 0.16252069247062836, "learning_rate": 3.647390510414709e-05, "loss": 0.5083, "num_tokens": 2362044425.0, "step": 3093 }, { "epoch": 1.1333699734359257, "grad_norm": 0.17737115325784295, "learning_rate": 3.6471364658584724e-05, "loss": 0.544, "num_tokens": 2362798421.0, "step": 3094 }, { "epoch": 1.1337363744618485, "grad_norm": 0.18460246501781652, "learning_rate": 3.6468823397639944e-05, "loss": 0.4818, "num_tokens": 2363540371.0, "step": 3095 }, { "epoch": 1.1341027754877713, "grad_norm": 0.14936876881856484, "learning_rate": 3.6466281321455944e-05, "loss": 0.5335, "num_tokens": 2364307689.0, "step": 3096 }, { "epoch": 1.1344691765136943, "grad_norm": 0.18725449972234978, "learning_rate": 3.646373843017594e-05, "loss": 0.494, "num_tokens": 2365088846.0, "step": 3097 }, { "epoch": 1.134835577539617, "grad_norm": 0.14772550477088048, "learning_rate": 3.646119472394322e-05, "loss": 0.5112, "num_tokens": 2365876588.0, "step": 3098 }, { "epoch": 1.13520197856554, "grad_norm": 0.14975391193777582, "learning_rate": 3.645865020290112e-05, "loss": 0.4936, "num_tokens": 2366569286.0, "step": 3099 }, { "epoch": 1.1355683795914628, "grad_norm": 0.17309057452095528, "learning_rate": 3.645610486719299e-05, "loss": 0.531, "num_tokens": 2367267925.0, "step": 3100 }, { "epoch": 1.1359347806173856, "grad_norm": 0.14503779630793454, "learning_rate": 3.6453558716962254e-05, "loss": 0.5039, "num_tokens": 2368033127.0, "step": 3101 }, { "epoch": 1.1363011816433086, "grad_norm": 0.15271440912659665, "learning_rate": 3.6451011752352374e-05, "loss": 0.5281, "num_tokens": 2368870843.0, "step": 3102 }, { "epoch": 1.1366675826692314, "grad_norm": 0.15530557767317527, "learning_rate": 3.644846397350686e-05, "loss": 0.5416, "num_tokens": 2369675731.0, "step": 3103 }, { "epoch": 1.1370339836951544, "grad_norm": 0.1739090388832535, "learning_rate": 3.6445915380569264e-05, "loss": 0.5018, "num_tokens": 2370474950.0, "step": 3104 }, { "epoch": 1.1374003847210772, "grad_norm": 0.19381873526366122, "learning_rate": 3.644336597368318e-05, "loss": 0.5282, "num_tokens": 2371202442.0, "step": 3105 }, { "epoch": 1.1377667857470002, "grad_norm": 0.14585967293797353, "learning_rate": 3.644081575299226e-05, "loss": 0.5203, "num_tokens": 2371969791.0, "step": 3106 }, { "epoch": 1.138133186772923, "grad_norm": 0.22032627996613846, "learning_rate": 3.643826471864019e-05, "loss": 0.5138, "num_tokens": 2372720722.0, "step": 3107 }, { "epoch": 1.1384995877988457, "grad_norm": 0.13732453449298426, "learning_rate": 3.643571287077071e-05, "loss": 0.4826, "num_tokens": 2373605083.0, "step": 3108 }, { "epoch": 1.1388659888247687, "grad_norm": 0.14102170997128738, "learning_rate": 3.64331602095276e-05, "loss": 0.5255, "num_tokens": 2374454569.0, "step": 3109 }, { "epoch": 1.1392323898506915, "grad_norm": 0.1921412062773475, "learning_rate": 3.643060673505469e-05, "loss": 0.5074, "num_tokens": 2375267398.0, "step": 3110 }, { "epoch": 1.1395987908766145, "grad_norm": 0.16740382973075624, "learning_rate": 3.642805244749585e-05, "loss": 0.5239, "num_tokens": 2376056216.0, "step": 3111 }, { "epoch": 1.1399651919025373, "grad_norm": 0.13635393534790913, "learning_rate": 3.642549734699501e-05, "loss": 0.4926, "num_tokens": 2376863749.0, "step": 3112 }, { "epoch": 1.1403315929284603, "grad_norm": 0.16639629427647096, "learning_rate": 3.6422941433696135e-05, "loss": 0.4978, "num_tokens": 2377649044.0, "step": 3113 }, { "epoch": 1.140697993954383, "grad_norm": 0.1598982294073684, "learning_rate": 3.642038470774322e-05, "loss": 0.5229, "num_tokens": 2378462971.0, "step": 3114 }, { "epoch": 1.1410643949803059, "grad_norm": 0.1438091048627707, "learning_rate": 3.641782716928035e-05, "loss": 0.4908, "num_tokens": 2379265940.0, "step": 3115 }, { "epoch": 1.1414307960062289, "grad_norm": 0.13666275771459577, "learning_rate": 3.641526881845161e-05, "loss": 0.5115, "num_tokens": 2380048760.0, "step": 3116 }, { "epoch": 1.1417971970321517, "grad_norm": 0.14383444197525785, "learning_rate": 3.6412709655401144e-05, "loss": 0.4988, "num_tokens": 2380710483.0, "step": 3117 }, { "epoch": 1.1421635980580747, "grad_norm": 0.16576245786913682, "learning_rate": 3.641014968027316e-05, "loss": 0.4921, "num_tokens": 2381455447.0, "step": 3118 }, { "epoch": 1.1425299990839974, "grad_norm": 0.14340194080571478, "learning_rate": 3.64075888932119e-05, "loss": 0.4897, "num_tokens": 2382286940.0, "step": 3119 }, { "epoch": 1.1428964001099202, "grad_norm": 0.15240907743121185, "learning_rate": 3.640502729436164e-05, "loss": 0.5093, "num_tokens": 2383149242.0, "step": 3120 }, { "epoch": 1.1432628011358432, "grad_norm": 0.14462456480105043, "learning_rate": 3.6402464883866714e-05, "loss": 0.4847, "num_tokens": 2384087624.0, "step": 3121 }, { "epoch": 1.143629202161766, "grad_norm": 0.15513634010228633, "learning_rate": 3.6399901661871504e-05, "loss": 0.4864, "num_tokens": 2384833210.0, "step": 3122 }, { "epoch": 1.143995603187689, "grad_norm": 0.14313510928705706, "learning_rate": 3.639733762852043e-05, "loss": 0.5266, "num_tokens": 2385615750.0, "step": 3123 }, { "epoch": 1.1443620042136118, "grad_norm": 0.15320231628575426, "learning_rate": 3.6394772783957964e-05, "loss": 0.4636, "num_tokens": 2386380999.0, "step": 3124 }, { "epoch": 1.1447284052395346, "grad_norm": 0.15749877635370915, "learning_rate": 3.639220712832862e-05, "loss": 0.5567, "num_tokens": 2387121873.0, "step": 3125 }, { "epoch": 1.1450948062654576, "grad_norm": 0.15646362036941244, "learning_rate": 3.638964066177696e-05, "loss": 0.5036, "num_tokens": 2387852242.0, "step": 3126 }, { "epoch": 1.1454612072913803, "grad_norm": 0.14937722186824698, "learning_rate": 3.638707338444759e-05, "loss": 0.512, "num_tokens": 2388701001.0, "step": 3127 }, { "epoch": 1.1458276083173033, "grad_norm": 0.14820814191593318, "learning_rate": 3.638450529648516e-05, "loss": 0.4679, "num_tokens": 2389405017.0, "step": 3128 }, { "epoch": 1.1461940093432261, "grad_norm": 0.1427931945985469, "learning_rate": 3.6381936398034365e-05, "loss": 0.5162, "num_tokens": 2390089927.0, "step": 3129 }, { "epoch": 1.1465604103691491, "grad_norm": 0.17591166768100802, "learning_rate": 3.6379366689239965e-05, "loss": 0.5093, "num_tokens": 2390846082.0, "step": 3130 }, { "epoch": 1.146926811395072, "grad_norm": 0.1526996485108812, "learning_rate": 3.637679617024673e-05, "loss": 0.53, "num_tokens": 2391563236.0, "step": 3131 }, { "epoch": 1.1472932124209947, "grad_norm": 0.16685168589922764, "learning_rate": 3.637422484119949e-05, "loss": 0.5269, "num_tokens": 2392297465.0, "step": 3132 }, { "epoch": 1.1476596134469177, "grad_norm": 0.14946036584108963, "learning_rate": 3.6371652702243146e-05, "loss": 0.4546, "num_tokens": 2393126094.0, "step": 3133 }, { "epoch": 1.1480260144728405, "grad_norm": 0.1690973423929808, "learning_rate": 3.636907975352262e-05, "loss": 0.5176, "num_tokens": 2393790753.0, "step": 3134 }, { "epoch": 1.1483924154987635, "grad_norm": 0.154838363095658, "learning_rate": 3.636650599518287e-05, "loss": 0.5021, "num_tokens": 2394535520.0, "step": 3135 }, { "epoch": 1.1487588165246863, "grad_norm": 0.14822947891917837, "learning_rate": 3.636393142736892e-05, "loss": 0.4957, "num_tokens": 2395217291.0, "step": 3136 }, { "epoch": 1.1491252175506093, "grad_norm": 0.13970370814085406, "learning_rate": 3.6361356050225835e-05, "loss": 0.503, "num_tokens": 2396011855.0, "step": 3137 }, { "epoch": 1.149491618576532, "grad_norm": 0.14655155805256848, "learning_rate": 3.6358779863898726e-05, "loss": 0.5105, "num_tokens": 2396866161.0, "step": 3138 }, { "epoch": 1.1498580196024548, "grad_norm": 0.14973155383957584, "learning_rate": 3.6356202868532746e-05, "loss": 0.4861, "num_tokens": 2397671114.0, "step": 3139 }, { "epoch": 1.1502244206283778, "grad_norm": 0.150835027071625, "learning_rate": 3.635362506427309e-05, "loss": 0.5155, "num_tokens": 2398333166.0, "step": 3140 }, { "epoch": 1.1505908216543006, "grad_norm": 0.16934147058925622, "learning_rate": 3.6351046451265006e-05, "loss": 0.4969, "num_tokens": 2399115410.0, "step": 3141 }, { "epoch": 1.1509572226802236, "grad_norm": 0.1349866126960968, "learning_rate": 3.634846702965378e-05, "loss": 0.4762, "num_tokens": 2399990799.0, "step": 3142 }, { "epoch": 1.1513236237061464, "grad_norm": 0.1484780576190422, "learning_rate": 3.634588679958475e-05, "loss": 0.4944, "num_tokens": 2400845224.0, "step": 3143 }, { "epoch": 1.1516900247320692, "grad_norm": 0.15196102881986392, "learning_rate": 3.634330576120332e-05, "loss": 0.5427, "num_tokens": 2401560477.0, "step": 3144 }, { "epoch": 1.1520564257579922, "grad_norm": 0.17607656203030791, "learning_rate": 3.634072391465488e-05, "loss": 0.4967, "num_tokens": 2402359364.0, "step": 3145 }, { "epoch": 1.152422826783915, "grad_norm": 0.16598506525052492, "learning_rate": 3.6338141260084925e-05, "loss": 0.5354, "num_tokens": 2403107389.0, "step": 3146 }, { "epoch": 1.152789227809838, "grad_norm": 0.1449692060089203, "learning_rate": 3.6335557797638974e-05, "loss": 0.4979, "num_tokens": 2403939864.0, "step": 3147 }, { "epoch": 1.1531556288357607, "grad_norm": 0.20864225961602154, "learning_rate": 3.6332973527462573e-05, "loss": 0.5298, "num_tokens": 2404793197.0, "step": 3148 }, { "epoch": 1.1535220298616835, "grad_norm": 0.15165704453113443, "learning_rate": 3.633038844970136e-05, "loss": 0.5255, "num_tokens": 2405512689.0, "step": 3149 }, { "epoch": 1.1538884308876065, "grad_norm": 0.1722218944292062, "learning_rate": 3.632780256450097e-05, "loss": 0.5205, "num_tokens": 2406209107.0, "step": 3150 }, { "epoch": 1.1542548319135293, "grad_norm": 0.1551282068138993, "learning_rate": 3.632521587200711e-05, "loss": 0.4544, "num_tokens": 2406991246.0, "step": 3151 }, { "epoch": 1.1546212329394523, "grad_norm": 0.1660287566779273, "learning_rate": 3.6322628372365516e-05, "loss": 0.5315, "num_tokens": 2407813213.0, "step": 3152 }, { "epoch": 1.154987633965375, "grad_norm": 0.14623083933885855, "learning_rate": 3.632004006572199e-05, "loss": 0.4947, "num_tokens": 2408605390.0, "step": 3153 }, { "epoch": 1.1553540349912979, "grad_norm": 0.13872636511857975, "learning_rate": 3.631745095222237e-05, "loss": 0.5346, "num_tokens": 2409426584.0, "step": 3154 }, { "epoch": 1.1557204360172209, "grad_norm": 0.16204031795268956, "learning_rate": 3.631486103201254e-05, "loss": 0.4992, "num_tokens": 2410262397.0, "step": 3155 }, { "epoch": 1.1560868370431436, "grad_norm": 0.1612263663380658, "learning_rate": 3.631227030523842e-05, "loss": 0.472, "num_tokens": 2411007397.0, "step": 3156 }, { "epoch": 1.1564532380690666, "grad_norm": 0.16198062585176795, "learning_rate": 3.630967877204598e-05, "loss": 0.5245, "num_tokens": 2411599019.0, "step": 3157 }, { "epoch": 1.1568196390949894, "grad_norm": 0.15680136775074036, "learning_rate": 3.630708643258126e-05, "loss": 0.4963, "num_tokens": 2412292432.0, "step": 3158 }, { "epoch": 1.1571860401209124, "grad_norm": 0.15924512362585658, "learning_rate": 3.63044932869903e-05, "loss": 0.4907, "num_tokens": 2413188731.0, "step": 3159 }, { "epoch": 1.1575524411468352, "grad_norm": 0.16740817973019248, "learning_rate": 3.6301899335419216e-05, "loss": 0.5161, "num_tokens": 2413997866.0, "step": 3160 }, { "epoch": 1.1579188421727582, "grad_norm": 0.13130877057988805, "learning_rate": 3.629930457801417e-05, "loss": 0.4881, "num_tokens": 2414818279.0, "step": 3161 }, { "epoch": 1.158285243198681, "grad_norm": 0.1584534781463497, "learning_rate": 3.629670901492135e-05, "loss": 0.492, "num_tokens": 2415590788.0, "step": 3162 }, { "epoch": 1.1586516442246038, "grad_norm": 0.1566201975364299, "learning_rate": 3.629411264628703e-05, "loss": 0.5117, "num_tokens": 2416426924.0, "step": 3163 }, { "epoch": 1.1590180452505268, "grad_norm": 0.1560940844241621, "learning_rate": 3.6291515472257464e-05, "loss": 0.4811, "num_tokens": 2417048647.0, "step": 3164 }, { "epoch": 1.1593844462764495, "grad_norm": 0.15234067076425511, "learning_rate": 3.6288917492979014e-05, "loss": 0.533, "num_tokens": 2417741878.0, "step": 3165 }, { "epoch": 1.1597508473023725, "grad_norm": 0.1712224294065582, "learning_rate": 3.628631870859805e-05, "loss": 0.5109, "num_tokens": 2418406420.0, "step": 3166 }, { "epoch": 1.1601172483282953, "grad_norm": 0.14113949713081556, "learning_rate": 3.6283719119261004e-05, "loss": 0.5075, "num_tokens": 2419062127.0, "step": 3167 }, { "epoch": 1.160483649354218, "grad_norm": 0.17951461083356746, "learning_rate": 3.628111872511435e-05, "loss": 0.5436, "num_tokens": 2419808464.0, "step": 3168 }, { "epoch": 1.160850050380141, "grad_norm": 0.15108387416295235, "learning_rate": 3.62785175263046e-05, "loss": 0.4922, "num_tokens": 2420662262.0, "step": 3169 }, { "epoch": 1.1612164514060639, "grad_norm": 0.15132914832166747, "learning_rate": 3.627591552297833e-05, "loss": 0.4887, "num_tokens": 2421525299.0, "step": 3170 }, { "epoch": 1.161582852431987, "grad_norm": 0.1486865220200341, "learning_rate": 3.627331271528213e-05, "loss": 0.493, "num_tokens": 2422259053.0, "step": 3171 }, { "epoch": 1.1619492534579097, "grad_norm": 0.16517931298117838, "learning_rate": 3.6270709103362674e-05, "loss": 0.4896, "num_tokens": 2422964712.0, "step": 3172 }, { "epoch": 1.1623156544838324, "grad_norm": 0.16229118682036828, "learning_rate": 3.6268104687366644e-05, "loss": 0.5099, "num_tokens": 2423655083.0, "step": 3173 }, { "epoch": 1.1626820555097555, "grad_norm": 0.17095344683596497, "learning_rate": 3.6265499467440794e-05, "loss": 0.5333, "num_tokens": 2424346033.0, "step": 3174 }, { "epoch": 1.1630484565356782, "grad_norm": 0.1583023788552321, "learning_rate": 3.626289344373191e-05, "loss": 0.4842, "num_tokens": 2425168358.0, "step": 3175 }, { "epoch": 1.1634148575616012, "grad_norm": 0.13948806704244057, "learning_rate": 3.626028661638683e-05, "loss": 0.5132, "num_tokens": 2425916927.0, "step": 3176 }, { "epoch": 1.163781258587524, "grad_norm": 0.17185041535782664, "learning_rate": 3.6257678985552436e-05, "loss": 0.505, "num_tokens": 2426598333.0, "step": 3177 }, { "epoch": 1.1641476596134468, "grad_norm": 0.1664420094155238, "learning_rate": 3.6255070551375646e-05, "loss": 0.5267, "num_tokens": 2427321077.0, "step": 3178 }, { "epoch": 1.1645140606393698, "grad_norm": 0.16492828884443753, "learning_rate": 3.625246131400345e-05, "loss": 0.4742, "num_tokens": 2428089906.0, "step": 3179 }, { "epoch": 1.1648804616652926, "grad_norm": 0.1685046588327957, "learning_rate": 3.624985127358283e-05, "loss": 0.5382, "num_tokens": 2428827551.0, "step": 3180 }, { "epoch": 1.1652468626912156, "grad_norm": 0.1406424387733573, "learning_rate": 3.624724043026088e-05, "loss": 0.5447, "num_tokens": 2429679185.0, "step": 3181 }, { "epoch": 1.1656132637171384, "grad_norm": 0.17405558383504657, "learning_rate": 3.6244628784184693e-05, "loss": 0.5496, "num_tokens": 2430428500.0, "step": 3182 }, { "epoch": 1.1659796647430614, "grad_norm": 0.13646755331312765, "learning_rate": 3.624201633550143e-05, "loss": 0.5054, "num_tokens": 2431216463.0, "step": 3183 }, { "epoch": 1.1663460657689841, "grad_norm": 0.15930147360586022, "learning_rate": 3.623940308435827e-05, "loss": 0.5215, "num_tokens": 2431973377.0, "step": 3184 }, { "epoch": 1.1667124667949071, "grad_norm": 0.13639366238424244, "learning_rate": 3.6236789030902474e-05, "loss": 0.4833, "num_tokens": 2432737458.0, "step": 3185 }, { "epoch": 1.16707886782083, "grad_norm": 0.1597350593823826, "learning_rate": 3.623417417528132e-05, "loss": 0.5046, "num_tokens": 2433567728.0, "step": 3186 }, { "epoch": 1.1674452688467527, "grad_norm": 0.14938842508234038, "learning_rate": 3.623155851764214e-05, "loss": 0.4991, "num_tokens": 2434355707.0, "step": 3187 }, { "epoch": 1.1678116698726757, "grad_norm": 0.14881367517353564, "learning_rate": 3.622894205813232e-05, "loss": 0.5231, "num_tokens": 2435096610.0, "step": 3188 }, { "epoch": 1.1681780708985985, "grad_norm": 0.13431558110475167, "learning_rate": 3.622632479689928e-05, "loss": 0.5278, "num_tokens": 2436025509.0, "step": 3189 }, { "epoch": 1.1685444719245215, "grad_norm": 0.16361308684237744, "learning_rate": 3.622370673409048e-05, "loss": 0.5077, "num_tokens": 2436811271.0, "step": 3190 }, { "epoch": 1.1689108729504443, "grad_norm": 0.1496023037267571, "learning_rate": 3.622108786985344e-05, "loss": 0.4979, "num_tokens": 2437613282.0, "step": 3191 }, { "epoch": 1.169277273976367, "grad_norm": 0.1542363601288864, "learning_rate": 3.621846820433573e-05, "loss": 0.4825, "num_tokens": 2438463434.0, "step": 3192 }, { "epoch": 1.16964367500229, "grad_norm": 0.16053939988503738, "learning_rate": 3.621584773768493e-05, "loss": 0.4732, "num_tokens": 2439334689.0, "step": 3193 }, { "epoch": 1.1700100760282128, "grad_norm": 0.15522496871638727, "learning_rate": 3.621322647004871e-05, "loss": 0.5107, "num_tokens": 2440149848.0, "step": 3194 }, { "epoch": 1.1703764770541358, "grad_norm": 0.15087000329306155, "learning_rate": 3.621060440157476e-05, "loss": 0.4867, "num_tokens": 2440932001.0, "step": 3195 }, { "epoch": 1.1707428780800586, "grad_norm": 0.15126308333891178, "learning_rate": 3.620798153241081e-05, "loss": 0.4864, "num_tokens": 2441635465.0, "step": 3196 }, { "epoch": 1.1711092791059814, "grad_norm": 0.13737149145927968, "learning_rate": 3.620535786270465e-05, "loss": 0.4808, "num_tokens": 2442347026.0, "step": 3197 }, { "epoch": 1.1714756801319044, "grad_norm": 0.16271370834622637, "learning_rate": 3.62027333926041e-05, "loss": 0.5096, "num_tokens": 2443145046.0, "step": 3198 }, { "epoch": 1.1718420811578272, "grad_norm": 0.1524565345980606, "learning_rate": 3.620010812225706e-05, "loss": 0.4955, "num_tokens": 2443997840.0, "step": 3199 }, { "epoch": 1.1722084821837502, "grad_norm": 0.15478972525308207, "learning_rate": 3.619748205181142e-05, "loss": 0.4871, "num_tokens": 2444648055.0, "step": 3200 }, { "epoch": 1.172574883209673, "grad_norm": 0.18921871869523096, "learning_rate": 3.6194855181415163e-05, "loss": 0.5638, "num_tokens": 2445261921.0, "step": 3201 }, { "epoch": 1.1729412842355957, "grad_norm": 0.1537981919830211, "learning_rate": 3.6192227511216296e-05, "loss": 0.476, "num_tokens": 2446049295.0, "step": 3202 }, { "epoch": 1.1733076852615187, "grad_norm": 0.17593033479466047, "learning_rate": 3.618959904136287e-05, "loss": 0.5084, "num_tokens": 2446835011.0, "step": 3203 }, { "epoch": 1.1736740862874415, "grad_norm": 0.16127811851853704, "learning_rate": 3.6186969772002985e-05, "loss": 0.4965, "num_tokens": 2447672872.0, "step": 3204 }, { "epoch": 1.1740404873133645, "grad_norm": 0.17464786692401443, "learning_rate": 3.618433970328478e-05, "loss": 0.5207, "num_tokens": 2448483017.0, "step": 3205 }, { "epoch": 1.1744068883392873, "grad_norm": 0.17333214161411176, "learning_rate": 3.618170883535647e-05, "loss": 0.4944, "num_tokens": 2449237203.0, "step": 3206 }, { "epoch": 1.1747732893652103, "grad_norm": 0.16961389506539995, "learning_rate": 3.617907716836626e-05, "loss": 0.4974, "num_tokens": 2449994165.0, "step": 3207 }, { "epoch": 1.175139690391133, "grad_norm": 0.18106536060862571, "learning_rate": 3.617644470246245e-05, "loss": 0.4919, "num_tokens": 2450653249.0, "step": 3208 }, { "epoch": 1.175506091417056, "grad_norm": 0.180442761880159, "learning_rate": 3.6173811437793355e-05, "loss": 0.5092, "num_tokens": 2451382513.0, "step": 3209 }, { "epoch": 1.1758724924429789, "grad_norm": 0.17717279350632875, "learning_rate": 3.6171177374507344e-05, "loss": 0.5203, "num_tokens": 2452115090.0, "step": 3210 }, { "epoch": 1.1762388934689016, "grad_norm": 0.16381336005200303, "learning_rate": 3.616854251275284e-05, "loss": 0.5155, "num_tokens": 2452872932.0, "step": 3211 }, { "epoch": 1.1766052944948246, "grad_norm": 0.17706106976894806, "learning_rate": 3.61659068526783e-05, "loss": 0.486, "num_tokens": 2453789263.0, "step": 3212 }, { "epoch": 1.1769716955207474, "grad_norm": 0.2099454899270782, "learning_rate": 3.6163270394432226e-05, "loss": 0.5158, "num_tokens": 2454533986.0, "step": 3213 }, { "epoch": 1.1773380965466704, "grad_norm": 0.16051126792354553, "learning_rate": 3.6160633138163174e-05, "loss": 0.4892, "num_tokens": 2455203259.0, "step": 3214 }, { "epoch": 1.1777044975725932, "grad_norm": 0.16857061117350733, "learning_rate": 3.615799508401973e-05, "loss": 0.5204, "num_tokens": 2455983917.0, "step": 3215 }, { "epoch": 1.178070898598516, "grad_norm": 0.15761364738349462, "learning_rate": 3.615535623215054e-05, "loss": 0.5227, "num_tokens": 2456802942.0, "step": 3216 }, { "epoch": 1.178437299624439, "grad_norm": 0.1781681874016192, "learning_rate": 3.615271658270429e-05, "loss": 0.5188, "num_tokens": 2457503782.0, "step": 3217 }, { "epoch": 1.1788037006503618, "grad_norm": 0.15305256358898423, "learning_rate": 3.61500761358297e-05, "loss": 0.4998, "num_tokens": 2458194659.0, "step": 3218 }, { "epoch": 1.1791701016762848, "grad_norm": 0.15567981200283398, "learning_rate": 3.6147434891675567e-05, "loss": 0.5097, "num_tokens": 2458979368.0, "step": 3219 }, { "epoch": 1.1795365027022076, "grad_norm": 0.1476956536942097, "learning_rate": 3.6144792850390686e-05, "loss": 0.4862, "num_tokens": 2459759308.0, "step": 3220 }, { "epoch": 1.1799029037281303, "grad_norm": 0.15514359662740393, "learning_rate": 3.614215001212394e-05, "loss": 0.5123, "num_tokens": 2460474201.0, "step": 3221 }, { "epoch": 1.1802693047540533, "grad_norm": 0.1628246867725646, "learning_rate": 3.613950637702422e-05, "loss": 0.5251, "num_tokens": 2461251359.0, "step": 3222 }, { "epoch": 1.1806357057799761, "grad_norm": 0.1579496590621572, "learning_rate": 3.61368619452405e-05, "loss": 0.531, "num_tokens": 2462039083.0, "step": 3223 }, { "epoch": 1.1810021068058991, "grad_norm": 0.15516860508594119, "learning_rate": 3.613421671692177e-05, "loss": 0.4907, "num_tokens": 2462821126.0, "step": 3224 }, { "epoch": 1.181368507831822, "grad_norm": 0.1609178487073482, "learning_rate": 3.613157069221707e-05, "loss": 0.5093, "num_tokens": 2463636344.0, "step": 3225 }, { "epoch": 1.1817349088577447, "grad_norm": 0.1567334438673625, "learning_rate": 3.612892387127549e-05, "loss": 0.4944, "num_tokens": 2464403312.0, "step": 3226 }, { "epoch": 1.1821013098836677, "grad_norm": 0.14767050807029827, "learning_rate": 3.6126276254246176e-05, "loss": 0.5188, "num_tokens": 2465151510.0, "step": 3227 }, { "epoch": 1.1824677109095905, "grad_norm": 0.14327058450792682, "learning_rate": 3.6123627841278295e-05, "loss": 0.4749, "num_tokens": 2465932751.0, "step": 3228 }, { "epoch": 1.1828341119355135, "grad_norm": 0.17755246246743828, "learning_rate": 3.6120978632521076e-05, "loss": 0.4991, "num_tokens": 2466647826.0, "step": 3229 }, { "epoch": 1.1832005129614362, "grad_norm": 0.1528648674539233, "learning_rate": 3.611832862812377e-05, "loss": 0.5342, "num_tokens": 2467294954.0, "step": 3230 }, { "epoch": 1.1835669139873592, "grad_norm": 0.15935360830717302, "learning_rate": 3.611567782823572e-05, "loss": 0.5073, "num_tokens": 2467965777.0, "step": 3231 }, { "epoch": 1.183933315013282, "grad_norm": 0.18303827049874052, "learning_rate": 3.611302623300627e-05, "loss": 0.4938, "num_tokens": 2468694289.0, "step": 3232 }, { "epoch": 1.184299716039205, "grad_norm": 0.1539973535987448, "learning_rate": 3.611037384258482e-05, "loss": 0.4923, "num_tokens": 2469430284.0, "step": 3233 }, { "epoch": 1.1846661170651278, "grad_norm": 0.17788219474915737, "learning_rate": 3.6107720657120816e-05, "loss": 0.5448, "num_tokens": 2470132757.0, "step": 3234 }, { "epoch": 1.1850325180910506, "grad_norm": 0.16638894358625264, "learning_rate": 3.610506667676376e-05, "loss": 0.5017, "num_tokens": 2470902090.0, "step": 3235 }, { "epoch": 1.1853989191169736, "grad_norm": 0.14964539570903518, "learning_rate": 3.610241190166318e-05, "loss": 0.5379, "num_tokens": 2471702430.0, "step": 3236 }, { "epoch": 1.1857653201428964, "grad_norm": 0.16208477398169546, "learning_rate": 3.609975633196866e-05, "loss": 0.5236, "num_tokens": 2472438717.0, "step": 3237 }, { "epoch": 1.1861317211688194, "grad_norm": 0.1371736485921193, "learning_rate": 3.609709996782984e-05, "loss": 0.496, "num_tokens": 2473244967.0, "step": 3238 }, { "epoch": 1.1864981221947422, "grad_norm": 0.15329329469369835, "learning_rate": 3.609444280939637e-05, "loss": 0.4942, "num_tokens": 2474034864.0, "step": 3239 }, { "epoch": 1.186864523220665, "grad_norm": 0.1562838234088394, "learning_rate": 3.609178485681798e-05, "loss": 0.537, "num_tokens": 2474759028.0, "step": 3240 }, { "epoch": 1.187230924246588, "grad_norm": 0.14423691998204224, "learning_rate": 3.608912611024443e-05, "loss": 0.488, "num_tokens": 2475529064.0, "step": 3241 }, { "epoch": 1.1875973252725107, "grad_norm": 0.15185722444956615, "learning_rate": 3.6086466569825526e-05, "loss": 0.513, "num_tokens": 2476308894.0, "step": 3242 }, { "epoch": 1.1879637262984337, "grad_norm": 0.16636314539356847, "learning_rate": 3.6083806235711114e-05, "loss": 0.5195, "num_tokens": 2477230769.0, "step": 3243 }, { "epoch": 1.1883301273243565, "grad_norm": 0.1816680238984646, "learning_rate": 3.608114510805109e-05, "loss": 0.5272, "num_tokens": 2477838918.0, "step": 3244 }, { "epoch": 1.1886965283502793, "grad_norm": 0.17232310473169768, "learning_rate": 3.6078483186995395e-05, "loss": 0.5111, "num_tokens": 2478570341.0, "step": 3245 }, { "epoch": 1.1890629293762023, "grad_norm": 0.1733141521443877, "learning_rate": 3.607582047269402e-05, "loss": 0.4928, "num_tokens": 2479452335.0, "step": 3246 }, { "epoch": 1.189429330402125, "grad_norm": 0.14335047729143466, "learning_rate": 3.607315696529699e-05, "loss": 0.4845, "num_tokens": 2480209754.0, "step": 3247 }, { "epoch": 1.189795731428048, "grad_norm": 0.15935503721522784, "learning_rate": 3.607049266495436e-05, "loss": 0.5246, "num_tokens": 2480945455.0, "step": 3248 }, { "epoch": 1.1901621324539708, "grad_norm": 0.1733658662536143, "learning_rate": 3.6067827571816284e-05, "loss": 0.5332, "num_tokens": 2481671805.0, "step": 3249 }, { "epoch": 1.1905285334798936, "grad_norm": 0.17973753872133286, "learning_rate": 3.606516168603291e-05, "loss": 0.4928, "num_tokens": 2482453863.0, "step": 3250 }, { "epoch": 1.1908949345058166, "grad_norm": 0.1541676440267342, "learning_rate": 3.606249500775443e-05, "loss": 0.5103, "num_tokens": 2483277190.0, "step": 3251 }, { "epoch": 1.1912613355317394, "grad_norm": 0.2118178830480349, "learning_rate": 3.605982753713113e-05, "loss": 0.5074, "num_tokens": 2483974764.0, "step": 3252 }, { "epoch": 1.1916277365576624, "grad_norm": 0.17797626242773407, "learning_rate": 3.605715927431327e-05, "loss": 0.5459, "num_tokens": 2484668198.0, "step": 3253 }, { "epoch": 1.1919941375835852, "grad_norm": 0.19587383001468592, "learning_rate": 3.605449021945122e-05, "loss": 0.5154, "num_tokens": 2485280117.0, "step": 3254 }, { "epoch": 1.1923605386095082, "grad_norm": 0.17348192921735373, "learning_rate": 3.605182037269536e-05, "loss": 0.507, "num_tokens": 2485985647.0, "step": 3255 }, { "epoch": 1.192726939635431, "grad_norm": 0.16389853844867314, "learning_rate": 3.604914973419612e-05, "loss": 0.4809, "num_tokens": 2486771433.0, "step": 3256 }, { "epoch": 1.193093340661354, "grad_norm": 0.16160891254832846, "learning_rate": 3.6046478304103965e-05, "loss": 0.5062, "num_tokens": 2487478268.0, "step": 3257 }, { "epoch": 1.1934597416872768, "grad_norm": 0.16389952741621736, "learning_rate": 3.6043806082569434e-05, "loss": 0.5202, "num_tokens": 2488168325.0, "step": 3258 }, { "epoch": 1.1938261427131995, "grad_norm": 0.1814049992395626, "learning_rate": 3.604113306974308e-05, "loss": 0.5313, "num_tokens": 2488935048.0, "step": 3259 }, { "epoch": 1.1941925437391225, "grad_norm": 0.14374345453626308, "learning_rate": 3.6038459265775505e-05, "loss": 0.5013, "num_tokens": 2489732347.0, "step": 3260 }, { "epoch": 1.1945589447650453, "grad_norm": 0.17675689156132154, "learning_rate": 3.603578467081739e-05, "loss": 0.5313, "num_tokens": 2490548084.0, "step": 3261 }, { "epoch": 1.1949253457909683, "grad_norm": 0.162450401946062, "learning_rate": 3.603310928501941e-05, "loss": 0.511, "num_tokens": 2491283499.0, "step": 3262 }, { "epoch": 1.195291746816891, "grad_norm": 0.20515238498654212, "learning_rate": 3.603043310853232e-05, "loss": 0.5311, "num_tokens": 2492052389.0, "step": 3263 }, { "epoch": 1.1956581478428139, "grad_norm": 0.14548081078018194, "learning_rate": 3.60277561415069e-05, "loss": 0.4864, "num_tokens": 2492836574.0, "step": 3264 }, { "epoch": 1.1960245488687369, "grad_norm": 0.16801638347081635, "learning_rate": 3.602507838409399e-05, "loss": 0.5196, "num_tokens": 2493603772.0, "step": 3265 }, { "epoch": 1.1963909498946597, "grad_norm": 0.16895441072213924, "learning_rate": 3.602239983644446e-05, "loss": 0.5074, "num_tokens": 2494331215.0, "step": 3266 }, { "epoch": 1.1967573509205827, "grad_norm": 0.19214521169709886, "learning_rate": 3.601972049870925e-05, "loss": 0.5638, "num_tokens": 2495026900.0, "step": 3267 }, { "epoch": 1.1971237519465054, "grad_norm": 0.17778072616811277, "learning_rate": 3.601704037103929e-05, "loss": 0.5022, "num_tokens": 2495731706.0, "step": 3268 }, { "epoch": 1.1974901529724282, "grad_norm": 0.16815217182629408, "learning_rate": 3.601435945358562e-05, "loss": 0.503, "num_tokens": 2496596604.0, "step": 3269 }, { "epoch": 1.1978565539983512, "grad_norm": 0.1572677862513932, "learning_rate": 3.6011677746499294e-05, "loss": 0.5271, "num_tokens": 2497356992.0, "step": 3270 }, { "epoch": 1.198222955024274, "grad_norm": 0.18523233685806056, "learning_rate": 3.6008995249931396e-05, "loss": 0.5534, "num_tokens": 2498013674.0, "step": 3271 }, { "epoch": 1.198589356050197, "grad_norm": 0.15922375899532268, "learning_rate": 3.600631196403308e-05, "loss": 0.5246, "num_tokens": 2498816719.0, "step": 3272 }, { "epoch": 1.1989557570761198, "grad_norm": 0.16770311676946226, "learning_rate": 3.600362788895553e-05, "loss": 0.4745, "num_tokens": 2499573838.0, "step": 3273 }, { "epoch": 1.1993221581020426, "grad_norm": 0.15061486060505394, "learning_rate": 3.600094302484999e-05, "loss": 0.5248, "num_tokens": 2500226335.0, "step": 3274 }, { "epoch": 1.1996885591279656, "grad_norm": 0.1716370978907056, "learning_rate": 3.5998257371867724e-05, "loss": 0.4834, "num_tokens": 2500967283.0, "step": 3275 }, { "epoch": 1.2000549601538884, "grad_norm": 0.15597110282883728, "learning_rate": 3.599557093016006e-05, "loss": 0.5489, "num_tokens": 2501662386.0, "step": 3276 }, { "epoch": 1.2004213611798114, "grad_norm": 0.16457916547812193, "learning_rate": 3.5992883699878366e-05, "loss": 0.493, "num_tokens": 2502390014.0, "step": 3277 }, { "epoch": 1.2007877622057341, "grad_norm": 0.16549561263681878, "learning_rate": 3.599019568117405e-05, "loss": 0.4878, "num_tokens": 2503139118.0, "step": 3278 }, { "epoch": 1.2011541632316571, "grad_norm": 0.17667590502187452, "learning_rate": 3.5987506874198565e-05, "loss": 0.5327, "num_tokens": 2503795587.0, "step": 3279 }, { "epoch": 1.20152056425758, "grad_norm": 0.16107257285991464, "learning_rate": 3.5984817279103415e-05, "loss": 0.5147, "num_tokens": 2504549532.0, "step": 3280 }, { "epoch": 1.201886965283503, "grad_norm": 0.14681848650920212, "learning_rate": 3.5982126896040146e-05, "loss": 0.4747, "num_tokens": 2505291213.0, "step": 3281 }, { "epoch": 1.2022533663094257, "grad_norm": 0.15270049551893308, "learning_rate": 3.5979435725160335e-05, "loss": 0.5124, "num_tokens": 2506101024.0, "step": 3282 }, { "epoch": 1.2026197673353485, "grad_norm": 0.14015993487668235, "learning_rate": 3.597674376661563e-05, "loss": 0.4706, "num_tokens": 2506987250.0, "step": 3283 }, { "epoch": 1.2029861683612715, "grad_norm": 0.14390218858042636, "learning_rate": 3.59740510205577e-05, "loss": 0.4955, "num_tokens": 2507739322.0, "step": 3284 }, { "epoch": 1.2033525693871943, "grad_norm": 0.15035429659121674, "learning_rate": 3.597135748713826e-05, "loss": 0.5189, "num_tokens": 2508550646.0, "step": 3285 }, { "epoch": 1.2037189704131173, "grad_norm": 0.1461065058325464, "learning_rate": 3.5968663166509085e-05, "loss": 0.5437, "num_tokens": 2509386834.0, "step": 3286 }, { "epoch": 1.20408537143904, "grad_norm": 0.14807277527429719, "learning_rate": 3.5965968058821986e-05, "loss": 0.5075, "num_tokens": 2510187546.0, "step": 3287 }, { "epoch": 1.2044517724649628, "grad_norm": 0.14837928521458493, "learning_rate": 3.596327216422882e-05, "loss": 0.5078, "num_tokens": 2510958287.0, "step": 3288 }, { "epoch": 1.2048181734908858, "grad_norm": 0.16806202643371107, "learning_rate": 3.596057548288147e-05, "loss": 0.536, "num_tokens": 2511727932.0, "step": 3289 }, { "epoch": 1.2051845745168086, "grad_norm": 0.15316144860018743, "learning_rate": 3.595787801493189e-05, "loss": 0.4876, "num_tokens": 2512610154.0, "step": 3290 }, { "epoch": 1.2055509755427316, "grad_norm": 0.13341769854326416, "learning_rate": 3.595517976053208e-05, "loss": 0.5332, "num_tokens": 2513502328.0, "step": 3291 }, { "epoch": 1.2059173765686544, "grad_norm": 0.1569865913278078, "learning_rate": 3.5952480719834054e-05, "loss": 0.5079, "num_tokens": 2514304606.0, "step": 3292 }, { "epoch": 1.2062837775945772, "grad_norm": 0.1574106090849673, "learning_rate": 3.594978089298989e-05, "loss": 0.5328, "num_tokens": 2515121803.0, "step": 3293 }, { "epoch": 1.2066501786205002, "grad_norm": 0.16983432084282665, "learning_rate": 3.5947080280151714e-05, "loss": 0.5387, "num_tokens": 2515834909.0, "step": 3294 }, { "epoch": 1.207016579646423, "grad_norm": 0.15244367431529685, "learning_rate": 3.594437888147169e-05, "loss": 0.4984, "num_tokens": 2516473407.0, "step": 3295 }, { "epoch": 1.207382980672346, "grad_norm": 0.15049938658868176, "learning_rate": 3.5941676697102027e-05, "loss": 0.4938, "num_tokens": 2517256303.0, "step": 3296 }, { "epoch": 1.2077493816982687, "grad_norm": 0.16455963471307447, "learning_rate": 3.593897372719498e-05, "loss": 0.5003, "num_tokens": 2518063663.0, "step": 3297 }, { "epoch": 1.2081157827241915, "grad_norm": 0.14242253540707916, "learning_rate": 3.593626997190284e-05, "loss": 0.4982, "num_tokens": 2518838196.0, "step": 3298 }, { "epoch": 1.2084821837501145, "grad_norm": 0.14852748143042838, "learning_rate": 3.593356543137796e-05, "loss": 0.5117, "num_tokens": 2519524648.0, "step": 3299 }, { "epoch": 1.2088485847760373, "grad_norm": 0.15555279579557107, "learning_rate": 3.59308601057727e-05, "loss": 0.4955, "num_tokens": 2520315840.0, "step": 3300 }, { "epoch": 1.2092149858019603, "grad_norm": 0.14506163933091532, "learning_rate": 3.5928153995239526e-05, "loss": 0.5139, "num_tokens": 2521061051.0, "step": 3301 }, { "epoch": 1.209581386827883, "grad_norm": 0.1466720737271116, "learning_rate": 3.592544709993089e-05, "loss": 0.5302, "num_tokens": 2521754822.0, "step": 3302 }, { "epoch": 1.209947787853806, "grad_norm": 0.15297050412514315, "learning_rate": 3.592273941999932e-05, "loss": 0.4964, "num_tokens": 2522506896.0, "step": 3303 }, { "epoch": 1.2103141888797289, "grad_norm": 0.1357179285059211, "learning_rate": 3.592003095559738e-05, "loss": 0.5044, "num_tokens": 2523274578.0, "step": 3304 }, { "epoch": 1.2106805899056519, "grad_norm": 0.15156610905300333, "learning_rate": 3.5917321706877665e-05, "loss": 0.5193, "num_tokens": 2524016460.0, "step": 3305 }, { "epoch": 1.2110469909315746, "grad_norm": 0.17231382142685617, "learning_rate": 3.591461167399284e-05, "loss": 0.524, "num_tokens": 2524712932.0, "step": 3306 }, { "epoch": 1.2114133919574974, "grad_norm": 0.1400575926324851, "learning_rate": 3.591190085709559e-05, "loss": 0.4801, "num_tokens": 2525428629.0, "step": 3307 }, { "epoch": 1.2117797929834204, "grad_norm": 0.15238661260843706, "learning_rate": 3.590918925633866e-05, "loss": 0.4888, "num_tokens": 2526190133.0, "step": 3308 }, { "epoch": 1.2121461940093432, "grad_norm": 0.18017405630588218, "learning_rate": 3.590647687187484e-05, "loss": 0.512, "num_tokens": 2526977305.0, "step": 3309 }, { "epoch": 1.2125125950352662, "grad_norm": 0.15252341021655105, "learning_rate": 3.590376370385694e-05, "loss": 0.4962, "num_tokens": 2527762997.0, "step": 3310 }, { "epoch": 1.212878996061189, "grad_norm": 0.16972167164857166, "learning_rate": 3.5901049752437854e-05, "loss": 0.4836, "num_tokens": 2528622343.0, "step": 3311 }, { "epoch": 1.2132453970871118, "grad_norm": 0.1446177224710291, "learning_rate": 3.5898335017770484e-05, "loss": 0.4743, "num_tokens": 2529439863.0, "step": 3312 }, { "epoch": 1.2136117981130348, "grad_norm": 0.15213217479779118, "learning_rate": 3.58956195000078e-05, "loss": 0.5106, "num_tokens": 2530321414.0, "step": 3313 }, { "epoch": 1.2139781991389575, "grad_norm": 0.13934916989798027, "learning_rate": 3.58929031993028e-05, "loss": 0.5085, "num_tokens": 2530986482.0, "step": 3314 }, { "epoch": 1.2143446001648805, "grad_norm": 0.16090736192620164, "learning_rate": 3.5890186115808526e-05, "loss": 0.4906, "num_tokens": 2531738298.0, "step": 3315 }, { "epoch": 1.2147110011908033, "grad_norm": 0.17274116344280346, "learning_rate": 3.588746824967808e-05, "loss": 0.5183, "num_tokens": 2532525896.0, "step": 3316 }, { "epoch": 1.215077402216726, "grad_norm": 0.15544269008998407, "learning_rate": 3.588474960106461e-05, "loss": 0.5341, "num_tokens": 2533351543.0, "step": 3317 }, { "epoch": 1.2154438032426491, "grad_norm": 0.1844457304571539, "learning_rate": 3.588203017012127e-05, "loss": 0.5126, "num_tokens": 2534186380.0, "step": 3318 }, { "epoch": 1.215810204268572, "grad_norm": 0.15007014357349788, "learning_rate": 3.587930995700131e-05, "loss": 0.4997, "num_tokens": 2534945964.0, "step": 3319 }, { "epoch": 1.216176605294495, "grad_norm": 0.18143992040496093, "learning_rate": 3.5876588961857976e-05, "loss": 0.5055, "num_tokens": 2535591685.0, "step": 3320 }, { "epoch": 1.2165430063204177, "grad_norm": 0.15667977826422375, "learning_rate": 3.5873867184844604e-05, "loss": 0.4801, "num_tokens": 2536396587.0, "step": 3321 }, { "epoch": 1.2169094073463405, "grad_norm": 0.14740260802699373, "learning_rate": 3.587114462611454e-05, "loss": 0.5389, "num_tokens": 2537213760.0, "step": 3322 }, { "epoch": 1.2172758083722635, "grad_norm": 0.17779079784075638, "learning_rate": 3.5868421285821186e-05, "loss": 0.5249, "num_tokens": 2538040083.0, "step": 3323 }, { "epoch": 1.2176422093981862, "grad_norm": 0.14817482159130843, "learning_rate": 3.586569716411799e-05, "loss": 0.5159, "num_tokens": 2538860910.0, "step": 3324 }, { "epoch": 1.2180086104241092, "grad_norm": 0.1628711364758715, "learning_rate": 3.586297226115844e-05, "loss": 0.5041, "num_tokens": 2539605586.0, "step": 3325 }, { "epoch": 1.218375011450032, "grad_norm": 0.13912224987502012, "learning_rate": 3.586024657709606e-05, "loss": 0.4822, "num_tokens": 2540371960.0, "step": 3326 }, { "epoch": 1.218741412475955, "grad_norm": 0.15116441463976185, "learning_rate": 3.585752011208445e-05, "loss": 0.4867, "num_tokens": 2541172439.0, "step": 3327 }, { "epoch": 1.2191078135018778, "grad_norm": 0.16618266085481684, "learning_rate": 3.585479286627721e-05, "loss": 0.4856, "num_tokens": 2542034077.0, "step": 3328 }, { "epoch": 1.2194742145278006, "grad_norm": 0.15921830778135496, "learning_rate": 3.5852064839828014e-05, "loss": 0.5227, "num_tokens": 2542757008.0, "step": 3329 }, { "epoch": 1.2198406155537236, "grad_norm": 0.14666549130481313, "learning_rate": 3.584933603289057e-05, "loss": 0.4911, "num_tokens": 2543732441.0, "step": 3330 }, { "epoch": 1.2202070165796464, "grad_norm": 0.14404135604478197, "learning_rate": 3.584660644561864e-05, "loss": 0.4944, "num_tokens": 2544503723.0, "step": 3331 }, { "epoch": 1.2205734176055694, "grad_norm": 0.17340854067577818, "learning_rate": 3.5843876078166e-05, "loss": 0.5287, "num_tokens": 2545202123.0, "step": 3332 }, { "epoch": 1.2209398186314921, "grad_norm": 0.15720889733395663, "learning_rate": 3.5841144930686516e-05, "loss": 0.4853, "num_tokens": 2545932606.0, "step": 3333 }, { "epoch": 1.2213062196574151, "grad_norm": 0.15689977087809528, "learning_rate": 3.583841300333406e-05, "loss": 0.4927, "num_tokens": 2546656583.0, "step": 3334 }, { "epoch": 1.221672620683338, "grad_norm": 0.18581811541806262, "learning_rate": 3.583568029626256e-05, "loss": 0.4978, "num_tokens": 2547393755.0, "step": 3335 }, { "epoch": 1.2220390217092607, "grad_norm": 0.14344147759655682, "learning_rate": 3.583294680962599e-05, "loss": 0.5063, "num_tokens": 2548198597.0, "step": 3336 }, { "epoch": 1.2224054227351837, "grad_norm": 0.1705674583860248, "learning_rate": 3.5830212543578373e-05, "loss": 0.467, "num_tokens": 2548907802.0, "step": 3337 }, { "epoch": 1.2227718237611065, "grad_norm": 0.15920841139624278, "learning_rate": 3.5827477498273764e-05, "loss": 0.5239, "num_tokens": 2549599138.0, "step": 3338 }, { "epoch": 1.2231382247870295, "grad_norm": 0.14478849696514925, "learning_rate": 3.582474167386627e-05, "loss": 0.4965, "num_tokens": 2550460025.0, "step": 3339 }, { "epoch": 1.2235046258129523, "grad_norm": 0.1451767425027202, "learning_rate": 3.582200507051004e-05, "loss": 0.5274, "num_tokens": 2551193463.0, "step": 3340 }, { "epoch": 1.223871026838875, "grad_norm": 0.14766208721761756, "learning_rate": 3.581926768835927e-05, "loss": 0.498, "num_tokens": 2551975672.0, "step": 3341 }, { "epoch": 1.224237427864798, "grad_norm": 0.13238262000341083, "learning_rate": 3.581652952756819e-05, "loss": 0.4604, "num_tokens": 2552701689.0, "step": 3342 }, { "epoch": 1.2246038288907208, "grad_norm": 0.1436656260416302, "learning_rate": 3.5813790588291086e-05, "loss": 0.5246, "num_tokens": 2553565337.0, "step": 3343 }, { "epoch": 1.2249702299166438, "grad_norm": 0.15248521116483618, "learning_rate": 3.581105087068227e-05, "loss": 0.4757, "num_tokens": 2554289241.0, "step": 3344 }, { "epoch": 1.2253366309425666, "grad_norm": 0.1531099619302745, "learning_rate": 3.580831037489613e-05, "loss": 0.4876, "num_tokens": 2555127397.0, "step": 3345 }, { "epoch": 1.2257030319684894, "grad_norm": 0.16066241291679004, "learning_rate": 3.580556910108707e-05, "loss": 0.5034, "num_tokens": 2555911453.0, "step": 3346 }, { "epoch": 1.2260694329944124, "grad_norm": 0.138237983170002, "learning_rate": 3.580282704940954e-05, "loss": 0.474, "num_tokens": 2556583185.0, "step": 3347 }, { "epoch": 1.2264358340203352, "grad_norm": 0.16989675915942742, "learning_rate": 3.580008422001804e-05, "loss": 0.5437, "num_tokens": 2557261393.0, "step": 3348 }, { "epoch": 1.2268022350462582, "grad_norm": 0.15059709263436902, "learning_rate": 3.579734061306712e-05, "loss": 0.4897, "num_tokens": 2558018941.0, "step": 3349 }, { "epoch": 1.227168636072181, "grad_norm": 0.1671140671231693, "learning_rate": 3.5794596228711364e-05, "loss": 0.5141, "num_tokens": 2558839622.0, "step": 3350 }, { "epoch": 1.227535037098104, "grad_norm": 0.1300815405112637, "learning_rate": 3.5791851067105404e-05, "loss": 0.4735, "num_tokens": 2559693140.0, "step": 3351 }, { "epoch": 1.2279014381240267, "grad_norm": 0.14356969783820495, "learning_rate": 3.578910512840391e-05, "loss": 0.5017, "num_tokens": 2560435146.0, "step": 3352 }, { "epoch": 1.2282678391499495, "grad_norm": 0.1472976267952271, "learning_rate": 3.5786358412761614e-05, "loss": 0.4868, "num_tokens": 2561280799.0, "step": 3353 }, { "epoch": 1.2286342401758725, "grad_norm": 0.15195386837532282, "learning_rate": 3.5783610920333255e-05, "loss": 0.5115, "num_tokens": 2562021697.0, "step": 3354 }, { "epoch": 1.2290006412017953, "grad_norm": 0.14642287497008083, "learning_rate": 3.578086265127366e-05, "loss": 0.4811, "num_tokens": 2562841515.0, "step": 3355 }, { "epoch": 1.2293670422277183, "grad_norm": 0.1446040299955227, "learning_rate": 3.577811360573767e-05, "loss": 0.4776, "num_tokens": 2563574134.0, "step": 3356 }, { "epoch": 1.229733443253641, "grad_norm": 0.16514762660623697, "learning_rate": 3.577536378388019e-05, "loss": 0.523, "num_tokens": 2564334223.0, "step": 3357 }, { "epoch": 1.230099844279564, "grad_norm": 0.15518282931625516, "learning_rate": 3.5772613185856145e-05, "loss": 0.5212, "num_tokens": 2565153642.0, "step": 3358 }, { "epoch": 1.2304662453054869, "grad_norm": 0.14523300686637192, "learning_rate": 3.576986181182052e-05, "loss": 0.5013, "num_tokens": 2565926830.0, "step": 3359 }, { "epoch": 1.2308326463314097, "grad_norm": 0.13503607812948223, "learning_rate": 3.576710966192833e-05, "loss": 0.5256, "num_tokens": 2566837785.0, "step": 3360 }, { "epoch": 1.2311990473573327, "grad_norm": 0.15268788059718436, "learning_rate": 3.5764356736334664e-05, "loss": 0.506, "num_tokens": 2567545761.0, "step": 3361 }, { "epoch": 1.2315654483832554, "grad_norm": 0.17349550729215052, "learning_rate": 3.576160303519462e-05, "loss": 0.533, "num_tokens": 2568397370.0, "step": 3362 }, { "epoch": 1.2319318494091784, "grad_norm": 0.16030475434995659, "learning_rate": 3.575884855866336e-05, "loss": 0.5392, "num_tokens": 2569106662.0, "step": 3363 }, { "epoch": 1.2322982504351012, "grad_norm": 0.1819587689555269, "learning_rate": 3.575609330689608e-05, "loss": 0.4813, "num_tokens": 2569807267.0, "step": 3364 }, { "epoch": 1.232664651461024, "grad_norm": 0.1533116041048282, "learning_rate": 3.5753337280048015e-05, "loss": 0.5331, "num_tokens": 2570638229.0, "step": 3365 }, { "epoch": 1.233031052486947, "grad_norm": 1.9791393205877474, "learning_rate": 3.575058047827447e-05, "loss": 0.5135, "num_tokens": 2571402864.0, "step": 3366 }, { "epoch": 1.2333974535128698, "grad_norm": 0.19663717083976584, "learning_rate": 3.5747822901730774e-05, "loss": 0.5459, "num_tokens": 2572157089.0, "step": 3367 }, { "epoch": 1.2337638545387928, "grad_norm": 0.1609705279808731, "learning_rate": 3.574506455057229e-05, "loss": 0.5332, "num_tokens": 2572972048.0, "step": 3368 }, { "epoch": 1.2341302555647156, "grad_norm": 0.16426588789848626, "learning_rate": 3.5742305424954436e-05, "loss": 0.4802, "num_tokens": 2573804168.0, "step": 3369 }, { "epoch": 1.2344966565906383, "grad_norm": 0.1645963462786887, "learning_rate": 3.5739545525032676e-05, "loss": 0.4896, "num_tokens": 2574642420.0, "step": 3370 }, { "epoch": 1.2348630576165613, "grad_norm": 0.14670675485013343, "learning_rate": 3.573678485096253e-05, "loss": 0.5013, "num_tokens": 2575462027.0, "step": 3371 }, { "epoch": 1.2352294586424841, "grad_norm": 0.1612204134451886, "learning_rate": 3.5734023402899526e-05, "loss": 0.5402, "num_tokens": 2576156985.0, "step": 3372 }, { "epoch": 1.2355958596684071, "grad_norm": 2.803993549376901, "learning_rate": 3.5731261180999267e-05, "loss": 0.5066, "num_tokens": 2576953201.0, "step": 3373 }, { "epoch": 1.23596226069433, "grad_norm": 0.20538837668660656, "learning_rate": 3.5728498185417386e-05, "loss": 0.4864, "num_tokens": 2577823599.0, "step": 3374 }, { "epoch": 1.236328661720253, "grad_norm": 0.1632872705970136, "learning_rate": 3.572573441630957e-05, "loss": 0.5268, "num_tokens": 2578574858.0, "step": 3375 }, { "epoch": 1.2366950627461757, "grad_norm": 0.1772075341235687, "learning_rate": 3.572296987383153e-05, "loss": 0.5087, "num_tokens": 2579400858.0, "step": 3376 }, { "epoch": 1.2370614637720985, "grad_norm": 0.19889139611221926, "learning_rate": 3.572020455813904e-05, "loss": 0.4735, "num_tokens": 2580200391.0, "step": 3377 }, { "epoch": 1.2374278647980215, "grad_norm": 0.14894983312806861, "learning_rate": 3.571743846938791e-05, "loss": 0.522, "num_tokens": 2581032538.0, "step": 3378 }, { "epoch": 1.2377942658239443, "grad_norm": 0.18625853385159452, "learning_rate": 3.5714671607734e-05, "loss": 0.5253, "num_tokens": 2581778974.0, "step": 3379 }, { "epoch": 1.2381606668498673, "grad_norm": 0.1965409451366317, "learning_rate": 3.57119039733332e-05, "loss": 0.5197, "num_tokens": 2582557464.0, "step": 3380 }, { "epoch": 1.23852706787579, "grad_norm": 0.18017740640370744, "learning_rate": 3.5709135566341444e-05, "loss": 0.5004, "num_tokens": 2583248287.0, "step": 3381 }, { "epoch": 1.238893468901713, "grad_norm": 0.16204656039014478, "learning_rate": 3.570636638691473e-05, "loss": 0.5134, "num_tokens": 2584060823.0, "step": 3382 }, { "epoch": 1.2392598699276358, "grad_norm": 0.1632789008370845, "learning_rate": 3.570359643520908e-05, "loss": 0.473, "num_tokens": 2584983300.0, "step": 3383 }, { "epoch": 1.2396262709535586, "grad_norm": 0.2344927984055559, "learning_rate": 3.570082571138057e-05, "loss": 0.549, "num_tokens": 2585672896.0, "step": 3384 }, { "epoch": 1.2399926719794816, "grad_norm": 0.1739731323040099, "learning_rate": 3.569805421558532e-05, "loss": 0.5094, "num_tokens": 2586462063.0, "step": 3385 }, { "epoch": 1.2403590730054044, "grad_norm": 0.16995699402095604, "learning_rate": 3.569528194797946e-05, "loss": 0.5096, "num_tokens": 2587168772.0, "step": 3386 }, { "epoch": 1.2407254740313274, "grad_norm": 0.17727658064001228, "learning_rate": 3.5692508908719226e-05, "loss": 0.5206, "num_tokens": 2587936297.0, "step": 3387 }, { "epoch": 1.2410918750572502, "grad_norm": 0.1768186479520692, "learning_rate": 3.568973509796085e-05, "loss": 0.5103, "num_tokens": 2588716202.0, "step": 3388 }, { "epoch": 1.241458276083173, "grad_norm": 0.18346191960768568, "learning_rate": 3.568696051586062e-05, "loss": 0.5393, "num_tokens": 2589325000.0, "step": 3389 }, { "epoch": 1.241824677109096, "grad_norm": 0.14808443982381042, "learning_rate": 3.568418516257487e-05, "loss": 0.5056, "num_tokens": 2590009260.0, "step": 3390 }, { "epoch": 1.2421910781350187, "grad_norm": 0.24421031934939683, "learning_rate": 3.568140903825997e-05, "loss": 0.4977, "num_tokens": 2590796597.0, "step": 3391 }, { "epoch": 1.2425574791609417, "grad_norm": 0.15430891887490306, "learning_rate": 3.567863214307235e-05, "loss": 0.4951, "num_tokens": 2591548270.0, "step": 3392 }, { "epoch": 1.2429238801868645, "grad_norm": 0.1593434622835198, "learning_rate": 3.567585447716848e-05, "loss": 0.4977, "num_tokens": 2592173790.0, "step": 3393 }, { "epoch": 1.2432902812127873, "grad_norm": 0.2113998907907304, "learning_rate": 3.5673076040704836e-05, "loss": 0.5352, "num_tokens": 2592879670.0, "step": 3394 }, { "epoch": 1.2436566822387103, "grad_norm": 0.20047538126568304, "learning_rate": 3.5670296833837995e-05, "loss": 0.5606, "num_tokens": 2593542685.0, "step": 3395 }, { "epoch": 1.244023083264633, "grad_norm": 0.15345216314355115, "learning_rate": 3.566751685672454e-05, "loss": 0.5333, "num_tokens": 2594375411.0, "step": 3396 }, { "epoch": 1.244389484290556, "grad_norm": 0.19866103546112668, "learning_rate": 3.566473610952111e-05, "loss": 0.5408, "num_tokens": 2595130495.0, "step": 3397 }, { "epoch": 1.2447558853164788, "grad_norm": 0.15853070864638685, "learning_rate": 3.566195459238439e-05, "loss": 0.5664, "num_tokens": 2595893824.0, "step": 3398 }, { "epoch": 1.2451222863424019, "grad_norm": 0.1604408187372082, "learning_rate": 3.565917230547109e-05, "loss": 0.4993, "num_tokens": 2596734368.0, "step": 3399 }, { "epoch": 1.2454886873683246, "grad_norm": 0.14816525735393768, "learning_rate": 3.5656389248937985e-05, "loss": 0.5095, "num_tokens": 2597520969.0, "step": 3400 }, { "epoch": 1.2458550883942474, "grad_norm": 0.156397127682123, "learning_rate": 3.565360542294189e-05, "loss": 0.4785, "num_tokens": 2598440730.0, "step": 3401 }, { "epoch": 1.2462214894201704, "grad_norm": 0.12396112727710094, "learning_rate": 3.565082082763964e-05, "loss": 0.476, "num_tokens": 2599323575.0, "step": 3402 }, { "epoch": 1.2465878904460932, "grad_norm": 0.1959694899685424, "learning_rate": 3.564803546318816e-05, "loss": 0.5058, "num_tokens": 2600041612.0, "step": 3403 }, { "epoch": 1.2469542914720162, "grad_norm": 0.1687538979695197, "learning_rate": 3.564524932974436e-05, "loss": 0.5117, "num_tokens": 2600769742.0, "step": 3404 }, { "epoch": 1.247320692497939, "grad_norm": 0.1582223225697369, "learning_rate": 3.5642462427465245e-05, "loss": 0.5466, "num_tokens": 2601464276.0, "step": 3405 }, { "epoch": 1.247687093523862, "grad_norm": 0.1586736710736246, "learning_rate": 3.563967475650784e-05, "loss": 0.4782, "num_tokens": 2602210158.0, "step": 3406 }, { "epoch": 1.2480534945497848, "grad_norm": 0.1370899498591728, "learning_rate": 3.56368863170292e-05, "loss": 0.4989, "num_tokens": 2602927314.0, "step": 3407 }, { "epoch": 1.2484198955757075, "grad_norm": 0.16077453133528768, "learning_rate": 3.5634097109186453e-05, "loss": 0.5074, "num_tokens": 2603735606.0, "step": 3408 }, { "epoch": 1.2487862966016305, "grad_norm": 0.15860963986003893, "learning_rate": 3.5631307133136745e-05, "loss": 0.4973, "num_tokens": 2604529203.0, "step": 3409 }, { "epoch": 1.2491526976275533, "grad_norm": 0.14302250562550967, "learning_rate": 3.562851638903729e-05, "loss": 0.5129, "num_tokens": 2605376614.0, "step": 3410 }, { "epoch": 1.2495190986534763, "grad_norm": 0.15536043743118594, "learning_rate": 3.562572487704532e-05, "loss": 0.5075, "num_tokens": 2606193319.0, "step": 3411 }, { "epoch": 1.249885499679399, "grad_norm": 0.15601299734859975, "learning_rate": 3.5622932597318115e-05, "loss": 0.5326, "num_tokens": 2606920664.0, "step": 3412 }, { "epoch": 1.2502519007053219, "grad_norm": 0.15380803483743288, "learning_rate": 3.5620139550013026e-05, "loss": 0.5026, "num_tokens": 2607684621.0, "step": 3413 }, { "epoch": 1.2506183017312449, "grad_norm": 0.14538753125152215, "learning_rate": 3.5617345735287406e-05, "loss": 0.541, "num_tokens": 2608400027.0, "step": 3414 }, { "epoch": 1.2509847027571677, "grad_norm": 0.17741859110034167, "learning_rate": 3.561455115329869e-05, "loss": 0.5269, "num_tokens": 2609125238.0, "step": 3415 }, { "epoch": 1.2513511037830907, "grad_norm": 0.16341274862701974, "learning_rate": 3.561175580420431e-05, "loss": 0.5226, "num_tokens": 2609904998.0, "step": 3416 }, { "epoch": 1.2517175048090134, "grad_norm": 0.1539083168255901, "learning_rate": 3.560895968816179e-05, "loss": 0.5183, "num_tokens": 2610757093.0, "step": 3417 }, { "epoch": 1.2520839058349362, "grad_norm": 0.1627039635381501, "learning_rate": 3.560616280532868e-05, "loss": 0.5089, "num_tokens": 2611588417.0, "step": 3418 }, { "epoch": 1.2524503068608592, "grad_norm": 0.1424871792863475, "learning_rate": 3.5603365155862556e-05, "loss": 0.4778, "num_tokens": 2612412211.0, "step": 3419 }, { "epoch": 1.252816707886782, "grad_norm": 0.14363795101380505, "learning_rate": 3.560056673992105e-05, "loss": 0.4907, "num_tokens": 2613194183.0, "step": 3420 }, { "epoch": 1.253183108912705, "grad_norm": 0.16186922104378396, "learning_rate": 3.5597767557661843e-05, "loss": 0.481, "num_tokens": 2613925335.0, "step": 3421 }, { "epoch": 1.2535495099386278, "grad_norm": 0.1661920643365492, "learning_rate": 3.559496760924266e-05, "loss": 0.5427, "num_tokens": 2614724601.0, "step": 3422 }, { "epoch": 1.2539159109645506, "grad_norm": 0.14992860260020577, "learning_rate": 3.559216689482125e-05, "loss": 0.5137, "num_tokens": 2615480128.0, "step": 3423 }, { "epoch": 1.2542823119904736, "grad_norm": 0.17307984664570789, "learning_rate": 3.558936541455542e-05, "loss": 0.5232, "num_tokens": 2616141709.0, "step": 3424 }, { "epoch": 1.2546487130163966, "grad_norm": 0.13851030002374087, "learning_rate": 3.558656316860302e-05, "loss": 0.4996, "num_tokens": 2616971207.0, "step": 3425 }, { "epoch": 1.2550151140423194, "grad_norm": 0.20247437216890735, "learning_rate": 3.558376015712195e-05, "loss": 0.5175, "num_tokens": 2617853125.0, "step": 3426 }, { "epoch": 1.2553815150682421, "grad_norm": 0.15804540217488489, "learning_rate": 3.558095638027013e-05, "loss": 0.5198, "num_tokens": 2618520964.0, "step": 3427 }, { "epoch": 1.2557479160941651, "grad_norm": 0.1538139680255514, "learning_rate": 3.557815183820555e-05, "loss": 0.4814, "num_tokens": 2619320126.0, "step": 3428 }, { "epoch": 1.256114317120088, "grad_norm": 0.15712542788658224, "learning_rate": 3.5575346531086226e-05, "loss": 0.5284, "num_tokens": 2620050479.0, "step": 3429 }, { "epoch": 1.256480718146011, "grad_norm": 0.17298169609389175, "learning_rate": 3.557254045907022e-05, "loss": 0.5293, "num_tokens": 2620816503.0, "step": 3430 }, { "epoch": 1.2568471191719337, "grad_norm": 0.14231590707906805, "learning_rate": 3.556973362231563e-05, "loss": 0.5238, "num_tokens": 2621621814.0, "step": 3431 }, { "epoch": 1.2572135201978565, "grad_norm": 0.17060244661845614, "learning_rate": 3.556692602098063e-05, "loss": 0.4755, "num_tokens": 2622345231.0, "step": 3432 }, { "epoch": 1.2575799212237795, "grad_norm": 0.16460145008852756, "learning_rate": 3.5564117655223394e-05, "loss": 0.4846, "num_tokens": 2623105667.0, "step": 3433 }, { "epoch": 1.2579463222497023, "grad_norm": 0.13734922490436627, "learning_rate": 3.556130852520216e-05, "loss": 0.4823, "num_tokens": 2623848781.0, "step": 3434 }, { "epoch": 1.2583127232756253, "grad_norm": 0.1491061330370432, "learning_rate": 3.5558498631075205e-05, "loss": 0.4905, "num_tokens": 2624630764.0, "step": 3435 }, { "epoch": 1.258679124301548, "grad_norm": 0.17345194690289906, "learning_rate": 3.555568797300087e-05, "loss": 0.5331, "num_tokens": 2625457534.0, "step": 3436 }, { "epoch": 1.2590455253274708, "grad_norm": 0.1425053196529374, "learning_rate": 3.5552876551137495e-05, "loss": 0.5082, "num_tokens": 2626280155.0, "step": 3437 }, { "epoch": 1.2594119263533938, "grad_norm": 0.16923450200480897, "learning_rate": 3.55500643656435e-05, "loss": 0.5121, "num_tokens": 2627044028.0, "step": 3438 }, { "epoch": 1.2597783273793166, "grad_norm": 0.15918294752335835, "learning_rate": 3.5547251416677344e-05, "loss": 0.485, "num_tokens": 2627842810.0, "step": 3439 }, { "epoch": 1.2601447284052396, "grad_norm": 0.15484406474159468, "learning_rate": 3.554443770439751e-05, "loss": 0.5035, "num_tokens": 2628663555.0, "step": 3440 }, { "epoch": 1.2605111294311624, "grad_norm": 0.16283216301457898, "learning_rate": 3.554162322896254e-05, "loss": 0.5231, "num_tokens": 2629397685.0, "step": 3441 }, { "epoch": 1.2608775304570852, "grad_norm": 0.17034152620265752, "learning_rate": 3.5538807990531006e-05, "loss": 0.4954, "num_tokens": 2630157905.0, "step": 3442 }, { "epoch": 1.2612439314830082, "grad_norm": 0.16700689955122425, "learning_rate": 3.5535991989261545e-05, "loss": 0.5001, "num_tokens": 2630900421.0, "step": 3443 }, { "epoch": 1.261610332508931, "grad_norm": 0.15798690345438446, "learning_rate": 3.5533175225312816e-05, "loss": 0.5212, "num_tokens": 2631718504.0, "step": 3444 }, { "epoch": 1.261976733534854, "grad_norm": 0.20132215617749125, "learning_rate": 3.553035769884352e-05, "loss": 0.5241, "num_tokens": 2632439804.0, "step": 3445 }, { "epoch": 1.2623431345607767, "grad_norm": 0.14093941917622688, "learning_rate": 3.552753941001243e-05, "loss": 0.5001, "num_tokens": 2633163525.0, "step": 3446 }, { "epoch": 1.2627095355866995, "grad_norm": 0.18131891134968822, "learning_rate": 3.5524720358978327e-05, "loss": 0.5086, "num_tokens": 2633876563.0, "step": 3447 }, { "epoch": 1.2630759366126225, "grad_norm": 0.17230217966081177, "learning_rate": 3.5521900545900045e-05, "loss": 0.5132, "num_tokens": 2634550167.0, "step": 3448 }, { "epoch": 1.2634423376385455, "grad_norm": 0.1763074175650782, "learning_rate": 3.551907997093647e-05, "loss": 0.4869, "num_tokens": 2635281500.0, "step": 3449 }, { "epoch": 1.2638087386644683, "grad_norm": 0.19123048435770473, "learning_rate": 3.551625863424654e-05, "loss": 0.487, "num_tokens": 2635964299.0, "step": 3450 }, { "epoch": 1.264175139690391, "grad_norm": 0.15598485024169548, "learning_rate": 3.55134365359892e-05, "loss": 0.5219, "num_tokens": 2636795655.0, "step": 3451 }, { "epoch": 1.264541540716314, "grad_norm": 0.14574747484822176, "learning_rate": 3.551061367632347e-05, "loss": 0.5077, "num_tokens": 2637616224.0, "step": 3452 }, { "epoch": 1.2649079417422369, "grad_norm": 0.17172740588673996, "learning_rate": 3.55077900554084e-05, "loss": 0.47, "num_tokens": 2638462377.0, "step": 3453 }, { "epoch": 1.2652743427681599, "grad_norm": 0.16643254458046214, "learning_rate": 3.5504965673403096e-05, "loss": 0.4936, "num_tokens": 2639114373.0, "step": 3454 }, { "epoch": 1.2656407437940826, "grad_norm": 0.16320071858158813, "learning_rate": 3.550214053046668e-05, "loss": 0.5355, "num_tokens": 2639883365.0, "step": 3455 }, { "epoch": 1.2660071448200054, "grad_norm": 0.1550360325304111, "learning_rate": 3.549931462675835e-05, "loss": 0.4838, "num_tokens": 2640769037.0, "step": 3456 }, { "epoch": 1.2663735458459284, "grad_norm": 0.18677601014880715, "learning_rate": 3.549648796243731e-05, "loss": 0.4778, "num_tokens": 2641562598.0, "step": 3457 }, { "epoch": 1.2667399468718512, "grad_norm": 0.14502410351908632, "learning_rate": 3.549366053766284e-05, "loss": 0.4865, "num_tokens": 2642308703.0, "step": 3458 }, { "epoch": 1.2671063478977742, "grad_norm": 0.17495102418163774, "learning_rate": 3.549083235259426e-05, "loss": 0.5043, "num_tokens": 2643115185.0, "step": 3459 }, { "epoch": 1.267472748923697, "grad_norm": 0.1708438441730871, "learning_rate": 3.548800340739091e-05, "loss": 0.5115, "num_tokens": 2644062025.0, "step": 3460 }, { "epoch": 1.2678391499496198, "grad_norm": 0.16618264901906757, "learning_rate": 3.5485173702212183e-05, "loss": 0.5198, "num_tokens": 2644828276.0, "step": 3461 }, { "epoch": 1.2682055509755428, "grad_norm": 0.154339264378888, "learning_rate": 3.548234323721752e-05, "loss": 0.4869, "num_tokens": 2645676202.0, "step": 3462 }, { "epoch": 1.2685719520014656, "grad_norm": 0.183828504383785, "learning_rate": 3.54795120125664e-05, "loss": 0.5365, "num_tokens": 2646532692.0, "step": 3463 }, { "epoch": 1.2689383530273886, "grad_norm": 0.1354206758570525, "learning_rate": 3.547668002841836e-05, "loss": 0.5233, "num_tokens": 2647368230.0, "step": 3464 }, { "epoch": 1.2693047540533113, "grad_norm": 0.18125097878328103, "learning_rate": 3.547384728493295e-05, "loss": 0.4977, "num_tokens": 2648318249.0, "step": 3465 }, { "epoch": 1.2696711550792341, "grad_norm": 0.15197626973718578, "learning_rate": 3.547101378226979e-05, "loss": 0.5226, "num_tokens": 2649093379.0, "step": 3466 }, { "epoch": 1.2700375561051571, "grad_norm": 0.15578593770133026, "learning_rate": 3.546817952058854e-05, "loss": 0.4938, "num_tokens": 2649858604.0, "step": 3467 }, { "epoch": 1.27040395713108, "grad_norm": 0.1701992495885922, "learning_rate": 3.5465344500048876e-05, "loss": 0.5238, "num_tokens": 2650565663.0, "step": 3468 }, { "epoch": 1.270770358157003, "grad_norm": 0.13770204070421568, "learning_rate": 3.546250872081055e-05, "loss": 0.4893, "num_tokens": 2651305794.0, "step": 3469 }, { "epoch": 1.2711367591829257, "grad_norm": 0.16755764737010861, "learning_rate": 3.545967218303333e-05, "loss": 0.5388, "num_tokens": 2652083455.0, "step": 3470 }, { "epoch": 1.2715031602088485, "grad_norm": 0.16894854684544075, "learning_rate": 3.545683488687705e-05, "loss": 0.5008, "num_tokens": 2652885284.0, "step": 3471 }, { "epoch": 1.2718695612347715, "grad_norm": 0.1377369077324564, "learning_rate": 3.5453996832501574e-05, "loss": 0.4854, "num_tokens": 2653722852.0, "step": 3472 }, { "epoch": 1.2722359622606945, "grad_norm": 0.18971224975709036, "learning_rate": 3.545115802006681e-05, "loss": 0.5134, "num_tokens": 2654487469.0, "step": 3473 }, { "epoch": 1.2726023632866172, "grad_norm": 0.14944454754384856, "learning_rate": 3.544831844973271e-05, "loss": 0.5266, "num_tokens": 2655175567.0, "step": 3474 }, { "epoch": 1.27296876431254, "grad_norm": 0.17661607244041497, "learning_rate": 3.544547812165926e-05, "loss": 0.5204, "num_tokens": 2656039586.0, "step": 3475 }, { "epoch": 1.273335165338463, "grad_norm": 0.14016181981158368, "learning_rate": 3.5442637036006516e-05, "loss": 0.4771, "num_tokens": 2656901295.0, "step": 3476 }, { "epoch": 1.2737015663643858, "grad_norm": 0.1684674988805778, "learning_rate": 3.543979519293454e-05, "loss": 0.4996, "num_tokens": 2657635983.0, "step": 3477 }, { "epoch": 1.2740679673903088, "grad_norm": 0.180394460903341, "learning_rate": 3.543695259260347e-05, "loss": 0.5205, "num_tokens": 2658521235.0, "step": 3478 }, { "epoch": 1.2744343684162316, "grad_norm": 0.17153970603394886, "learning_rate": 3.543410923517345e-05, "loss": 0.5104, "num_tokens": 2659156903.0, "step": 3479 }, { "epoch": 1.2748007694421544, "grad_norm": 0.1651613399279492, "learning_rate": 3.5431265120804705e-05, "loss": 0.5459, "num_tokens": 2659828917.0, "step": 3480 }, { "epoch": 1.2751671704680774, "grad_norm": 0.16453697372044954, "learning_rate": 3.542842024965748e-05, "loss": 0.5114, "num_tokens": 2660504885.0, "step": 3481 }, { "epoch": 1.2755335714940002, "grad_norm": 0.1552200697453295, "learning_rate": 3.542557462189207e-05, "loss": 0.5011, "num_tokens": 2661215595.0, "step": 3482 }, { "epoch": 1.2758999725199232, "grad_norm": 0.2032897998353576, "learning_rate": 3.542272823766881e-05, "loss": 0.5359, "num_tokens": 2661840872.0, "step": 3483 }, { "epoch": 1.276266373545846, "grad_norm": 0.16398010901526128, "learning_rate": 3.541988109714807e-05, "loss": 0.5694, "num_tokens": 2662515542.0, "step": 3484 }, { "epoch": 1.2766327745717687, "grad_norm": 0.16725835382276233, "learning_rate": 3.5417033200490286e-05, "loss": 0.5254, "num_tokens": 2663211869.0, "step": 3485 }, { "epoch": 1.2769991755976917, "grad_norm": 0.16901887171298763, "learning_rate": 3.541418454785591e-05, "loss": 0.4939, "num_tokens": 2664040313.0, "step": 3486 }, { "epoch": 1.2773655766236145, "grad_norm": 0.16826684290638652, "learning_rate": 3.541133513940545e-05, "loss": 0.5233, "num_tokens": 2664716624.0, "step": 3487 }, { "epoch": 1.2777319776495375, "grad_norm": 0.19240375334197055, "learning_rate": 3.540848497529945e-05, "loss": 0.5159, "num_tokens": 2665463180.0, "step": 3488 }, { "epoch": 1.2780983786754603, "grad_norm": 0.15085494272343172, "learning_rate": 3.540563405569851e-05, "loss": 0.4923, "num_tokens": 2666321115.0, "step": 3489 }, { "epoch": 1.278464779701383, "grad_norm": 0.17205275894318, "learning_rate": 3.540278238076326e-05, "loss": 0.48, "num_tokens": 2666987872.0, "step": 3490 }, { "epoch": 1.278831180727306, "grad_norm": 0.14823021627721739, "learning_rate": 3.539992995065438e-05, "loss": 0.5024, "num_tokens": 2667714031.0, "step": 3491 }, { "epoch": 1.2791975817532288, "grad_norm": 0.1819339471064415, "learning_rate": 3.539707676553259e-05, "loss": 0.542, "num_tokens": 2668398181.0, "step": 3492 }, { "epoch": 1.2795639827791518, "grad_norm": 0.15359902106729656, "learning_rate": 3.539422282555864e-05, "loss": 0.4627, "num_tokens": 2669213728.0, "step": 3493 }, { "epoch": 1.2799303838050746, "grad_norm": 0.14918189024602627, "learning_rate": 3.5391368130893344e-05, "loss": 0.4788, "num_tokens": 2669982815.0, "step": 3494 }, { "epoch": 1.2802967848309974, "grad_norm": 0.158835786860451, "learning_rate": 3.538851268169755e-05, "loss": 0.4857, "num_tokens": 2670743344.0, "step": 3495 }, { "epoch": 1.2806631858569204, "grad_norm": 0.19143227403662688, "learning_rate": 3.538565647813214e-05, "loss": 0.5333, "num_tokens": 2671538733.0, "step": 3496 }, { "epoch": 1.2810295868828434, "grad_norm": 0.15318026114804223, "learning_rate": 3.538279952035804e-05, "loss": 0.5012, "num_tokens": 2672324382.0, "step": 3497 }, { "epoch": 1.2813959879087662, "grad_norm": 0.1654696134301764, "learning_rate": 3.5379941808536244e-05, "loss": 0.5256, "num_tokens": 2673070789.0, "step": 3498 }, { "epoch": 1.281762388934689, "grad_norm": 0.15867061052699707, "learning_rate": 3.5377083342827746e-05, "loss": 0.5216, "num_tokens": 2673824221.0, "step": 3499 }, { "epoch": 1.282128789960612, "grad_norm": 0.140309881117112, "learning_rate": 3.537422412339362e-05, "loss": 0.5265, "num_tokens": 2674547447.0, "step": 3500 }, { "epoch": 1.2824951909865347, "grad_norm": 0.16510251826749697, "learning_rate": 3.537136415039496e-05, "loss": 0.5066, "num_tokens": 2675231271.0, "step": 3501 }, { "epoch": 1.2828615920124578, "grad_norm": 0.15260149756355829, "learning_rate": 3.536850342399292e-05, "loss": 0.5507, "num_tokens": 2675876425.0, "step": 3502 }, { "epoch": 1.2832279930383805, "grad_norm": 0.16872172332616045, "learning_rate": 3.536564194434867e-05, "loss": 0.5179, "num_tokens": 2676674133.0, "step": 3503 }, { "epoch": 1.2835943940643033, "grad_norm": 0.14887083644145885, "learning_rate": 3.536277971162345e-05, "loss": 0.4855, "num_tokens": 2677392923.0, "step": 3504 }, { "epoch": 1.2839607950902263, "grad_norm": 0.1622880267849333, "learning_rate": 3.535991672597853e-05, "loss": 0.5593, "num_tokens": 2678154041.0, "step": 3505 }, { "epoch": 1.284327196116149, "grad_norm": 0.15260637312496605, "learning_rate": 3.535705298757523e-05, "loss": 0.516, "num_tokens": 2678922586.0, "step": 3506 }, { "epoch": 1.284693597142072, "grad_norm": 0.14569302625084332, "learning_rate": 3.535418849657489e-05, "loss": 0.501, "num_tokens": 2679609729.0, "step": 3507 }, { "epoch": 1.2850599981679949, "grad_norm": 0.153088048794695, "learning_rate": 3.535132325313892e-05, "loss": 0.5149, "num_tokens": 2680419884.0, "step": 3508 }, { "epoch": 1.2854263991939177, "grad_norm": 0.1451030369622614, "learning_rate": 3.534845725742876e-05, "loss": 0.5173, "num_tokens": 2681148705.0, "step": 3509 }, { "epoch": 1.2857928002198407, "grad_norm": 0.141445891108748, "learning_rate": 3.534559050960589e-05, "loss": 0.4965, "num_tokens": 2681855672.0, "step": 3510 }, { "epoch": 1.2861592012457634, "grad_norm": 0.1461580581666764, "learning_rate": 3.5342723009831836e-05, "loss": 0.5097, "num_tokens": 2682693626.0, "step": 3511 }, { "epoch": 1.2865256022716864, "grad_norm": 0.14335120039395674, "learning_rate": 3.533985475826816e-05, "loss": 0.4916, "num_tokens": 2683474327.0, "step": 3512 }, { "epoch": 1.2868920032976092, "grad_norm": 0.15298820030845692, "learning_rate": 3.5336985755076485e-05, "loss": 0.4835, "num_tokens": 2684243984.0, "step": 3513 }, { "epoch": 1.287258404323532, "grad_norm": 0.160847974615074, "learning_rate": 3.5334116000418456e-05, "loss": 0.5277, "num_tokens": 2685055786.0, "step": 3514 }, { "epoch": 1.287624805349455, "grad_norm": 0.14359253256221546, "learning_rate": 3.5331245494455776e-05, "loss": 0.5261, "num_tokens": 2685801927.0, "step": 3515 }, { "epoch": 1.2879912063753778, "grad_norm": 0.15193356600360405, "learning_rate": 3.532837423735018e-05, "loss": 0.503, "num_tokens": 2686599979.0, "step": 3516 }, { "epoch": 1.2883576074013008, "grad_norm": 0.16786411688429737, "learning_rate": 3.5325502229263424e-05, "loss": 0.4917, "num_tokens": 2687374802.0, "step": 3517 }, { "epoch": 1.2887240084272236, "grad_norm": 0.1304490146794175, "learning_rate": 3.532262947035736e-05, "loss": 0.4992, "num_tokens": 2688220190.0, "step": 3518 }, { "epoch": 1.2890904094531463, "grad_norm": 0.15830866129935808, "learning_rate": 3.5319755960793847e-05, "loss": 0.4798, "num_tokens": 2688982411.0, "step": 3519 }, { "epoch": 1.2894568104790693, "grad_norm": 0.15774007968030038, "learning_rate": 3.531688170073478e-05, "loss": 0.5033, "num_tokens": 2689751822.0, "step": 3520 }, { "epoch": 1.2898232115049924, "grad_norm": 0.15014869761878533, "learning_rate": 3.531400669034212e-05, "loss": 0.4818, "num_tokens": 2690585213.0, "step": 3521 }, { "epoch": 1.2901896125309151, "grad_norm": 0.14321198233043314, "learning_rate": 3.531113092977785e-05, "loss": 0.4926, "num_tokens": 2691277292.0, "step": 3522 }, { "epoch": 1.290556013556838, "grad_norm": 0.1817089101224106, "learning_rate": 3.5308254419204e-05, "loss": 0.4975, "num_tokens": 2691963352.0, "step": 3523 }, { "epoch": 1.290922414582761, "grad_norm": 0.1453961584304028, "learning_rate": 3.5305377158782655e-05, "loss": 0.4907, "num_tokens": 2692757160.0, "step": 3524 }, { "epoch": 1.2912888156086837, "grad_norm": 0.16695892059579248, "learning_rate": 3.530249914867593e-05, "loss": 0.4853, "num_tokens": 2693470432.0, "step": 3525 }, { "epoch": 1.2916552166346067, "grad_norm": 0.1418643936926641, "learning_rate": 3.529962038904599e-05, "loss": 0.4737, "num_tokens": 2694268792.0, "step": 3526 }, { "epoch": 1.2920216176605295, "grad_norm": 0.16119081134441848, "learning_rate": 3.5296740880055035e-05, "loss": 0.513, "num_tokens": 2694964009.0, "step": 3527 }, { "epoch": 1.2923880186864523, "grad_norm": 0.14759855668610997, "learning_rate": 3.529386062186529e-05, "loss": 0.5023, "num_tokens": 2695729923.0, "step": 3528 }, { "epoch": 1.2927544197123753, "grad_norm": 0.16116164152929613, "learning_rate": 3.529097961463907e-05, "loss": 0.495, "num_tokens": 2696424573.0, "step": 3529 }, { "epoch": 1.293120820738298, "grad_norm": 0.15379304855013406, "learning_rate": 3.5288097858538694e-05, "loss": 0.4938, "num_tokens": 2697191476.0, "step": 3530 }, { "epoch": 1.293487221764221, "grad_norm": 0.1507191597563598, "learning_rate": 3.528521535372653e-05, "loss": 0.5014, "num_tokens": 2698014204.0, "step": 3531 }, { "epoch": 1.2938536227901438, "grad_norm": 0.16165529525155464, "learning_rate": 3.528233210036499e-05, "loss": 0.5174, "num_tokens": 2698796981.0, "step": 3532 }, { "epoch": 1.2942200238160666, "grad_norm": 0.1748628471445497, "learning_rate": 3.5279448098616534e-05, "loss": 0.5289, "num_tokens": 2699515192.0, "step": 3533 }, { "epoch": 1.2945864248419896, "grad_norm": 0.17155915535370564, "learning_rate": 3.527656334864366e-05, "loss": 0.5471, "num_tokens": 2700233746.0, "step": 3534 }, { "epoch": 1.2949528258679124, "grad_norm": 0.17469150305632314, "learning_rate": 3.5273677850608915e-05, "loss": 0.4711, "num_tokens": 2700983924.0, "step": 3535 }, { "epoch": 1.2953192268938354, "grad_norm": 0.15442694971562962, "learning_rate": 3.5270791604674856e-05, "loss": 0.4844, "num_tokens": 2701841194.0, "step": 3536 }, { "epoch": 1.2956856279197582, "grad_norm": 0.15610677163788186, "learning_rate": 3.526790461100413e-05, "loss": 0.52, "num_tokens": 2702637527.0, "step": 3537 }, { "epoch": 1.296052028945681, "grad_norm": 0.16137184372245267, "learning_rate": 3.5265016869759396e-05, "loss": 0.5007, "num_tokens": 2703392367.0, "step": 3538 }, { "epoch": 1.296418429971604, "grad_norm": 0.15516885372879954, "learning_rate": 3.5262128381103365e-05, "loss": 0.4672, "num_tokens": 2704029667.0, "step": 3539 }, { "epoch": 1.2967848309975267, "grad_norm": 0.16473293571242695, "learning_rate": 3.5259239145198784e-05, "loss": 0.5044, "num_tokens": 2704973204.0, "step": 3540 }, { "epoch": 1.2971512320234497, "grad_norm": 0.164992985816893, "learning_rate": 3.525634916220845e-05, "loss": 0.513, "num_tokens": 2705709113.0, "step": 3541 }, { "epoch": 1.2975176330493725, "grad_norm": 0.1555344849219509, "learning_rate": 3.52534584322952e-05, "loss": 0.5136, "num_tokens": 2706494480.0, "step": 3542 }, { "epoch": 1.2978840340752953, "grad_norm": 0.18117880244917392, "learning_rate": 3.52505669556219e-05, "loss": 0.5068, "num_tokens": 2707286856.0, "step": 3543 }, { "epoch": 1.2982504351012183, "grad_norm": 0.1629599678746739, "learning_rate": 3.524767473235148e-05, "loss": 0.5117, "num_tokens": 2708043797.0, "step": 3544 }, { "epoch": 1.2986168361271413, "grad_norm": 0.17673775406595044, "learning_rate": 3.52447817626469e-05, "loss": 0.4949, "num_tokens": 2708791467.0, "step": 3545 }, { "epoch": 1.298983237153064, "grad_norm": 0.15337011254892666, "learning_rate": 3.524188804667115e-05, "loss": 0.4728, "num_tokens": 2709623142.0, "step": 3546 }, { "epoch": 1.2993496381789869, "grad_norm": 0.18829810754915413, "learning_rate": 3.523899358458728e-05, "loss": 0.5238, "num_tokens": 2710408949.0, "step": 3547 }, { "epoch": 1.2997160392049099, "grad_norm": 0.16420290462738904, "learning_rate": 3.52360983765584e-05, "loss": 0.5034, "num_tokens": 2711098300.0, "step": 3548 }, { "epoch": 1.3000824402308326, "grad_norm": 0.1515701297918223, "learning_rate": 3.523320242274761e-05, "loss": 0.5123, "num_tokens": 2711892019.0, "step": 3549 }, { "epoch": 1.3004488412567556, "grad_norm": 0.17116847954622566, "learning_rate": 3.523030572331809e-05, "loss": 0.5221, "num_tokens": 2712696169.0, "step": 3550 }, { "epoch": 1.3008152422826784, "grad_norm": 0.165614522526582, "learning_rate": 3.522740827843306e-05, "loss": 0.522, "num_tokens": 2713386627.0, "step": 3551 }, { "epoch": 1.3011816433086012, "grad_norm": 0.17314487703572629, "learning_rate": 3.5224510088255766e-05, "loss": 0.509, "num_tokens": 2714009151.0, "step": 3552 }, { "epoch": 1.3015480443345242, "grad_norm": 0.17609127791456464, "learning_rate": 3.522161115294951e-05, "loss": 0.5199, "num_tokens": 2714655605.0, "step": 3553 }, { "epoch": 1.301914445360447, "grad_norm": 0.16130224255180314, "learning_rate": 3.5218711472677635e-05, "loss": 0.4695, "num_tokens": 2715371981.0, "step": 3554 }, { "epoch": 1.30228084638637, "grad_norm": 0.14843188235969773, "learning_rate": 3.5215811047603515e-05, "loss": 0.4997, "num_tokens": 2716192991.0, "step": 3555 }, { "epoch": 1.3026472474122928, "grad_norm": 0.1553459129798879, "learning_rate": 3.5212909877890575e-05, "loss": 0.4856, "num_tokens": 2717063249.0, "step": 3556 }, { "epoch": 1.3030136484382155, "grad_norm": 0.16354436330343225, "learning_rate": 3.521000796370228e-05, "loss": 0.5199, "num_tokens": 2717838458.0, "step": 3557 }, { "epoch": 1.3033800494641385, "grad_norm": 0.18223733914074003, "learning_rate": 3.520710530520214e-05, "loss": 0.5244, "num_tokens": 2718632904.0, "step": 3558 }, { "epoch": 1.3037464504900613, "grad_norm": 0.14658327512935063, "learning_rate": 3.52042019025537e-05, "loss": 0.4978, "num_tokens": 2719423931.0, "step": 3559 }, { "epoch": 1.3041128515159843, "grad_norm": 0.16345996157844975, "learning_rate": 3.520129775592056e-05, "loss": 0.5129, "num_tokens": 2720166617.0, "step": 3560 }, { "epoch": 1.304479252541907, "grad_norm": 0.1490558463211656, "learning_rate": 3.519839286546634e-05, "loss": 0.5125, "num_tokens": 2720931706.0, "step": 3561 }, { "epoch": 1.3048456535678299, "grad_norm": 0.14982344012190982, "learning_rate": 3.519548723135472e-05, "loss": 0.5209, "num_tokens": 2721710643.0, "step": 3562 }, { "epoch": 1.305212054593753, "grad_norm": 0.16773143211422775, "learning_rate": 3.519258085374942e-05, "loss": 0.4916, "num_tokens": 2722454616.0, "step": 3563 }, { "epoch": 1.3055784556196757, "grad_norm": 0.15716611238960082, "learning_rate": 3.518967373281419e-05, "loss": 0.5142, "num_tokens": 2723252254.0, "step": 3564 }, { "epoch": 1.3059448566455987, "grad_norm": 0.1478727596526105, "learning_rate": 3.518676586871284e-05, "loss": 0.4839, "num_tokens": 2724073894.0, "step": 3565 }, { "epoch": 1.3063112576715215, "grad_norm": 0.14653524035756244, "learning_rate": 3.518385726160922e-05, "loss": 0.5014, "num_tokens": 2724953227.0, "step": 3566 }, { "epoch": 1.3066776586974442, "grad_norm": 0.15311961390500356, "learning_rate": 3.518094791166718e-05, "loss": 0.5077, "num_tokens": 2725759003.0, "step": 3567 }, { "epoch": 1.3070440597233672, "grad_norm": 0.13848019646413895, "learning_rate": 3.517803781905069e-05, "loss": 0.4971, "num_tokens": 2726573359.0, "step": 3568 }, { "epoch": 1.30741046074929, "grad_norm": 0.15100715603160234, "learning_rate": 3.517512698392368e-05, "loss": 0.5077, "num_tokens": 2727295183.0, "step": 3569 }, { "epoch": 1.307776861775213, "grad_norm": 0.14519762822729718, "learning_rate": 3.5172215406450186e-05, "loss": 0.5506, "num_tokens": 2728074830.0, "step": 3570 }, { "epoch": 1.3081432628011358, "grad_norm": 0.1585196071890998, "learning_rate": 3.516930308679425e-05, "loss": 0.5037, "num_tokens": 2728840114.0, "step": 3571 }, { "epoch": 1.3085096638270586, "grad_norm": 0.14643074925108598, "learning_rate": 3.516639002511996e-05, "loss": 0.5488, "num_tokens": 2729552475.0, "step": 3572 }, { "epoch": 1.3088760648529816, "grad_norm": 0.17248983165838944, "learning_rate": 3.516347622159145e-05, "loss": 0.5575, "num_tokens": 2730300984.0, "step": 3573 }, { "epoch": 1.3092424658789046, "grad_norm": 0.1311687265689311, "learning_rate": 3.516056167637291e-05, "loss": 0.5145, "num_tokens": 2731216353.0, "step": 3574 }, { "epoch": 1.3096088669048274, "grad_norm": 0.16338085875950065, "learning_rate": 3.515764638962855e-05, "loss": 0.5047, "num_tokens": 2731976467.0, "step": 3575 }, { "epoch": 1.3099752679307501, "grad_norm": 0.13901978057986092, "learning_rate": 3.515473036152263e-05, "loss": 0.5094, "num_tokens": 2732771028.0, "step": 3576 }, { "epoch": 1.3103416689566731, "grad_norm": 0.16881030297667463, "learning_rate": 3.515181359221946e-05, "loss": 0.5049, "num_tokens": 2733451210.0, "step": 3577 }, { "epoch": 1.310708069982596, "grad_norm": 0.1574738342497701, "learning_rate": 3.5148896081883374e-05, "loss": 0.5078, "num_tokens": 2734224906.0, "step": 3578 }, { "epoch": 1.311074471008519, "grad_norm": 0.15052843897813636, "learning_rate": 3.514597783067877e-05, "loss": 0.5015, "num_tokens": 2735001381.0, "step": 3579 }, { "epoch": 1.3114408720344417, "grad_norm": 0.15426186225625743, "learning_rate": 3.514305883877006e-05, "loss": 0.5025, "num_tokens": 2735714201.0, "step": 3580 }, { "epoch": 1.3118072730603645, "grad_norm": 0.1597692134086471, "learning_rate": 3.514013910632172e-05, "loss": 0.5118, "num_tokens": 2736380221.0, "step": 3581 }, { "epoch": 1.3121736740862875, "grad_norm": 0.14778035946581222, "learning_rate": 3.513721863349827e-05, "loss": 0.4966, "num_tokens": 2737180719.0, "step": 3582 }, { "epoch": 1.3125400751122103, "grad_norm": 0.14584943901807937, "learning_rate": 3.5134297420464246e-05, "loss": 0.5061, "num_tokens": 2738038149.0, "step": 3583 }, { "epoch": 1.3129064761381333, "grad_norm": 0.1560410557402379, "learning_rate": 3.513137546738425e-05, "loss": 0.4781, "num_tokens": 2738919123.0, "step": 3584 }, { "epoch": 1.313272877164056, "grad_norm": 0.1468780742847058, "learning_rate": 3.512845277442292e-05, "loss": 0.5385, "num_tokens": 2739736188.0, "step": 3585 }, { "epoch": 1.3136392781899788, "grad_norm": 0.16205128626818885, "learning_rate": 3.5125529341744937e-05, "loss": 0.5426, "num_tokens": 2740410712.0, "step": 3586 }, { "epoch": 1.3140056792159018, "grad_norm": 0.1425068986194546, "learning_rate": 3.512260516951502e-05, "loss": 0.4902, "num_tokens": 2741189912.0, "step": 3587 }, { "epoch": 1.3143720802418246, "grad_norm": 0.1559429066302026, "learning_rate": 3.511968025789792e-05, "loss": 0.5172, "num_tokens": 2741886303.0, "step": 3588 }, { "epoch": 1.3147384812677476, "grad_norm": 0.15301559542404516, "learning_rate": 3.511675460705844e-05, "loss": 0.4777, "num_tokens": 2742610852.0, "step": 3589 }, { "epoch": 1.3151048822936704, "grad_norm": 0.15177667795339864, "learning_rate": 3.5113828217161445e-05, "loss": 0.5636, "num_tokens": 2743387660.0, "step": 3590 }, { "epoch": 1.3154712833195932, "grad_norm": 0.1580917606238413, "learning_rate": 3.51109010883718e-05, "loss": 0.5344, "num_tokens": 2744053319.0, "step": 3591 }, { "epoch": 1.3158376843455162, "grad_norm": 0.17299530793445003, "learning_rate": 3.5107973220854445e-05, "loss": 0.5034, "num_tokens": 2744869905.0, "step": 3592 }, { "epoch": 1.316204085371439, "grad_norm": 0.14860420953760764, "learning_rate": 3.5105044614774335e-05, "loss": 0.5102, "num_tokens": 2745594342.0, "step": 3593 }, { "epoch": 1.316570486397362, "grad_norm": 0.16864432147390693, "learning_rate": 3.510211527029649e-05, "loss": 0.5398, "num_tokens": 2746394656.0, "step": 3594 }, { "epoch": 1.3169368874232847, "grad_norm": 0.14045319185484778, "learning_rate": 3.509918518758597e-05, "loss": 0.5124, "num_tokens": 2747178838.0, "step": 3595 }, { "epoch": 1.3173032884492075, "grad_norm": 0.17533002347634394, "learning_rate": 3.509625436680786e-05, "loss": 0.5389, "num_tokens": 2747917118.0, "step": 3596 }, { "epoch": 1.3176696894751305, "grad_norm": 0.18319889357811267, "learning_rate": 3.50933228081273e-05, "loss": 0.5062, "num_tokens": 2748725519.0, "step": 3597 }, { "epoch": 1.3180360905010535, "grad_norm": 0.13556109499686653, "learning_rate": 3.509039051170946e-05, "loss": 0.5316, "num_tokens": 2749580291.0, "step": 3598 }, { "epoch": 1.3184024915269763, "grad_norm": 0.20766419135908637, "learning_rate": 3.508745747771956e-05, "loss": 0.5019, "num_tokens": 2750429939.0, "step": 3599 }, { "epoch": 1.318768892552899, "grad_norm": 0.16653884861359763, "learning_rate": 3.508452370632287e-05, "loss": 0.5155, "num_tokens": 2751117341.0, "step": 3600 }, { "epoch": 1.319135293578822, "grad_norm": 0.15513495615223885, "learning_rate": 3.508158919768468e-05, "loss": 0.5164, "num_tokens": 2751917889.0, "step": 3601 }, { "epoch": 1.3195016946047449, "grad_norm": 0.18608013305614637, "learning_rate": 3.5078653951970346e-05, "loss": 0.4891, "num_tokens": 2752778808.0, "step": 3602 }, { "epoch": 1.3198680956306679, "grad_norm": 0.20400841184010535, "learning_rate": 3.507571796934525e-05, "loss": 0.4975, "num_tokens": 2753629371.0, "step": 3603 }, { "epoch": 1.3202344966565907, "grad_norm": 0.15863476249872802, "learning_rate": 3.507278124997481e-05, "loss": 0.5083, "num_tokens": 2754324930.0, "step": 3604 }, { "epoch": 1.3206008976825134, "grad_norm": 0.18497956227328471, "learning_rate": 3.5069843794024506e-05, "loss": 0.5024, "num_tokens": 2755069002.0, "step": 3605 }, { "epoch": 1.3209672987084364, "grad_norm": 0.19318084620367654, "learning_rate": 3.506690560165982e-05, "loss": 0.5048, "num_tokens": 2755800480.0, "step": 3606 }, { "epoch": 1.3213336997343592, "grad_norm": 0.1648972456991225, "learning_rate": 3.5063966673046344e-05, "loss": 0.5259, "num_tokens": 2756516290.0, "step": 3607 }, { "epoch": 1.3217001007602822, "grad_norm": 0.17097232093994086, "learning_rate": 3.506102700834965e-05, "loss": 0.4588, "num_tokens": 2757334109.0, "step": 3608 }, { "epoch": 1.322066501786205, "grad_norm": 0.176388650530444, "learning_rate": 3.505808660773537e-05, "loss": 0.5352, "num_tokens": 2758000286.0, "step": 3609 }, { "epoch": 1.3224329028121278, "grad_norm": 0.1715075486249078, "learning_rate": 3.505514547136918e-05, "loss": 0.5184, "num_tokens": 2758806120.0, "step": 3610 }, { "epoch": 1.3227993038380508, "grad_norm": 0.15395601648640617, "learning_rate": 3.50522035994168e-05, "loss": 0.5088, "num_tokens": 2759645321.0, "step": 3611 }, { "epoch": 1.3231657048639736, "grad_norm": 0.17492876938526683, "learning_rate": 3.5049260992043986e-05, "loss": 0.5205, "num_tokens": 2760463651.0, "step": 3612 }, { "epoch": 1.3235321058898966, "grad_norm": 0.1438143178019472, "learning_rate": 3.5046317649416546e-05, "loss": 0.5031, "num_tokens": 2761255784.0, "step": 3613 }, { "epoch": 1.3238985069158193, "grad_norm": 0.17124269641162368, "learning_rate": 3.504337357170031e-05, "loss": 0.511, "num_tokens": 2762004125.0, "step": 3614 }, { "epoch": 1.3242649079417421, "grad_norm": 0.16392957418365145, "learning_rate": 3.504042875906116e-05, "loss": 0.5328, "num_tokens": 2762786853.0, "step": 3615 }, { "epoch": 1.3246313089676651, "grad_norm": 0.1565329973963637, "learning_rate": 3.503748321166504e-05, "loss": 0.4956, "num_tokens": 2763545638.0, "step": 3616 }, { "epoch": 1.324997709993588, "grad_norm": 0.15149314856927382, "learning_rate": 3.5034536929677894e-05, "loss": 0.4819, "num_tokens": 2764432970.0, "step": 3617 }, { "epoch": 1.325364111019511, "grad_norm": 0.14118159405277195, "learning_rate": 3.503158991326573e-05, "loss": 0.4774, "num_tokens": 2765182649.0, "step": 3618 }, { "epoch": 1.3257305120454337, "grad_norm": 0.1534273129460677, "learning_rate": 3.502864216259461e-05, "loss": 0.5108, "num_tokens": 2766118566.0, "step": 3619 }, { "epoch": 1.3260969130713565, "grad_norm": 0.14639269197066132, "learning_rate": 3.5025693677830615e-05, "loss": 0.4706, "num_tokens": 2766879496.0, "step": 3620 }, { "epoch": 1.3264633140972795, "grad_norm": 0.16219508146745504, "learning_rate": 3.5022744459139874e-05, "loss": 0.4857, "num_tokens": 2767770174.0, "step": 3621 }, { "epoch": 1.3268297151232025, "grad_norm": 0.1356925968932526, "learning_rate": 3.501979450668856e-05, "loss": 0.4933, "num_tokens": 2768620606.0, "step": 3622 }, { "epoch": 1.3271961161491252, "grad_norm": 0.1618784975911005, "learning_rate": 3.50168438206429e-05, "loss": 0.5274, "num_tokens": 2769457551.0, "step": 3623 }, { "epoch": 1.327562517175048, "grad_norm": 0.1467164018386529, "learning_rate": 3.501389240116913e-05, "loss": 0.4946, "num_tokens": 2770163568.0, "step": 3624 }, { "epoch": 1.327928918200971, "grad_norm": 0.17100105175461672, "learning_rate": 3.501094024843356e-05, "loss": 0.5127, "num_tokens": 2770973945.0, "step": 3625 }, { "epoch": 1.3282953192268938, "grad_norm": 0.14370665382332556, "learning_rate": 3.500798736260252e-05, "loss": 0.4724, "num_tokens": 2771760977.0, "step": 3626 }, { "epoch": 1.3286617202528168, "grad_norm": 0.14467354633396043, "learning_rate": 3.500503374384239e-05, "loss": 0.4854, "num_tokens": 2772655160.0, "step": 3627 }, { "epoch": 1.3290281212787396, "grad_norm": 0.1538553454491182, "learning_rate": 3.500207939231959e-05, "loss": 0.4992, "num_tokens": 2773470175.0, "step": 3628 }, { "epoch": 1.3293945223046624, "grad_norm": 0.15335600803216093, "learning_rate": 3.499912430820059e-05, "loss": 0.535, "num_tokens": 2774237145.0, "step": 3629 }, { "epoch": 1.3297609233305854, "grad_norm": 0.15833154626215715, "learning_rate": 3.499616849165188e-05, "loss": 0.4863, "num_tokens": 2774913434.0, "step": 3630 }, { "epoch": 1.3301273243565082, "grad_norm": 0.16099878781935867, "learning_rate": 3.499321194284001e-05, "loss": 0.5142, "num_tokens": 2775767333.0, "step": 3631 }, { "epoch": 1.3304937253824312, "grad_norm": 0.1514954758580131, "learning_rate": 3.4990254661931566e-05, "loss": 0.4963, "num_tokens": 2776416838.0, "step": 3632 }, { "epoch": 1.330860126408354, "grad_norm": 0.1524294828695685, "learning_rate": 3.498729664909318e-05, "loss": 0.4621, "num_tokens": 2777161700.0, "step": 3633 }, { "epoch": 1.3312265274342767, "grad_norm": 0.15093568089837686, "learning_rate": 3.498433790449151e-05, "loss": 0.4899, "num_tokens": 2777832941.0, "step": 3634 }, { "epoch": 1.3315929284601997, "grad_norm": 0.17864547117410073, "learning_rate": 3.498137842829326e-05, "loss": 0.5127, "num_tokens": 2778430430.0, "step": 3635 }, { "epoch": 1.3319593294861225, "grad_norm": 0.20295113868798734, "learning_rate": 3.497841822066521e-05, "loss": 0.4804, "num_tokens": 2779195265.0, "step": 3636 }, { "epoch": 1.3323257305120455, "grad_norm": 0.15134529940718872, "learning_rate": 3.4975457281774116e-05, "loss": 0.5536, "num_tokens": 2779912273.0, "step": 3637 }, { "epoch": 1.3326921315379683, "grad_norm": 0.1823234772897079, "learning_rate": 3.497249561178683e-05, "loss": 0.5074, "num_tokens": 2780655480.0, "step": 3638 }, { "epoch": 1.333058532563891, "grad_norm": 0.16110294869026337, "learning_rate": 3.4969533210870216e-05, "loss": 0.493, "num_tokens": 2781452351.0, "step": 3639 }, { "epoch": 1.333424933589814, "grad_norm": 0.1560405929850572, "learning_rate": 3.49665700791912e-05, "loss": 0.4867, "num_tokens": 2782332935.0, "step": 3640 }, { "epoch": 1.3337913346157368, "grad_norm": 0.13833847972632152, "learning_rate": 3.4963606216916744e-05, "loss": 0.5046, "num_tokens": 2783092310.0, "step": 3641 }, { "epoch": 1.3341577356416598, "grad_norm": 0.1403800897857359, "learning_rate": 3.496064162421381e-05, "loss": 0.5233, "num_tokens": 2783818624.0, "step": 3642 }, { "epoch": 1.3345241366675826, "grad_norm": 0.15443753534269003, "learning_rate": 3.4957676301249484e-05, "loss": 0.4975, "num_tokens": 2784521389.0, "step": 3643 }, { "epoch": 1.3348905376935054, "grad_norm": 0.1456893021631341, "learning_rate": 3.495471024819082e-05, "loss": 0.4675, "num_tokens": 2785363976.0, "step": 3644 }, { "epoch": 1.3352569387194284, "grad_norm": 0.1352454965012065, "learning_rate": 3.495174346520493e-05, "loss": 0.5072, "num_tokens": 2786077247.0, "step": 3645 }, { "epoch": 1.3356233397453514, "grad_norm": 0.13348343683111472, "learning_rate": 3.4948775952458996e-05, "loss": 0.4731, "num_tokens": 2786848205.0, "step": 3646 }, { "epoch": 1.3359897407712742, "grad_norm": 0.1767219960961897, "learning_rate": 3.4945807710120215e-05, "loss": 0.5441, "num_tokens": 2787513515.0, "step": 3647 }, { "epoch": 1.336356141797197, "grad_norm": 0.13721445549977168, "learning_rate": 3.4942838738355826e-05, "loss": 0.4955, "num_tokens": 2788312207.0, "step": 3648 }, { "epoch": 1.33672254282312, "grad_norm": 0.1532821453139843, "learning_rate": 3.4939869037333115e-05, "loss": 0.5268, "num_tokens": 2789062920.0, "step": 3649 }, { "epoch": 1.3370889438490428, "grad_norm": 0.18417099639331896, "learning_rate": 3.4936898607219416e-05, "loss": 0.5421, "num_tokens": 2789759050.0, "step": 3650 }, { "epoch": 1.3374553448749658, "grad_norm": 0.13178314955481116, "learning_rate": 3.4933927448182095e-05, "loss": 0.5141, "num_tokens": 2790554461.0, "step": 3651 }, { "epoch": 1.3378217459008885, "grad_norm": 0.16051729229463235, "learning_rate": 3.4930955560388546e-05, "loss": 0.4874, "num_tokens": 2791207080.0, "step": 3652 }, { "epoch": 1.3381881469268113, "grad_norm": 0.15661002494458326, "learning_rate": 3.492798294400623e-05, "loss": 0.4836, "num_tokens": 2791981391.0, "step": 3653 }, { "epoch": 1.3385545479527343, "grad_norm": 0.1528637438558291, "learning_rate": 3.4925009599202653e-05, "loss": 0.519, "num_tokens": 2792677224.0, "step": 3654 }, { "epoch": 1.338920948978657, "grad_norm": 0.17461214646261328, "learning_rate": 3.4922035526145314e-05, "loss": 0.514, "num_tokens": 2793353081.0, "step": 3655 }, { "epoch": 1.33928735000458, "grad_norm": 0.14459114431568335, "learning_rate": 3.491906072500181e-05, "loss": 0.5194, "num_tokens": 2794132119.0, "step": 3656 }, { "epoch": 1.3396537510305029, "grad_norm": 0.15965550289305655, "learning_rate": 3.4916085195939756e-05, "loss": 0.4814, "num_tokens": 2795003675.0, "step": 3657 }, { "epoch": 1.3400201520564257, "grad_norm": 0.1295541551377429, "learning_rate": 3.491310893912679e-05, "loss": 0.4827, "num_tokens": 2795883459.0, "step": 3658 }, { "epoch": 1.3403865530823487, "grad_norm": 0.14003019511755382, "learning_rate": 3.491013195473062e-05, "loss": 0.512, "num_tokens": 2796683218.0, "step": 3659 }, { "epoch": 1.3407529541082714, "grad_norm": 0.16246879884649532, "learning_rate": 3.4907154242918984e-05, "loss": 0.4766, "num_tokens": 2797543608.0, "step": 3660 }, { "epoch": 1.3411193551341944, "grad_norm": 0.131582208233919, "learning_rate": 3.4904175803859646e-05, "loss": 0.5161, "num_tokens": 2798334134.0, "step": 3661 }, { "epoch": 1.3414857561601172, "grad_norm": 0.16686659674695298, "learning_rate": 3.490119663772044e-05, "loss": 0.4505, "num_tokens": 2799019107.0, "step": 3662 }, { "epoch": 1.34185215718604, "grad_norm": 0.1713612022392549, "learning_rate": 3.489821674466921e-05, "loss": 0.5413, "num_tokens": 2799631580.0, "step": 3663 }, { "epoch": 1.342218558211963, "grad_norm": 0.1518945902597345, "learning_rate": 3.489523612487388e-05, "loss": 0.4744, "num_tokens": 2800370156.0, "step": 3664 }, { "epoch": 1.3425849592378858, "grad_norm": 0.15199435976467496, "learning_rate": 3.489225477850237e-05, "loss": 0.5093, "num_tokens": 2801168269.0, "step": 3665 }, { "epoch": 1.3429513602638088, "grad_norm": 0.132227220670133, "learning_rate": 3.488927270572267e-05, "loss": 0.4887, "num_tokens": 2801952896.0, "step": 3666 }, { "epoch": 1.3433177612897316, "grad_norm": 0.13366584561657785, "learning_rate": 3.4886289906702815e-05, "loss": 0.4609, "num_tokens": 2802811482.0, "step": 3667 }, { "epoch": 1.3436841623156544, "grad_norm": 0.13730855218177887, "learning_rate": 3.488330638161085e-05, "loss": 0.4866, "num_tokens": 2803608873.0, "step": 3668 }, { "epoch": 1.3440505633415774, "grad_norm": 0.15090366000701666, "learning_rate": 3.4880322130614885e-05, "loss": 0.5079, "num_tokens": 2804354921.0, "step": 3669 }, { "epoch": 1.3444169643675004, "grad_norm": 0.13086664095857237, "learning_rate": 3.487733715388308e-05, "loss": 0.4918, "num_tokens": 2805227260.0, "step": 3670 }, { "epoch": 1.3447833653934231, "grad_norm": 0.14226747956904426, "learning_rate": 3.487435145158361e-05, "loss": 0.5297, "num_tokens": 2806071448.0, "step": 3671 }, { "epoch": 1.345149766419346, "grad_norm": 0.1527692277120353, "learning_rate": 3.48713650238847e-05, "loss": 0.5207, "num_tokens": 2806837425.0, "step": 3672 }, { "epoch": 1.345516167445269, "grad_norm": 0.14964038497796228, "learning_rate": 3.486837787095463e-05, "loss": 0.5437, "num_tokens": 2807545342.0, "step": 3673 }, { "epoch": 1.3458825684711917, "grad_norm": 0.1507903021219133, "learning_rate": 3.48653899929617e-05, "loss": 0.4933, "num_tokens": 2808342085.0, "step": 3674 }, { "epoch": 1.3462489694971147, "grad_norm": 0.15202028254714026, "learning_rate": 3.4862401390074274e-05, "loss": 0.5334, "num_tokens": 2809118201.0, "step": 3675 }, { "epoch": 1.3466153705230375, "grad_norm": 0.1591349636076323, "learning_rate": 3.485941206246072e-05, "loss": 0.5083, "num_tokens": 2809891097.0, "step": 3676 }, { "epoch": 1.3469817715489603, "grad_norm": 0.14458664818258896, "learning_rate": 3.485642201028949e-05, "loss": 0.4934, "num_tokens": 2810643705.0, "step": 3677 }, { "epoch": 1.3473481725748833, "grad_norm": 0.14757549338316725, "learning_rate": 3.485343123372906e-05, "loss": 0.5054, "num_tokens": 2811435002.0, "step": 3678 }, { "epoch": 1.347714573600806, "grad_norm": 0.15937249248676125, "learning_rate": 3.485043973294793e-05, "loss": 0.5284, "num_tokens": 2812078171.0, "step": 3679 }, { "epoch": 1.348080974626729, "grad_norm": 0.1269432848423171, "learning_rate": 3.484744750811466e-05, "loss": 0.4797, "num_tokens": 2812835866.0, "step": 3680 }, { "epoch": 1.3484473756526518, "grad_norm": 0.15262774065685336, "learning_rate": 3.484445455939785e-05, "loss": 0.4976, "num_tokens": 2813590271.0, "step": 3681 }, { "epoch": 1.3488137766785746, "grad_norm": 0.16366422699628266, "learning_rate": 3.484146088696612e-05, "loss": 0.5467, "num_tokens": 2814297465.0, "step": 3682 }, { "epoch": 1.3491801777044976, "grad_norm": 0.16916362493124049, "learning_rate": 3.483846649098817e-05, "loss": 0.4943, "num_tokens": 2815046076.0, "step": 3683 }, { "epoch": 1.3495465787304204, "grad_norm": 0.13952552309195357, "learning_rate": 3.4835471371632707e-05, "loss": 0.4698, "num_tokens": 2815858654.0, "step": 3684 }, { "epoch": 1.3499129797563434, "grad_norm": 0.13697479655602252, "learning_rate": 3.483247552906849e-05, "loss": 0.5266, "num_tokens": 2816670937.0, "step": 3685 }, { "epoch": 1.3502793807822662, "grad_norm": 0.1621596098173753, "learning_rate": 3.482947896346431e-05, "loss": 0.5022, "num_tokens": 2817478123.0, "step": 3686 }, { "epoch": 1.350645781808189, "grad_norm": 0.1521825780554357, "learning_rate": 3.4826481674989027e-05, "loss": 0.4692, "num_tokens": 2818371285.0, "step": 3687 }, { "epoch": 1.351012182834112, "grad_norm": 0.13615864116819848, "learning_rate": 3.48234836638115e-05, "loss": 0.5386, "num_tokens": 2819179969.0, "step": 3688 }, { "epoch": 1.3513785838600347, "grad_norm": 0.15929765750953126, "learning_rate": 3.4820484930100655e-05, "loss": 0.5396, "num_tokens": 2819869737.0, "step": 3689 }, { "epoch": 1.3517449848859577, "grad_norm": 0.15302102107797794, "learning_rate": 3.481748547402548e-05, "loss": 0.4657, "num_tokens": 2820707203.0, "step": 3690 }, { "epoch": 1.3521113859118805, "grad_norm": 0.14805849993079542, "learning_rate": 3.4814485295754946e-05, "loss": 0.5018, "num_tokens": 2821425837.0, "step": 3691 }, { "epoch": 1.3524777869378033, "grad_norm": 0.15676609006669434, "learning_rate": 3.481148439545811e-05, "loss": 0.484, "num_tokens": 2822187849.0, "step": 3692 }, { "epoch": 1.3528441879637263, "grad_norm": 0.13862326989189724, "learning_rate": 3.480848277330406e-05, "loss": 0.5172, "num_tokens": 2822902201.0, "step": 3693 }, { "epoch": 1.3532105889896493, "grad_norm": 0.134846099634904, "learning_rate": 3.480548042946191e-05, "loss": 0.508, "num_tokens": 2823625359.0, "step": 3694 }, { "epoch": 1.353576990015572, "grad_norm": 0.16936602065691397, "learning_rate": 3.480247736410083e-05, "loss": 0.5104, "num_tokens": 2824446982.0, "step": 3695 }, { "epoch": 1.3539433910414949, "grad_norm": 0.15642173522624872, "learning_rate": 3.479947357739003e-05, "loss": 0.5318, "num_tokens": 2825149251.0, "step": 3696 }, { "epoch": 1.3543097920674179, "grad_norm": 0.1435419146215537, "learning_rate": 3.4796469069498755e-05, "loss": 0.5386, "num_tokens": 2825913353.0, "step": 3697 }, { "epoch": 1.3546761930933406, "grad_norm": 0.160616286909887, "learning_rate": 3.47934638405963e-05, "loss": 0.5387, "num_tokens": 2826643794.0, "step": 3698 }, { "epoch": 1.3550425941192636, "grad_norm": 0.16268682894471806, "learning_rate": 3.4790457890851975e-05, "loss": 0.5359, "num_tokens": 2827358872.0, "step": 3699 }, { "epoch": 1.3554089951451864, "grad_norm": 0.15552653139693856, "learning_rate": 3.4787451220435166e-05, "loss": 0.54, "num_tokens": 2828011898.0, "step": 3700 }, { "epoch": 1.3557753961711092, "grad_norm": 0.14484235287391603, "learning_rate": 3.478444382951528e-05, "loss": 0.5054, "num_tokens": 2828852592.0, "step": 3701 }, { "epoch": 1.3561417971970322, "grad_norm": 0.15659681236283077, "learning_rate": 3.4781435718261755e-05, "loss": 0.5317, "num_tokens": 2829549494.0, "step": 3702 }, { "epoch": 1.356508198222955, "grad_norm": 0.1535837145238838, "learning_rate": 3.4778426886844096e-05, "loss": 0.5208, "num_tokens": 2830332576.0, "step": 3703 }, { "epoch": 1.356874599248878, "grad_norm": 0.15872913053359236, "learning_rate": 3.477541733543182e-05, "loss": 0.5227, "num_tokens": 2831162206.0, "step": 3704 }, { "epoch": 1.3572410002748008, "grad_norm": 0.14186489998018215, "learning_rate": 3.477240706419452e-05, "loss": 0.4984, "num_tokens": 2831989528.0, "step": 3705 }, { "epoch": 1.3576074013007235, "grad_norm": 0.14331916678324605, "learning_rate": 3.476939607330178e-05, "loss": 0.4963, "num_tokens": 2832776389.0, "step": 3706 }, { "epoch": 1.3579738023266466, "grad_norm": 0.14608256470324568, "learning_rate": 3.4766384362923276e-05, "loss": 0.5269, "num_tokens": 2833518750.0, "step": 3707 }, { "epoch": 1.3583402033525693, "grad_norm": 0.1476459648839185, "learning_rate": 3.47633719332287e-05, "loss": 0.4689, "num_tokens": 2834320933.0, "step": 3708 }, { "epoch": 1.3587066043784923, "grad_norm": 0.14530872316875038, "learning_rate": 3.476035878438778e-05, "loss": 0.5056, "num_tokens": 2835199973.0, "step": 3709 }, { "epoch": 1.3590730054044151, "grad_norm": 0.1431505278438103, "learning_rate": 3.475734491657027e-05, "loss": 0.4879, "num_tokens": 2835955854.0, "step": 3710 }, { "epoch": 1.359439406430338, "grad_norm": 0.15197387256350448, "learning_rate": 3.475433032994603e-05, "loss": 0.4984, "num_tokens": 2836727589.0, "step": 3711 }, { "epoch": 1.359805807456261, "grad_norm": 0.1728526183285273, "learning_rate": 3.475131502468487e-05, "loss": 0.5437, "num_tokens": 2837527240.0, "step": 3712 }, { "epoch": 1.3601722084821837, "grad_norm": 0.13488643283766716, "learning_rate": 3.4748299000956725e-05, "loss": 0.4952, "num_tokens": 2838372251.0, "step": 3713 }, { "epoch": 1.3605386095081067, "grad_norm": 0.16586305242992955, "learning_rate": 3.47452822589315e-05, "loss": 0.4757, "num_tokens": 2839231698.0, "step": 3714 }, { "epoch": 1.3609050105340295, "grad_norm": 0.1402270326977971, "learning_rate": 3.474226479877919e-05, "loss": 0.5103, "num_tokens": 2839953950.0, "step": 3715 }, { "epoch": 1.3612714115599522, "grad_norm": 0.14761382276519813, "learning_rate": 3.473924662066981e-05, "loss": 0.5136, "num_tokens": 2840635985.0, "step": 3716 }, { "epoch": 1.3616378125858752, "grad_norm": 0.16029175549344848, "learning_rate": 3.4736227724773406e-05, "loss": 0.4962, "num_tokens": 2841440246.0, "step": 3717 }, { "epoch": 1.3620042136117982, "grad_norm": 0.12938480996442098, "learning_rate": 3.473320811126009e-05, "loss": 0.5023, "num_tokens": 2842175569.0, "step": 3718 }, { "epoch": 1.362370614637721, "grad_norm": 0.15537412945316265, "learning_rate": 3.4730187780300005e-05, "loss": 0.5126, "num_tokens": 2842992988.0, "step": 3719 }, { "epoch": 1.3627370156636438, "grad_norm": 0.14647998355466196, "learning_rate": 3.472716673206332e-05, "loss": 0.501, "num_tokens": 2843827132.0, "step": 3720 }, { "epoch": 1.3631034166895668, "grad_norm": 0.1517011989997817, "learning_rate": 3.472414496672025e-05, "loss": 0.5128, "num_tokens": 2844593997.0, "step": 3721 }, { "epoch": 1.3634698177154896, "grad_norm": 0.17293373636145873, "learning_rate": 3.4721122484441055e-05, "loss": 0.4795, "num_tokens": 2845474464.0, "step": 3722 }, { "epoch": 1.3638362187414126, "grad_norm": 0.14607877320729645, "learning_rate": 3.4718099285396055e-05, "loss": 0.4866, "num_tokens": 2846222755.0, "step": 3723 }, { "epoch": 1.3642026197673354, "grad_norm": 0.13893006206924274, "learning_rate": 3.4715075369755564e-05, "loss": 0.477, "num_tokens": 2847031566.0, "step": 3724 }, { "epoch": 1.3645690207932581, "grad_norm": 0.16529558335577835, "learning_rate": 3.4712050737689985e-05, "loss": 0.5047, "num_tokens": 2847774750.0, "step": 3725 }, { "epoch": 1.3649354218191811, "grad_norm": 0.15949847851510796, "learning_rate": 3.470902538936973e-05, "loss": 0.5196, "num_tokens": 2848561514.0, "step": 3726 }, { "epoch": 1.365301822845104, "grad_norm": 0.18347128339975963, "learning_rate": 3.470599932496526e-05, "loss": 0.4936, "num_tokens": 2849503495.0, "step": 3727 }, { "epoch": 1.365668223871027, "grad_norm": 0.15727127827795695, "learning_rate": 3.4702972544647084e-05, "loss": 0.5147, "num_tokens": 2850245984.0, "step": 3728 }, { "epoch": 1.3660346248969497, "grad_norm": 0.21263161588737672, "learning_rate": 3.4699945048585735e-05, "loss": 0.4942, "num_tokens": 2850977083.0, "step": 3729 }, { "epoch": 1.3664010259228725, "grad_norm": 0.15229364064138837, "learning_rate": 3.469691683695179e-05, "loss": 0.521, "num_tokens": 2851785127.0, "step": 3730 }, { "epoch": 1.3667674269487955, "grad_norm": 0.16406029829238777, "learning_rate": 3.46938879099159e-05, "loss": 0.5448, "num_tokens": 2852421552.0, "step": 3731 }, { "epoch": 1.3671338279747183, "grad_norm": 0.20992368314998358, "learning_rate": 3.4690858267648705e-05, "loss": 0.5334, "num_tokens": 2853263311.0, "step": 3732 }, { "epoch": 1.3675002290006413, "grad_norm": 0.15270848492861075, "learning_rate": 3.4687827910320915e-05, "loss": 0.5106, "num_tokens": 2854059978.0, "step": 3733 }, { "epoch": 1.367866630026564, "grad_norm": 0.1643169285497761, "learning_rate": 3.4684796838103275e-05, "loss": 0.5212, "num_tokens": 2854911272.0, "step": 3734 }, { "epoch": 1.3682330310524868, "grad_norm": 0.1856310088686298, "learning_rate": 3.4681765051166565e-05, "loss": 0.5106, "num_tokens": 2855804038.0, "step": 3735 }, { "epoch": 1.3685994320784098, "grad_norm": 0.1322917983689209, "learning_rate": 3.4678732549681616e-05, "loss": 0.4902, "num_tokens": 2856618350.0, "step": 3736 }, { "epoch": 1.3689658331043326, "grad_norm": 0.15613075324548778, "learning_rate": 3.4675699333819284e-05, "loss": 0.4855, "num_tokens": 2857377284.0, "step": 3737 }, { "epoch": 1.3693322341302556, "grad_norm": 0.16189172162104065, "learning_rate": 3.4672665403750494e-05, "loss": 0.483, "num_tokens": 2858180853.0, "step": 3738 }, { "epoch": 1.3696986351561784, "grad_norm": 0.14410963062303056, "learning_rate": 3.4669630759646164e-05, "loss": 0.4917, "num_tokens": 2858894922.0, "step": 3739 }, { "epoch": 1.3700650361821012, "grad_norm": 0.15335309753715307, "learning_rate": 3.46665954016773e-05, "loss": 0.5162, "num_tokens": 2859681698.0, "step": 3740 }, { "epoch": 1.3704314372080242, "grad_norm": 0.15171311060350287, "learning_rate": 3.466355933001492e-05, "loss": 0.4791, "num_tokens": 2860404699.0, "step": 3741 }, { "epoch": 1.3707978382339472, "grad_norm": 0.1402876158937031, "learning_rate": 3.466052254483009e-05, "loss": 0.5259, "num_tokens": 2861194341.0, "step": 3742 }, { "epoch": 1.37116423925987, "grad_norm": 0.14151941672878401, "learning_rate": 3.465748504629392e-05, "loss": 0.4842, "num_tokens": 2861915482.0, "step": 3743 }, { "epoch": 1.3715306402857927, "grad_norm": 0.1775531626677163, "learning_rate": 3.465444683457755e-05, "loss": 0.5504, "num_tokens": 2862706984.0, "step": 3744 }, { "epoch": 1.3718970413117157, "grad_norm": 0.16082520229807334, "learning_rate": 3.465140790985217e-05, "loss": 0.4985, "num_tokens": 2863471972.0, "step": 3745 }, { "epoch": 1.3722634423376385, "grad_norm": 0.15133848886543222, "learning_rate": 3.464836827228901e-05, "loss": 0.4956, "num_tokens": 2864124035.0, "step": 3746 }, { "epoch": 1.3726298433635615, "grad_norm": 0.16487455952400945, "learning_rate": 3.464532792205933e-05, "loss": 0.5346, "num_tokens": 2864790549.0, "step": 3747 }, { "epoch": 1.3729962443894843, "grad_norm": 0.13683782039741835, "learning_rate": 3.464228685933445e-05, "loss": 0.481, "num_tokens": 2865535763.0, "step": 3748 }, { "epoch": 1.373362645415407, "grad_norm": 0.16720531140950254, "learning_rate": 3.4639245084285696e-05, "loss": 0.476, "num_tokens": 2866132322.0, "step": 3749 }, { "epoch": 1.37372904644133, "grad_norm": 0.15438949411970943, "learning_rate": 3.463620259708448e-05, "loss": 0.5233, "num_tokens": 2866961083.0, "step": 3750 }, { "epoch": 1.3740954474672529, "grad_norm": 0.1650393505680212, "learning_rate": 3.463315939790221e-05, "loss": 0.5256, "num_tokens": 2867615961.0, "step": 3751 }, { "epoch": 1.3744618484931759, "grad_norm": 0.1459216675740849, "learning_rate": 3.4630115486910364e-05, "loss": 0.4776, "num_tokens": 2868378208.0, "step": 3752 }, { "epoch": 1.3748282495190987, "grad_norm": 0.14277491174497584, "learning_rate": 3.4627070864280446e-05, "loss": 0.486, "num_tokens": 2869236982.0, "step": 3753 }, { "epoch": 1.3751946505450214, "grad_norm": 0.14415907059437572, "learning_rate": 3.462402553018401e-05, "loss": 0.4972, "num_tokens": 2869968422.0, "step": 3754 }, { "epoch": 1.3755610515709444, "grad_norm": 0.18770242566779857, "learning_rate": 3.4620979484792635e-05, "loss": 0.5319, "num_tokens": 2870670202.0, "step": 3755 }, { "epoch": 1.3759274525968672, "grad_norm": 0.15204225392384657, "learning_rate": 3.4617932728277946e-05, "loss": 0.5067, "num_tokens": 2871454555.0, "step": 3756 }, { "epoch": 1.3762938536227902, "grad_norm": 0.15093965225633407, "learning_rate": 3.461488526081163e-05, "loss": 0.5209, "num_tokens": 2872234960.0, "step": 3757 }, { "epoch": 1.376660254648713, "grad_norm": 0.16820833979550662, "learning_rate": 3.4611837082565384e-05, "loss": 0.5105, "num_tokens": 2872928430.0, "step": 3758 }, { "epoch": 1.3770266556746358, "grad_norm": 0.15098684344178484, "learning_rate": 3.460878819371094e-05, "loss": 0.5048, "num_tokens": 2873699985.0, "step": 3759 }, { "epoch": 1.3773930567005588, "grad_norm": 0.14117917737398664, "learning_rate": 3.460573859442011e-05, "loss": 0.4784, "num_tokens": 2874500241.0, "step": 3760 }, { "epoch": 1.3777594577264816, "grad_norm": 0.15794156878329543, "learning_rate": 3.460268828486472e-05, "loss": 0.5246, "num_tokens": 2875399516.0, "step": 3761 }, { "epoch": 1.3781258587524046, "grad_norm": 0.15894374214524887, "learning_rate": 3.459963726521663e-05, "loss": 0.493, "num_tokens": 2876183992.0, "step": 3762 }, { "epoch": 1.3784922597783273, "grad_norm": 0.1636459066673849, "learning_rate": 3.459658553564775e-05, "loss": 0.4826, "num_tokens": 2876913717.0, "step": 3763 }, { "epoch": 1.3788586608042501, "grad_norm": 0.16157947379290502, "learning_rate": 3.4593533096330025e-05, "loss": 0.538, "num_tokens": 2877615861.0, "step": 3764 }, { "epoch": 1.3792250618301731, "grad_norm": 0.16947695153657694, "learning_rate": 3.4590479947435445e-05, "loss": 0.4945, "num_tokens": 2878409102.0, "step": 3765 }, { "epoch": 1.3795914628560961, "grad_norm": 0.18004808631634414, "learning_rate": 3.4587426089136047e-05, "loss": 0.4765, "num_tokens": 2879227515.0, "step": 3766 }, { "epoch": 1.379957863882019, "grad_norm": 0.1597758394510207, "learning_rate": 3.4584371521603886e-05, "loss": 0.5186, "num_tokens": 2879883347.0, "step": 3767 }, { "epoch": 1.3803242649079417, "grad_norm": 0.17034833052356077, "learning_rate": 3.458131624501108e-05, "loss": 0.5121, "num_tokens": 2880537726.0, "step": 3768 }, { "epoch": 1.3806906659338647, "grad_norm": 0.1910652747837545, "learning_rate": 3.457826025952978e-05, "loss": 0.4817, "num_tokens": 2881479392.0, "step": 3769 }, { "epoch": 1.3810570669597875, "grad_norm": 0.1648461920642381, "learning_rate": 3.457520356533216e-05, "loss": 0.4911, "num_tokens": 2882154298.0, "step": 3770 }, { "epoch": 1.3814234679857105, "grad_norm": 0.18435071501118025, "learning_rate": 3.457214616259046e-05, "loss": 0.557, "num_tokens": 2882804861.0, "step": 3771 }, { "epoch": 1.3817898690116333, "grad_norm": 0.16931405593874593, "learning_rate": 3.4569088051476945e-05, "loss": 0.4817, "num_tokens": 2883558993.0, "step": 3772 }, { "epoch": 1.382156270037556, "grad_norm": 0.14863424618385057, "learning_rate": 3.4566029232163925e-05, "loss": 0.5243, "num_tokens": 2884283168.0, "step": 3773 }, { "epoch": 1.382522671063479, "grad_norm": 0.1485109222586298, "learning_rate": 3.456296970482373e-05, "loss": 0.5078, "num_tokens": 2884962345.0, "step": 3774 }, { "epoch": 1.3828890720894018, "grad_norm": 0.18101754808313691, "learning_rate": 3.455990946962878e-05, "loss": 0.5762, "num_tokens": 2885603375.0, "step": 3775 }, { "epoch": 1.3832554731153248, "grad_norm": 0.1649503881425592, "learning_rate": 3.455684852675147e-05, "loss": 0.5156, "num_tokens": 2886364984.0, "step": 3776 }, { "epoch": 1.3836218741412476, "grad_norm": 0.13651004245666726, "learning_rate": 3.455378687636429e-05, "loss": 0.5081, "num_tokens": 2887131105.0, "step": 3777 }, { "epoch": 1.3839882751671704, "grad_norm": 0.16635264008388684, "learning_rate": 3.455072451863974e-05, "loss": 0.5269, "num_tokens": 2887955067.0, "step": 3778 }, { "epoch": 1.3843546761930934, "grad_norm": 0.16078458035113077, "learning_rate": 3.454766145375037e-05, "loss": 0.5212, "num_tokens": 2888652895.0, "step": 3779 }, { "epoch": 1.3847210772190162, "grad_norm": 0.1788484552862155, "learning_rate": 3.454459768186876e-05, "loss": 0.5212, "num_tokens": 2889466072.0, "step": 3780 }, { "epoch": 1.3850874782449392, "grad_norm": 0.13668924449659542, "learning_rate": 3.454153320316754e-05, "loss": 0.5175, "num_tokens": 2890097920.0, "step": 3781 }, { "epoch": 1.385453879270862, "grad_norm": 0.17586666972495033, "learning_rate": 3.453846801781938e-05, "loss": 0.4968, "num_tokens": 2890964396.0, "step": 3782 }, { "epoch": 1.3858202802967847, "grad_norm": 0.14765006277933987, "learning_rate": 3.453540212599698e-05, "loss": 0.5284, "num_tokens": 2891749791.0, "step": 3783 }, { "epoch": 1.3861866813227077, "grad_norm": 0.16685913402684555, "learning_rate": 3.45323355278731e-05, "loss": 0.5132, "num_tokens": 2892456119.0, "step": 3784 }, { "epoch": 1.3865530823486305, "grad_norm": 0.15424327876365282, "learning_rate": 3.45292682236205e-05, "loss": 0.5359, "num_tokens": 2893158587.0, "step": 3785 }, { "epoch": 1.3869194833745535, "grad_norm": 0.15034460125980859, "learning_rate": 3.4526200213412026e-05, "loss": 0.504, "num_tokens": 2893987529.0, "step": 3786 }, { "epoch": 1.3872858844004763, "grad_norm": 0.1457278183876305, "learning_rate": 3.4523131497420545e-05, "loss": 0.5297, "num_tokens": 2894767923.0, "step": 3787 }, { "epoch": 1.387652285426399, "grad_norm": 0.16689880459250508, "learning_rate": 3.452006207581896e-05, "loss": 0.4703, "num_tokens": 2895576225.0, "step": 3788 }, { "epoch": 1.388018686452322, "grad_norm": 0.14126622520844007, "learning_rate": 3.45169919487802e-05, "loss": 0.5067, "num_tokens": 2896327354.0, "step": 3789 }, { "epoch": 1.388385087478245, "grad_norm": 0.19751861492285153, "learning_rate": 3.4513921116477275e-05, "loss": 0.524, "num_tokens": 2897072957.0, "step": 3790 }, { "epoch": 1.3887514885041679, "grad_norm": 0.17135263185182356, "learning_rate": 3.45108495790832e-05, "loss": 0.4902, "num_tokens": 2897775706.0, "step": 3791 }, { "epoch": 1.3891178895300906, "grad_norm": 0.1644454993742216, "learning_rate": 3.450777733677103e-05, "loss": 0.5303, "num_tokens": 2898610296.0, "step": 3792 }, { "epoch": 1.3894842905560136, "grad_norm": 0.1658288720879303, "learning_rate": 3.450470438971387e-05, "loss": 0.5059, "num_tokens": 2899368377.0, "step": 3793 }, { "epoch": 1.3898506915819364, "grad_norm": 0.16465658569781794, "learning_rate": 3.4501630738084877e-05, "loss": 0.4744, "num_tokens": 2900115273.0, "step": 3794 }, { "epoch": 1.3902170926078594, "grad_norm": 0.16582280906868302, "learning_rate": 3.449855638205723e-05, "loss": 0.5101, "num_tokens": 2900939944.0, "step": 3795 }, { "epoch": 1.3905834936337822, "grad_norm": 0.16723928696698243, "learning_rate": 3.4495481321804144e-05, "loss": 0.5039, "num_tokens": 2901768061.0, "step": 3796 }, { "epoch": 1.390949894659705, "grad_norm": 0.16176740769910475, "learning_rate": 3.4492405557498894e-05, "loss": 0.524, "num_tokens": 2902554997.0, "step": 3797 }, { "epoch": 1.391316295685628, "grad_norm": 0.15681544124778868, "learning_rate": 3.4489329089314774e-05, "loss": 0.509, "num_tokens": 2903306390.0, "step": 3798 }, { "epoch": 1.3916826967115508, "grad_norm": 0.1481013294232558, "learning_rate": 3.448625191742512e-05, "loss": 0.5084, "num_tokens": 2904044887.0, "step": 3799 }, { "epoch": 1.3920490977374738, "grad_norm": 0.17753648780618225, "learning_rate": 3.448317404200334e-05, "loss": 0.5439, "num_tokens": 2904708618.0, "step": 3800 }, { "epoch": 1.3924154987633965, "grad_norm": 0.14981120830679537, "learning_rate": 3.448009546322282e-05, "loss": 0.5234, "num_tokens": 2905562510.0, "step": 3801 }, { "epoch": 1.3927818997893193, "grad_norm": 0.1737479037898839, "learning_rate": 3.447701618125705e-05, "loss": 0.5226, "num_tokens": 2906201836.0, "step": 3802 }, { "epoch": 1.3931483008152423, "grad_norm": 0.17172960429205145, "learning_rate": 3.447393619627951e-05, "loss": 0.5256, "num_tokens": 2906936848.0, "step": 3803 }, { "epoch": 1.393514701841165, "grad_norm": 0.18849803898880302, "learning_rate": 3.4470855508463754e-05, "loss": 0.4963, "num_tokens": 2907836245.0, "step": 3804 }, { "epoch": 1.393881102867088, "grad_norm": 0.14915673416433822, "learning_rate": 3.446777411798335e-05, "loss": 0.481, "num_tokens": 2908570156.0, "step": 3805 }, { "epoch": 1.3942475038930109, "grad_norm": 0.18131325652001537, "learning_rate": 3.446469202501194e-05, "loss": 0.4848, "num_tokens": 2909292915.0, "step": 3806 }, { "epoch": 1.3946139049189337, "grad_norm": 0.17179076162225637, "learning_rate": 3.4461609229723155e-05, "loss": 0.4825, "num_tokens": 2910123831.0, "step": 3807 }, { "epoch": 1.3949803059448567, "grad_norm": 0.1616751911397021, "learning_rate": 3.445852573229071e-05, "loss": 0.4899, "num_tokens": 2910934120.0, "step": 3808 }, { "epoch": 1.3953467069707794, "grad_norm": 0.16097558463285816, "learning_rate": 3.445544153288835e-05, "loss": 0.5572, "num_tokens": 2911580072.0, "step": 3809 }, { "epoch": 1.3957131079967025, "grad_norm": 0.18578529734525456, "learning_rate": 3.445235663168983e-05, "loss": 0.534, "num_tokens": 2912320429.0, "step": 3810 }, { "epoch": 1.3960795090226252, "grad_norm": 0.1739566144206175, "learning_rate": 3.444927102886899e-05, "loss": 0.5293, "num_tokens": 2913114789.0, "step": 3811 }, { "epoch": 1.396445910048548, "grad_norm": 0.15536744490920346, "learning_rate": 3.444618472459967e-05, "loss": 0.5144, "num_tokens": 2913845123.0, "step": 3812 }, { "epoch": 1.396812311074471, "grad_norm": 0.21480641479931753, "learning_rate": 3.4443097719055784e-05, "loss": 0.5461, "num_tokens": 2914509916.0, "step": 3813 }, { "epoch": 1.397178712100394, "grad_norm": 0.14569288288752125, "learning_rate": 3.444001001241125e-05, "loss": 0.4982, "num_tokens": 2915401136.0, "step": 3814 }, { "epoch": 1.3975451131263168, "grad_norm": 0.12796177855464175, "learning_rate": 3.443692160484005e-05, "loss": 0.5047, "num_tokens": 2916254011.0, "step": 3815 }, { "epoch": 1.3979115141522396, "grad_norm": 0.1692155667503309, "learning_rate": 3.44338324965162e-05, "loss": 0.5013, "num_tokens": 2917049366.0, "step": 3816 }, { "epoch": 1.3982779151781626, "grad_norm": 0.15704961988291694, "learning_rate": 3.4430742687613754e-05, "loss": 0.5162, "num_tokens": 2917820792.0, "step": 3817 }, { "epoch": 1.3986443162040854, "grad_norm": 0.15950779934582346, "learning_rate": 3.442765217830681e-05, "loss": 0.5261, "num_tokens": 2918561293.0, "step": 3818 }, { "epoch": 1.3990107172300084, "grad_norm": 0.15102199402060304, "learning_rate": 3.44245609687695e-05, "loss": 0.4896, "num_tokens": 2919304829.0, "step": 3819 }, { "epoch": 1.3993771182559311, "grad_norm": 0.1487377910557712, "learning_rate": 3.442146905917598e-05, "loss": 0.5212, "num_tokens": 2920168537.0, "step": 3820 }, { "epoch": 1.399743519281854, "grad_norm": 0.14086698757831254, "learning_rate": 3.441837644970049e-05, "loss": 0.5015, "num_tokens": 2921084750.0, "step": 3821 }, { "epoch": 1.400109920307777, "grad_norm": 0.14914228331448579, "learning_rate": 3.4415283140517266e-05, "loss": 0.5145, "num_tokens": 2921778278.0, "step": 3822 }, { "epoch": 1.4004763213336997, "grad_norm": 0.16309710256141238, "learning_rate": 3.4412189131800595e-05, "loss": 0.4854, "num_tokens": 2922501586.0, "step": 3823 }, { "epoch": 1.4008427223596227, "grad_norm": 0.15192488242830574, "learning_rate": 3.4409094423724816e-05, "loss": 0.5088, "num_tokens": 2923288715.0, "step": 3824 }, { "epoch": 1.4012091233855455, "grad_norm": 0.15200197702311422, "learning_rate": 3.44059990164643e-05, "loss": 0.5123, "num_tokens": 2924112270.0, "step": 3825 }, { "epoch": 1.4015755244114683, "grad_norm": 0.13677345419760215, "learning_rate": 3.440290291019346e-05, "loss": 0.474, "num_tokens": 2924786203.0, "step": 3826 }, { "epoch": 1.4019419254373913, "grad_norm": 0.15273900150932132, "learning_rate": 3.439980610508672e-05, "loss": 0.5002, "num_tokens": 2925657132.0, "step": 3827 }, { "epoch": 1.402308326463314, "grad_norm": 0.15614211657901833, "learning_rate": 3.4396708601318594e-05, "loss": 0.4791, "num_tokens": 2926505325.0, "step": 3828 }, { "epoch": 1.402674727489237, "grad_norm": 0.15399585640700275, "learning_rate": 3.4393610399063604e-05, "loss": 0.481, "num_tokens": 2927172083.0, "step": 3829 }, { "epoch": 1.4030411285151598, "grad_norm": 0.16460909531794496, "learning_rate": 3.439051149849632e-05, "loss": 0.4824, "num_tokens": 2928015542.0, "step": 3830 }, { "epoch": 1.4034075295410826, "grad_norm": 0.13605425990061948, "learning_rate": 3.438741189979133e-05, "loss": 0.487, "num_tokens": 2928823622.0, "step": 3831 }, { "epoch": 1.4037739305670056, "grad_norm": 0.1580490615745539, "learning_rate": 3.43843116031233e-05, "loss": 0.4768, "num_tokens": 2929624743.0, "step": 3832 }, { "epoch": 1.4041403315929284, "grad_norm": 0.14821116504417675, "learning_rate": 3.438121060866689e-05, "loss": 0.5318, "num_tokens": 2930381379.0, "step": 3833 }, { "epoch": 1.4045067326188514, "grad_norm": 0.15337449913579837, "learning_rate": 3.437810891659686e-05, "loss": 0.5004, "num_tokens": 2930992763.0, "step": 3834 }, { "epoch": 1.4048731336447742, "grad_norm": 0.16372710420512848, "learning_rate": 3.437500652708794e-05, "loss": 0.4983, "num_tokens": 2931765169.0, "step": 3835 }, { "epoch": 1.405239534670697, "grad_norm": 0.17086558982219868, "learning_rate": 3.437190344031495e-05, "loss": 0.5138, "num_tokens": 2932600440.0, "step": 3836 }, { "epoch": 1.40560593569662, "grad_norm": 0.14317450658084255, "learning_rate": 3.4368799656452736e-05, "loss": 0.5083, "num_tokens": 2933410736.0, "step": 3837 }, { "epoch": 1.405972336722543, "grad_norm": 0.17310837532810155, "learning_rate": 3.436569517567617e-05, "loss": 0.5016, "num_tokens": 2934120192.0, "step": 3838 }, { "epoch": 1.4063387377484657, "grad_norm": 0.14470972883275943, "learning_rate": 3.436258999816017e-05, "loss": 0.4822, "num_tokens": 2934904362.0, "step": 3839 }, { "epoch": 1.4067051387743885, "grad_norm": 0.16679950322231776, "learning_rate": 3.43594841240797e-05, "loss": 0.4801, "num_tokens": 2935672874.0, "step": 3840 }, { "epoch": 1.4070715398003115, "grad_norm": 0.14256133806138266, "learning_rate": 3.4356377553609756e-05, "loss": 0.4943, "num_tokens": 2936418051.0, "step": 3841 }, { "epoch": 1.4074379408262343, "grad_norm": 0.15520128194084665, "learning_rate": 3.435327028692539e-05, "loss": 0.5022, "num_tokens": 2937154087.0, "step": 3842 }, { "epoch": 1.4078043418521573, "grad_norm": 0.1623587820795941, "learning_rate": 3.435016232420166e-05, "loss": 0.5108, "num_tokens": 2937909685.0, "step": 3843 }, { "epoch": 1.40817074287808, "grad_norm": 0.1498038327853316, "learning_rate": 3.4347053665613696e-05, "loss": 0.5425, "num_tokens": 2938726387.0, "step": 3844 }, { "epoch": 1.4085371439040029, "grad_norm": 0.16811552369432578, "learning_rate": 3.434394431133665e-05, "loss": 0.4986, "num_tokens": 2939502569.0, "step": 3845 }, { "epoch": 1.4089035449299259, "grad_norm": 0.13762273295822344, "learning_rate": 3.434083426154572e-05, "loss": 0.5245, "num_tokens": 2940377461.0, "step": 3846 }, { "epoch": 1.4092699459558486, "grad_norm": 0.17411657168827765, "learning_rate": 3.433772351641613e-05, "loss": 0.568, "num_tokens": 2941149442.0, "step": 3847 }, { "epoch": 1.4096363469817716, "grad_norm": 0.1521458505754665, "learning_rate": 3.433461207612316e-05, "loss": 0.4642, "num_tokens": 2941987384.0, "step": 3848 }, { "epoch": 1.4100027480076944, "grad_norm": 0.13525874054045384, "learning_rate": 3.433149994084213e-05, "loss": 0.4809, "num_tokens": 2942768409.0, "step": 3849 }, { "epoch": 1.4103691490336172, "grad_norm": 0.15596396554615002, "learning_rate": 3.4328387110748385e-05, "loss": 0.5114, "num_tokens": 2943454029.0, "step": 3850 }, { "epoch": 1.4107355500595402, "grad_norm": 0.16093550536803228, "learning_rate": 3.4325273586017306e-05, "loss": 0.497, "num_tokens": 2944329098.0, "step": 3851 }, { "epoch": 1.411101951085463, "grad_norm": 0.15377937339620576, "learning_rate": 3.4322159366824344e-05, "loss": 0.4951, "num_tokens": 2945022165.0, "step": 3852 }, { "epoch": 1.411468352111386, "grad_norm": 0.15821662819120197, "learning_rate": 3.431904445334496e-05, "loss": 0.5392, "num_tokens": 2945599371.0, "step": 3853 }, { "epoch": 1.4118347531373088, "grad_norm": 0.17437311729210278, "learning_rate": 3.431592884575465e-05, "loss": 0.4943, "num_tokens": 2946369972.0, "step": 3854 }, { "epoch": 1.4122011541632316, "grad_norm": 0.1464942542851046, "learning_rate": 3.4312812544228976e-05, "loss": 0.4736, "num_tokens": 2947205635.0, "step": 3855 }, { "epoch": 1.4125675551891546, "grad_norm": 0.1521894602767993, "learning_rate": 3.4309695548943514e-05, "loss": 0.5323, "num_tokens": 2947917787.0, "step": 3856 }, { "epoch": 1.4129339562150773, "grad_norm": 0.17043251258505016, "learning_rate": 3.430657786007391e-05, "loss": 0.475, "num_tokens": 2948627067.0, "step": 3857 }, { "epoch": 1.4133003572410003, "grad_norm": 0.1750427324833459, "learning_rate": 3.43034594777958e-05, "loss": 0.4951, "num_tokens": 2949328039.0, "step": 3858 }, { "epoch": 1.4136667582669231, "grad_norm": 0.13803494546755865, "learning_rate": 3.430034040228491e-05, "loss": 0.4851, "num_tokens": 2950148357.0, "step": 3859 }, { "epoch": 1.414033159292846, "grad_norm": 0.1517196693865047, "learning_rate": 3.429722063371697e-05, "loss": 0.5122, "num_tokens": 2950938470.0, "step": 3860 }, { "epoch": 1.414399560318769, "grad_norm": 0.1687807124994209, "learning_rate": 3.429410017226777e-05, "loss": 0.5289, "num_tokens": 2951634657.0, "step": 3861 }, { "epoch": 1.414765961344692, "grad_norm": 0.1528445488527354, "learning_rate": 3.429097901811313e-05, "loss": 0.5001, "num_tokens": 2952299585.0, "step": 3862 }, { "epoch": 1.4151323623706147, "grad_norm": 0.15641879116377191, "learning_rate": 3.4287857171428914e-05, "loss": 0.5071, "num_tokens": 2953062775.0, "step": 3863 }, { "epoch": 1.4154987633965375, "grad_norm": 0.15463807951057976, "learning_rate": 3.4284734632391004e-05, "loss": 0.5118, "num_tokens": 2953885349.0, "step": 3864 }, { "epoch": 1.4158651644224605, "grad_norm": 0.1594500028051099, "learning_rate": 3.4281611401175353e-05, "loss": 0.5415, "num_tokens": 2954494575.0, "step": 3865 }, { "epoch": 1.4162315654483832, "grad_norm": 0.1423034682947892, "learning_rate": 3.4278487477957936e-05, "loss": 0.5011, "num_tokens": 2955348953.0, "step": 3866 }, { "epoch": 1.4165979664743062, "grad_norm": 0.15464480391240693, "learning_rate": 3.427536286291476e-05, "loss": 0.495, "num_tokens": 2956262311.0, "step": 3867 }, { "epoch": 1.416964367500229, "grad_norm": 0.13683706016369457, "learning_rate": 3.427223755622189e-05, "loss": 0.4858, "num_tokens": 2957113568.0, "step": 3868 }, { "epoch": 1.4173307685261518, "grad_norm": 0.15420551462685275, "learning_rate": 3.426911155805542e-05, "loss": 0.5071, "num_tokens": 2957921731.0, "step": 3869 }, { "epoch": 1.4176971695520748, "grad_norm": 0.1645306717865279, "learning_rate": 3.4265984868591484e-05, "loss": 0.511, "num_tokens": 2958590410.0, "step": 3870 }, { "epoch": 1.4180635705779976, "grad_norm": 0.13987520006213164, "learning_rate": 3.426285748800624e-05, "loss": 0.5294, "num_tokens": 2959221716.0, "step": 3871 }, { "epoch": 1.4184299716039206, "grad_norm": 0.1692728482193319, "learning_rate": 3.425972941647592e-05, "loss": 0.5195, "num_tokens": 2959882289.0, "step": 3872 }, { "epoch": 1.4187963726298434, "grad_norm": 0.2103331569214023, "learning_rate": 3.425660065417676e-05, "loss": 0.5049, "num_tokens": 2960649342.0, "step": 3873 }, { "epoch": 1.4191627736557662, "grad_norm": 0.14376350658850695, "learning_rate": 3.425347120128504e-05, "loss": 0.5153, "num_tokens": 2961415369.0, "step": 3874 }, { "epoch": 1.4195291746816892, "grad_norm": 0.1689823776863002, "learning_rate": 3.4250341057977107e-05, "loss": 0.497, "num_tokens": 2962163552.0, "step": 3875 }, { "epoch": 1.419895575707612, "grad_norm": 0.16218043185225303, "learning_rate": 3.424721022442932e-05, "loss": 0.5021, "num_tokens": 2962864413.0, "step": 3876 }, { "epoch": 1.420261976733535, "grad_norm": 0.14988646638269, "learning_rate": 3.4244078700818085e-05, "loss": 0.4889, "num_tokens": 2963572896.0, "step": 3877 }, { "epoch": 1.4206283777594577, "grad_norm": 0.16838852430241472, "learning_rate": 3.424094648731984e-05, "loss": 0.467, "num_tokens": 2964320436.0, "step": 3878 }, { "epoch": 1.4209947787853805, "grad_norm": 0.16190605858307605, "learning_rate": 3.4237813584111076e-05, "loss": 0.5132, "num_tokens": 2965043654.0, "step": 3879 }, { "epoch": 1.4213611798113035, "grad_norm": 0.1575588752557236, "learning_rate": 3.423467999136831e-05, "loss": 0.5433, "num_tokens": 2965806735.0, "step": 3880 }, { "epoch": 1.4217275808372263, "grad_norm": 0.14399185338798381, "learning_rate": 3.423154570926811e-05, "loss": 0.4899, "num_tokens": 2966600043.0, "step": 3881 }, { "epoch": 1.4220939818631493, "grad_norm": 0.17943043347890655, "learning_rate": 3.4228410737987064e-05, "loss": 0.4975, "num_tokens": 2967285160.0, "step": 3882 }, { "epoch": 1.422460382889072, "grad_norm": 0.16962978079041113, "learning_rate": 3.4225275077701826e-05, "loss": 0.503, "num_tokens": 2968034627.0, "step": 3883 }, { "epoch": 1.4228267839149948, "grad_norm": 0.17643627778851723, "learning_rate": 3.422213872858905e-05, "loss": 0.5404, "num_tokens": 2968790605.0, "step": 3884 }, { "epoch": 1.4231931849409178, "grad_norm": 0.15464325911603624, "learning_rate": 3.421900169082548e-05, "loss": 0.5178, "num_tokens": 2969615857.0, "step": 3885 }, { "epoch": 1.4235595859668408, "grad_norm": 0.14943426570155113, "learning_rate": 3.421586396458785e-05, "loss": 0.5427, "num_tokens": 2970426140.0, "step": 3886 }, { "epoch": 1.4239259869927636, "grad_norm": 0.1688035075603487, "learning_rate": 3.421272555005297e-05, "loss": 0.5212, "num_tokens": 2971242356.0, "step": 3887 }, { "epoch": 1.4242923880186864, "grad_norm": 0.15420650765914792, "learning_rate": 3.420958644739765e-05, "loss": 0.5068, "num_tokens": 2972002404.0, "step": 3888 }, { "epoch": 1.4246587890446092, "grad_norm": 0.14790200530929304, "learning_rate": 3.420644665679879e-05, "loss": 0.512, "num_tokens": 2972819805.0, "step": 3889 }, { "epoch": 1.4250251900705322, "grad_norm": 0.14622657088409882, "learning_rate": 3.420330617843327e-05, "loss": 0.4943, "num_tokens": 2973583153.0, "step": 3890 }, { "epoch": 1.4253915910964552, "grad_norm": 0.14051410416907215, "learning_rate": 3.420016501247807e-05, "loss": 0.474, "num_tokens": 2974359619.0, "step": 3891 }, { "epoch": 1.425757992122378, "grad_norm": 0.14298071476018645, "learning_rate": 3.419702315911015e-05, "loss": 0.5055, "num_tokens": 2975201386.0, "step": 3892 }, { "epoch": 1.4261243931483008, "grad_norm": 0.15818223152224156, "learning_rate": 3.419388061850656e-05, "loss": 0.5579, "num_tokens": 2975970311.0, "step": 3893 }, { "epoch": 1.4264907941742238, "grad_norm": 0.165595578295684, "learning_rate": 3.4190737390844345e-05, "loss": 0.5515, "num_tokens": 2976757471.0, "step": 3894 }, { "epoch": 1.4268571952001465, "grad_norm": 0.1569480030620412, "learning_rate": 3.418759347630061e-05, "loss": 0.5236, "num_tokens": 2977493009.0, "step": 3895 }, { "epoch": 1.4272235962260695, "grad_norm": 0.17143601803546896, "learning_rate": 3.4184448875052515e-05, "loss": 0.5254, "num_tokens": 2978263000.0, "step": 3896 }, { "epoch": 1.4275899972519923, "grad_norm": 0.13712968649361024, "learning_rate": 3.418130358727723e-05, "loss": 0.5072, "num_tokens": 2979060325.0, "step": 3897 }, { "epoch": 1.427956398277915, "grad_norm": 0.16402092938480708, "learning_rate": 3.4178157613151965e-05, "loss": 0.4995, "num_tokens": 2979758600.0, "step": 3898 }, { "epoch": 1.428322799303838, "grad_norm": 0.17013960584722293, "learning_rate": 3.4175010952854e-05, "loss": 0.5062, "num_tokens": 2980470927.0, "step": 3899 }, { "epoch": 1.4286892003297609, "grad_norm": 0.12422890654720198, "learning_rate": 3.417186360656061e-05, "loss": 0.4889, "num_tokens": 2981261652.0, "step": 3900 }, { "epoch": 1.4290556013556839, "grad_norm": 0.17064797153356104, "learning_rate": 3.4168715574449144e-05, "loss": 0.529, "num_tokens": 2982024469.0, "step": 3901 }, { "epoch": 1.4294220023816067, "grad_norm": 0.13662007131935838, "learning_rate": 3.4165566856696974e-05, "loss": 0.4904, "num_tokens": 2982828470.0, "step": 3902 }, { "epoch": 1.4297884034075294, "grad_norm": 0.15394594234717082, "learning_rate": 3.416241745348152e-05, "loss": 0.4757, "num_tokens": 2983618373.0, "step": 3903 }, { "epoch": 1.4301548044334524, "grad_norm": 0.1439010343353492, "learning_rate": 3.415926736498022e-05, "loss": 0.4834, "num_tokens": 2984399186.0, "step": 3904 }, { "epoch": 1.4305212054593752, "grad_norm": 0.13400129277390835, "learning_rate": 3.4156116591370574e-05, "loss": 0.4866, "num_tokens": 2985103799.0, "step": 3905 }, { "epoch": 1.4308876064852982, "grad_norm": 0.1629687667480135, "learning_rate": 3.41529651328301e-05, "loss": 0.5123, "num_tokens": 2985718940.0, "step": 3906 }, { "epoch": 1.431254007511221, "grad_norm": 0.1490250196697062, "learning_rate": 3.4149812989536384e-05, "loss": 0.4899, "num_tokens": 2986384484.0, "step": 3907 }, { "epoch": 1.4316204085371438, "grad_norm": 0.14654395726491468, "learning_rate": 3.414666016166701e-05, "loss": 0.4889, "num_tokens": 2987028161.0, "step": 3908 }, { "epoch": 1.4319868095630668, "grad_norm": 0.15205620629782288, "learning_rate": 3.414350664939965e-05, "loss": 0.4955, "num_tokens": 2987870065.0, "step": 3909 }, { "epoch": 1.4323532105889896, "grad_norm": 0.14049709269992489, "learning_rate": 3.414035245291196e-05, "loss": 0.5163, "num_tokens": 2988620658.0, "step": 3910 }, { "epoch": 1.4327196116149126, "grad_norm": 0.13954635723717632, "learning_rate": 3.413719757238167e-05, "loss": 0.4776, "num_tokens": 2989398568.0, "step": 3911 }, { "epoch": 1.4330860126408353, "grad_norm": 0.1492540547494762, "learning_rate": 3.413404200798654e-05, "loss": 0.5212, "num_tokens": 2990263640.0, "step": 3912 }, { "epoch": 1.4334524136667581, "grad_norm": 0.154817214117615, "learning_rate": 3.413088575990439e-05, "loss": 0.512, "num_tokens": 2991040319.0, "step": 3913 }, { "epoch": 1.4338188146926811, "grad_norm": 0.12221768618061576, "learning_rate": 3.412772882831302e-05, "loss": 0.4816, "num_tokens": 2991971666.0, "step": 3914 }, { "epoch": 1.4341852157186041, "grad_norm": 0.15966998198057641, "learning_rate": 3.4124571213390336e-05, "loss": 0.5144, "num_tokens": 2992665897.0, "step": 3915 }, { "epoch": 1.434551616744527, "grad_norm": 0.1482541287978526, "learning_rate": 3.412141291531424e-05, "loss": 0.5331, "num_tokens": 2993357713.0, "step": 3916 }, { "epoch": 1.4349180177704497, "grad_norm": 0.14440349942358985, "learning_rate": 3.4118253934262683e-05, "loss": 0.5176, "num_tokens": 2994291822.0, "step": 3917 }, { "epoch": 1.4352844187963727, "grad_norm": 0.16719705812983585, "learning_rate": 3.4115094270413674e-05, "loss": 0.5133, "num_tokens": 2995053849.0, "step": 3918 }, { "epoch": 1.4356508198222955, "grad_norm": 0.15733777051340367, "learning_rate": 3.411193392394521e-05, "loss": 0.4987, "num_tokens": 2995745318.0, "step": 3919 }, { "epoch": 1.4360172208482185, "grad_norm": 0.1559083055178147, "learning_rate": 3.4108772895035386e-05, "loss": 0.5066, "num_tokens": 2996484109.0, "step": 3920 }, { "epoch": 1.4363836218741413, "grad_norm": 0.14959392728043014, "learning_rate": 3.41056111838623e-05, "loss": 0.5171, "num_tokens": 2997355183.0, "step": 3921 }, { "epoch": 1.436750022900064, "grad_norm": 0.17403080201963575, "learning_rate": 3.41024487906041e-05, "loss": 0.477, "num_tokens": 2998102192.0, "step": 3922 }, { "epoch": 1.437116423925987, "grad_norm": 0.14276021976813766, "learning_rate": 3.409928571543896e-05, "loss": 0.4862, "num_tokens": 2998905621.0, "step": 3923 }, { "epoch": 1.4374828249519098, "grad_norm": 0.16651011641254676, "learning_rate": 3.409612195854512e-05, "loss": 0.51, "num_tokens": 2999687195.0, "step": 3924 }, { "epoch": 1.4378492259778328, "grad_norm": 0.15137429453453327, "learning_rate": 3.409295752010082e-05, "loss": 0.4867, "num_tokens": 3000324051.0, "step": 3925 }, { "epoch": 1.4382156270037556, "grad_norm": 0.15845293078647207, "learning_rate": 3.408979240028437e-05, "loss": 0.5054, "num_tokens": 3001037894.0, "step": 3926 }, { "epoch": 1.4385820280296784, "grad_norm": 0.17946651057355453, "learning_rate": 3.4086626599274114e-05, "loss": 0.4975, "num_tokens": 3001782140.0, "step": 3927 }, { "epoch": 1.4389484290556014, "grad_norm": 0.17259750905091167, "learning_rate": 3.4083460117248405e-05, "loss": 0.496, "num_tokens": 3002539307.0, "step": 3928 }, { "epoch": 1.4393148300815242, "grad_norm": 0.13815258109319306, "learning_rate": 3.4080292954385684e-05, "loss": 0.4922, "num_tokens": 3003304725.0, "step": 3929 }, { "epoch": 1.4396812311074472, "grad_norm": 0.1760971480262154, "learning_rate": 3.407712511086439e-05, "loss": 0.5348, "num_tokens": 3004010409.0, "step": 3930 }, { "epoch": 1.44004763213337, "grad_norm": 0.1872049657968621, "learning_rate": 3.407395658686301e-05, "loss": 0.5408, "num_tokens": 3004846557.0, "step": 3931 }, { "epoch": 1.4404140331592927, "grad_norm": 0.1391185758674632, "learning_rate": 3.4070787382560076e-05, "loss": 0.495, "num_tokens": 3005587469.0, "step": 3932 }, { "epoch": 1.4407804341852157, "grad_norm": 0.17313838553539682, "learning_rate": 3.406761749813416e-05, "loss": 0.5267, "num_tokens": 3006321885.0, "step": 3933 }, { "epoch": 1.4411468352111385, "grad_norm": 0.22296411298638474, "learning_rate": 3.406444693376387e-05, "loss": 0.5001, "num_tokens": 3007049611.0, "step": 3934 }, { "epoch": 1.4415132362370615, "grad_norm": 0.15209778187870704, "learning_rate": 3.4061275689627836e-05, "loss": 0.4996, "num_tokens": 3007799149.0, "step": 3935 }, { "epoch": 1.4418796372629843, "grad_norm": 0.13771649842867795, "learning_rate": 3.4058103765904755e-05, "loss": 0.4968, "num_tokens": 3008511041.0, "step": 3936 }, { "epoch": 1.442246038288907, "grad_norm": 0.17239212587417788, "learning_rate": 3.4054931162773335e-05, "loss": 0.5096, "num_tokens": 3009215500.0, "step": 3937 }, { "epoch": 1.44261243931483, "grad_norm": 0.15792845508448738, "learning_rate": 3.405175788041235e-05, "loss": 0.5036, "num_tokens": 3009911770.0, "step": 3938 }, { "epoch": 1.442978840340753, "grad_norm": 0.13471897985779155, "learning_rate": 3.404858391900058e-05, "loss": 0.5083, "num_tokens": 3010699638.0, "step": 3939 }, { "epoch": 1.4433452413666759, "grad_norm": 0.14584410253035515, "learning_rate": 3.4045409278716864e-05, "loss": 0.4952, "num_tokens": 3011564710.0, "step": 3940 }, { "epoch": 1.4437116423925986, "grad_norm": 0.12701464720205677, "learning_rate": 3.40422339597401e-05, "loss": 0.4976, "num_tokens": 3012325598.0, "step": 3941 }, { "epoch": 1.4440780434185216, "grad_norm": 0.15930162027194936, "learning_rate": 3.403905796224916e-05, "loss": 0.5275, "num_tokens": 3013085142.0, "step": 3942 }, { "epoch": 1.4444444444444444, "grad_norm": 0.14480647138716282, "learning_rate": 3.403588128642303e-05, "loss": 0.4668, "num_tokens": 3013868102.0, "step": 3943 }, { "epoch": 1.4448108454703674, "grad_norm": 0.15269673270101872, "learning_rate": 3.4032703932440674e-05, "loss": 0.5188, "num_tokens": 3014653171.0, "step": 3944 }, { "epoch": 1.4451772464962902, "grad_norm": 0.14048152551594645, "learning_rate": 3.402952590048113e-05, "loss": 0.4874, "num_tokens": 3015451692.0, "step": 3945 }, { "epoch": 1.445543647522213, "grad_norm": 0.1558257718322135, "learning_rate": 3.4026347190723466e-05, "loss": 0.5109, "num_tokens": 3016203861.0, "step": 3946 }, { "epoch": 1.445910048548136, "grad_norm": 0.1707119438922098, "learning_rate": 3.402316780334677e-05, "loss": 0.5146, "num_tokens": 3016960552.0, "step": 3947 }, { "epoch": 1.4462764495740588, "grad_norm": 0.14051855727316734, "learning_rate": 3.401998773853019e-05, "loss": 0.515, "num_tokens": 3017608969.0, "step": 3948 }, { "epoch": 1.4466428505999818, "grad_norm": 0.1416800919538993, "learning_rate": 3.401680699645291e-05, "loss": 0.5238, "num_tokens": 3018481170.0, "step": 3949 }, { "epoch": 1.4470092516259045, "grad_norm": 0.15625283745170046, "learning_rate": 3.401362557729415e-05, "loss": 0.4845, "num_tokens": 3019232243.0, "step": 3950 }, { "epoch": 1.4473756526518273, "grad_norm": 0.17411672649211454, "learning_rate": 3.401044348123316e-05, "loss": 0.494, "num_tokens": 3020003661.0, "step": 3951 }, { "epoch": 1.4477420536777503, "grad_norm": 0.139244208787759, "learning_rate": 3.4007260708449236e-05, "loss": 0.5021, "num_tokens": 3020900463.0, "step": 3952 }, { "epoch": 1.448108454703673, "grad_norm": 0.14475434041767682, "learning_rate": 3.40040772591217e-05, "loss": 0.5244, "num_tokens": 3021618984.0, "step": 3953 }, { "epoch": 1.4484748557295961, "grad_norm": 0.17508418410965093, "learning_rate": 3.400089313342994e-05, "loss": 0.5112, "num_tokens": 3022443064.0, "step": 3954 }, { "epoch": 1.448841256755519, "grad_norm": 0.16638827895076322, "learning_rate": 3.3997708331553344e-05, "loss": 0.534, "num_tokens": 3023236960.0, "step": 3955 }, { "epoch": 1.4492076577814417, "grad_norm": 0.15669763868207423, "learning_rate": 3.3994522853671376e-05, "loss": 0.5048, "num_tokens": 3024067771.0, "step": 3956 }, { "epoch": 1.4495740588073647, "grad_norm": 0.14759105152107946, "learning_rate": 3.399133669996352e-05, "loss": 0.5131, "num_tokens": 3024957634.0, "step": 3957 }, { "epoch": 1.4499404598332875, "grad_norm": 0.15575081899637525, "learning_rate": 3.398814987060928e-05, "loss": 0.503, "num_tokens": 3025892706.0, "step": 3958 }, { "epoch": 1.4503068608592105, "grad_norm": 0.15340764224375814, "learning_rate": 3.398496236578822e-05, "loss": 0.4983, "num_tokens": 3026556607.0, "step": 3959 }, { "epoch": 1.4506732618851332, "grad_norm": 0.1846319998150774, "learning_rate": 3.398177418567996e-05, "loss": 0.501, "num_tokens": 3027260407.0, "step": 3960 }, { "epoch": 1.451039662911056, "grad_norm": 0.1643002935216169, "learning_rate": 3.397858533046412e-05, "loss": 0.5242, "num_tokens": 3027959772.0, "step": 3961 }, { "epoch": 1.451406063936979, "grad_norm": 0.14440517938436329, "learning_rate": 3.3975395800320374e-05, "loss": 0.5155, "num_tokens": 3028765610.0, "step": 3962 }, { "epoch": 1.451772464962902, "grad_norm": 0.16280366429963378, "learning_rate": 3.3972205595428444e-05, "loss": 0.489, "num_tokens": 3029523063.0, "step": 3963 }, { "epoch": 1.4521388659888248, "grad_norm": 0.16075605584286248, "learning_rate": 3.396901471596807e-05, "loss": 0.5154, "num_tokens": 3030200962.0, "step": 3964 }, { "epoch": 1.4525052670147476, "grad_norm": 0.13886489624609438, "learning_rate": 3.396582316211904e-05, "loss": 0.4811, "num_tokens": 3031006632.0, "step": 3965 }, { "epoch": 1.4528716680406706, "grad_norm": 0.15609340142112782, "learning_rate": 3.396263093406119e-05, "loss": 0.4799, "num_tokens": 3031827028.0, "step": 3966 }, { "epoch": 1.4532380690665934, "grad_norm": 0.14950771939797963, "learning_rate": 3.3959438031974385e-05, "loss": 0.5076, "num_tokens": 3032607768.0, "step": 3967 }, { "epoch": 1.4536044700925164, "grad_norm": 0.14130891863674497, "learning_rate": 3.395624445603852e-05, "loss": 0.5253, "num_tokens": 3033297109.0, "step": 3968 }, { "epoch": 1.4539708711184391, "grad_norm": 0.15402623836781104, "learning_rate": 3.395305020643354e-05, "loss": 0.5141, "num_tokens": 3034123024.0, "step": 3969 }, { "epoch": 1.454337272144362, "grad_norm": 0.16314244989047005, "learning_rate": 3.394985528333942e-05, "loss": 0.5108, "num_tokens": 3034767603.0, "step": 3970 }, { "epoch": 1.454703673170285, "grad_norm": 0.15493381834385508, "learning_rate": 3.394665968693618e-05, "loss": 0.5088, "num_tokens": 3035469024.0, "step": 3971 }, { "epoch": 1.4550700741962077, "grad_norm": 0.13892692270884105, "learning_rate": 3.394346341740387e-05, "loss": 0.4714, "num_tokens": 3036222257.0, "step": 3972 }, { "epoch": 1.4554364752221307, "grad_norm": 0.14985908197756806, "learning_rate": 3.394026647492258e-05, "loss": 0.495, "num_tokens": 3036965302.0, "step": 3973 }, { "epoch": 1.4558028762480535, "grad_norm": 0.1422286280800514, "learning_rate": 3.393706885967246e-05, "loss": 0.5012, "num_tokens": 3037879958.0, "step": 3974 }, { "epoch": 1.4561692772739763, "grad_norm": 0.14342615272758694, "learning_rate": 3.393387057183366e-05, "loss": 0.4865, "num_tokens": 3038662709.0, "step": 3975 }, { "epoch": 1.4565356782998993, "grad_norm": 0.16041196345767428, "learning_rate": 3.393067161158638e-05, "loss": 0.5201, "num_tokens": 3039383312.0, "step": 3976 }, { "epoch": 1.456902079325822, "grad_norm": 0.14448616507401446, "learning_rate": 3.392747197911088e-05, "loss": 0.4978, "num_tokens": 3040186761.0, "step": 3977 }, { "epoch": 1.457268480351745, "grad_norm": 0.17293589430839773, "learning_rate": 3.392427167458744e-05, "loss": 0.5344, "num_tokens": 3040969490.0, "step": 3978 }, { "epoch": 1.4576348813776678, "grad_norm": 0.13303783533652017, "learning_rate": 3.392107069819637e-05, "loss": 0.5042, "num_tokens": 3041745519.0, "step": 3979 }, { "epoch": 1.4580012824035906, "grad_norm": 0.13588137048647364, "learning_rate": 3.3917869050118036e-05, "loss": 0.5181, "num_tokens": 3042701333.0, "step": 3980 }, { "epoch": 1.4583676834295136, "grad_norm": 0.15280384560694718, "learning_rate": 3.3914666730532824e-05, "loss": 0.5097, "num_tokens": 3043456186.0, "step": 3981 }, { "epoch": 1.4587340844554364, "grad_norm": 0.14359407613732236, "learning_rate": 3.391146373962118e-05, "loss": 0.4902, "num_tokens": 3044255852.0, "step": 3982 }, { "epoch": 1.4591004854813594, "grad_norm": 0.14524538883081103, "learning_rate": 3.3908260077563565e-05, "loss": 0.5059, "num_tokens": 3045025667.0, "step": 3983 }, { "epoch": 1.4594668865072822, "grad_norm": 0.13545930009082835, "learning_rate": 3.3905055744540496e-05, "loss": 0.5, "num_tokens": 3045779259.0, "step": 3984 }, { "epoch": 1.459833287533205, "grad_norm": 0.15140745635597863, "learning_rate": 3.390185074073251e-05, "loss": 0.488, "num_tokens": 3046536821.0, "step": 3985 }, { "epoch": 1.460199688559128, "grad_norm": 0.15783895723115263, "learning_rate": 3.3898645066320204e-05, "loss": 0.5162, "num_tokens": 3047291551.0, "step": 3986 }, { "epoch": 1.460566089585051, "grad_norm": 0.14016267693948878, "learning_rate": 3.389543872148419e-05, "loss": 0.5006, "num_tokens": 3048154879.0, "step": 3987 }, { "epoch": 1.4609324906109737, "grad_norm": 0.150008412296606, "learning_rate": 3.389223170640512e-05, "loss": 0.5025, "num_tokens": 3048955750.0, "step": 3988 }, { "epoch": 1.4612988916368965, "grad_norm": 0.15408901214454784, "learning_rate": 3.388902402126372e-05, "loss": 0.4811, "num_tokens": 3049756331.0, "step": 3989 }, { "epoch": 1.4616652926628195, "grad_norm": 0.15818392134309958, "learning_rate": 3.388581566624069e-05, "loss": 0.5114, "num_tokens": 3050551098.0, "step": 3990 }, { "epoch": 1.4620316936887423, "grad_norm": 0.14722924333866938, "learning_rate": 3.388260664151684e-05, "loss": 0.4825, "num_tokens": 3051302834.0, "step": 3991 }, { "epoch": 1.4623980947146653, "grad_norm": 0.14954085667974984, "learning_rate": 3.387939694727294e-05, "loss": 0.4942, "num_tokens": 3051992120.0, "step": 3992 }, { "epoch": 1.462764495740588, "grad_norm": 0.16692705278949277, "learning_rate": 3.387618658368988e-05, "loss": 0.4946, "num_tokens": 3052699095.0, "step": 3993 }, { "epoch": 1.4631308967665109, "grad_norm": 0.1610079997319933, "learning_rate": 3.3872975550948526e-05, "loss": 0.4924, "num_tokens": 3053437189.0, "step": 3994 }, { "epoch": 1.4634972977924339, "grad_norm": 0.1726792037748703, "learning_rate": 3.38697638492298e-05, "loss": 0.5127, "num_tokens": 3054082035.0, "step": 3995 }, { "epoch": 1.4638636988183567, "grad_norm": 0.17825345980707882, "learning_rate": 3.386655147871467e-05, "loss": 0.4942, "num_tokens": 3054832991.0, "step": 3996 }, { "epoch": 1.4642300998442797, "grad_norm": 0.14049341921386257, "learning_rate": 3.386333843958413e-05, "loss": 0.4736, "num_tokens": 3055624677.0, "step": 3997 }, { "epoch": 1.4645965008702024, "grad_norm": 0.1493219627740566, "learning_rate": 3.386012473201922e-05, "loss": 0.5124, "num_tokens": 3056401642.0, "step": 3998 }, { "epoch": 1.4649629018961252, "grad_norm": 0.18828132191005817, "learning_rate": 3.3856910356201015e-05, "loss": 0.5245, "num_tokens": 3057112129.0, "step": 3999 }, { "epoch": 1.4653293029220482, "grad_norm": 0.13873093465251748, "learning_rate": 3.385369531231063e-05, "loss": 0.513, "num_tokens": 3057936577.0, "step": 4000 }, { "epoch": 1.465695703947971, "grad_norm": 0.1535935457594169, "learning_rate": 3.385047960052922e-05, "loss": 0.5213, "num_tokens": 3058735082.0, "step": 4001 }, { "epoch": 1.466062104973894, "grad_norm": 0.1446562498735536, "learning_rate": 3.384726322103795e-05, "loss": 0.5037, "num_tokens": 3059471654.0, "step": 4002 }, { "epoch": 1.4664285059998168, "grad_norm": 0.1631273235588649, "learning_rate": 3.384404617401807e-05, "loss": 0.499, "num_tokens": 3060124410.0, "step": 4003 }, { "epoch": 1.4667949070257396, "grad_norm": 0.14706898247959402, "learning_rate": 3.384082845965084e-05, "loss": 0.5266, "num_tokens": 3060919220.0, "step": 4004 }, { "epoch": 1.4671613080516626, "grad_norm": 0.14687328306056774, "learning_rate": 3.3837610078117556e-05, "loss": 0.5033, "num_tokens": 3061766570.0, "step": 4005 }, { "epoch": 1.4675277090775853, "grad_norm": 0.1594165332707405, "learning_rate": 3.3834391029599544e-05, "loss": 0.5435, "num_tokens": 3062520657.0, "step": 4006 }, { "epoch": 1.4678941101035083, "grad_norm": 0.15661595742614506, "learning_rate": 3.38311713142782e-05, "loss": 0.4939, "num_tokens": 3063243046.0, "step": 4007 }, { "epoch": 1.4682605111294311, "grad_norm": 0.15810824092125259, "learning_rate": 3.3827950932334923e-05, "loss": 0.5096, "num_tokens": 3064024727.0, "step": 4008 }, { "epoch": 1.468626912155354, "grad_norm": 0.16042747392377096, "learning_rate": 3.382472988395117e-05, "loss": 0.5107, "num_tokens": 3064795207.0, "step": 4009 }, { "epoch": 1.468993313181277, "grad_norm": 0.14858533227287782, "learning_rate": 3.3821508169308425e-05, "loss": 0.5011, "num_tokens": 3065592837.0, "step": 4010 }, { "epoch": 1.4693597142072, "grad_norm": 0.1323256581187396, "learning_rate": 3.381828578858822e-05, "loss": 0.5166, "num_tokens": 3066439974.0, "step": 4011 }, { "epoch": 1.4697261152331227, "grad_norm": 0.15939615273394284, "learning_rate": 3.381506274197213e-05, "loss": 0.5109, "num_tokens": 3067217157.0, "step": 4012 }, { "epoch": 1.4700925162590455, "grad_norm": 0.14134612663079227, "learning_rate": 3.381183902964173e-05, "loss": 0.4939, "num_tokens": 3067952285.0, "step": 4013 }, { "epoch": 1.4704589172849685, "grad_norm": 0.15947917402913753, "learning_rate": 3.380861465177867e-05, "loss": 0.5037, "num_tokens": 3068732119.0, "step": 4014 }, { "epoch": 1.4708253183108912, "grad_norm": 0.14202221655858505, "learning_rate": 3.3805389608564634e-05, "loss": 0.4873, "num_tokens": 3069517783.0, "step": 4015 }, { "epoch": 1.4711917193368143, "grad_norm": 0.13058576361805882, "learning_rate": 3.380216390018133e-05, "loss": 0.4885, "num_tokens": 3070285948.0, "step": 4016 }, { "epoch": 1.471558120362737, "grad_norm": 0.15141819733481052, "learning_rate": 3.37989375268105e-05, "loss": 0.5165, "num_tokens": 3070992721.0, "step": 4017 }, { "epoch": 1.4719245213886598, "grad_norm": 0.14351801078215579, "learning_rate": 3.379571048863394e-05, "loss": 0.4898, "num_tokens": 3071725982.0, "step": 4018 }, { "epoch": 1.4722909224145828, "grad_norm": 0.1363827869713064, "learning_rate": 3.3792482785833485e-05, "loss": 0.4921, "num_tokens": 3072518685.0, "step": 4019 }, { "epoch": 1.4726573234405056, "grad_norm": 0.1457193065271274, "learning_rate": 3.378925441859098e-05, "loss": 0.5196, "num_tokens": 3073317418.0, "step": 4020 }, { "epoch": 1.4730237244664286, "grad_norm": 0.15301957730291063, "learning_rate": 3.378602538708835e-05, "loss": 0.5489, "num_tokens": 3073954543.0, "step": 4021 }, { "epoch": 1.4733901254923514, "grad_norm": 0.15719670377948441, "learning_rate": 3.378279569150751e-05, "loss": 0.4923, "num_tokens": 3074848325.0, "step": 4022 }, { "epoch": 1.4737565265182742, "grad_norm": 0.14380516868479037, "learning_rate": 3.377956533203044e-05, "loss": 0.4831, "num_tokens": 3075642596.0, "step": 4023 }, { "epoch": 1.4741229275441972, "grad_norm": 0.16344034220921086, "learning_rate": 3.3776334308839167e-05, "loss": 0.4828, "num_tokens": 3076401084.0, "step": 4024 }, { "epoch": 1.47448932857012, "grad_norm": 0.13924957536184832, "learning_rate": 3.3773102622115724e-05, "loss": 0.5072, "num_tokens": 3077233118.0, "step": 4025 }, { "epoch": 1.474855729596043, "grad_norm": 0.174054152665902, "learning_rate": 3.376987027204221e-05, "loss": 0.5802, "num_tokens": 3077966555.0, "step": 4026 }, { "epoch": 1.4752221306219657, "grad_norm": 0.14592328798801865, "learning_rate": 3.376663725880075e-05, "loss": 0.4716, "num_tokens": 3078582105.0, "step": 4027 }, { "epoch": 1.4755885316478885, "grad_norm": 0.15900880195877792, "learning_rate": 3.3763403582573504e-05, "loss": 0.4883, "num_tokens": 3079326755.0, "step": 4028 }, { "epoch": 1.4759549326738115, "grad_norm": 0.16452109728945605, "learning_rate": 3.376016924354267e-05, "loss": 0.5311, "num_tokens": 3080013222.0, "step": 4029 }, { "epoch": 1.4763213336997343, "grad_norm": 0.1640894126250138, "learning_rate": 3.3756934241890494e-05, "loss": 0.5273, "num_tokens": 3080810872.0, "step": 4030 }, { "epoch": 1.4766877347256573, "grad_norm": 0.16508657879766547, "learning_rate": 3.375369857779923e-05, "loss": 0.5176, "num_tokens": 3081549970.0, "step": 4031 }, { "epoch": 1.47705413575158, "grad_norm": 0.13673454595852527, "learning_rate": 3.375046225145122e-05, "loss": 0.5027, "num_tokens": 3082324923.0, "step": 4032 }, { "epoch": 1.4774205367775028, "grad_norm": 0.18488163444638325, "learning_rate": 3.37472252630288e-05, "loss": 0.5115, "num_tokens": 3083139813.0, "step": 4033 }, { "epoch": 1.4777869378034258, "grad_norm": 0.15235663883202022, "learning_rate": 3.374398761271434e-05, "loss": 0.515, "num_tokens": 3084023970.0, "step": 4034 }, { "epoch": 1.4781533388293489, "grad_norm": 0.14877342914124964, "learning_rate": 3.374074930069028e-05, "loss": 0.5307, "num_tokens": 3084802084.0, "step": 4035 }, { "epoch": 1.4785197398552716, "grad_norm": 0.18521763367698516, "learning_rate": 3.373751032713909e-05, "loss": 0.5186, "num_tokens": 3085516135.0, "step": 4036 }, { "epoch": 1.4788861408811944, "grad_norm": 0.12451913461887024, "learning_rate": 3.3734270692243244e-05, "loss": 0.5012, "num_tokens": 3086317536.0, "step": 4037 }, { "epoch": 1.4792525419071174, "grad_norm": 0.16456042602150045, "learning_rate": 3.37310303961853e-05, "loss": 0.5118, "num_tokens": 3087040627.0, "step": 4038 }, { "epoch": 1.4796189429330402, "grad_norm": 0.15305760051924747, "learning_rate": 3.3727789439147825e-05, "loss": 0.5183, "num_tokens": 3087875969.0, "step": 4039 }, { "epoch": 1.4799853439589632, "grad_norm": 0.1344779714720325, "learning_rate": 3.372454782131342e-05, "loss": 0.4761, "num_tokens": 3088633546.0, "step": 4040 }, { "epoch": 1.480351744984886, "grad_norm": 0.13324221277225295, "learning_rate": 3.372130554286474e-05, "loss": 0.4844, "num_tokens": 3089432774.0, "step": 4041 }, { "epoch": 1.4807181460108088, "grad_norm": 0.14452006256265795, "learning_rate": 3.3718062603984474e-05, "loss": 0.5073, "num_tokens": 3090286205.0, "step": 4042 }, { "epoch": 1.4810845470367318, "grad_norm": 0.14127677222451407, "learning_rate": 3.371481900485534e-05, "loss": 0.4896, "num_tokens": 3091053218.0, "step": 4043 }, { "epoch": 1.4814509480626545, "grad_norm": 0.13542439157395544, "learning_rate": 3.371157474566009e-05, "loss": 0.5173, "num_tokens": 3091854215.0, "step": 4044 }, { "epoch": 1.4818173490885775, "grad_norm": 0.14491085161207426, "learning_rate": 3.3708329826581536e-05, "loss": 0.5161, "num_tokens": 3092595686.0, "step": 4045 }, { "epoch": 1.4821837501145003, "grad_norm": 0.15704800384073891, "learning_rate": 3.3705084247802484e-05, "loss": 0.5069, "num_tokens": 3093375416.0, "step": 4046 }, { "epoch": 1.482550151140423, "grad_norm": 0.15277466412456014, "learning_rate": 3.3701838009505845e-05, "loss": 0.536, "num_tokens": 3094092861.0, "step": 4047 }, { "epoch": 1.482916552166346, "grad_norm": 0.14423845957985099, "learning_rate": 3.369859111187449e-05, "loss": 0.49, "num_tokens": 3094719301.0, "step": 4048 }, { "epoch": 1.4832829531922689, "grad_norm": 0.13961252474227184, "learning_rate": 3.369534355509138e-05, "loss": 0.4807, "num_tokens": 3095509290.0, "step": 4049 }, { "epoch": 1.4836493542181919, "grad_norm": 0.14450751177729695, "learning_rate": 3.369209533933949e-05, "loss": 0.4953, "num_tokens": 3096260219.0, "step": 4050 }, { "epoch": 1.4840157552441147, "grad_norm": 0.14797778468042225, "learning_rate": 3.3688846464801857e-05, "loss": 0.5564, "num_tokens": 3097004440.0, "step": 4051 }, { "epoch": 1.4843821562700374, "grad_norm": 0.1410093189436178, "learning_rate": 3.368559693166152e-05, "loss": 0.4835, "num_tokens": 3097770169.0, "step": 4052 }, { "epoch": 1.4847485572959604, "grad_norm": 0.15950548798773215, "learning_rate": 3.368234674010158e-05, "loss": 0.4881, "num_tokens": 3098571535.0, "step": 4053 }, { "epoch": 1.4851149583218832, "grad_norm": 0.15705551566262263, "learning_rate": 3.367909589030515e-05, "loss": 0.5002, "num_tokens": 3099361428.0, "step": 4054 }, { "epoch": 1.4854813593478062, "grad_norm": 0.1329620003384677, "learning_rate": 3.3675844382455434e-05, "loss": 0.4777, "num_tokens": 3100084485.0, "step": 4055 }, { "epoch": 1.485847760373729, "grad_norm": 0.16604683240653414, "learning_rate": 3.36725922167356e-05, "loss": 0.5115, "num_tokens": 3100896189.0, "step": 4056 }, { "epoch": 1.4862141613996518, "grad_norm": 0.15122412642793312, "learning_rate": 3.366933939332891e-05, "loss": 0.5503, "num_tokens": 3101663885.0, "step": 4057 }, { "epoch": 1.4865805624255748, "grad_norm": 0.13113997230108532, "learning_rate": 3.366608591241864e-05, "loss": 0.4972, "num_tokens": 3102474577.0, "step": 4058 }, { "epoch": 1.4869469634514978, "grad_norm": 0.17016175339973352, "learning_rate": 3.3662831774188104e-05, "loss": 0.5024, "num_tokens": 3103218560.0, "step": 4059 }, { "epoch": 1.4873133644774206, "grad_norm": 0.15739870708188694, "learning_rate": 3.365957697882065e-05, "loss": 0.5049, "num_tokens": 3104018312.0, "step": 4060 }, { "epoch": 1.4876797655033434, "grad_norm": 0.1487917798493332, "learning_rate": 3.365632152649967e-05, "loss": 0.5203, "num_tokens": 3104725625.0, "step": 4061 }, { "epoch": 1.4880461665292664, "grad_norm": 0.13703918930527226, "learning_rate": 3.365306541740861e-05, "loss": 0.4924, "num_tokens": 3105703546.0, "step": 4062 }, { "epoch": 1.4884125675551891, "grad_norm": 0.16114402414118353, "learning_rate": 3.36498086517309e-05, "loss": 0.5151, "num_tokens": 3106455069.0, "step": 4063 }, { "epoch": 1.4887789685811121, "grad_norm": 0.14268519312688094, "learning_rate": 3.364655122965007e-05, "loss": 0.4718, "num_tokens": 3107319794.0, "step": 4064 }, { "epoch": 1.489145369607035, "grad_norm": 0.13340156974989378, "learning_rate": 3.364329315134964e-05, "loss": 0.5178, "num_tokens": 3108232595.0, "step": 4065 }, { "epoch": 1.4895117706329577, "grad_norm": 0.13679758573240566, "learning_rate": 3.3640034417013195e-05, "loss": 0.5072, "num_tokens": 3108919774.0, "step": 4066 }, { "epoch": 1.4898781716588807, "grad_norm": 0.16024538636866742, "learning_rate": 3.3636775026824335e-05, "loss": 0.4895, "num_tokens": 3109623333.0, "step": 4067 }, { "epoch": 1.4902445726848035, "grad_norm": 0.14765364068322917, "learning_rate": 3.3633514980966726e-05, "loss": 0.5068, "num_tokens": 3110558635.0, "step": 4068 }, { "epoch": 1.4906109737107265, "grad_norm": 0.15636888257210324, "learning_rate": 3.3630254279624044e-05, "loss": 0.5179, "num_tokens": 3111367212.0, "step": 4069 }, { "epoch": 1.4909773747366493, "grad_norm": 0.15625304606047352, "learning_rate": 3.3626992922980015e-05, "loss": 0.5154, "num_tokens": 3112047635.0, "step": 4070 }, { "epoch": 1.491343775762572, "grad_norm": 0.14415078246352211, "learning_rate": 3.362373091121839e-05, "loss": 0.5232, "num_tokens": 3112869967.0, "step": 4071 }, { "epoch": 1.491710176788495, "grad_norm": 0.17441273803409252, "learning_rate": 3.3620468244522976e-05, "loss": 0.4635, "num_tokens": 3113603839.0, "step": 4072 }, { "epoch": 1.4920765778144178, "grad_norm": 0.14961258787332685, "learning_rate": 3.36172049230776e-05, "loss": 0.4761, "num_tokens": 3114300177.0, "step": 4073 }, { "epoch": 1.4924429788403408, "grad_norm": 0.14766071052884264, "learning_rate": 3.361394094706613e-05, "loss": 0.5072, "num_tokens": 3115084690.0, "step": 4074 }, { "epoch": 1.4928093798662636, "grad_norm": 0.15938911016075896, "learning_rate": 3.361067631667249e-05, "loss": 0.4872, "num_tokens": 3115890108.0, "step": 4075 }, { "epoch": 1.4931757808921864, "grad_norm": 0.14997329759259131, "learning_rate": 3.36074110320806e-05, "loss": 0.4948, "num_tokens": 3116593402.0, "step": 4076 }, { "epoch": 1.4935421819181094, "grad_norm": 0.15601915991894724, "learning_rate": 3.3604145093474465e-05, "loss": 0.5035, "num_tokens": 3117446361.0, "step": 4077 }, { "epoch": 1.4939085829440322, "grad_norm": 0.16768488942216916, "learning_rate": 3.3600878501038086e-05, "loss": 0.4985, "num_tokens": 3118252864.0, "step": 4078 }, { "epoch": 1.4942749839699552, "grad_norm": 0.1393074417835363, "learning_rate": 3.359761125495552e-05, "loss": 0.5068, "num_tokens": 3118940989.0, "step": 4079 }, { "epoch": 1.494641384995878, "grad_norm": 0.15443064133949863, "learning_rate": 3.359434335541086e-05, "loss": 0.5126, "num_tokens": 3119797923.0, "step": 4080 }, { "epoch": 1.4950077860218007, "grad_norm": 0.15673719004532838, "learning_rate": 3.359107480258824e-05, "loss": 0.533, "num_tokens": 3120623190.0, "step": 4081 }, { "epoch": 1.4953741870477237, "grad_norm": 0.1379987334452534, "learning_rate": 3.358780559667182e-05, "loss": 0.5213, "num_tokens": 3121375583.0, "step": 4082 }, { "epoch": 1.4957405880736467, "grad_norm": 0.14361619679022058, "learning_rate": 3.35845357378458e-05, "loss": 0.5032, "num_tokens": 3122139209.0, "step": 4083 }, { "epoch": 1.4961069890995695, "grad_norm": 0.1703074725094775, "learning_rate": 3.358126522629442e-05, "loss": 0.5126, "num_tokens": 3122862737.0, "step": 4084 }, { "epoch": 1.4964733901254923, "grad_norm": 0.14987838806568762, "learning_rate": 3.3577994062201966e-05, "loss": 0.4957, "num_tokens": 3123659572.0, "step": 4085 }, { "epoch": 1.4968397911514153, "grad_norm": 0.15378007976893265, "learning_rate": 3.357472224575273e-05, "loss": 0.5246, "num_tokens": 3124402346.0, "step": 4086 }, { "epoch": 1.497206192177338, "grad_norm": 0.14875433595563617, "learning_rate": 3.357144977713107e-05, "loss": 0.5012, "num_tokens": 3125170823.0, "step": 4087 }, { "epoch": 1.497572593203261, "grad_norm": 0.12903730134226205, "learning_rate": 3.356817665652138e-05, "loss": 0.4998, "num_tokens": 3125937643.0, "step": 4088 }, { "epoch": 1.4979389942291839, "grad_norm": 0.1448986832091459, "learning_rate": 3.3564902884108074e-05, "loss": 0.4804, "num_tokens": 3126692044.0, "step": 4089 }, { "epoch": 1.4983053952551066, "grad_norm": 0.14741804099655395, "learning_rate": 3.356162846007561e-05, "loss": 0.5206, "num_tokens": 3127353151.0, "step": 4090 }, { "epoch": 1.4986717962810296, "grad_norm": 0.138647222509241, "learning_rate": 3.3558353384608495e-05, "loss": 0.4994, "num_tokens": 3128128938.0, "step": 4091 }, { "epoch": 1.4990381973069524, "grad_norm": 0.13710159030808206, "learning_rate": 3.3555077657891246e-05, "loss": 0.4871, "num_tokens": 3128918334.0, "step": 4092 }, { "epoch": 1.4994045983328754, "grad_norm": 0.14920346267142579, "learning_rate": 3.3551801280108445e-05, "loss": 0.5409, "num_tokens": 3129769666.0, "step": 4093 }, { "epoch": 1.4997709993587982, "grad_norm": 0.15354331464402177, "learning_rate": 3.3548524251444696e-05, "loss": 0.5021, "num_tokens": 3130495849.0, "step": 4094 }, { "epoch": 1.500137400384721, "grad_norm": 0.12460233006365465, "learning_rate": 3.3545246572084634e-05, "loss": 0.4787, "num_tokens": 3131299320.0, "step": 4095 }, { "epoch": 1.500503801410644, "grad_norm": 0.1669677503496433, "learning_rate": 3.354196824221294e-05, "loss": 0.4938, "num_tokens": 3132038145.0, "step": 4096 }, { "epoch": 1.5008702024365668, "grad_norm": 0.14203438882127478, "learning_rate": 3.3538689262014336e-05, "loss": 0.5061, "num_tokens": 3132774435.0, "step": 4097 }, { "epoch": 1.5012366034624898, "grad_norm": 0.1528912952823211, "learning_rate": 3.3535409631673576e-05, "loss": 0.4945, "num_tokens": 3133506359.0, "step": 4098 }, { "epoch": 1.5016030044884126, "grad_norm": 0.1665595673889995, "learning_rate": 3.353212935137543e-05, "loss": 0.4955, "num_tokens": 3134204460.0, "step": 4099 }, { "epoch": 1.5019694055143353, "grad_norm": 0.166667494599487, "learning_rate": 3.352884842130476e-05, "loss": 0.4877, "num_tokens": 3134932779.0, "step": 4100 }, { "epoch": 1.5023358065402583, "grad_norm": 0.16414232421730324, "learning_rate": 3.352556684164639e-05, "loss": 0.5157, "num_tokens": 3135724896.0, "step": 4101 }, { "epoch": 1.5027022075661813, "grad_norm": 0.1426214518091348, "learning_rate": 3.3522284612585244e-05, "loss": 0.497, "num_tokens": 3136589449.0, "step": 4102 }, { "epoch": 1.5030686085921041, "grad_norm": 0.14748083001594656, "learning_rate": 3.351900173430626e-05, "loss": 0.5234, "num_tokens": 3137423624.0, "step": 4103 }, { "epoch": 1.503435009618027, "grad_norm": 0.15323105728842126, "learning_rate": 3.3515718206994386e-05, "loss": 0.525, "num_tokens": 3138165329.0, "step": 4104 }, { "epoch": 1.5038014106439497, "grad_norm": 0.13310502254096188, "learning_rate": 3.351243403083465e-05, "loss": 0.4941, "num_tokens": 3139002148.0, "step": 4105 }, { "epoch": 1.5041678116698727, "grad_norm": 0.16533150922231407, "learning_rate": 3.350914920601209e-05, "loss": 0.5077, "num_tokens": 3139822098.0, "step": 4106 }, { "epoch": 1.5045342126957957, "grad_norm": 0.13793155838513976, "learning_rate": 3.350586373271179e-05, "loss": 0.4967, "num_tokens": 3140654108.0, "step": 4107 }, { "epoch": 1.5049006137217185, "grad_norm": 0.1618959880975068, "learning_rate": 3.350257761111887e-05, "loss": 0.4914, "num_tokens": 3141437672.0, "step": 4108 }, { "epoch": 1.5052670147476412, "grad_norm": 0.14823189263284953, "learning_rate": 3.3499290841418487e-05, "loss": 0.488, "num_tokens": 3142203300.0, "step": 4109 }, { "epoch": 1.505633415773564, "grad_norm": 0.1277096378269188, "learning_rate": 3.3496003423795826e-05, "loss": 0.5069, "num_tokens": 3143030059.0, "step": 4110 }, { "epoch": 1.505999816799487, "grad_norm": 0.1425714767183451, "learning_rate": 3.349271535843612e-05, "loss": 0.5136, "num_tokens": 3143862409.0, "step": 4111 }, { "epoch": 1.50636621782541, "grad_norm": 0.15283787961997308, "learning_rate": 3.348942664552464e-05, "loss": 0.5521, "num_tokens": 3144591476.0, "step": 4112 }, { "epoch": 1.5067326188513328, "grad_norm": 0.16914348793324668, "learning_rate": 3.348613728524667e-05, "loss": 0.5103, "num_tokens": 3145307314.0, "step": 4113 }, { "epoch": 1.5070990198772556, "grad_norm": 0.13561809390540863, "learning_rate": 3.3482847277787565e-05, "loss": 0.4789, "num_tokens": 3146087543.0, "step": 4114 }, { "epoch": 1.5074654209031784, "grad_norm": 0.14123206060597673, "learning_rate": 3.347955662333269e-05, "loss": 0.5026, "num_tokens": 3146812935.0, "step": 4115 }, { "epoch": 1.5078318219291014, "grad_norm": 0.15005221620617012, "learning_rate": 3.347626532206745e-05, "loss": 0.5127, "num_tokens": 3147710522.0, "step": 4116 }, { "epoch": 1.5081982229550244, "grad_norm": 0.1312932949776957, "learning_rate": 3.3472973374177295e-05, "loss": 0.4923, "num_tokens": 3148487607.0, "step": 4117 }, { "epoch": 1.5085646239809472, "grad_norm": 0.1467559767393309, "learning_rate": 3.346968077984772e-05, "loss": 0.4846, "num_tokens": 3149137823.0, "step": 4118 }, { "epoch": 1.50893102500687, "grad_norm": 0.13549657974976798, "learning_rate": 3.3466387539264224e-05, "loss": 0.4704, "num_tokens": 3149981356.0, "step": 4119 }, { "epoch": 1.509297426032793, "grad_norm": 0.15216516383130335, "learning_rate": 3.346309365261239e-05, "loss": 0.4943, "num_tokens": 3150703685.0, "step": 4120 }, { "epoch": 1.5096638270587157, "grad_norm": 0.1462205557453461, "learning_rate": 3.345979912007779e-05, "loss": 0.4917, "num_tokens": 3151358658.0, "step": 4121 }, { "epoch": 1.5100302280846387, "grad_norm": 0.13947115599593662, "learning_rate": 3.345650394184605e-05, "loss": 0.5264, "num_tokens": 3152132948.0, "step": 4122 }, { "epoch": 1.5103966291105615, "grad_norm": 0.14845457300896342, "learning_rate": 3.345320811810286e-05, "loss": 0.52, "num_tokens": 3152924571.0, "step": 4123 }, { "epoch": 1.5107630301364843, "grad_norm": 0.1422756660146661, "learning_rate": 3.3449911649033896e-05, "loss": 0.5114, "num_tokens": 3153729275.0, "step": 4124 }, { "epoch": 1.5111294311624073, "grad_norm": 0.15819748131640635, "learning_rate": 3.344661453482491e-05, "loss": 0.5463, "num_tokens": 3154385197.0, "step": 4125 }, { "epoch": 1.5114958321883303, "grad_norm": 0.13657029699360082, "learning_rate": 3.344331677566166e-05, "loss": 0.5082, "num_tokens": 3155193431.0, "step": 4126 }, { "epoch": 1.511862233214253, "grad_norm": 0.13120404554645726, "learning_rate": 3.3440018371729985e-05, "loss": 0.4944, "num_tokens": 3156055765.0, "step": 4127 }, { "epoch": 1.5122286342401758, "grad_norm": 0.14654630368287333, "learning_rate": 3.343671932321571e-05, "loss": 0.4726, "num_tokens": 3156905677.0, "step": 4128 }, { "epoch": 1.5125950352660986, "grad_norm": 0.12967981925044555, "learning_rate": 3.343341963030472e-05, "loss": 0.5154, "num_tokens": 3157709483.0, "step": 4129 }, { "epoch": 1.5129614362920216, "grad_norm": 0.1563954250880307, "learning_rate": 3.343011929318294e-05, "loss": 0.5394, "num_tokens": 3158464310.0, "step": 4130 }, { "epoch": 1.5133278373179446, "grad_norm": 0.14883374623305645, "learning_rate": 3.342681831203633e-05, "loss": 0.4679, "num_tokens": 3159248950.0, "step": 4131 }, { "epoch": 1.5136942383438674, "grad_norm": 0.1612002288018616, "learning_rate": 3.342351668705087e-05, "loss": 0.5226, "num_tokens": 3160036443.0, "step": 4132 }, { "epoch": 1.5140606393697902, "grad_norm": 0.15273825525020351, "learning_rate": 3.34202144184126e-05, "loss": 0.4793, "num_tokens": 3160644785.0, "step": 4133 }, { "epoch": 1.514427040395713, "grad_norm": 0.13990969864738353, "learning_rate": 3.341691150630758e-05, "loss": 0.5226, "num_tokens": 3161456572.0, "step": 4134 }, { "epoch": 1.514793441421636, "grad_norm": 0.15442698852702044, "learning_rate": 3.341360795092191e-05, "loss": 0.49, "num_tokens": 3162267598.0, "step": 4135 }, { "epoch": 1.515159842447559, "grad_norm": 0.14603721659286756, "learning_rate": 3.341030375244173e-05, "loss": 0.5119, "num_tokens": 3162953402.0, "step": 4136 }, { "epoch": 1.5155262434734817, "grad_norm": 0.15291922811081277, "learning_rate": 3.340699891105321e-05, "loss": 0.5479, "num_tokens": 3163646283.0, "step": 4137 }, { "epoch": 1.5158926444994045, "grad_norm": 0.16282125178233126, "learning_rate": 3.340369342694257e-05, "loss": 0.4868, "num_tokens": 3164469146.0, "step": 4138 }, { "epoch": 1.5162590455253273, "grad_norm": 0.141619108444792, "learning_rate": 3.340038730029605e-05, "loss": 0.4803, "num_tokens": 3165210474.0, "step": 4139 }, { "epoch": 1.5166254465512503, "grad_norm": 0.15195784034328141, "learning_rate": 3.3397080531299916e-05, "loss": 0.4952, "num_tokens": 3166086257.0, "step": 4140 }, { "epoch": 1.5169918475771733, "grad_norm": 0.13312748617187084, "learning_rate": 3.339377312014051e-05, "loss": 0.508, "num_tokens": 3166793478.0, "step": 4141 }, { "epoch": 1.517358248603096, "grad_norm": 0.16709909716712346, "learning_rate": 3.339046506700417e-05, "loss": 0.538, "num_tokens": 3167502967.0, "step": 4142 }, { "epoch": 1.5177246496290189, "grad_norm": 0.14701109071068727, "learning_rate": 3.33871563720773e-05, "loss": 0.5115, "num_tokens": 3168157333.0, "step": 4143 }, { "epoch": 1.5180910506549419, "grad_norm": 0.14641701825432993, "learning_rate": 3.338384703554632e-05, "loss": 0.4811, "num_tokens": 3169026839.0, "step": 4144 }, { "epoch": 1.5184574516808647, "grad_norm": 0.14144043262083514, "learning_rate": 3.3380537057597696e-05, "loss": 0.5157, "num_tokens": 3169833700.0, "step": 4145 }, { "epoch": 1.5188238527067877, "grad_norm": 0.15428475552828336, "learning_rate": 3.3377226438417925e-05, "loss": 0.5067, "num_tokens": 3170537219.0, "step": 4146 }, { "epoch": 1.5191902537327104, "grad_norm": 0.13319232129218037, "learning_rate": 3.337391517819354e-05, "loss": 0.4812, "num_tokens": 3171346362.0, "step": 4147 }, { "epoch": 1.5195566547586332, "grad_norm": 0.14199263232021667, "learning_rate": 3.337060327711112e-05, "loss": 0.4986, "num_tokens": 3172186523.0, "step": 4148 }, { "epoch": 1.5199230557845562, "grad_norm": 0.1305775955736984, "learning_rate": 3.336729073535726e-05, "loss": 0.5015, "num_tokens": 3172907027.0, "step": 4149 }, { "epoch": 1.5202894568104792, "grad_norm": 0.15824920098638823, "learning_rate": 3.336397755311862e-05, "loss": 0.4593, "num_tokens": 3173606407.0, "step": 4150 }, { "epoch": 1.520655857836402, "grad_norm": 0.13712519174087923, "learning_rate": 3.336066373058186e-05, "loss": 0.5042, "num_tokens": 3174482654.0, "step": 4151 }, { "epoch": 1.5210222588623248, "grad_norm": 0.13710348960908686, "learning_rate": 3.335734926793371e-05, "loss": 0.5241, "num_tokens": 3175236830.0, "step": 4152 }, { "epoch": 1.5213886598882476, "grad_norm": 0.15887973069373773, "learning_rate": 3.335403416536092e-05, "loss": 0.52, "num_tokens": 3175968826.0, "step": 4153 }, { "epoch": 1.5217550609141706, "grad_norm": 0.16937748555676735, "learning_rate": 3.335071842305028e-05, "loss": 0.5322, "num_tokens": 3176742399.0, "step": 4154 }, { "epoch": 1.5221214619400936, "grad_norm": 0.15132332375612934, "learning_rate": 3.3347402041188614e-05, "loss": 0.4868, "num_tokens": 3177416443.0, "step": 4155 }, { "epoch": 1.5224878629660163, "grad_norm": 0.14228501291155407, "learning_rate": 3.334408501996277e-05, "loss": 0.5046, "num_tokens": 3178202284.0, "step": 4156 }, { "epoch": 1.5228542639919391, "grad_norm": 0.1715228321532074, "learning_rate": 3.334076735955965e-05, "loss": 0.5063, "num_tokens": 3178993549.0, "step": 4157 }, { "epoch": 1.523220665017862, "grad_norm": 0.1454533281788289, "learning_rate": 3.33374490601662e-05, "loss": 0.4791, "num_tokens": 3179860347.0, "step": 4158 }, { "epoch": 1.523587066043785, "grad_norm": 0.14641594724006812, "learning_rate": 3.333413012196937e-05, "loss": 0.4927, "num_tokens": 3180577340.0, "step": 4159 }, { "epoch": 1.523953467069708, "grad_norm": 0.1436919923858413, "learning_rate": 3.333081054515617e-05, "loss": 0.534, "num_tokens": 3181335346.0, "step": 4160 }, { "epoch": 1.5243198680956307, "grad_norm": 0.15369522117325654, "learning_rate": 3.332749032991364e-05, "loss": 0.5054, "num_tokens": 3182069687.0, "step": 4161 }, { "epoch": 1.5246862691215535, "grad_norm": 0.16421436386224453, "learning_rate": 3.3324169476428864e-05, "loss": 0.5426, "num_tokens": 3182762997.0, "step": 4162 }, { "epoch": 1.5250526701474763, "grad_norm": 0.14124029122103005, "learning_rate": 3.332084798488894e-05, "loss": 0.4748, "num_tokens": 3183648151.0, "step": 4163 }, { "epoch": 1.5254190711733993, "grad_norm": 0.25161347627450814, "learning_rate": 3.3317525855481026e-05, "loss": 0.5133, "num_tokens": 3184248514.0, "step": 4164 }, { "epoch": 1.5257854721993223, "grad_norm": 0.15893387094147807, "learning_rate": 3.33142030883923e-05, "loss": 0.5233, "num_tokens": 3185007152.0, "step": 4165 }, { "epoch": 1.526151873225245, "grad_norm": 0.15054743154184985, "learning_rate": 3.3310879683809985e-05, "loss": 0.5431, "num_tokens": 3185752931.0, "step": 4166 }, { "epoch": 1.5265182742511678, "grad_norm": 0.15343425979503594, "learning_rate": 3.3307555641921334e-05, "loss": 0.5227, "num_tokens": 3186425125.0, "step": 4167 }, { "epoch": 1.5268846752770908, "grad_norm": 0.15114237525790486, "learning_rate": 3.330423096291365e-05, "loss": 0.5102, "num_tokens": 3187216421.0, "step": 4168 }, { "epoch": 1.5272510763030136, "grad_norm": 0.155323932861794, "learning_rate": 3.330090564697424e-05, "loss": 0.5139, "num_tokens": 3187965847.0, "step": 4169 }, { "epoch": 1.5276174773289366, "grad_norm": 0.14271738940031678, "learning_rate": 3.3297579694290485e-05, "loss": 0.4831, "num_tokens": 3188680600.0, "step": 4170 }, { "epoch": 1.5279838783548594, "grad_norm": 0.12850879654239164, "learning_rate": 3.3294253105049774e-05, "loss": 0.5043, "num_tokens": 3189542640.0, "step": 4171 }, { "epoch": 1.5283502793807822, "grad_norm": 0.14976366766050533, "learning_rate": 3.3290925879439545e-05, "loss": 0.4753, "num_tokens": 3190366650.0, "step": 4172 }, { "epoch": 1.5287166804067052, "grad_norm": 0.14447117427512865, "learning_rate": 3.328759801764728e-05, "loss": 0.5091, "num_tokens": 3191093976.0, "step": 4173 }, { "epoch": 1.5290830814326282, "grad_norm": 0.16031906040960553, "learning_rate": 3.328426951986046e-05, "loss": 0.4859, "num_tokens": 3191871864.0, "step": 4174 }, { "epoch": 1.529449482458551, "grad_norm": 0.16654834527372167, "learning_rate": 3.328094038626666e-05, "loss": 0.4916, "num_tokens": 3192778168.0, "step": 4175 }, { "epoch": 1.5298158834844737, "grad_norm": 0.1457609192015339, "learning_rate": 3.327761061705343e-05, "loss": 0.5141, "num_tokens": 3193503359.0, "step": 4176 }, { "epoch": 1.5301822845103965, "grad_norm": 0.1732829284095675, "learning_rate": 3.32742802124084e-05, "loss": 0.4929, "num_tokens": 3194272067.0, "step": 4177 }, { "epoch": 1.5305486855363195, "grad_norm": 0.16780785143219093, "learning_rate": 3.327094917251922e-05, "loss": 0.5053, "num_tokens": 3194923626.0, "step": 4178 }, { "epoch": 1.5309150865622425, "grad_norm": 0.14830789786832185, "learning_rate": 3.326761749757356e-05, "loss": 0.4872, "num_tokens": 3195729002.0, "step": 4179 }, { "epoch": 1.5312814875881653, "grad_norm": 0.14537253976242034, "learning_rate": 3.326428518775917e-05, "loss": 0.4972, "num_tokens": 3196443177.0, "step": 4180 }, { "epoch": 1.531647888614088, "grad_norm": 0.17307978291558068, "learning_rate": 3.3260952243263784e-05, "loss": 0.475, "num_tokens": 3197206364.0, "step": 4181 }, { "epoch": 1.5320142896400109, "grad_norm": 0.14373875598201794, "learning_rate": 3.32576186642752e-05, "loss": 0.4685, "num_tokens": 3197952685.0, "step": 4182 }, { "epoch": 1.5323806906659339, "grad_norm": 0.136715130554832, "learning_rate": 3.325428445098126e-05, "loss": 0.5047, "num_tokens": 3198758211.0, "step": 4183 }, { "epoch": 1.5327470916918569, "grad_norm": 0.16987844087837517, "learning_rate": 3.325094960356981e-05, "loss": 0.5203, "num_tokens": 3199434077.0, "step": 4184 }, { "epoch": 1.5331134927177796, "grad_norm": 0.1474792687647113, "learning_rate": 3.324761412222876e-05, "loss": 0.4845, "num_tokens": 3200229225.0, "step": 4185 }, { "epoch": 1.5334798937437024, "grad_norm": 0.16211665870502956, "learning_rate": 3.324427800714605e-05, "loss": 0.5498, "num_tokens": 3200875124.0, "step": 4186 }, { "epoch": 1.5338462947696252, "grad_norm": 0.1558189840652415, "learning_rate": 3.324094125850964e-05, "loss": 0.4996, "num_tokens": 3201677524.0, "step": 4187 }, { "epoch": 1.5342126957955482, "grad_norm": 0.13547260201041883, "learning_rate": 3.323760387650755e-05, "loss": 0.5092, "num_tokens": 3202392471.0, "step": 4188 }, { "epoch": 1.5345790968214712, "grad_norm": 0.1717903293444168, "learning_rate": 3.323426586132781e-05, "loss": 0.5302, "num_tokens": 3203141916.0, "step": 4189 }, { "epoch": 1.534945497847394, "grad_norm": 0.1425195045025675, "learning_rate": 3.323092721315852e-05, "loss": 0.527, "num_tokens": 3203922037.0, "step": 4190 }, { "epoch": 1.5353118988733168, "grad_norm": 0.14199443790368713, "learning_rate": 3.322758793218777e-05, "loss": 0.4967, "num_tokens": 3204760321.0, "step": 4191 }, { "epoch": 1.5356782998992398, "grad_norm": 0.15826498612776732, "learning_rate": 3.322424801860373e-05, "loss": 0.4906, "num_tokens": 3205503754.0, "step": 4192 }, { "epoch": 1.5360447009251625, "grad_norm": 0.14184899241735813, "learning_rate": 3.3220907472594564e-05, "loss": 0.4735, "num_tokens": 3206200992.0, "step": 4193 }, { "epoch": 1.5364111019510855, "grad_norm": 0.16849834561014243, "learning_rate": 3.321756629434852e-05, "loss": 0.5338, "num_tokens": 3206901299.0, "step": 4194 }, { "epoch": 1.5367775029770083, "grad_norm": 0.1579772681032547, "learning_rate": 3.3214224484053834e-05, "loss": 0.5101, "num_tokens": 3207661802.0, "step": 4195 }, { "epoch": 1.537143904002931, "grad_norm": 0.14177469943457355, "learning_rate": 3.321088204189881e-05, "loss": 0.5171, "num_tokens": 3208568160.0, "step": 4196 }, { "epoch": 1.537510305028854, "grad_norm": 0.14521792917342194, "learning_rate": 3.3207538968071774e-05, "loss": 0.4982, "num_tokens": 3209294082.0, "step": 4197 }, { "epoch": 1.537876706054777, "grad_norm": 0.16371816279461862, "learning_rate": 3.3204195262761095e-05, "loss": 0.5076, "num_tokens": 3210068814.0, "step": 4198 }, { "epoch": 1.5382431070807, "grad_norm": 0.16145711965058335, "learning_rate": 3.3200850926155155e-05, "loss": 0.5206, "num_tokens": 3210842095.0, "step": 4199 }, { "epoch": 1.5386095081066227, "grad_norm": 0.13122877671040678, "learning_rate": 3.3197505958442414e-05, "loss": 0.5002, "num_tokens": 3211612581.0, "step": 4200 }, { "epoch": 1.5389759091325454, "grad_norm": 0.16132334614044547, "learning_rate": 3.319416035981132e-05, "loss": 0.5282, "num_tokens": 3212351747.0, "step": 4201 }, { "epoch": 1.5393423101584685, "grad_norm": 0.15974562143569165, "learning_rate": 3.319081413045039e-05, "loss": 0.4711, "num_tokens": 3213224410.0, "step": 4202 }, { "epoch": 1.5397087111843915, "grad_norm": 0.16294641104085741, "learning_rate": 3.318746727054817e-05, "loss": 0.4826, "num_tokens": 3213916121.0, "step": 4203 }, { "epoch": 1.5400751122103142, "grad_norm": 0.15854852192667548, "learning_rate": 3.318411978029323e-05, "loss": 0.4981, "num_tokens": 3214665559.0, "step": 4204 }, { "epoch": 1.540441513236237, "grad_norm": 0.19252542335978928, "learning_rate": 3.318077165987419e-05, "loss": 0.5458, "num_tokens": 3215356327.0, "step": 4205 }, { "epoch": 1.5408079142621598, "grad_norm": 0.15736439569532323, "learning_rate": 3.3177422909479684e-05, "loss": 0.5257, "num_tokens": 3216066971.0, "step": 4206 }, { "epoch": 1.5411743152880828, "grad_norm": 0.20932374975439225, "learning_rate": 3.31740735292984e-05, "loss": 0.4963, "num_tokens": 3216892900.0, "step": 4207 }, { "epoch": 1.5415407163140058, "grad_norm": 0.15972569217593857, "learning_rate": 3.317072351951907e-05, "loss": 0.5196, "num_tokens": 3217720390.0, "step": 4208 }, { "epoch": 1.5419071173399286, "grad_norm": 0.18998888215551146, "learning_rate": 3.3167372880330436e-05, "loss": 0.5331, "num_tokens": 3218465547.0, "step": 4209 }, { "epoch": 1.5422735183658514, "grad_norm": 0.16000748972324527, "learning_rate": 3.31640216119213e-05, "loss": 0.5091, "num_tokens": 3219161743.0, "step": 4210 }, { "epoch": 1.5426399193917741, "grad_norm": 0.13871378805115667, "learning_rate": 3.316066971448048e-05, "loss": 0.521, "num_tokens": 3219921881.0, "step": 4211 }, { "epoch": 1.5430063204176971, "grad_norm": 0.17618848614511468, "learning_rate": 3.315731718819684e-05, "loss": 0.5012, "num_tokens": 3220711842.0, "step": 4212 }, { "epoch": 1.5433727214436201, "grad_norm": 0.1748648841008551, "learning_rate": 3.3153964033259267e-05, "loss": 0.5161, "num_tokens": 3221531101.0, "step": 4213 }, { "epoch": 1.543739122469543, "grad_norm": 0.14331758385863497, "learning_rate": 3.315061024985671e-05, "loss": 0.4801, "num_tokens": 3222340209.0, "step": 4214 }, { "epoch": 1.5441055234954657, "grad_norm": 0.19160755227895326, "learning_rate": 3.3147255838178116e-05, "loss": 0.5183, "num_tokens": 3223148308.0, "step": 4215 }, { "epoch": 1.5444719245213887, "grad_norm": 0.15983420868499665, "learning_rate": 3.3143900798412503e-05, "loss": 0.47, "num_tokens": 3223901635.0, "step": 4216 }, { "epoch": 1.5448383255473115, "grad_norm": 0.14067334036610107, "learning_rate": 3.31405451307489e-05, "loss": 0.5017, "num_tokens": 3224708657.0, "step": 4217 }, { "epoch": 1.5452047265732345, "grad_norm": 0.17283587913781157, "learning_rate": 3.31371888353764e-05, "loss": 0.4869, "num_tokens": 3225424337.0, "step": 4218 }, { "epoch": 1.5455711275991573, "grad_norm": 0.18125122361942475, "learning_rate": 3.313383191248409e-05, "loss": 0.5039, "num_tokens": 3226210681.0, "step": 4219 }, { "epoch": 1.54593752862508, "grad_norm": 0.15777465927661954, "learning_rate": 3.313047436226111e-05, "loss": 0.5336, "num_tokens": 3226996011.0, "step": 4220 }, { "epoch": 1.546303929651003, "grad_norm": 0.18402824774005905, "learning_rate": 3.312711618489666e-05, "loss": 0.5115, "num_tokens": 3227693797.0, "step": 4221 }, { "epoch": 1.546670330676926, "grad_norm": 0.1390962008186881, "learning_rate": 3.3123757380579945e-05, "loss": 0.5125, "num_tokens": 3228446796.0, "step": 4222 }, { "epoch": 1.5470367317028488, "grad_norm": 0.16627496804283545, "learning_rate": 3.3120397949500224e-05, "loss": 0.4847, "num_tokens": 3229176595.0, "step": 4223 }, { "epoch": 1.5474031327287716, "grad_norm": 0.14204839818691034, "learning_rate": 3.311703789184677e-05, "loss": 0.4936, "num_tokens": 3229969736.0, "step": 4224 }, { "epoch": 1.5477695337546944, "grad_norm": 0.13993124797246578, "learning_rate": 3.3113677207808907e-05, "loss": 0.5157, "num_tokens": 3230680924.0, "step": 4225 }, { "epoch": 1.5481359347806174, "grad_norm": 0.15225180345887782, "learning_rate": 3.3110315897575994e-05, "loss": 0.5014, "num_tokens": 3231432194.0, "step": 4226 }, { "epoch": 1.5485023358065404, "grad_norm": 0.15398443368568682, "learning_rate": 3.3106953961337426e-05, "loss": 0.4962, "num_tokens": 3232179511.0, "step": 4227 }, { "epoch": 1.5488687368324632, "grad_norm": 0.14376381157832932, "learning_rate": 3.3103591399282625e-05, "loss": 0.5158, "num_tokens": 3232910266.0, "step": 4228 }, { "epoch": 1.549235137858386, "grad_norm": 0.15938388285427937, "learning_rate": 3.310022821160105e-05, "loss": 0.4715, "num_tokens": 3233569855.0, "step": 4229 }, { "epoch": 1.5496015388843087, "grad_norm": 0.14481789967624745, "learning_rate": 3.30968643984822e-05, "loss": 0.5466, "num_tokens": 3234302479.0, "step": 4230 }, { "epoch": 1.5499679399102317, "grad_norm": 0.16829406215372944, "learning_rate": 3.309349996011561e-05, "loss": 0.484, "num_tokens": 3234987068.0, "step": 4231 }, { "epoch": 1.5503343409361547, "grad_norm": 0.14731144646792974, "learning_rate": 3.309013489669085e-05, "loss": 0.5209, "num_tokens": 3235828443.0, "step": 4232 }, { "epoch": 1.5507007419620775, "grad_norm": 0.16252420861459638, "learning_rate": 3.308676920839751e-05, "loss": 0.4711, "num_tokens": 3236611070.0, "step": 4233 }, { "epoch": 1.5510671429880003, "grad_norm": 0.15279971003515594, "learning_rate": 3.308340289542525e-05, "loss": 0.4861, "num_tokens": 3237351347.0, "step": 4234 }, { "epoch": 1.551433544013923, "grad_norm": 0.1376344062402992, "learning_rate": 3.3080035957963734e-05, "loss": 0.5114, "num_tokens": 3238271740.0, "step": 4235 }, { "epoch": 1.551799945039846, "grad_norm": 0.14848534653966974, "learning_rate": 3.3076668396202657e-05, "loss": 0.4985, "num_tokens": 3239004536.0, "step": 4236 }, { "epoch": 1.552166346065769, "grad_norm": 0.1719020817415842, "learning_rate": 3.307330021033178e-05, "loss": 0.4887, "num_tokens": 3239881038.0, "step": 4237 }, { "epoch": 1.5525327470916919, "grad_norm": 0.14874176535910627, "learning_rate": 3.306993140054087e-05, "loss": 0.513, "num_tokens": 3240690241.0, "step": 4238 }, { "epoch": 1.5528991481176146, "grad_norm": 0.16756861479880722, "learning_rate": 3.306656196701976e-05, "loss": 0.498, "num_tokens": 3241408893.0, "step": 4239 }, { "epoch": 1.5532655491435376, "grad_norm": 0.1777034961747664, "learning_rate": 3.3063191909958274e-05, "loss": 0.5089, "num_tokens": 3242177900.0, "step": 4240 }, { "epoch": 1.5536319501694604, "grad_norm": 0.17400717787953854, "learning_rate": 3.305982122954631e-05, "loss": 0.4766, "num_tokens": 3242920566.0, "step": 4241 }, { "epoch": 1.5539983511953834, "grad_norm": 0.14797129899683592, "learning_rate": 3.305644992597379e-05, "loss": 0.5093, "num_tokens": 3243647014.0, "step": 4242 }, { "epoch": 1.5543647522213062, "grad_norm": 0.1623384307930772, "learning_rate": 3.3053077999430655e-05, "loss": 0.4974, "num_tokens": 3244421867.0, "step": 4243 }, { "epoch": 1.554731153247229, "grad_norm": 0.1380531180480609, "learning_rate": 3.304970545010691e-05, "loss": 0.4854, "num_tokens": 3245177301.0, "step": 4244 }, { "epoch": 1.555097554273152, "grad_norm": 0.13861652222174606, "learning_rate": 3.304633227819258e-05, "loss": 0.4989, "num_tokens": 3245845919.0, "step": 4245 }, { "epoch": 1.555463955299075, "grad_norm": 0.16429403910449836, "learning_rate": 3.304295848387771e-05, "loss": 0.5056, "num_tokens": 3246592505.0, "step": 4246 }, { "epoch": 1.5558303563249978, "grad_norm": 0.13242991888325528, "learning_rate": 3.30395840673524e-05, "loss": 0.4651, "num_tokens": 3247404847.0, "step": 4247 }, { "epoch": 1.5561967573509206, "grad_norm": 0.139741998656048, "learning_rate": 3.303620902880679e-05, "loss": 0.5002, "num_tokens": 3248185488.0, "step": 4248 }, { "epoch": 1.5565631583768433, "grad_norm": 0.1352233559711742, "learning_rate": 3.303283336843104e-05, "loss": 0.5071, "num_tokens": 3248986279.0, "step": 4249 }, { "epoch": 1.5569295594027663, "grad_norm": 0.1712377466142036, "learning_rate": 3.302945708641535e-05, "loss": 0.5103, "num_tokens": 3249654016.0, "step": 4250 }, { "epoch": 1.5572959604286893, "grad_norm": 0.13939023178486634, "learning_rate": 3.302608018294994e-05, "loss": 0.4835, "num_tokens": 3250380346.0, "step": 4251 }, { "epoch": 1.5576623614546121, "grad_norm": 0.1503739902530832, "learning_rate": 3.30227026582251e-05, "loss": 0.488, "num_tokens": 3251213839.0, "step": 4252 }, { "epoch": 1.558028762480535, "grad_norm": 0.14439542547420628, "learning_rate": 3.301932451243113e-05, "loss": 0.5087, "num_tokens": 3252118218.0, "step": 4253 }, { "epoch": 1.5583951635064577, "grad_norm": 0.14801434138734948, "learning_rate": 3.301594574575837e-05, "loss": 0.4552, "num_tokens": 3252894937.0, "step": 4254 }, { "epoch": 1.5587615645323807, "grad_norm": 0.1650589349401528, "learning_rate": 3.301256635839719e-05, "loss": 0.4942, "num_tokens": 3253654419.0, "step": 4255 }, { "epoch": 1.5591279655583037, "grad_norm": 0.14835419200639655, "learning_rate": 3.300918635053801e-05, "loss": 0.5148, "num_tokens": 3254369872.0, "step": 4256 }, { "epoch": 1.5594943665842265, "grad_norm": 0.14259433527827825, "learning_rate": 3.300580572237126e-05, "loss": 0.5022, "num_tokens": 3255213866.0, "step": 4257 }, { "epoch": 1.5598607676101492, "grad_norm": 0.15784461540639894, "learning_rate": 3.300242447408744e-05, "loss": 0.5103, "num_tokens": 3256019899.0, "step": 4258 }, { "epoch": 1.560227168636072, "grad_norm": 0.1382746529967247, "learning_rate": 3.299904260587704e-05, "loss": 0.4842, "num_tokens": 3256824744.0, "step": 4259 }, { "epoch": 1.560593569661995, "grad_norm": 0.137904234787446, "learning_rate": 3.299566011793063e-05, "loss": 0.5124, "num_tokens": 3257587856.0, "step": 4260 }, { "epoch": 1.560959970687918, "grad_norm": 0.1550201754693506, "learning_rate": 3.2992277010438796e-05, "loss": 0.5088, "num_tokens": 3258442467.0, "step": 4261 }, { "epoch": 1.5613263717138408, "grad_norm": 0.14468172670268123, "learning_rate": 3.298889328359214e-05, "loss": 0.5269, "num_tokens": 3259196359.0, "step": 4262 }, { "epoch": 1.5616927727397636, "grad_norm": 0.17344166293497662, "learning_rate": 3.2985508937581325e-05, "loss": 0.5109, "num_tokens": 3259831321.0, "step": 4263 }, { "epoch": 1.5620591737656866, "grad_norm": 0.1424372575585072, "learning_rate": 3.298212397259704e-05, "loss": 0.5115, "num_tokens": 3260557060.0, "step": 4264 }, { "epoch": 1.5624255747916094, "grad_norm": 0.14025459041211435, "learning_rate": 3.297873838883002e-05, "loss": 0.4815, "num_tokens": 3261326339.0, "step": 4265 }, { "epoch": 1.5627919758175324, "grad_norm": 0.15460897894121134, "learning_rate": 3.297535218647101e-05, "loss": 0.5073, "num_tokens": 3262155149.0, "step": 4266 }, { "epoch": 1.5631583768434552, "grad_norm": 0.13465536062948907, "learning_rate": 3.2971965365710805e-05, "loss": 0.4839, "num_tokens": 3262836320.0, "step": 4267 }, { "epoch": 1.563524777869378, "grad_norm": 0.17677312286204944, "learning_rate": 3.296857792674025e-05, "loss": 0.5129, "num_tokens": 3263643068.0, "step": 4268 }, { "epoch": 1.563891178895301, "grad_norm": 0.1557682992131503, "learning_rate": 3.2965189869750185e-05, "loss": 0.5116, "num_tokens": 3264426054.0, "step": 4269 }, { "epoch": 1.564257579921224, "grad_norm": 0.14443443495288438, "learning_rate": 3.2961801194931524e-05, "loss": 0.5102, "num_tokens": 3265296777.0, "step": 4270 }, { "epoch": 1.5646239809471467, "grad_norm": 0.16729691414877162, "learning_rate": 3.29584119024752e-05, "loss": 0.4963, "num_tokens": 3266103792.0, "step": 4271 }, { "epoch": 1.5649903819730695, "grad_norm": 0.15039519170347274, "learning_rate": 3.295502199257218e-05, "loss": 0.5017, "num_tokens": 3266890915.0, "step": 4272 }, { "epoch": 1.5653567829989923, "grad_norm": 0.14152259347267426, "learning_rate": 3.295163146541346e-05, "loss": 0.486, "num_tokens": 3267700098.0, "step": 4273 }, { "epoch": 1.5657231840249153, "grad_norm": 0.1593491069379987, "learning_rate": 3.2948240321190076e-05, "loss": 0.4921, "num_tokens": 3268539406.0, "step": 4274 }, { "epoch": 1.5660895850508383, "grad_norm": 0.1414685484325683, "learning_rate": 3.294484856009312e-05, "loss": 0.4947, "num_tokens": 3269342917.0, "step": 4275 }, { "epoch": 1.566455986076761, "grad_norm": 0.1454462424817988, "learning_rate": 3.294145618231367e-05, "loss": 0.4825, "num_tokens": 3270141468.0, "step": 4276 }, { "epoch": 1.5668223871026838, "grad_norm": 0.15361814097020254, "learning_rate": 3.29380631880429e-05, "loss": 0.4671, "num_tokens": 3270940932.0, "step": 4277 }, { "epoch": 1.5671887881286066, "grad_norm": 0.14602772112309745, "learning_rate": 3.293466957747196e-05, "loss": 0.5003, "num_tokens": 3271725728.0, "step": 4278 }, { "epoch": 1.5675551891545296, "grad_norm": 0.1844715634838303, "learning_rate": 3.293127535079208e-05, "loss": 0.5264, "num_tokens": 3272407279.0, "step": 4279 }, { "epoch": 1.5679215901804526, "grad_norm": 0.14624807550561197, "learning_rate": 3.2927880508194485e-05, "loss": 0.4678, "num_tokens": 3273154410.0, "step": 4280 }, { "epoch": 1.5682879912063754, "grad_norm": 0.16836294070283042, "learning_rate": 3.2924485049870486e-05, "loss": 0.5384, "num_tokens": 3273951844.0, "step": 4281 }, { "epoch": 1.5686543922322982, "grad_norm": 0.19998214327544372, "learning_rate": 3.292108897601137e-05, "loss": 0.5032, "num_tokens": 3274756617.0, "step": 4282 }, { "epoch": 1.569020793258221, "grad_norm": 0.14535530368039226, "learning_rate": 3.29176922868085e-05, "loss": 0.4798, "num_tokens": 3275425862.0, "step": 4283 }, { "epoch": 1.569387194284144, "grad_norm": 0.15883267597500209, "learning_rate": 3.291429498245326e-05, "loss": 0.5092, "num_tokens": 3276126816.0, "step": 4284 }, { "epoch": 1.569753595310067, "grad_norm": 0.17809773128718243, "learning_rate": 3.291089706313708e-05, "loss": 0.4983, "num_tokens": 3276712647.0, "step": 4285 }, { "epoch": 1.5701199963359898, "grad_norm": 0.1881861217980349, "learning_rate": 3.2907498529051396e-05, "loss": 0.5157, "num_tokens": 3277461863.0, "step": 4286 }, { "epoch": 1.5704863973619125, "grad_norm": 0.13630409002529373, "learning_rate": 3.29040993803877e-05, "loss": 0.4795, "num_tokens": 3278136456.0, "step": 4287 }, { "epoch": 1.5708527983878355, "grad_norm": 0.16422489057526193, "learning_rate": 3.2900699617337526e-05, "loss": 0.4941, "num_tokens": 3278855102.0, "step": 4288 }, { "epoch": 1.5712191994137583, "grad_norm": 0.15969026659768354, "learning_rate": 3.289729924009243e-05, "loss": 0.5007, "num_tokens": 3279517309.0, "step": 4289 }, { "epoch": 1.5715856004396813, "grad_norm": 0.1709384227393181, "learning_rate": 3.2893898248843983e-05, "loss": 0.5355, "num_tokens": 3280308633.0, "step": 4290 }, { "epoch": 1.571952001465604, "grad_norm": 0.15855631232295983, "learning_rate": 3.289049664378384e-05, "loss": 0.5208, "num_tokens": 3281062614.0, "step": 4291 }, { "epoch": 1.5723184024915269, "grad_norm": 0.1749039774942213, "learning_rate": 3.288709442510366e-05, "loss": 0.4934, "num_tokens": 3281937473.0, "step": 4292 }, { "epoch": 1.5726848035174499, "grad_norm": 0.14421563945948992, "learning_rate": 3.288369159299512e-05, "loss": 0.4938, "num_tokens": 3282892349.0, "step": 4293 }, { "epoch": 1.5730512045433729, "grad_norm": 0.1400473627203084, "learning_rate": 3.288028814764997e-05, "loss": 0.5064, "num_tokens": 3283688668.0, "step": 4294 }, { "epoch": 1.5734176055692957, "grad_norm": 0.1514289839945984, "learning_rate": 3.287688408925997e-05, "loss": 0.5695, "num_tokens": 3284302376.0, "step": 4295 }, { "epoch": 1.5737840065952184, "grad_norm": 0.16963640688962045, "learning_rate": 3.287347941801692e-05, "loss": 0.4969, "num_tokens": 3285040280.0, "step": 4296 }, { "epoch": 1.5741504076211412, "grad_norm": 0.15325070503777757, "learning_rate": 3.2870074134112656e-05, "loss": 0.5301, "num_tokens": 3285788982.0, "step": 4297 }, { "epoch": 1.5745168086470642, "grad_norm": 0.15185231862208773, "learning_rate": 3.2866668237739035e-05, "loss": 0.5055, "num_tokens": 3286630985.0, "step": 4298 }, { "epoch": 1.5748832096729872, "grad_norm": 0.14493550045566683, "learning_rate": 3.286326172908798e-05, "loss": 0.502, "num_tokens": 3287303075.0, "step": 4299 }, { "epoch": 1.57524961069891, "grad_norm": 0.16754993647376643, "learning_rate": 3.285985460835141e-05, "loss": 0.4942, "num_tokens": 3288016069.0, "step": 4300 }, { "epoch": 1.5756160117248328, "grad_norm": 0.13272699473174016, "learning_rate": 3.2856446875721315e-05, "loss": 0.4747, "num_tokens": 3288856449.0, "step": 4301 }, { "epoch": 1.5759824127507556, "grad_norm": 0.15577029501854128, "learning_rate": 3.2853038531389686e-05, "loss": 0.5249, "num_tokens": 3289649330.0, "step": 4302 }, { "epoch": 1.5763488137766786, "grad_norm": 0.16334333576023324, "learning_rate": 3.284962957554859e-05, "loss": 0.5242, "num_tokens": 3290312936.0, "step": 4303 }, { "epoch": 1.5767152148026016, "grad_norm": 0.1668520879933911, "learning_rate": 3.284622000839006e-05, "loss": 0.4879, "num_tokens": 3291027133.0, "step": 4304 }, { "epoch": 1.5770816158285244, "grad_norm": 0.13160203956616093, "learning_rate": 3.2842809830106256e-05, "loss": 0.5057, "num_tokens": 3291826125.0, "step": 4305 }, { "epoch": 1.5774480168544471, "grad_norm": 0.1593499622430274, "learning_rate": 3.283939904088929e-05, "loss": 0.4855, "num_tokens": 3292669096.0, "step": 4306 }, { "epoch": 1.57781441788037, "grad_norm": 0.16766374652246097, "learning_rate": 3.283598764093135e-05, "loss": 0.5198, "num_tokens": 3293296617.0, "step": 4307 }, { "epoch": 1.578180818906293, "grad_norm": 0.1607084431008895, "learning_rate": 3.2832575630424654e-05, "loss": 0.5003, "num_tokens": 3294009548.0, "step": 4308 }, { "epoch": 1.578547219932216, "grad_norm": 0.1526744859845339, "learning_rate": 3.282916300956144e-05, "loss": 0.5483, "num_tokens": 3294799411.0, "step": 4309 }, { "epoch": 1.5789136209581387, "grad_norm": 0.14984204536052503, "learning_rate": 3.282574977853401e-05, "loss": 0.4751, "num_tokens": 3295482809.0, "step": 4310 }, { "epoch": 1.5792800219840615, "grad_norm": 0.14932443855408695, "learning_rate": 3.282233593753466e-05, "loss": 0.4903, "num_tokens": 3296207845.0, "step": 4311 }, { "epoch": 1.5796464230099845, "grad_norm": 0.13987993767161253, "learning_rate": 3.281892148675575e-05, "loss": 0.4994, "num_tokens": 3297061397.0, "step": 4312 }, { "epoch": 1.5800128240359073, "grad_norm": 0.15459456070829994, "learning_rate": 3.2815506426389664e-05, "loss": 0.5298, "num_tokens": 3297725607.0, "step": 4313 }, { "epoch": 1.5803792250618303, "grad_norm": 0.1765516028187383, "learning_rate": 3.281209075662883e-05, "loss": 0.5272, "num_tokens": 3298438090.0, "step": 4314 }, { "epoch": 1.580745626087753, "grad_norm": 0.18804292586787064, "learning_rate": 3.280867447766569e-05, "loss": 0.4954, "num_tokens": 3299240231.0, "step": 4315 }, { "epoch": 1.5811120271136758, "grad_norm": 0.14148869105008463, "learning_rate": 3.2805257589692735e-05, "loss": 0.5252, "num_tokens": 3300044614.0, "step": 4316 }, { "epoch": 1.5814784281395988, "grad_norm": 0.16517463919395622, "learning_rate": 3.28018400929025e-05, "loss": 0.5009, "num_tokens": 3300723554.0, "step": 4317 }, { "epoch": 1.5818448291655218, "grad_norm": 0.16972000168245038, "learning_rate": 3.279842198748752e-05, "loss": 0.5161, "num_tokens": 3301503766.0, "step": 4318 }, { "epoch": 1.5822112301914446, "grad_norm": 0.1270241711619597, "learning_rate": 3.279500327364041e-05, "loss": 0.5221, "num_tokens": 3302401202.0, "step": 4319 }, { "epoch": 1.5825776312173674, "grad_norm": 0.16041685500986191, "learning_rate": 3.2791583951553784e-05, "loss": 0.5148, "num_tokens": 3303171181.0, "step": 4320 }, { "epoch": 1.5829440322432902, "grad_norm": 0.14458601164850332, "learning_rate": 3.2788164021420305e-05, "loss": 0.5041, "num_tokens": 3303903756.0, "step": 4321 }, { "epoch": 1.5833104332692132, "grad_norm": 0.1576590915901619, "learning_rate": 3.278474348343267e-05, "loss": 0.5329, "num_tokens": 3304649381.0, "step": 4322 }, { "epoch": 1.5836768342951362, "grad_norm": 0.17288809407657657, "learning_rate": 3.2781322337783604e-05, "loss": 0.5538, "num_tokens": 3305277817.0, "step": 4323 }, { "epoch": 1.584043235321059, "grad_norm": 0.16184646973061012, "learning_rate": 3.2777900584665864e-05, "loss": 0.4835, "num_tokens": 3306086532.0, "step": 4324 }, { "epoch": 1.5844096363469817, "grad_norm": 0.16963200760923797, "learning_rate": 3.277447822427226e-05, "loss": 0.5136, "num_tokens": 3306774720.0, "step": 4325 }, { "epoch": 1.5847760373729045, "grad_norm": 0.1478968209971538, "learning_rate": 3.277105525679561e-05, "loss": 0.4772, "num_tokens": 3307505991.0, "step": 4326 }, { "epoch": 1.5851424383988275, "grad_norm": 0.14992746597779735, "learning_rate": 3.276763168242879e-05, "loss": 0.5096, "num_tokens": 3308185704.0, "step": 4327 }, { "epoch": 1.5855088394247505, "grad_norm": 0.15887797795215738, "learning_rate": 3.276420750136469e-05, "loss": 0.5079, "num_tokens": 3308905880.0, "step": 4328 }, { "epoch": 1.5858752404506733, "grad_norm": 0.16349230413335053, "learning_rate": 3.276078271379626e-05, "loss": 0.5193, "num_tokens": 3309622871.0, "step": 4329 }, { "epoch": 1.586241641476596, "grad_norm": 0.16829776928347376, "learning_rate": 3.275735731991645e-05, "loss": 0.466, "num_tokens": 3310330364.0, "step": 4330 }, { "epoch": 1.5866080425025189, "grad_norm": 0.17812978507016247, "learning_rate": 3.2753931319918276e-05, "loss": 0.5481, "num_tokens": 3311139179.0, "step": 4331 }, { "epoch": 1.5869744435284419, "grad_norm": 0.16186681818573542, "learning_rate": 3.275050471399476e-05, "loss": 0.5119, "num_tokens": 3311891128.0, "step": 4332 }, { "epoch": 1.5873408445543649, "grad_norm": 0.18010032754175173, "learning_rate": 3.274707750233899e-05, "loss": 0.4954, "num_tokens": 3312685783.0, "step": 4333 }, { "epoch": 1.5877072455802876, "grad_norm": 0.2002503747778091, "learning_rate": 3.274364968514405e-05, "loss": 0.493, "num_tokens": 3313495707.0, "step": 4334 }, { "epoch": 1.5880736466062104, "grad_norm": 0.1600004099000899, "learning_rate": 3.27402212626031e-05, "loss": 0.503, "num_tokens": 3314218598.0, "step": 4335 }, { "epoch": 1.5884400476321334, "grad_norm": 0.1904141848221993, "learning_rate": 3.27367922349093e-05, "loss": 0.5128, "num_tokens": 3314888253.0, "step": 4336 }, { "epoch": 1.5888064486580562, "grad_norm": 0.17285401078362547, "learning_rate": 3.2733362602255865e-05, "loss": 0.5273, "num_tokens": 3315708137.0, "step": 4337 }, { "epoch": 1.5891728496839792, "grad_norm": 0.1791817372483196, "learning_rate": 3.272993236483603e-05, "loss": 0.5442, "num_tokens": 3316443567.0, "step": 4338 }, { "epoch": 1.589539250709902, "grad_norm": 0.17150638679154756, "learning_rate": 3.272650152284306e-05, "loss": 0.5256, "num_tokens": 3317135771.0, "step": 4339 }, { "epoch": 1.5899056517358248, "grad_norm": 0.1566352861087932, "learning_rate": 3.272307007647029e-05, "loss": 0.5099, "num_tokens": 3317956808.0, "step": 4340 }, { "epoch": 1.5902720527617478, "grad_norm": 0.16857103420264713, "learning_rate": 3.2719638025911044e-05, "loss": 0.5073, "num_tokens": 3318767747.0, "step": 4341 }, { "epoch": 1.5906384537876708, "grad_norm": 0.16758439633138794, "learning_rate": 3.27162053713587e-05, "loss": 0.5221, "num_tokens": 3319551606.0, "step": 4342 }, { "epoch": 1.5910048548135935, "grad_norm": 0.14488311445783686, "learning_rate": 3.271277211300669e-05, "loss": 0.5065, "num_tokens": 3320374018.0, "step": 4343 }, { "epoch": 1.5913712558395163, "grad_norm": 0.16747528369126888, "learning_rate": 3.270933825104843e-05, "loss": 0.4863, "num_tokens": 3321209471.0, "step": 4344 }, { "epoch": 1.591737656865439, "grad_norm": 0.1316504775512227, "learning_rate": 3.270590378567741e-05, "loss": 0.4888, "num_tokens": 3322105059.0, "step": 4345 }, { "epoch": 1.5921040578913621, "grad_norm": 0.17014274817786218, "learning_rate": 3.270246871708714e-05, "loss": 0.5354, "num_tokens": 3323002533.0, "step": 4346 }, { "epoch": 1.5924704589172851, "grad_norm": 0.1332872961274366, "learning_rate": 3.269903304547119e-05, "loss": 0.5027, "num_tokens": 3323765927.0, "step": 4347 }, { "epoch": 1.592836859943208, "grad_norm": 0.1499293807495197, "learning_rate": 3.2695596771023126e-05, "loss": 0.4972, "num_tokens": 3324503096.0, "step": 4348 }, { "epoch": 1.5932032609691307, "grad_norm": 0.13468318739608884, "learning_rate": 3.269215989393655e-05, "loss": 0.4989, "num_tokens": 3325296752.0, "step": 4349 }, { "epoch": 1.5935696619950535, "grad_norm": 0.15643326860956028, "learning_rate": 3.268872241440512e-05, "loss": 0.5172, "num_tokens": 3326005326.0, "step": 4350 }, { "epoch": 1.5939360630209765, "grad_norm": 0.15940378941911668, "learning_rate": 3.268528433262253e-05, "loss": 0.5022, "num_tokens": 3326844162.0, "step": 4351 }, { "epoch": 1.5943024640468995, "grad_norm": 0.13808969158966403, "learning_rate": 3.2681845648782486e-05, "loss": 0.5055, "num_tokens": 3327628760.0, "step": 4352 }, { "epoch": 1.5946688650728222, "grad_norm": 0.13784901071665034, "learning_rate": 3.2678406363078754e-05, "loss": 0.4868, "num_tokens": 3328479783.0, "step": 4353 }, { "epoch": 1.595035266098745, "grad_norm": 0.18657688231764505, "learning_rate": 3.2674966475705096e-05, "loss": 0.491, "num_tokens": 3329133189.0, "step": 4354 }, { "epoch": 1.5954016671246678, "grad_norm": 0.15472603422542908, "learning_rate": 3.267152598685534e-05, "loss": 0.5577, "num_tokens": 3329819653.0, "step": 4355 }, { "epoch": 1.5957680681505908, "grad_norm": 0.15441483389889513, "learning_rate": 3.266808489672335e-05, "loss": 0.5212, "num_tokens": 3330539022.0, "step": 4356 }, { "epoch": 1.5961344691765138, "grad_norm": 0.15300391791514564, "learning_rate": 3.2664643205503e-05, "loss": 0.522, "num_tokens": 3331325186.0, "step": 4357 }, { "epoch": 1.5965008702024366, "grad_norm": 0.13829754496263824, "learning_rate": 3.266120091338821e-05, "loss": 0.5152, "num_tokens": 3332107703.0, "step": 4358 }, { "epoch": 1.5968672712283594, "grad_norm": 0.1597447109032274, "learning_rate": 3.265775802057295e-05, "loss": 0.5357, "num_tokens": 3332894455.0, "step": 4359 }, { "epoch": 1.5972336722542824, "grad_norm": 0.1451425344095095, "learning_rate": 3.2654314527251184e-05, "loss": 0.5282, "num_tokens": 3333731538.0, "step": 4360 }, { "epoch": 1.5976000732802051, "grad_norm": 0.14649942909723954, "learning_rate": 3.265087043361695e-05, "loss": 0.5032, "num_tokens": 3334554727.0, "step": 4361 }, { "epoch": 1.5979664743061281, "grad_norm": 0.14559827271367165, "learning_rate": 3.264742573986431e-05, "loss": 0.539, "num_tokens": 3335327837.0, "step": 4362 }, { "epoch": 1.598332875332051, "grad_norm": 0.1793226942956068, "learning_rate": 3.264398044618733e-05, "loss": 0.5517, "num_tokens": 3335946790.0, "step": 4363 }, { "epoch": 1.5986992763579737, "grad_norm": 0.15359631909991509, "learning_rate": 3.2640534552780166e-05, "loss": 0.4934, "num_tokens": 3336776675.0, "step": 4364 }, { "epoch": 1.5990656773838967, "grad_norm": 0.133267541725859, "learning_rate": 3.263708805983694e-05, "loss": 0.481, "num_tokens": 3337541052.0, "step": 4365 }, { "epoch": 1.5994320784098197, "grad_norm": 0.15517802488522395, "learning_rate": 3.263364096755187e-05, "loss": 0.5144, "num_tokens": 3338249381.0, "step": 4366 }, { "epoch": 1.5997984794357425, "grad_norm": 0.14306540908665608, "learning_rate": 3.263019327611917e-05, "loss": 0.4846, "num_tokens": 3338955962.0, "step": 4367 }, { "epoch": 1.6001648804616653, "grad_norm": 0.13378755771651635, "learning_rate": 3.26267449857331e-05, "loss": 0.5058, "num_tokens": 3339801930.0, "step": 4368 }, { "epoch": 1.600531281487588, "grad_norm": 0.14247793174086498, "learning_rate": 3.2623296096587945e-05, "loss": 0.5169, "num_tokens": 3340613725.0, "step": 4369 }, { "epoch": 1.600897682513511, "grad_norm": 0.177537013290781, "learning_rate": 3.2619846608878055e-05, "loss": 0.5068, "num_tokens": 3341373021.0, "step": 4370 }, { "epoch": 1.601264083539434, "grad_norm": 0.14978385988778928, "learning_rate": 3.261639652279776e-05, "loss": 0.489, "num_tokens": 3342104322.0, "step": 4371 }, { "epoch": 1.6016304845653568, "grad_norm": 0.16175452626747638, "learning_rate": 3.261294583854147e-05, "loss": 0.5125, "num_tokens": 3342843301.0, "step": 4372 }, { "epoch": 1.6019968855912796, "grad_norm": 0.15554015610710448, "learning_rate": 3.26094945563036e-05, "loss": 0.5332, "num_tokens": 3343632853.0, "step": 4373 }, { "epoch": 1.6023632866172024, "grad_norm": 0.14734275750707934, "learning_rate": 3.2606042676278634e-05, "loss": 0.4729, "num_tokens": 3344462176.0, "step": 4374 }, { "epoch": 1.6027296876431254, "grad_norm": 0.14759745827640916, "learning_rate": 3.2602590198661045e-05, "loss": 0.5057, "num_tokens": 3345153181.0, "step": 4375 }, { "epoch": 1.6030960886690484, "grad_norm": 0.1478804129929553, "learning_rate": 3.2599137123645376e-05, "loss": 0.5395, "num_tokens": 3345793536.0, "step": 4376 }, { "epoch": 1.6034624896949712, "grad_norm": 0.14871271360155447, "learning_rate": 3.2595683451426166e-05, "loss": 0.523, "num_tokens": 3346501865.0, "step": 4377 }, { "epoch": 1.603828890720894, "grad_norm": 0.15513624765570805, "learning_rate": 3.2592229182198036e-05, "loss": 0.4904, "num_tokens": 3347326002.0, "step": 4378 }, { "epoch": 1.6041952917468167, "grad_norm": 0.13707156472114734, "learning_rate": 3.258877431615561e-05, "loss": 0.5072, "num_tokens": 3348031844.0, "step": 4379 }, { "epoch": 1.6045616927727397, "grad_norm": 0.1765014146503345, "learning_rate": 3.2585318853493536e-05, "loss": 0.5511, "num_tokens": 3348763839.0, "step": 4380 }, { "epoch": 1.6049280937986627, "grad_norm": 0.13975343240486757, "learning_rate": 3.258186279440653e-05, "loss": 0.4709, "num_tokens": 3349531510.0, "step": 4381 }, { "epoch": 1.6052944948245855, "grad_norm": 0.1554528423232485, "learning_rate": 3.25784061390893e-05, "loss": 0.4892, "num_tokens": 3350195473.0, "step": 4382 }, { "epoch": 1.6056608958505083, "grad_norm": 0.1569304270217757, "learning_rate": 3.257494888773664e-05, "loss": 0.4867, "num_tokens": 3350969258.0, "step": 4383 }, { "epoch": 1.6060272968764313, "grad_norm": 0.14111485376413746, "learning_rate": 3.2571491040543313e-05, "loss": 0.5064, "num_tokens": 3351807247.0, "step": 4384 }, { "epoch": 1.606393697902354, "grad_norm": 0.15235052492565082, "learning_rate": 3.2568032597704176e-05, "loss": 0.5271, "num_tokens": 3352522932.0, "step": 4385 }, { "epoch": 1.606760098928277, "grad_norm": 0.15062642762211342, "learning_rate": 3.2564573559414077e-05, "loss": 0.5288, "num_tokens": 3353307945.0, "step": 4386 }, { "epoch": 1.6071264999541999, "grad_norm": 0.1398183218399914, "learning_rate": 3.256111392586793e-05, "loss": 0.5192, "num_tokens": 3354081197.0, "step": 4387 }, { "epoch": 1.6074929009801227, "grad_norm": 0.1569273841395495, "learning_rate": 3.255765369726065e-05, "loss": 0.5338, "num_tokens": 3354779720.0, "step": 4388 }, { "epoch": 1.6078593020060457, "grad_norm": 0.14897039817327634, "learning_rate": 3.255419287378721e-05, "loss": 0.5115, "num_tokens": 3355459681.0, "step": 4389 }, { "epoch": 1.6082257030319684, "grad_norm": 0.14907687926314545, "learning_rate": 3.2550731455642615e-05, "loss": 0.514, "num_tokens": 3356307789.0, "step": 4390 }, { "epoch": 1.6085921040578914, "grad_norm": 0.13896229310691277, "learning_rate": 3.254726944302188e-05, "loss": 0.5461, "num_tokens": 3357032597.0, "step": 4391 }, { "epoch": 1.6089585050838142, "grad_norm": 0.1315994297361333, "learning_rate": 3.254380683612008e-05, "loss": 0.4724, "num_tokens": 3357830893.0, "step": 4392 }, { "epoch": 1.609324906109737, "grad_norm": 0.14004576727749884, "learning_rate": 3.254034363513232e-05, "loss": 0.4811, "num_tokens": 3358638839.0, "step": 4393 }, { "epoch": 1.60969130713566, "grad_norm": 0.1369790884425269, "learning_rate": 3.253687984025373e-05, "loss": 0.4768, "num_tokens": 3359460579.0, "step": 4394 }, { "epoch": 1.610057708161583, "grad_norm": 0.15738164175527353, "learning_rate": 3.253341545167946e-05, "loss": 0.4941, "num_tokens": 3360162280.0, "step": 4395 }, { "epoch": 1.6104241091875058, "grad_norm": 0.14262486316746642, "learning_rate": 3.252995046960473e-05, "loss": 0.496, "num_tokens": 3360948477.0, "step": 4396 }, { "epoch": 1.6107905102134286, "grad_norm": 0.14849157713861472, "learning_rate": 3.252648489422476e-05, "loss": 0.5005, "num_tokens": 3361734750.0, "step": 4397 }, { "epoch": 1.6111569112393513, "grad_norm": 0.1460606605200792, "learning_rate": 3.252301872573483e-05, "loss": 0.499, "num_tokens": 3362527934.0, "step": 4398 }, { "epoch": 1.6115233122652743, "grad_norm": 0.13708641251745418, "learning_rate": 3.2519551964330224e-05, "loss": 0.5212, "num_tokens": 3363224383.0, "step": 4399 }, { "epoch": 1.6118897132911973, "grad_norm": 0.15689600904702244, "learning_rate": 3.2516084610206284e-05, "loss": 0.4961, "num_tokens": 3363947662.0, "step": 4400 }, { "epoch": 1.6122561143171201, "grad_norm": 0.1504527493230567, "learning_rate": 3.2512616663558375e-05, "loss": 0.5103, "num_tokens": 3364691014.0, "step": 4401 }, { "epoch": 1.612622515343043, "grad_norm": 0.13662947945058596, "learning_rate": 3.25091481245819e-05, "loss": 0.4805, "num_tokens": 3365410365.0, "step": 4402 }, { "epoch": 1.6129889163689657, "grad_norm": 0.14217409252686308, "learning_rate": 3.250567899347228e-05, "loss": 0.5024, "num_tokens": 3366233325.0, "step": 4403 }, { "epoch": 1.6133553173948887, "grad_norm": 0.1486716460611178, "learning_rate": 3.250220927042499e-05, "loss": 0.4842, "num_tokens": 3367036075.0, "step": 4404 }, { "epoch": 1.6137217184208117, "grad_norm": 0.15046423054085645, "learning_rate": 3.249873895563554e-05, "loss": 0.5132, "num_tokens": 3367718553.0, "step": 4405 }, { "epoch": 1.6140881194467345, "grad_norm": 0.15782357940095723, "learning_rate": 3.249526804929945e-05, "loss": 0.4949, "num_tokens": 3368529741.0, "step": 4406 }, { "epoch": 1.6144545204726573, "grad_norm": 0.14087719588267905, "learning_rate": 3.2491796551612286e-05, "loss": 0.4795, "num_tokens": 3369235305.0, "step": 4407 }, { "epoch": 1.61482092149858, "grad_norm": 0.1615135150049932, "learning_rate": 3.248832446276965e-05, "loss": 0.5028, "num_tokens": 3370082895.0, "step": 4408 }, { "epoch": 1.615187322524503, "grad_norm": 0.13957752216004202, "learning_rate": 3.248485178296718e-05, "loss": 0.4939, "num_tokens": 3370852904.0, "step": 4409 }, { "epoch": 1.615553723550426, "grad_norm": 0.1590264568968384, "learning_rate": 3.2481378512400544e-05, "loss": 0.4904, "num_tokens": 3371519088.0, "step": 4410 }, { "epoch": 1.6159201245763488, "grad_norm": 0.15946934741302157, "learning_rate": 3.247790465126543e-05, "loss": 0.5332, "num_tokens": 3372299244.0, "step": 4411 }, { "epoch": 1.6162865256022716, "grad_norm": 0.15663294375100628, "learning_rate": 3.247443019975758e-05, "loss": 0.5501, "num_tokens": 3373026783.0, "step": 4412 }, { "epoch": 1.6166529266281946, "grad_norm": 0.15435241066061708, "learning_rate": 3.2470955158072765e-05, "loss": 0.5021, "num_tokens": 3373813119.0, "step": 4413 }, { "epoch": 1.6170193276541174, "grad_norm": 0.14259913362632362, "learning_rate": 3.2467479526406775e-05, "loss": 0.4786, "num_tokens": 3374694745.0, "step": 4414 }, { "epoch": 1.6173857286800404, "grad_norm": 0.15825397471556163, "learning_rate": 3.2464003304955444e-05, "loss": 0.5277, "num_tokens": 3375369347.0, "step": 4415 }, { "epoch": 1.6177521297059632, "grad_norm": 0.14982184187290504, "learning_rate": 3.246052649391464e-05, "loss": 0.5191, "num_tokens": 3376136705.0, "step": 4416 }, { "epoch": 1.618118530731886, "grad_norm": 0.15739562362512527, "learning_rate": 3.245704909348027e-05, "loss": 0.4728, "num_tokens": 3376900247.0, "step": 4417 }, { "epoch": 1.618484931757809, "grad_norm": 0.15940566575608522, "learning_rate": 3.2453571103848256e-05, "loss": 0.5243, "num_tokens": 3377654827.0, "step": 4418 }, { "epoch": 1.618851332783732, "grad_norm": 0.1451215277083755, "learning_rate": 3.2450092525214564e-05, "loss": 0.5066, "num_tokens": 3378507734.0, "step": 4419 }, { "epoch": 1.6192177338096547, "grad_norm": 0.14638108643224923, "learning_rate": 3.2446613357775195e-05, "loss": 0.5148, "num_tokens": 3379455508.0, "step": 4420 }, { "epoch": 1.6195841348355775, "grad_norm": 0.1532133833320314, "learning_rate": 3.244313360172619e-05, "loss": 0.5014, "num_tokens": 3380268614.0, "step": 4421 }, { "epoch": 1.6199505358615003, "grad_norm": 0.13132317325586856, "learning_rate": 3.24396532572636e-05, "loss": 0.4545, "num_tokens": 3381128897.0, "step": 4422 }, { "epoch": 1.6203169368874233, "grad_norm": 0.13654162434958364, "learning_rate": 3.243617232458353e-05, "loss": 0.5199, "num_tokens": 3381930638.0, "step": 4423 }, { "epoch": 1.6206833379133463, "grad_norm": 0.12222329014832252, "learning_rate": 3.243269080388211e-05, "loss": 0.5089, "num_tokens": 3382839711.0, "step": 4424 }, { "epoch": 1.621049738939269, "grad_norm": 0.14536163774215716, "learning_rate": 3.2429208695355516e-05, "loss": 0.537, "num_tokens": 3383536942.0, "step": 4425 }, { "epoch": 1.6214161399651918, "grad_norm": 0.1444636582362562, "learning_rate": 3.242572599919993e-05, "loss": 0.4766, "num_tokens": 3384288272.0, "step": 4426 }, { "epoch": 1.6217825409911146, "grad_norm": 0.14928077547828747, "learning_rate": 3.242224271561159e-05, "loss": 0.523, "num_tokens": 3384974647.0, "step": 4427 }, { "epoch": 1.6221489420170376, "grad_norm": 0.15278708818734946, "learning_rate": 3.241875884478676e-05, "loss": 0.4897, "num_tokens": 3385811685.0, "step": 4428 }, { "epoch": 1.6225153430429606, "grad_norm": 0.1418285850300289, "learning_rate": 3.241527438692173e-05, "loss": 0.5142, "num_tokens": 3386620002.0, "step": 4429 }, { "epoch": 1.6228817440688834, "grad_norm": 0.1624913653367331, "learning_rate": 3.241178934221284e-05, "loss": 0.5307, "num_tokens": 3387379370.0, "step": 4430 }, { "epoch": 1.6232481450948062, "grad_norm": 0.14244395355338527, "learning_rate": 3.240830371085645e-05, "loss": 0.5009, "num_tokens": 3388191446.0, "step": 4431 }, { "epoch": 1.623614546120729, "grad_norm": 0.1510108238739513, "learning_rate": 3.240481749304895e-05, "loss": 0.5051, "num_tokens": 3388841511.0, "step": 4432 }, { "epoch": 1.623980947146652, "grad_norm": 0.16346068555162194, "learning_rate": 3.2401330688986784e-05, "loss": 0.4893, "num_tokens": 3389648583.0, "step": 4433 }, { "epoch": 1.624347348172575, "grad_norm": 0.16304135144090035, "learning_rate": 3.23978432988664e-05, "loss": 0.5366, "num_tokens": 3390361676.0, "step": 4434 }, { "epoch": 1.6247137491984978, "grad_norm": 0.14796769848316016, "learning_rate": 3.23943553228843e-05, "loss": 0.5372, "num_tokens": 3391084121.0, "step": 4435 }, { "epoch": 1.6250801502244205, "grad_norm": 0.17412593589025196, "learning_rate": 3.239086676123701e-05, "loss": 0.5189, "num_tokens": 3391846287.0, "step": 4436 }, { "epoch": 1.6254465512503435, "grad_norm": 0.14707693439938982, "learning_rate": 3.23873776141211e-05, "loss": 0.5335, "num_tokens": 3392579641.0, "step": 4437 }, { "epoch": 1.6258129522762663, "grad_norm": 0.15104741370648977, "learning_rate": 3.238388788173315e-05, "loss": 0.5046, "num_tokens": 3393355440.0, "step": 4438 }, { "epoch": 1.6261793533021893, "grad_norm": 0.1628843933353024, "learning_rate": 3.23803975642698e-05, "loss": 0.5203, "num_tokens": 3394088140.0, "step": 4439 }, { "epoch": 1.626545754328112, "grad_norm": 0.157252718448616, "learning_rate": 3.2376906661927695e-05, "loss": 0.5177, "num_tokens": 3394801995.0, "step": 4440 }, { "epoch": 1.6269121553540349, "grad_norm": 0.13790258660634522, "learning_rate": 3.237341517490354e-05, "loss": 0.5009, "num_tokens": 3395502569.0, "step": 4441 }, { "epoch": 1.6272785563799579, "grad_norm": 0.14872619896095024, "learning_rate": 3.2369923103394065e-05, "loss": 0.4944, "num_tokens": 3396305448.0, "step": 4442 }, { "epoch": 1.6276449574058809, "grad_norm": 0.1624517951572123, "learning_rate": 3.236643044759602e-05, "loss": 0.5099, "num_tokens": 3396972637.0, "step": 4443 }, { "epoch": 1.6280113584318037, "grad_norm": 0.15205109214607335, "learning_rate": 3.23629372077062e-05, "loss": 0.4924, "num_tokens": 3397594425.0, "step": 4444 }, { "epoch": 1.6283777594577264, "grad_norm": 0.15787214254976994, "learning_rate": 3.2359443383921436e-05, "loss": 0.5007, "num_tokens": 3398295756.0, "step": 4445 }, { "epoch": 1.6287441604836492, "grad_norm": 0.14356047809360642, "learning_rate": 3.235594897643857e-05, "loss": 0.5237, "num_tokens": 3399041723.0, "step": 4446 }, { "epoch": 1.6291105615095722, "grad_norm": 0.14320012644771363, "learning_rate": 3.235245398545452e-05, "loss": 0.5303, "num_tokens": 3399740237.0, "step": 4447 }, { "epoch": 1.6294769625354952, "grad_norm": 0.14522209892955312, "learning_rate": 3.234895841116618e-05, "loss": 0.5012, "num_tokens": 3400638302.0, "step": 4448 }, { "epoch": 1.629843363561418, "grad_norm": 0.15435125023644194, "learning_rate": 3.234546225377052e-05, "loss": 0.5303, "num_tokens": 3401308655.0, "step": 4449 }, { "epoch": 1.6302097645873408, "grad_norm": 0.16453341763060195, "learning_rate": 3.2341965513464536e-05, "loss": 0.5174, "num_tokens": 3401903896.0, "step": 4450 }, { "epoch": 1.6305761656132636, "grad_norm": 0.12866572149196484, "learning_rate": 3.2338468190445236e-05, "loss": 0.4724, "num_tokens": 3402792353.0, "step": 4451 }, { "epoch": 1.6309425666391866, "grad_norm": 0.13920209303131162, "learning_rate": 3.233497028490969e-05, "loss": 0.5296, "num_tokens": 3403611194.0, "step": 4452 }, { "epoch": 1.6313089676651096, "grad_norm": 0.14184806391746585, "learning_rate": 3.233147179705497e-05, "loss": 0.5096, "num_tokens": 3404394440.0, "step": 4453 }, { "epoch": 1.6316753686910324, "grad_norm": 0.1369229840499842, "learning_rate": 3.2327972727078204e-05, "loss": 0.521, "num_tokens": 3405166875.0, "step": 4454 }, { "epoch": 1.6320417697169551, "grad_norm": 0.154173172219507, "learning_rate": 3.232447307517656e-05, "loss": 0.5002, "num_tokens": 3406011499.0, "step": 4455 }, { "epoch": 1.632408170742878, "grad_norm": 0.15368101607100085, "learning_rate": 3.232097284154719e-05, "loss": 0.4854, "num_tokens": 3406799444.0, "step": 4456 }, { "epoch": 1.632774571768801, "grad_norm": 0.13357342268896352, "learning_rate": 3.2317472026387347e-05, "loss": 0.4719, "num_tokens": 3407519720.0, "step": 4457 }, { "epoch": 1.633140972794724, "grad_norm": 0.12602485484302853, "learning_rate": 3.231397062989426e-05, "loss": 0.5237, "num_tokens": 3408251694.0, "step": 4458 }, { "epoch": 1.6335073738206467, "grad_norm": 0.15495594011688368, "learning_rate": 3.231046865226524e-05, "loss": 0.509, "num_tokens": 3409173904.0, "step": 4459 }, { "epoch": 1.6338737748465695, "grad_norm": 0.13366930985393496, "learning_rate": 3.230696609369757e-05, "loss": 0.5296, "num_tokens": 3409908250.0, "step": 4460 }, { "epoch": 1.6342401758724925, "grad_norm": 0.13899936421123912, "learning_rate": 3.230346295438862e-05, "loss": 0.5062, "num_tokens": 3410637741.0, "step": 4461 }, { "epoch": 1.6346065768984153, "grad_norm": 0.1466357807901087, "learning_rate": 3.229995923453577e-05, "loss": 0.5288, "num_tokens": 3411575625.0, "step": 4462 }, { "epoch": 1.6349729779243383, "grad_norm": 0.14236896920205333, "learning_rate": 3.229645493433644e-05, "loss": 0.5556, "num_tokens": 3412456826.0, "step": 4463 }, { "epoch": 1.635339378950261, "grad_norm": 0.14280948643052735, "learning_rate": 3.2292950053988085e-05, "loss": 0.486, "num_tokens": 3413234590.0, "step": 4464 }, { "epoch": 1.6357057799761838, "grad_norm": 0.1590347587993376, "learning_rate": 3.228944459368816e-05, "loss": 0.5107, "num_tokens": 3413986994.0, "step": 4465 }, { "epoch": 1.6360721810021068, "grad_norm": 0.15001167814593963, "learning_rate": 3.228593855363419e-05, "loss": 0.5192, "num_tokens": 3414754904.0, "step": 4466 }, { "epoch": 1.6364385820280298, "grad_norm": 0.14213369158783393, "learning_rate": 3.228243193402373e-05, "loss": 0.4965, "num_tokens": 3415560667.0, "step": 4467 }, { "epoch": 1.6368049830539526, "grad_norm": 0.14263683763056106, "learning_rate": 3.2278924735054364e-05, "loss": 0.4785, "num_tokens": 3416366063.0, "step": 4468 }, { "epoch": 1.6371713840798754, "grad_norm": 0.13737251067906367, "learning_rate": 3.227541695692368e-05, "loss": 0.5075, "num_tokens": 3417076073.0, "step": 4469 }, { "epoch": 1.6375377851057982, "grad_norm": 0.14480700156509174, "learning_rate": 3.2271908599829336e-05, "loss": 0.4965, "num_tokens": 3417894729.0, "step": 4470 }, { "epoch": 1.6379041861317212, "grad_norm": 0.14609528574817016, "learning_rate": 3.226839966396901e-05, "loss": 0.5118, "num_tokens": 3418615980.0, "step": 4471 }, { "epoch": 1.6382705871576442, "grad_norm": 0.14515365351768486, "learning_rate": 3.226489014954041e-05, "loss": 0.5013, "num_tokens": 3419343243.0, "step": 4472 }, { "epoch": 1.638636988183567, "grad_norm": 0.1445177269995453, "learning_rate": 3.226138005674128e-05, "loss": 0.4926, "num_tokens": 3420107537.0, "step": 4473 }, { "epoch": 1.6390033892094897, "grad_norm": 0.14715615009713076, "learning_rate": 3.225786938576938e-05, "loss": 0.5106, "num_tokens": 3420810656.0, "step": 4474 }, { "epoch": 1.6393697902354125, "grad_norm": 0.15786322275583517, "learning_rate": 3.225435813682254e-05, "loss": 0.5383, "num_tokens": 3421476867.0, "step": 4475 }, { "epoch": 1.6397361912613355, "grad_norm": 0.1332961310840789, "learning_rate": 3.2250846310098585e-05, "loss": 0.4935, "num_tokens": 3422291310.0, "step": 4476 }, { "epoch": 1.6401025922872585, "grad_norm": 0.1430802602151029, "learning_rate": 3.2247333905795386e-05, "loss": 0.4755, "num_tokens": 3423085007.0, "step": 4477 }, { "epoch": 1.6404689933131813, "grad_norm": 0.17164135987788143, "learning_rate": 3.224382092411086e-05, "loss": 0.5194, "num_tokens": 3423859447.0, "step": 4478 }, { "epoch": 1.640835394339104, "grad_norm": 0.15787484931979126, "learning_rate": 3.2240307365242934e-05, "loss": 0.5087, "num_tokens": 3424715393.0, "step": 4479 }, { "epoch": 1.6412017953650269, "grad_norm": 0.14897775394470103, "learning_rate": 3.2236793229389584e-05, "loss": 0.4963, "num_tokens": 3425462380.0, "step": 4480 }, { "epoch": 1.6415681963909499, "grad_norm": 0.15822911272899007, "learning_rate": 3.22332785167488e-05, "loss": 0.5265, "num_tokens": 3426311585.0, "step": 4481 }, { "epoch": 1.6419345974168729, "grad_norm": 0.1727178272142148, "learning_rate": 3.222976322751864e-05, "loss": 0.4908, "num_tokens": 3427075889.0, "step": 4482 }, { "epoch": 1.6423009984427956, "grad_norm": 0.15770124047284093, "learning_rate": 3.222624736189715e-05, "loss": 0.4951, "num_tokens": 3427833672.0, "step": 4483 }, { "epoch": 1.6426673994687184, "grad_norm": 0.1527148968351094, "learning_rate": 3.222273092008243e-05, "loss": 0.5051, "num_tokens": 3428598123.0, "step": 4484 }, { "epoch": 1.6430338004946414, "grad_norm": 0.14837455717896947, "learning_rate": 3.2219213902272635e-05, "loss": 0.5018, "num_tokens": 3429406504.0, "step": 4485 }, { "epoch": 1.6434002015205642, "grad_norm": 0.1288098327415685, "learning_rate": 3.221569630866591e-05, "loss": 0.4763, "num_tokens": 3430145644.0, "step": 4486 }, { "epoch": 1.6437666025464872, "grad_norm": 0.1656275842244558, "learning_rate": 3.221217813946045e-05, "loss": 0.5129, "num_tokens": 3430850802.0, "step": 4487 }, { "epoch": 1.64413300357241, "grad_norm": 0.13430733713624743, "learning_rate": 3.220865939485449e-05, "loss": 0.5362, "num_tokens": 3431596142.0, "step": 4488 }, { "epoch": 1.6444994045983328, "grad_norm": 0.1579016609585184, "learning_rate": 3.2205140075046296e-05, "loss": 0.5154, "num_tokens": 3432353664.0, "step": 4489 }, { "epoch": 1.6448658056242558, "grad_norm": 0.15479233816464824, "learning_rate": 3.2201620180234166e-05, "loss": 0.5372, "num_tokens": 3433089859.0, "step": 4490 }, { "epoch": 1.6452322066501788, "grad_norm": 0.13543779788640792, "learning_rate": 3.219809971061641e-05, "loss": 0.4904, "num_tokens": 3433888926.0, "step": 4491 }, { "epoch": 1.6455986076761016, "grad_norm": 0.15170793082146006, "learning_rate": 3.2194578666391406e-05, "loss": 0.5056, "num_tokens": 3434641761.0, "step": 4492 }, { "epoch": 1.6459650087020243, "grad_norm": 0.14313811697213502, "learning_rate": 3.219105704775754e-05, "loss": 0.5407, "num_tokens": 3435451484.0, "step": 4493 }, { "epoch": 1.6463314097279471, "grad_norm": 0.16417393188975998, "learning_rate": 3.218753485491322e-05, "loss": 0.5212, "num_tokens": 3436208603.0, "step": 4494 }, { "epoch": 1.6466978107538701, "grad_norm": 0.14392777354490716, "learning_rate": 3.218401208805693e-05, "loss": 0.5308, "num_tokens": 3437039222.0, "step": 4495 }, { "epoch": 1.6470642117797931, "grad_norm": 0.16211032051509905, "learning_rate": 3.218048874738714e-05, "loss": 0.5288, "num_tokens": 3437812555.0, "step": 4496 }, { "epoch": 1.647430612805716, "grad_norm": 0.14560199275496702, "learning_rate": 3.2176964833102374e-05, "loss": 0.5235, "num_tokens": 3438597332.0, "step": 4497 }, { "epoch": 1.6477970138316387, "grad_norm": 0.1479416686263375, "learning_rate": 3.2173440345401186e-05, "loss": 0.4872, "num_tokens": 3439382780.0, "step": 4498 }, { "epoch": 1.6481634148575615, "grad_norm": 0.14216076449048343, "learning_rate": 3.216991528448216e-05, "loss": 0.4991, "num_tokens": 3440160102.0, "step": 4499 }, { "epoch": 1.6485298158834845, "grad_norm": 0.1621972583640033, "learning_rate": 3.2166389650543923e-05, "loss": 0.5113, "num_tokens": 3440984540.0, "step": 4500 }, { "epoch": 1.6488962169094075, "grad_norm": 0.16064807634561387, "learning_rate": 3.2162863443785114e-05, "loss": 0.5155, "num_tokens": 3441754976.0, "step": 4501 }, { "epoch": 1.6492626179353302, "grad_norm": 0.17848249333501515, "learning_rate": 3.2159336664404425e-05, "loss": 0.5072, "num_tokens": 3442638571.0, "step": 4502 }, { "epoch": 1.649629018961253, "grad_norm": 0.16374832683325508, "learning_rate": 3.215580931260056e-05, "loss": 0.4991, "num_tokens": 3443438374.0, "step": 4503 }, { "epoch": 1.6499954199871758, "grad_norm": 0.1682343120360036, "learning_rate": 3.215228138857228e-05, "loss": 0.4957, "num_tokens": 3444182155.0, "step": 4504 }, { "epoch": 1.6503618210130988, "grad_norm": 0.17347453235153915, "learning_rate": 3.214875289251835e-05, "loss": 0.542, "num_tokens": 3444860693.0, "step": 4505 }, { "epoch": 1.6507282220390218, "grad_norm": 0.15695695090304823, "learning_rate": 3.214522382463758e-05, "loss": 0.5075, "num_tokens": 3445618682.0, "step": 4506 }, { "epoch": 1.6510946230649446, "grad_norm": 0.13622732545003224, "learning_rate": 3.214169418512883e-05, "loss": 0.4815, "num_tokens": 3446437097.0, "step": 4507 }, { "epoch": 1.6514610240908674, "grad_norm": 0.15567669399046855, "learning_rate": 3.213816397419097e-05, "loss": 0.5108, "num_tokens": 3447313524.0, "step": 4508 }, { "epoch": 1.6518274251167904, "grad_norm": 0.15787015465080312, "learning_rate": 3.2134633192022904e-05, "loss": 0.5401, "num_tokens": 3448106070.0, "step": 4509 }, { "epoch": 1.6521938261427132, "grad_norm": 0.15377274486075612, "learning_rate": 3.2131101838823576e-05, "loss": 0.536, "num_tokens": 3448840542.0, "step": 4510 }, { "epoch": 1.6525602271686362, "grad_norm": 0.15418793051614152, "learning_rate": 3.212756991479195e-05, "loss": 0.4996, "num_tokens": 3449579246.0, "step": 4511 }, { "epoch": 1.652926628194559, "grad_norm": 0.13827011216215104, "learning_rate": 3.212403742012704e-05, "loss": 0.478, "num_tokens": 3450358462.0, "step": 4512 }, { "epoch": 1.6532930292204817, "grad_norm": 0.15268273754013234, "learning_rate": 3.212050435502788e-05, "loss": 0.484, "num_tokens": 3451093107.0, "step": 4513 }, { "epoch": 1.6536594302464047, "grad_norm": 0.15601909903281905, "learning_rate": 3.211697071969353e-05, "loss": 0.5159, "num_tokens": 3451862819.0, "step": 4514 }, { "epoch": 1.6540258312723277, "grad_norm": 0.15587022347701052, "learning_rate": 3.211343651432312e-05, "loss": 0.5046, "num_tokens": 3452591252.0, "step": 4515 }, { "epoch": 1.6543922322982505, "grad_norm": 0.15867808057378657, "learning_rate": 3.2109901739115744e-05, "loss": 0.4912, "num_tokens": 3453483576.0, "step": 4516 }, { "epoch": 1.6547586333241733, "grad_norm": 0.13857258784581053, "learning_rate": 3.210636639427059e-05, "loss": 0.5105, "num_tokens": 3454249134.0, "step": 4517 }, { "epoch": 1.655125034350096, "grad_norm": 0.1696156631389202, "learning_rate": 3.2102830479986855e-05, "loss": 0.4737, "num_tokens": 3455001448.0, "step": 4518 }, { "epoch": 1.655491435376019, "grad_norm": 0.15023980175255525, "learning_rate": 3.209929399646376e-05, "loss": 0.5063, "num_tokens": 3455741449.0, "step": 4519 }, { "epoch": 1.655857836401942, "grad_norm": 0.12620806164927967, "learning_rate": 3.209575694390058e-05, "loss": 0.4949, "num_tokens": 3456526592.0, "step": 4520 }, { "epoch": 1.6562242374278648, "grad_norm": 0.1604698444489229, "learning_rate": 3.2092219322496596e-05, "loss": 0.5381, "num_tokens": 3457265040.0, "step": 4521 }, { "epoch": 1.6565906384537876, "grad_norm": 0.14744139369143575, "learning_rate": 3.2088681132451136e-05, "loss": 0.5208, "num_tokens": 3458002436.0, "step": 4522 }, { "epoch": 1.6569570394797104, "grad_norm": 0.1454679809941734, "learning_rate": 3.208514237396356e-05, "loss": 0.5065, "num_tokens": 3458980802.0, "step": 4523 }, { "epoch": 1.6573234405056334, "grad_norm": 0.13809191879751637, "learning_rate": 3.208160304723326e-05, "loss": 0.4757, "num_tokens": 3459851637.0, "step": 4524 }, { "epoch": 1.6576898415315564, "grad_norm": 0.14720598006358282, "learning_rate": 3.2078063152459655e-05, "loss": 0.4882, "num_tokens": 3460652579.0, "step": 4525 }, { "epoch": 1.6580562425574792, "grad_norm": 0.1394055859025689, "learning_rate": 3.20745226898422e-05, "loss": 0.5301, "num_tokens": 3461534263.0, "step": 4526 }, { "epoch": 1.658422643583402, "grad_norm": 0.1502259105474673, "learning_rate": 3.207098165958037e-05, "loss": 0.4956, "num_tokens": 3462337445.0, "step": 4527 }, { "epoch": 1.6587890446093247, "grad_norm": 0.13492882632304612, "learning_rate": 3.2067440061873704e-05, "loss": 0.5285, "num_tokens": 3463075139.0, "step": 4528 }, { "epoch": 1.6591554456352477, "grad_norm": 0.1448711163846603, "learning_rate": 3.206389789692173e-05, "loss": 0.5094, "num_tokens": 3464002018.0, "step": 4529 }, { "epoch": 1.6595218466611708, "grad_norm": 0.1330009849260518, "learning_rate": 3.206035516492404e-05, "loss": 0.5123, "num_tokens": 3464702366.0, "step": 4530 }, { "epoch": 1.6598882476870935, "grad_norm": 0.14318810471581128, "learning_rate": 3.2056811866080256e-05, "loss": 0.5262, "num_tokens": 3465406910.0, "step": 4531 }, { "epoch": 1.6602546487130163, "grad_norm": 0.1445959336551537, "learning_rate": 3.205326800059001e-05, "loss": 0.5064, "num_tokens": 3466033048.0, "step": 4532 }, { "epoch": 1.6606210497389393, "grad_norm": 0.15836022633457555, "learning_rate": 3.2049723568652976e-05, "loss": 0.5345, "num_tokens": 3466746898.0, "step": 4533 }, { "epoch": 1.660987450764862, "grad_norm": 0.14483438081439318, "learning_rate": 3.204617857046888e-05, "loss": 0.5163, "num_tokens": 3467412580.0, "step": 4534 }, { "epoch": 1.661353851790785, "grad_norm": 0.14197124695598007, "learning_rate": 3.204263300623746e-05, "loss": 0.5008, "num_tokens": 3468214838.0, "step": 4535 }, { "epoch": 1.6617202528167079, "grad_norm": 0.13773920685946967, "learning_rate": 3.203908687615848e-05, "loss": 0.4787, "num_tokens": 3468958088.0, "step": 4536 }, { "epoch": 1.6620866538426307, "grad_norm": 0.14554505981261198, "learning_rate": 3.2035540180431744e-05, "loss": 0.4841, "num_tokens": 3469627823.0, "step": 4537 }, { "epoch": 1.6624530548685537, "grad_norm": 0.13809008813458876, "learning_rate": 3.20319929192571e-05, "loss": 0.5236, "num_tokens": 3470420876.0, "step": 4538 }, { "epoch": 1.6628194558944767, "grad_norm": 0.14948913638736022, "learning_rate": 3.20284450928344e-05, "loss": 0.5308, "num_tokens": 3471138092.0, "step": 4539 }, { "epoch": 1.6631858569203994, "grad_norm": 0.1444349316884449, "learning_rate": 3.202489670136357e-05, "loss": 0.4961, "num_tokens": 3471934747.0, "step": 4540 }, { "epoch": 1.6635522579463222, "grad_norm": 0.1607899159218899, "learning_rate": 3.202134774504452e-05, "loss": 0.4931, "num_tokens": 3472728540.0, "step": 4541 }, { "epoch": 1.663918658972245, "grad_norm": 0.1523417837218036, "learning_rate": 3.2017798224077225e-05, "loss": 0.5286, "num_tokens": 3473444057.0, "step": 4542 }, { "epoch": 1.664285059998168, "grad_norm": 0.13985725588346246, "learning_rate": 3.201424813866168e-05, "loss": 0.4449, "num_tokens": 3474140759.0, "step": 4543 }, { "epoch": 1.664651461024091, "grad_norm": 0.165249024102333, "learning_rate": 3.201069748899791e-05, "loss": 0.522, "num_tokens": 3474809578.0, "step": 4544 }, { "epoch": 1.6650178620500138, "grad_norm": 0.14746664006652793, "learning_rate": 3.200714627528597e-05, "loss": 0.52, "num_tokens": 3475553415.0, "step": 4545 }, { "epoch": 1.6653842630759366, "grad_norm": 0.1678206294617366, "learning_rate": 3.200359449772596e-05, "loss": 0.52, "num_tokens": 3476345710.0, "step": 4546 }, { "epoch": 1.6657506641018593, "grad_norm": 0.1406589281863931, "learning_rate": 3.200004215651801e-05, "loss": 0.5207, "num_tokens": 3477109365.0, "step": 4547 }, { "epoch": 1.6661170651277823, "grad_norm": 0.1615568190157285, "learning_rate": 3.199648925186226e-05, "loss": 0.5268, "num_tokens": 3477904327.0, "step": 4548 }, { "epoch": 1.6664834661537054, "grad_norm": 0.14537807046300846, "learning_rate": 3.19929357839589e-05, "loss": 0.5142, "num_tokens": 3478616184.0, "step": 4549 }, { "epoch": 1.6668498671796281, "grad_norm": 0.14062982507895316, "learning_rate": 3.1989381753008154e-05, "loss": 0.5195, "num_tokens": 3479383922.0, "step": 4550 }, { "epoch": 1.667216268205551, "grad_norm": 0.14359113437114354, "learning_rate": 3.198582715921027e-05, "loss": 0.5058, "num_tokens": 3480109105.0, "step": 4551 }, { "epoch": 1.6675826692314737, "grad_norm": 0.15741009975908102, "learning_rate": 3.1982272002765526e-05, "loss": 0.5214, "num_tokens": 3480958623.0, "step": 4552 }, { "epoch": 1.6679490702573967, "grad_norm": 0.1370372559647334, "learning_rate": 3.1978716283874246e-05, "loss": 0.5026, "num_tokens": 3481789704.0, "step": 4553 }, { "epoch": 1.6683154712833197, "grad_norm": 0.14674541015787398, "learning_rate": 3.1975160002736766e-05, "loss": 0.4949, "num_tokens": 3482445542.0, "step": 4554 }, { "epoch": 1.6686818723092425, "grad_norm": 0.14637935787621903, "learning_rate": 3.197160315955346e-05, "loss": 0.5128, "num_tokens": 3483166474.0, "step": 4555 }, { "epoch": 1.6690482733351653, "grad_norm": 0.13472888920484555, "learning_rate": 3.196804575452475e-05, "loss": 0.5025, "num_tokens": 3483935450.0, "step": 4556 }, { "epoch": 1.6694146743610883, "grad_norm": 0.14902072584552106, "learning_rate": 3.196448778785107e-05, "loss": 0.4963, "num_tokens": 3484650442.0, "step": 4557 }, { "epoch": 1.669781075387011, "grad_norm": 0.1524274537735463, "learning_rate": 3.196092925973288e-05, "loss": 0.497, "num_tokens": 3485349882.0, "step": 4558 }, { "epoch": 1.670147476412934, "grad_norm": 0.13350510046572123, "learning_rate": 3.1957370170370694e-05, "loss": 0.4919, "num_tokens": 3486123284.0, "step": 4559 }, { "epoch": 1.6705138774388568, "grad_norm": 0.1469367308353379, "learning_rate": 3.195381051996506e-05, "loss": 0.4925, "num_tokens": 3486991557.0, "step": 4560 }, { "epoch": 1.6708802784647796, "grad_norm": 0.16032452006710204, "learning_rate": 3.195025030871652e-05, "loss": 0.4744, "num_tokens": 3487830693.0, "step": 4561 }, { "epoch": 1.6712466794907026, "grad_norm": 0.1479074297338473, "learning_rate": 3.19466895368257e-05, "loss": 0.5299, "num_tokens": 3488552372.0, "step": 4562 }, { "epoch": 1.6716130805166256, "grad_norm": 0.1430911448185109, "learning_rate": 3.19431282044932e-05, "loss": 0.513, "num_tokens": 3489308039.0, "step": 4563 }, { "epoch": 1.6719794815425484, "grad_norm": 0.1503061227360775, "learning_rate": 3.19395663119197e-05, "loss": 0.5152, "num_tokens": 3490046399.0, "step": 4564 }, { "epoch": 1.6723458825684712, "grad_norm": 0.13657665028785435, "learning_rate": 3.1936003859305894e-05, "loss": 0.4755, "num_tokens": 3490958648.0, "step": 4565 }, { "epoch": 1.672712283594394, "grad_norm": 0.13336004532436843, "learning_rate": 3.1932440846852504e-05, "loss": 0.5213, "num_tokens": 3491696187.0, "step": 4566 }, { "epoch": 1.673078684620317, "grad_norm": 0.1589416665048111, "learning_rate": 3.192887727476027e-05, "loss": 0.4963, "num_tokens": 3492548922.0, "step": 4567 }, { "epoch": 1.67344508564624, "grad_norm": 0.14349798743918102, "learning_rate": 3.192531314323001e-05, "loss": 0.5136, "num_tokens": 3493300140.0, "step": 4568 }, { "epoch": 1.6738114866721627, "grad_norm": 0.16896433957232573, "learning_rate": 3.1921748452462525e-05, "loss": 0.5312, "num_tokens": 3493994074.0, "step": 4569 }, { "epoch": 1.6741778876980855, "grad_norm": 0.14808962331496026, "learning_rate": 3.191818320265867e-05, "loss": 0.4755, "num_tokens": 3494840546.0, "step": 4570 }, { "epoch": 1.6745442887240083, "grad_norm": 0.1429267452828663, "learning_rate": 3.191461739401932e-05, "loss": 0.5375, "num_tokens": 3495611531.0, "step": 4571 }, { "epoch": 1.6749106897499313, "grad_norm": 0.138609826683324, "learning_rate": 3.1911051026745404e-05, "loss": 0.5347, "num_tokens": 3496432689.0, "step": 4572 }, { "epoch": 1.6752770907758543, "grad_norm": 0.1758869487270184, "learning_rate": 3.190748410103785e-05, "loss": 0.5204, "num_tokens": 3497221673.0, "step": 4573 }, { "epoch": 1.675643491801777, "grad_norm": 0.14960981433150752, "learning_rate": 3.190391661709764e-05, "loss": 0.5433, "num_tokens": 3497993808.0, "step": 4574 }, { "epoch": 1.6760098928276999, "grad_norm": 0.15601483229675286, "learning_rate": 3.19003485751258e-05, "loss": 0.5255, "num_tokens": 3498759358.0, "step": 4575 }, { "epoch": 1.6763762938536226, "grad_norm": 0.14809570504191918, "learning_rate": 3.189677997532334e-05, "loss": 0.4494, "num_tokens": 3499524640.0, "step": 4576 }, { "epoch": 1.6767426948795456, "grad_norm": 0.1464212952855478, "learning_rate": 3.1893210817891364e-05, "loss": 0.4932, "num_tokens": 3500252701.0, "step": 4577 }, { "epoch": 1.6771090959054686, "grad_norm": 0.13745955692636558, "learning_rate": 3.1889641103030946e-05, "loss": 0.4844, "num_tokens": 3501045944.0, "step": 4578 }, { "epoch": 1.6774754969313914, "grad_norm": 0.13716257712349095, "learning_rate": 3.188607083094323e-05, "loss": 0.5203, "num_tokens": 3501808830.0, "step": 4579 }, { "epoch": 1.6778418979573142, "grad_norm": 0.14408322625454875, "learning_rate": 3.188250000182937e-05, "loss": 0.4942, "num_tokens": 3502593344.0, "step": 4580 }, { "epoch": 1.6782082989832372, "grad_norm": 0.14133200048808003, "learning_rate": 3.18789286158906e-05, "loss": 0.4646, "num_tokens": 3503519459.0, "step": 4581 }, { "epoch": 1.67857470000916, "grad_norm": 0.14119031997023712, "learning_rate": 3.187535667332811e-05, "loss": 0.51, "num_tokens": 3504285587.0, "step": 4582 }, { "epoch": 1.678941101035083, "grad_norm": 0.1427632776297187, "learning_rate": 3.187178417434317e-05, "loss": 0.4885, "num_tokens": 3505068218.0, "step": 4583 }, { "epoch": 1.6793075020610058, "grad_norm": 0.15004899459513707, "learning_rate": 3.186821111913708e-05, "loss": 0.528, "num_tokens": 3505763818.0, "step": 4584 }, { "epoch": 1.6796739030869285, "grad_norm": 0.14397207815948895, "learning_rate": 3.1864637507911144e-05, "loss": 0.4902, "num_tokens": 3506445515.0, "step": 4585 }, { "epoch": 1.6800403041128515, "grad_norm": 0.16188998574435207, "learning_rate": 3.186106334086674e-05, "loss": 0.5874, "num_tokens": 3507083839.0, "step": 4586 }, { "epoch": 1.6804067051387745, "grad_norm": 0.13993924861839638, "learning_rate": 3.185748861820523e-05, "loss": 0.494, "num_tokens": 3507833589.0, "step": 4587 }, { "epoch": 1.6807731061646973, "grad_norm": 0.1400460132864952, "learning_rate": 3.1853913340128044e-05, "loss": 0.5186, "num_tokens": 3508584845.0, "step": 4588 }, { "epoch": 1.68113950719062, "grad_norm": 0.14636644825905065, "learning_rate": 3.185033750683663e-05, "loss": 0.5376, "num_tokens": 3509238211.0, "step": 4589 }, { "epoch": 1.6815059082165429, "grad_norm": 0.14735515468939073, "learning_rate": 3.184676111853245e-05, "loss": 0.5008, "num_tokens": 3510115297.0, "step": 4590 }, { "epoch": 1.681872309242466, "grad_norm": 0.1415129097437575, "learning_rate": 3.184318417541704e-05, "loss": 0.4934, "num_tokens": 3510866613.0, "step": 4591 }, { "epoch": 1.682238710268389, "grad_norm": 0.136331304890626, "learning_rate": 3.1839606677691914e-05, "loss": 0.5035, "num_tokens": 3511680135.0, "step": 4592 }, { "epoch": 1.6826051112943117, "grad_norm": 0.14604032931618346, "learning_rate": 3.183602862555867e-05, "loss": 0.5129, "num_tokens": 3512430312.0, "step": 4593 }, { "epoch": 1.6829715123202345, "grad_norm": 0.13649553501909573, "learning_rate": 3.1832450019218893e-05, "loss": 0.4824, "num_tokens": 3513208960.0, "step": 4594 }, { "epoch": 1.6833379133461572, "grad_norm": 0.13929904814167635, "learning_rate": 3.182887085887422e-05, "loss": 0.4929, "num_tokens": 3514059534.0, "step": 4595 }, { "epoch": 1.6837043143720802, "grad_norm": 0.13564568068510877, "learning_rate": 3.182529114472633e-05, "loss": 0.527, "num_tokens": 3514922149.0, "step": 4596 }, { "epoch": 1.6840707153980032, "grad_norm": 0.1334896050944, "learning_rate": 3.18217108769769e-05, "loss": 0.4635, "num_tokens": 3515748315.0, "step": 4597 }, { "epoch": 1.684437116423926, "grad_norm": 0.14101437212086732, "learning_rate": 3.1818130055827686e-05, "loss": 0.5001, "num_tokens": 3516513223.0, "step": 4598 }, { "epoch": 1.6848035174498488, "grad_norm": 0.12512123399690953, "learning_rate": 3.1814548681480415e-05, "loss": 0.4715, "num_tokens": 3517244463.0, "step": 4599 }, { "epoch": 1.6851699184757716, "grad_norm": 0.1538343266705212, "learning_rate": 3.181096675413691e-05, "loss": 0.5072, "num_tokens": 3517935141.0, "step": 4600 }, { "epoch": 1.6855363195016946, "grad_norm": 0.13483718428734226, "learning_rate": 3.180738427399896e-05, "loss": 0.5019, "num_tokens": 3518759128.0, "step": 4601 }, { "epoch": 1.6859027205276176, "grad_norm": 0.14730487217905, "learning_rate": 3.180380124126845e-05, "loss": 0.4857, "num_tokens": 3519660539.0, "step": 4602 }, { "epoch": 1.6862691215535404, "grad_norm": 0.1441308754384079, "learning_rate": 3.180021765614725e-05, "loss": 0.5143, "num_tokens": 3520428280.0, "step": 4603 }, { "epoch": 1.6866355225794631, "grad_norm": 0.14688334978261253, "learning_rate": 3.1796633518837276e-05, "loss": 0.4798, "num_tokens": 3521224312.0, "step": 4604 }, { "epoch": 1.6870019236053861, "grad_norm": 0.1355046935693937, "learning_rate": 3.179304882954047e-05, "loss": 0.5377, "num_tokens": 3522059666.0, "step": 4605 }, { "epoch": 1.687368324631309, "grad_norm": 0.17071555224081372, "learning_rate": 3.178946358845881e-05, "loss": 0.4852, "num_tokens": 3523009789.0, "step": 4606 }, { "epoch": 1.687734725657232, "grad_norm": 0.1363803071752363, "learning_rate": 3.178587779579432e-05, "loss": 0.467, "num_tokens": 3523768626.0, "step": 4607 }, { "epoch": 1.6881011266831547, "grad_norm": 0.18101824436566424, "learning_rate": 3.178229145174903e-05, "loss": 0.5159, "num_tokens": 3524457114.0, "step": 4608 }, { "epoch": 1.6884675277090775, "grad_norm": 0.15336705452241514, "learning_rate": 3.1778704556525e-05, "loss": 0.4962, "num_tokens": 3525337396.0, "step": 4609 }, { "epoch": 1.6888339287350005, "grad_norm": 0.14967748894188979, "learning_rate": 3.177511711032434e-05, "loss": 0.4867, "num_tokens": 3526066609.0, "step": 4610 }, { "epoch": 1.6892003297609235, "grad_norm": 0.145439595799353, "learning_rate": 3.1771529113349195e-05, "loss": 0.4773, "num_tokens": 3526790919.0, "step": 4611 }, { "epoch": 1.6895667307868463, "grad_norm": 0.15724884838934566, "learning_rate": 3.176794056580171e-05, "loss": 0.5158, "num_tokens": 3527587244.0, "step": 4612 }, { "epoch": 1.689933131812769, "grad_norm": 0.13298750974763698, "learning_rate": 3.1764351467884096e-05, "loss": 0.5165, "num_tokens": 3528245108.0, "step": 4613 }, { "epoch": 1.6902995328386918, "grad_norm": 0.1683403402615902, "learning_rate": 3.1760761819798564e-05, "loss": 0.4992, "num_tokens": 3528972879.0, "step": 4614 }, { "epoch": 1.6906659338646148, "grad_norm": 0.1393649047317843, "learning_rate": 3.1757171621747386e-05, "loss": 0.5137, "num_tokens": 3529780494.0, "step": 4615 }, { "epoch": 1.6910323348905378, "grad_norm": 0.1309675436991297, "learning_rate": 3.1753580873932836e-05, "loss": 0.4798, "num_tokens": 3530493508.0, "step": 4616 }, { "epoch": 1.6913987359164606, "grad_norm": 0.15379425872396452, "learning_rate": 3.1749989576557244e-05, "loss": 0.5164, "num_tokens": 3531263386.0, "step": 4617 }, { "epoch": 1.6917651369423834, "grad_norm": 0.12999707630304042, "learning_rate": 3.174639772982296e-05, "loss": 0.5122, "num_tokens": 3532086683.0, "step": 4618 }, { "epoch": 1.6921315379683062, "grad_norm": 0.15110911989638123, "learning_rate": 3.1742805333932356e-05, "loss": 0.5115, "num_tokens": 3532827503.0, "step": 4619 }, { "epoch": 1.6924979389942292, "grad_norm": 0.14699820934852914, "learning_rate": 3.173921238908785e-05, "loss": 0.5101, "num_tokens": 3533607021.0, "step": 4620 }, { "epoch": 1.6928643400201522, "grad_norm": 0.1452089486120075, "learning_rate": 3.173561889549189e-05, "loss": 0.4984, "num_tokens": 3534428467.0, "step": 4621 }, { "epoch": 1.693230741046075, "grad_norm": 0.12652827112064088, "learning_rate": 3.173202485334693e-05, "loss": 0.4913, "num_tokens": 3535234995.0, "step": 4622 }, { "epoch": 1.6935971420719977, "grad_norm": 0.1614232368267506, "learning_rate": 3.1728430262855494e-05, "loss": 0.5508, "num_tokens": 3536002606.0, "step": 4623 }, { "epoch": 1.6939635430979205, "grad_norm": 0.13873490904092126, "learning_rate": 3.172483512422011e-05, "loss": 0.5077, "num_tokens": 3536715577.0, "step": 4624 }, { "epoch": 1.6943299441238435, "grad_norm": 0.15389357259745337, "learning_rate": 3.172123943764335e-05, "loss": 0.4892, "num_tokens": 3537432504.0, "step": 4625 }, { "epoch": 1.6946963451497665, "grad_norm": 0.1337547184584934, "learning_rate": 3.1717643203327806e-05, "loss": 0.5077, "num_tokens": 3538294640.0, "step": 4626 }, { "epoch": 1.6950627461756893, "grad_norm": 0.15082825939097766, "learning_rate": 3.1714046421476114e-05, "loss": 0.498, "num_tokens": 3539061186.0, "step": 4627 }, { "epoch": 1.695429147201612, "grad_norm": 0.15763630155264105, "learning_rate": 3.1710449092290915e-05, "loss": 0.5226, "num_tokens": 3539778057.0, "step": 4628 }, { "epoch": 1.695795548227535, "grad_norm": 0.16570824829867323, "learning_rate": 3.170685121597492e-05, "loss": 0.4992, "num_tokens": 3540509960.0, "step": 4629 }, { "epoch": 1.6961619492534579, "grad_norm": 0.1407263900452975, "learning_rate": 3.170325279273084e-05, "loss": 0.5151, "num_tokens": 3541249501.0, "step": 4630 }, { "epoch": 1.6965283502793809, "grad_norm": 0.16652364580777423, "learning_rate": 3.1699653822761424e-05, "loss": 0.501, "num_tokens": 3542094203.0, "step": 4631 }, { "epoch": 1.6968947513053037, "grad_norm": 0.16002521412565204, "learning_rate": 3.169605430626946e-05, "loss": 0.5319, "num_tokens": 3542854110.0, "step": 4632 }, { "epoch": 1.6972611523312264, "grad_norm": 0.14846140725648574, "learning_rate": 3.169245424345774e-05, "loss": 0.4944, "num_tokens": 3543544673.0, "step": 4633 }, { "epoch": 1.6976275533571494, "grad_norm": 0.14879836150063125, "learning_rate": 3.1688853634529144e-05, "loss": 0.5177, "num_tokens": 3544180061.0, "step": 4634 }, { "epoch": 1.6979939543830724, "grad_norm": 0.151527832126039, "learning_rate": 3.168525247968653e-05, "loss": 0.4829, "num_tokens": 3544947149.0, "step": 4635 }, { "epoch": 1.6983603554089952, "grad_norm": 0.14355013577625186, "learning_rate": 3.1681650779132796e-05, "loss": 0.4864, "num_tokens": 3545651868.0, "step": 4636 }, { "epoch": 1.698726756434918, "grad_norm": 0.14841174914916447, "learning_rate": 3.1678048533070883e-05, "loss": 0.4773, "num_tokens": 3546310444.0, "step": 4637 }, { "epoch": 1.6990931574608408, "grad_norm": 0.15131664556631783, "learning_rate": 3.167444574170376e-05, "loss": 0.5223, "num_tokens": 3547059346.0, "step": 4638 }, { "epoch": 1.6994595584867638, "grad_norm": 0.15856315788778927, "learning_rate": 3.167084240523443e-05, "loss": 0.5095, "num_tokens": 3547963410.0, "step": 4639 }, { "epoch": 1.6998259595126868, "grad_norm": 0.13125210152039762, "learning_rate": 3.166723852386591e-05, "loss": 0.5444, "num_tokens": 3548670018.0, "step": 4640 }, { "epoch": 1.7001923605386096, "grad_norm": 0.13521322145222817, "learning_rate": 3.166363409780126e-05, "loss": 0.4869, "num_tokens": 3549445838.0, "step": 4641 }, { "epoch": 1.7005587615645323, "grad_norm": 0.14590234111845773, "learning_rate": 3.166002912724358e-05, "loss": 0.4928, "num_tokens": 3550198382.0, "step": 4642 }, { "epoch": 1.7009251625904551, "grad_norm": 0.15369191658454945, "learning_rate": 3.1656423612395985e-05, "loss": 0.5366, "num_tokens": 3550955876.0, "step": 4643 }, { "epoch": 1.7012915636163781, "grad_norm": 0.14676788584092063, "learning_rate": 3.1652817553461624e-05, "loss": 0.4885, "num_tokens": 3551820588.0, "step": 4644 }, { "epoch": 1.7016579646423011, "grad_norm": 0.13663390031729034, "learning_rate": 3.164921095064367e-05, "loss": 0.4967, "num_tokens": 3552704206.0, "step": 4645 }, { "epoch": 1.702024365668224, "grad_norm": 0.15084818633819702, "learning_rate": 3.164560380414536e-05, "loss": 0.5298, "num_tokens": 3553429103.0, "step": 4646 }, { "epoch": 1.7023907666941467, "grad_norm": 0.13042827688519054, "learning_rate": 3.1641996114169926e-05, "loss": 0.5036, "num_tokens": 3554208084.0, "step": 4647 }, { "epoch": 1.7027571677200695, "grad_norm": 0.1316399218306498, "learning_rate": 3.1638387880920624e-05, "loss": 0.4787, "num_tokens": 3554983803.0, "step": 4648 }, { "epoch": 1.7031235687459925, "grad_norm": 0.15463882766392104, "learning_rate": 3.1634779104600776e-05, "loss": 0.5392, "num_tokens": 3555692267.0, "step": 4649 }, { "epoch": 1.7034899697719155, "grad_norm": 0.15543754388199743, "learning_rate": 3.163116978541372e-05, "loss": 0.4954, "num_tokens": 3556431460.0, "step": 4650 }, { "epoch": 1.7038563707978382, "grad_norm": 0.15332506677494617, "learning_rate": 3.162755992356281e-05, "loss": 0.5209, "num_tokens": 3557169321.0, "step": 4651 }, { "epoch": 1.704222771823761, "grad_norm": 0.1531373115919666, "learning_rate": 3.1623949519251445e-05, "loss": 0.5123, "num_tokens": 3557928553.0, "step": 4652 }, { "epoch": 1.704589172849684, "grad_norm": 0.20276269151564602, "learning_rate": 3.162033857268306e-05, "loss": 0.5275, "num_tokens": 3558677510.0, "step": 4653 }, { "epoch": 1.7049555738756068, "grad_norm": 0.13909971885230696, "learning_rate": 3.16167270840611e-05, "loss": 0.5127, "num_tokens": 3559476275.0, "step": 4654 }, { "epoch": 1.7053219749015298, "grad_norm": 0.14451200696740016, "learning_rate": 3.1613115053589056e-05, "loss": 0.5328, "num_tokens": 3560278394.0, "step": 4655 }, { "epoch": 1.7056883759274526, "grad_norm": 0.1482628753069946, "learning_rate": 3.160950248147045e-05, "loss": 0.4809, "num_tokens": 3561118652.0, "step": 4656 }, { "epoch": 1.7060547769533754, "grad_norm": 0.14282286333188934, "learning_rate": 3.1605889367908833e-05, "loss": 0.4972, "num_tokens": 3561885144.0, "step": 4657 }, { "epoch": 1.7064211779792984, "grad_norm": 0.14711634534540047, "learning_rate": 3.160227571310778e-05, "loss": 0.5235, "num_tokens": 3562590778.0, "step": 4658 }, { "epoch": 1.7067875790052214, "grad_norm": 0.16384805666514063, "learning_rate": 3.15986615172709e-05, "loss": 0.5245, "num_tokens": 3563411062.0, "step": 4659 }, { "epoch": 1.7071539800311442, "grad_norm": 0.15590497120955626, "learning_rate": 3.159504678060183e-05, "loss": 0.5049, "num_tokens": 3564120555.0, "step": 4660 }, { "epoch": 1.707520381057067, "grad_norm": 0.1459237615515276, "learning_rate": 3.1591431503304255e-05, "loss": 0.4881, "num_tokens": 3564871192.0, "step": 4661 }, { "epoch": 1.7078867820829897, "grad_norm": 0.1769300298287711, "learning_rate": 3.158781568558185e-05, "loss": 0.5078, "num_tokens": 3565625124.0, "step": 4662 }, { "epoch": 1.7082531831089127, "grad_norm": 0.13611422973492843, "learning_rate": 3.1584199327638374e-05, "loss": 0.5112, "num_tokens": 3566481877.0, "step": 4663 }, { "epoch": 1.7086195841348357, "grad_norm": 0.17010775081267376, "learning_rate": 3.158058242967758e-05, "loss": 0.5304, "num_tokens": 3567244588.0, "step": 4664 }, { "epoch": 1.7089859851607585, "grad_norm": 0.1250234537395659, "learning_rate": 3.1576964991903256e-05, "loss": 0.4722, "num_tokens": 3568122737.0, "step": 4665 }, { "epoch": 1.7093523861866813, "grad_norm": 0.14379754917357265, "learning_rate": 3.1573347014519215e-05, "loss": 0.4985, "num_tokens": 3568928066.0, "step": 4666 }, { "epoch": 1.709718787212604, "grad_norm": 0.1496026744836825, "learning_rate": 3.156972849772934e-05, "loss": 0.5478, "num_tokens": 3569662378.0, "step": 4667 }, { "epoch": 1.710085188238527, "grad_norm": 0.14295434206438098, "learning_rate": 3.156610944173748e-05, "loss": 0.4618, "num_tokens": 3570350726.0, "step": 4668 }, { "epoch": 1.71045158926445, "grad_norm": 0.15399027820698827, "learning_rate": 3.156248984674758e-05, "loss": 0.5016, "num_tokens": 3571029976.0, "step": 4669 }, { "epoch": 1.7108179902903728, "grad_norm": 0.1424032643177439, "learning_rate": 3.1558869712963565e-05, "loss": 0.4874, "num_tokens": 3571761107.0, "step": 4670 }, { "epoch": 1.7111843913162956, "grad_norm": 0.17255976123822414, "learning_rate": 3.155524904058941e-05, "loss": 0.5116, "num_tokens": 3572577667.0, "step": 4671 }, { "epoch": 1.7115507923422184, "grad_norm": 0.1525759743647397, "learning_rate": 3.155162782982913e-05, "loss": 0.5157, "num_tokens": 3573391402.0, "step": 4672 }, { "epoch": 1.7119171933681414, "grad_norm": 0.16809488274331685, "learning_rate": 3.154800608088676e-05, "loss": 0.4959, "num_tokens": 3574039558.0, "step": 4673 }, { "epoch": 1.7122835943940644, "grad_norm": 0.18453076775665295, "learning_rate": 3.154438379396636e-05, "loss": 0.5272, "num_tokens": 3574788131.0, "step": 4674 }, { "epoch": 1.7126499954199872, "grad_norm": 0.14222969612420888, "learning_rate": 3.154076096927202e-05, "loss": 0.4833, "num_tokens": 3575535859.0, "step": 4675 }, { "epoch": 1.71301639644591, "grad_norm": 0.1438332274538844, "learning_rate": 3.153713760700788e-05, "loss": 0.4776, "num_tokens": 3576355258.0, "step": 4676 }, { "epoch": 1.713382797471833, "grad_norm": 0.1859090443808492, "learning_rate": 3.153351370737809e-05, "loss": 0.5195, "num_tokens": 3577036579.0, "step": 4677 }, { "epoch": 1.7137491984977558, "grad_norm": 0.15733786959971663, "learning_rate": 3.152988927058684e-05, "loss": 0.5148, "num_tokens": 3577693070.0, "step": 4678 }, { "epoch": 1.7141155995236788, "grad_norm": 0.15002747559884547, "learning_rate": 3.1526264296838344e-05, "loss": 0.4769, "num_tokens": 3578465368.0, "step": 4679 }, { "epoch": 1.7144820005496015, "grad_norm": 0.16719721920534977, "learning_rate": 3.152263878633685e-05, "loss": 0.4405, "num_tokens": 3579271646.0, "step": 4680 }, { "epoch": 1.7148484015755243, "grad_norm": 0.14664780788143056, "learning_rate": 3.1519012739286643e-05, "loss": 0.4798, "num_tokens": 3580082303.0, "step": 4681 }, { "epoch": 1.7152148026014473, "grad_norm": 0.1293110039745844, "learning_rate": 3.151538615589201e-05, "loss": 0.5084, "num_tokens": 3580779127.0, "step": 4682 }, { "epoch": 1.7155812036273703, "grad_norm": 0.1633765882289025, "learning_rate": 3.151175903635731e-05, "loss": 0.4943, "num_tokens": 3581535954.0, "step": 4683 }, { "epoch": 1.715947604653293, "grad_norm": 0.14027860192664282, "learning_rate": 3.150813138088691e-05, "loss": 0.5015, "num_tokens": 3582388738.0, "step": 4684 }, { "epoch": 1.7163140056792159, "grad_norm": 0.13733937364629634, "learning_rate": 3.150450318968519e-05, "loss": 0.5025, "num_tokens": 3583119422.0, "step": 4685 }, { "epoch": 1.7166804067051387, "grad_norm": 0.16046960239660668, "learning_rate": 3.15008744629566e-05, "loss": 0.4698, "num_tokens": 3583866312.0, "step": 4686 }, { "epoch": 1.7170468077310617, "grad_norm": 0.1528169377682997, "learning_rate": 3.1497245200905594e-05, "loss": 0.4987, "num_tokens": 3584627605.0, "step": 4687 }, { "epoch": 1.7174132087569847, "grad_norm": 0.15117528581406373, "learning_rate": 3.1493615403736656e-05, "loss": 0.5094, "num_tokens": 3585370810.0, "step": 4688 }, { "epoch": 1.7177796097829074, "grad_norm": 0.13409586215443606, "learning_rate": 3.14899850716543e-05, "loss": 0.4989, "num_tokens": 3586116154.0, "step": 4689 }, { "epoch": 1.7181460108088302, "grad_norm": 0.15394810284015448, "learning_rate": 3.1486354204863095e-05, "loss": 0.5088, "num_tokens": 3586858632.0, "step": 4690 }, { "epoch": 1.718512411834753, "grad_norm": 0.1654644769990437, "learning_rate": 3.1482722803567604e-05, "loss": 0.4714, "num_tokens": 3587625759.0, "step": 4691 }, { "epoch": 1.718878812860676, "grad_norm": 0.12912951488957078, "learning_rate": 3.1479090867972436e-05, "loss": 0.4849, "num_tokens": 3588453767.0, "step": 4692 }, { "epoch": 1.719245213886599, "grad_norm": 0.14906120231317505, "learning_rate": 3.147545839828224e-05, "loss": 0.4866, "num_tokens": 3589290846.0, "step": 4693 }, { "epoch": 1.7196116149125218, "grad_norm": 0.1836557145942432, "learning_rate": 3.147182539470168e-05, "loss": 0.4946, "num_tokens": 3589951410.0, "step": 4694 }, { "epoch": 1.7199780159384446, "grad_norm": 0.14618379202441978, "learning_rate": 3.146819185743546e-05, "loss": 0.5067, "num_tokens": 3590695934.0, "step": 4695 }, { "epoch": 1.7203444169643674, "grad_norm": 0.16224210258393265, "learning_rate": 3.146455778668831e-05, "loss": 0.4914, "num_tokens": 3591460245.0, "step": 4696 }, { "epoch": 1.7207108179902904, "grad_norm": 0.15669253833762412, "learning_rate": 3.146092318266497e-05, "loss": 0.5101, "num_tokens": 3592283750.0, "step": 4697 }, { "epoch": 1.7210772190162134, "grad_norm": 0.13963230407365643, "learning_rate": 3.1457288045570266e-05, "loss": 0.5291, "num_tokens": 3593007978.0, "step": 4698 }, { "epoch": 1.7214436200421361, "grad_norm": 0.14985417909012125, "learning_rate": 3.1453652375609e-05, "loss": 0.4995, "num_tokens": 3593769786.0, "step": 4699 }, { "epoch": 1.721810021068059, "grad_norm": 0.13501173001570305, "learning_rate": 3.145001617298601e-05, "loss": 0.4925, "num_tokens": 3594558709.0, "step": 4700 }, { "epoch": 1.722176422093982, "grad_norm": 0.14242786710968328, "learning_rate": 3.1446379437906195e-05, "loss": 0.4765, "num_tokens": 3595283987.0, "step": 4701 }, { "epoch": 1.7225428231199047, "grad_norm": 0.1518738419340133, "learning_rate": 3.1442742170574456e-05, "loss": 0.5465, "num_tokens": 3595967415.0, "step": 4702 }, { "epoch": 1.7229092241458277, "grad_norm": 0.12656964976684504, "learning_rate": 3.1439104371195736e-05, "loss": 0.4628, "num_tokens": 3596687229.0, "step": 4703 }, { "epoch": 1.7232756251717505, "grad_norm": 0.1440531529528779, "learning_rate": 3.143546603997501e-05, "loss": 0.5174, "num_tokens": 3597374965.0, "step": 4704 }, { "epoch": 1.7236420261976733, "grad_norm": 0.14512997266598487, "learning_rate": 3.143182717711726e-05, "loss": 0.5071, "num_tokens": 3598163422.0, "step": 4705 }, { "epoch": 1.7240084272235963, "grad_norm": 0.1348759656576402, "learning_rate": 3.142818778282754e-05, "loss": 0.531, "num_tokens": 3598981929.0, "step": 4706 }, { "epoch": 1.7243748282495193, "grad_norm": 0.15344554080877648, "learning_rate": 3.14245478573109e-05, "loss": 0.5102, "num_tokens": 3599791716.0, "step": 4707 }, { "epoch": 1.724741229275442, "grad_norm": 0.13892732349285758, "learning_rate": 3.142090740077242e-05, "loss": 0.4961, "num_tokens": 3600633930.0, "step": 4708 }, { "epoch": 1.7251076303013648, "grad_norm": 0.1368947367962895, "learning_rate": 3.1417266413417234e-05, "loss": 0.5061, "num_tokens": 3601380147.0, "step": 4709 }, { "epoch": 1.7254740313272876, "grad_norm": 0.14511797360838116, "learning_rate": 3.141362489545048e-05, "loss": 0.4952, "num_tokens": 3602136196.0, "step": 4710 }, { "epoch": 1.7258404323532106, "grad_norm": 0.14979337534863946, "learning_rate": 3.140998284707735e-05, "loss": 0.4981, "num_tokens": 3602847470.0, "step": 4711 }, { "epoch": 1.7262068333791336, "grad_norm": 0.12660312789409214, "learning_rate": 3.140634026850305e-05, "loss": 0.4828, "num_tokens": 3603683804.0, "step": 4712 }, { "epoch": 1.7265732344050564, "grad_norm": 0.13776862693811764, "learning_rate": 3.1402697159932806e-05, "loss": 0.4984, "num_tokens": 3604533602.0, "step": 4713 }, { "epoch": 1.7269396354309792, "grad_norm": 0.13122010467406772, "learning_rate": 3.139905352157191e-05, "loss": 0.5016, "num_tokens": 3605377366.0, "step": 4714 }, { "epoch": 1.727306036456902, "grad_norm": 0.1404224101134569, "learning_rate": 3.139540935362564e-05, "loss": 0.4802, "num_tokens": 3606236080.0, "step": 4715 }, { "epoch": 1.727672437482825, "grad_norm": 0.13665308371514198, "learning_rate": 3.139176465629934e-05, "loss": 0.4961, "num_tokens": 3606932142.0, "step": 4716 }, { "epoch": 1.728038838508748, "grad_norm": 0.15802161203148088, "learning_rate": 3.1388119429798365e-05, "loss": 0.5073, "num_tokens": 3607709246.0, "step": 4717 }, { "epoch": 1.7284052395346707, "grad_norm": 0.1466086996083677, "learning_rate": 3.1384473674328096e-05, "loss": 0.5257, "num_tokens": 3608525419.0, "step": 4718 }, { "epoch": 1.7287716405605935, "grad_norm": 0.13875644156952305, "learning_rate": 3.138082739009395e-05, "loss": 0.4937, "num_tokens": 3609316634.0, "step": 4719 }, { "epoch": 1.7291380415865163, "grad_norm": 0.1567036860212625, "learning_rate": 3.13771805773014e-05, "loss": 0.4874, "num_tokens": 3610133349.0, "step": 4720 }, { "epoch": 1.7295044426124393, "grad_norm": 0.13535085767893884, "learning_rate": 3.13735332361559e-05, "loss": 0.4912, "num_tokens": 3610742193.0, "step": 4721 }, { "epoch": 1.7298708436383623, "grad_norm": 0.15242097790140513, "learning_rate": 3.1369885366862965e-05, "loss": 0.5397, "num_tokens": 3611479563.0, "step": 4722 }, { "epoch": 1.730237244664285, "grad_norm": 0.14516651277608839, "learning_rate": 3.136623696962813e-05, "loss": 0.4724, "num_tokens": 3612164303.0, "step": 4723 }, { "epoch": 1.7306036456902079, "grad_norm": 0.15845490240655213, "learning_rate": 3.136258804465696e-05, "loss": 0.4988, "num_tokens": 3612955041.0, "step": 4724 }, { "epoch": 1.7309700467161306, "grad_norm": 0.13356509050621568, "learning_rate": 3.135893859215505e-05, "loss": 0.4984, "num_tokens": 3613610088.0, "step": 4725 }, { "epoch": 1.7313364477420536, "grad_norm": 0.16452080169893052, "learning_rate": 3.135528861232805e-05, "loss": 0.5069, "num_tokens": 3614364797.0, "step": 4726 }, { "epoch": 1.7317028487679766, "grad_norm": 0.14478475782372247, "learning_rate": 3.135163810538159e-05, "loss": 0.4822, "num_tokens": 3615121617.0, "step": 4727 }, { "epoch": 1.7320692497938994, "grad_norm": 0.14126443805507444, "learning_rate": 3.1347987071521365e-05, "loss": 0.4943, "num_tokens": 3615883273.0, "step": 4728 }, { "epoch": 1.7324356508198222, "grad_norm": 0.15761098429795906, "learning_rate": 3.134433551095309e-05, "loss": 0.5321, "num_tokens": 3616662932.0, "step": 4729 }, { "epoch": 1.7328020518457452, "grad_norm": 0.1627981836023156, "learning_rate": 3.134068342388251e-05, "loss": 0.5102, "num_tokens": 3617563691.0, "step": 4730 }, { "epoch": 1.733168452871668, "grad_norm": 0.15307621340757888, "learning_rate": 3.13370308105154e-05, "loss": 0.5092, "num_tokens": 3618350274.0, "step": 4731 }, { "epoch": 1.733534853897591, "grad_norm": 0.1529135367981559, "learning_rate": 3.1333377671057574e-05, "loss": 0.4846, "num_tokens": 3619068280.0, "step": 4732 }, { "epoch": 1.7339012549235138, "grad_norm": 0.15129128907991507, "learning_rate": 3.1329724005714855e-05, "loss": 0.4797, "num_tokens": 3619784618.0, "step": 4733 }, { "epoch": 1.7342676559494365, "grad_norm": 0.16309578358255744, "learning_rate": 3.1326069814693114e-05, "loss": 0.4947, "num_tokens": 3620617936.0, "step": 4734 }, { "epoch": 1.7346340569753596, "grad_norm": 0.1532502170085031, "learning_rate": 3.132241509819824e-05, "loss": 0.5405, "num_tokens": 3621298447.0, "step": 4735 }, { "epoch": 1.7350004580012826, "grad_norm": 0.15609929148452173, "learning_rate": 3.1318759856436155e-05, "loss": 0.473, "num_tokens": 3622148962.0, "step": 4736 }, { "epoch": 1.7353668590272053, "grad_norm": 0.1734640559848913, "learning_rate": 3.131510408961282e-05, "loss": 0.4753, "num_tokens": 3622909121.0, "step": 4737 }, { "epoch": 1.7357332600531281, "grad_norm": 0.14029264261449897, "learning_rate": 3.1311447797934217e-05, "loss": 0.5049, "num_tokens": 3623734758.0, "step": 4738 }, { "epoch": 1.736099661079051, "grad_norm": 0.1575518943767166, "learning_rate": 3.130779098160634e-05, "loss": 0.494, "num_tokens": 3624449066.0, "step": 4739 }, { "epoch": 1.736466062104974, "grad_norm": 0.14198410711525572, "learning_rate": 3.1304133640835246e-05, "loss": 0.4975, "num_tokens": 3625075055.0, "step": 4740 }, { "epoch": 1.736832463130897, "grad_norm": 0.1469240332208555, "learning_rate": 3.130047577582702e-05, "loss": 0.5013, "num_tokens": 3625777840.0, "step": 4741 }, { "epoch": 1.7371988641568197, "grad_norm": 0.13986069935952153, "learning_rate": 3.129681738678773e-05, "loss": 0.4842, "num_tokens": 3626482525.0, "step": 4742 }, { "epoch": 1.7375652651827425, "grad_norm": 0.15397679093750685, "learning_rate": 3.129315847392353e-05, "loss": 0.5022, "num_tokens": 3627260198.0, "step": 4743 }, { "epoch": 1.7379316662086652, "grad_norm": 0.14221292297459476, "learning_rate": 3.1289499037440574e-05, "loss": 0.4833, "num_tokens": 3627999637.0, "step": 4744 }, { "epoch": 1.7382980672345882, "grad_norm": 0.13536785969751056, "learning_rate": 3.128583907754506e-05, "loss": 0.5014, "num_tokens": 3628850322.0, "step": 4745 }, { "epoch": 1.7386644682605112, "grad_norm": 0.13571893479573363, "learning_rate": 3.1282178594443186e-05, "loss": 0.5275, "num_tokens": 3629607175.0, "step": 4746 }, { "epoch": 1.739030869286434, "grad_norm": 0.14032843810194215, "learning_rate": 3.127851758834121e-05, "loss": 0.488, "num_tokens": 3630373064.0, "step": 4747 }, { "epoch": 1.7393972703123568, "grad_norm": 0.1499712300654384, "learning_rate": 3.127485605944542e-05, "loss": 0.5288, "num_tokens": 3630994740.0, "step": 4748 }, { "epoch": 1.7397636713382796, "grad_norm": 0.1315902773921092, "learning_rate": 3.12711940079621e-05, "loss": 0.4701, "num_tokens": 3631749264.0, "step": 4749 }, { "epoch": 1.7401300723642026, "grad_norm": 0.16513064330552035, "learning_rate": 3.126753143409761e-05, "loss": 0.5076, "num_tokens": 3632474830.0, "step": 4750 }, { "epoch": 1.7404964733901256, "grad_norm": 0.15731687021567153, "learning_rate": 3.126386833805831e-05, "loss": 0.5585, "num_tokens": 3633200714.0, "step": 4751 }, { "epoch": 1.7408628744160484, "grad_norm": 0.14374068408417295, "learning_rate": 3.1260204720050586e-05, "loss": 0.5056, "num_tokens": 3633935332.0, "step": 4752 }, { "epoch": 1.7412292754419711, "grad_norm": 0.13836214388957035, "learning_rate": 3.125654058028088e-05, "loss": 0.5086, "num_tokens": 3634614864.0, "step": 4753 }, { "epoch": 1.7415956764678941, "grad_norm": 0.15758649467899347, "learning_rate": 3.1252875918955625e-05, "loss": 0.5364, "num_tokens": 3635308702.0, "step": 4754 }, { "epoch": 1.741962077493817, "grad_norm": 0.12901325544738781, "learning_rate": 3.1249210736281315e-05, "loss": 0.451, "num_tokens": 3636113475.0, "step": 4755 }, { "epoch": 1.74232847851974, "grad_norm": 0.14211792670840265, "learning_rate": 3.124554503246447e-05, "loss": 0.4896, "num_tokens": 3636821491.0, "step": 4756 }, { "epoch": 1.7426948795456627, "grad_norm": 0.14244684673087513, "learning_rate": 3.124187880771163e-05, "loss": 0.5229, "num_tokens": 3637539784.0, "step": 4757 }, { "epoch": 1.7430612805715855, "grad_norm": 0.15924137738392863, "learning_rate": 3.123821206222935e-05, "loss": 0.539, "num_tokens": 3638338829.0, "step": 4758 }, { "epoch": 1.7434276815975085, "grad_norm": 0.13302160515408967, "learning_rate": 3.1234544796224246e-05, "loss": 0.4993, "num_tokens": 3639090636.0, "step": 4759 }, { "epoch": 1.7437940826234315, "grad_norm": 0.131908139356903, "learning_rate": 3.123087700990296e-05, "loss": 0.4983, "num_tokens": 3639873331.0, "step": 4760 }, { "epoch": 1.7441604836493543, "grad_norm": 0.1571542128684899, "learning_rate": 3.1227208703472125e-05, "loss": 0.5246, "num_tokens": 3640580756.0, "step": 4761 }, { "epoch": 1.744526884675277, "grad_norm": 0.14110358301559725, "learning_rate": 3.122353987713843e-05, "loss": 0.5378, "num_tokens": 3641290674.0, "step": 4762 }, { "epoch": 1.7448932857011998, "grad_norm": 0.13844250700973146, "learning_rate": 3.121987053110863e-05, "loss": 0.4906, "num_tokens": 3642091724.0, "step": 4763 }, { "epoch": 1.7452596867271228, "grad_norm": 0.1533936051820095, "learning_rate": 3.121620066558944e-05, "loss": 0.5132, "num_tokens": 3642970724.0, "step": 4764 }, { "epoch": 1.7456260877530458, "grad_norm": 0.14370487472615756, "learning_rate": 3.121253028078764e-05, "loss": 0.5142, "num_tokens": 3643689948.0, "step": 4765 }, { "epoch": 1.7459924887789686, "grad_norm": 0.14143116318497198, "learning_rate": 3.120885937691005e-05, "loss": 0.4966, "num_tokens": 3644437817.0, "step": 4766 }, { "epoch": 1.7463588898048914, "grad_norm": 0.15017599353611935, "learning_rate": 3.1205187954163494e-05, "loss": 0.537, "num_tokens": 3645149678.0, "step": 4767 }, { "epoch": 1.7467252908308142, "grad_norm": 0.1483597886788585, "learning_rate": 3.1201516012754846e-05, "loss": 0.5093, "num_tokens": 3645867032.0, "step": 4768 }, { "epoch": 1.7470916918567372, "grad_norm": 0.15667131540355517, "learning_rate": 3.1197843552890984e-05, "loss": 0.5072, "num_tokens": 3646581900.0, "step": 4769 }, { "epoch": 1.7474580928826602, "grad_norm": 0.13871689959669511, "learning_rate": 3.119417057477884e-05, "loss": 0.4968, "num_tokens": 3647348684.0, "step": 4770 }, { "epoch": 1.747824493908583, "grad_norm": 0.1729574800437309, "learning_rate": 3.119049707862538e-05, "loss": 0.4961, "num_tokens": 3647993414.0, "step": 4771 }, { "epoch": 1.7481908949345057, "grad_norm": 0.1517339414640509, "learning_rate": 3.118682306463756e-05, "loss": 0.4865, "num_tokens": 3648897507.0, "step": 4772 }, { "epoch": 1.7485572959604285, "grad_norm": 0.14974099176449662, "learning_rate": 3.118314853302241e-05, "loss": 0.5364, "num_tokens": 3649663441.0, "step": 4773 }, { "epoch": 1.7489236969863515, "grad_norm": 0.1489177770495297, "learning_rate": 3.117947348398696e-05, "loss": 0.5166, "num_tokens": 3650320878.0, "step": 4774 }, { "epoch": 1.7492900980122745, "grad_norm": 0.17179720906800278, "learning_rate": 3.117579791773828e-05, "loss": 0.554, "num_tokens": 3651115972.0, "step": 4775 }, { "epoch": 1.7496564990381973, "grad_norm": 0.15631507720842153, "learning_rate": 3.117212183448347e-05, "loss": 0.4947, "num_tokens": 3651818689.0, "step": 4776 }, { "epoch": 1.75002290006412, "grad_norm": 0.15675726504634152, "learning_rate": 3.1168445234429664e-05, "loss": 0.5442, "num_tokens": 3652502259.0, "step": 4777 }, { "epoch": 1.750389301090043, "grad_norm": 0.15351126964985123, "learning_rate": 3.1164768117784007e-05, "loss": 0.5153, "num_tokens": 3653275570.0, "step": 4778 }, { "epoch": 1.7507557021159659, "grad_norm": 0.13777871171303352, "learning_rate": 3.116109048475369e-05, "loss": 0.4635, "num_tokens": 3654053760.0, "step": 4779 }, { "epoch": 1.7511221031418889, "grad_norm": 0.15864080822662283, "learning_rate": 3.115741233554593e-05, "loss": 0.4814, "num_tokens": 3654841020.0, "step": 4780 }, { "epoch": 1.7514885041678117, "grad_norm": 0.14239341086245352, "learning_rate": 3.115373367036795e-05, "loss": 0.4906, "num_tokens": 3655637165.0, "step": 4781 }, { "epoch": 1.7518549051937344, "grad_norm": 0.14220661262173248, "learning_rate": 3.115005448942706e-05, "loss": 0.5113, "num_tokens": 3656388134.0, "step": 4782 }, { "epoch": 1.7522213062196574, "grad_norm": 0.15010162581479503, "learning_rate": 3.114637479293054e-05, "loss": 0.5142, "num_tokens": 3657209536.0, "step": 4783 }, { "epoch": 1.7525877072455804, "grad_norm": 0.1380019925032017, "learning_rate": 3.114269458108571e-05, "loss": 0.4745, "num_tokens": 3657974326.0, "step": 4784 }, { "epoch": 1.7529541082715032, "grad_norm": 0.15447506468550898, "learning_rate": 3.113901385409995e-05, "loss": 0.5411, "num_tokens": 3658759288.0, "step": 4785 }, { "epoch": 1.753320509297426, "grad_norm": 0.14875465512213035, "learning_rate": 3.113533261218065e-05, "loss": 0.488, "num_tokens": 3659472786.0, "step": 4786 }, { "epoch": 1.7536869103233488, "grad_norm": 0.13530011304172784, "learning_rate": 3.11316508555352e-05, "loss": 0.5153, "num_tokens": 3660251568.0, "step": 4787 }, { "epoch": 1.7540533113492718, "grad_norm": 0.14804686824486943, "learning_rate": 3.1127968584371076e-05, "loss": 0.4637, "num_tokens": 3661059130.0, "step": 4788 }, { "epoch": 1.7544197123751948, "grad_norm": 0.13784368257816593, "learning_rate": 3.112428579889574e-05, "loss": 0.4976, "num_tokens": 3661767296.0, "step": 4789 }, { "epoch": 1.7547861134011176, "grad_norm": 0.14899388918749915, "learning_rate": 3.1120602499316714e-05, "loss": 0.4872, "num_tokens": 3662709057.0, "step": 4790 }, { "epoch": 1.7551525144270403, "grad_norm": 0.1360705422288686, "learning_rate": 3.111691868584151e-05, "loss": 0.4802, "num_tokens": 3663550189.0, "step": 4791 }, { "epoch": 1.7555189154529631, "grad_norm": 0.13194425381740046, "learning_rate": 3.11132343586777e-05, "loss": 0.4955, "num_tokens": 3664268792.0, "step": 4792 }, { "epoch": 1.7558853164788861, "grad_norm": 0.14685006618296156, "learning_rate": 3.110954951803287e-05, "loss": 0.492, "num_tokens": 3665005040.0, "step": 4793 }, { "epoch": 1.7562517175048091, "grad_norm": 0.15562188907388638, "learning_rate": 3.110586416411465e-05, "loss": 0.4847, "num_tokens": 3665708090.0, "step": 4794 }, { "epoch": 1.756618118530732, "grad_norm": 0.15467827290959843, "learning_rate": 3.110217829713068e-05, "loss": 0.4828, "num_tokens": 3666545482.0, "step": 4795 }, { "epoch": 1.7569845195566547, "grad_norm": 0.13795131467353733, "learning_rate": 3.109849191728865e-05, "loss": 0.4962, "num_tokens": 3667278289.0, "step": 4796 }, { "epoch": 1.7573509205825775, "grad_norm": 0.13614307310811202, "learning_rate": 3.109480502479626e-05, "loss": 0.4807, "num_tokens": 3668067774.0, "step": 4797 }, { "epoch": 1.7577173216085005, "grad_norm": 0.14249208816017905, "learning_rate": 3.109111761986125e-05, "loss": 0.5298, "num_tokens": 3668812074.0, "step": 4798 }, { "epoch": 1.7580837226344235, "grad_norm": 0.13821671384226294, "learning_rate": 3.108742970269137e-05, "loss": 0.4895, "num_tokens": 3669585158.0, "step": 4799 }, { "epoch": 1.7584501236603463, "grad_norm": 0.1412980326231839, "learning_rate": 3.1083741273494444e-05, "loss": 0.4931, "num_tokens": 3670298893.0, "step": 4800 }, { "epoch": 1.758816524686269, "grad_norm": 0.14936071571614856, "learning_rate": 3.108005233247826e-05, "loss": 0.4759, "num_tokens": 3671042906.0, "step": 4801 }, { "epoch": 1.759182925712192, "grad_norm": 0.14681785917908954, "learning_rate": 3.1076362879850706e-05, "loss": 0.5415, "num_tokens": 3671770146.0, "step": 4802 }, { "epoch": 1.7595493267381148, "grad_norm": 0.1432050833142499, "learning_rate": 3.1072672915819634e-05, "loss": 0.481, "num_tokens": 3672553879.0, "step": 4803 }, { "epoch": 1.7599157277640378, "grad_norm": 0.1360957166166381, "learning_rate": 3.106898244059296e-05, "loss": 0.4925, "num_tokens": 3673427749.0, "step": 4804 }, { "epoch": 1.7602821287899606, "grad_norm": 0.15377185108318617, "learning_rate": 3.106529145437863e-05, "loss": 0.5183, "num_tokens": 3674172185.0, "step": 4805 }, { "epoch": 1.7606485298158834, "grad_norm": 0.13246684126861746, "learning_rate": 3.106159995738461e-05, "loss": 0.5001, "num_tokens": 3674803060.0, "step": 4806 }, { "epoch": 1.7610149308418064, "grad_norm": 0.16353559011374189, "learning_rate": 3.1057907949818876e-05, "loss": 0.5065, "num_tokens": 3675539801.0, "step": 4807 }, { "epoch": 1.7613813318677294, "grad_norm": 0.13987675494396976, "learning_rate": 3.105421543188949e-05, "loss": 0.4943, "num_tokens": 3676421852.0, "step": 4808 }, { "epoch": 1.7617477328936522, "grad_norm": 0.14501175700014174, "learning_rate": 3.105052240380447e-05, "loss": 0.4971, "num_tokens": 3677161550.0, "step": 4809 }, { "epoch": 1.762114133919575, "grad_norm": 0.13585058441675443, "learning_rate": 3.104682886577192e-05, "loss": 0.494, "num_tokens": 3677932648.0, "step": 4810 }, { "epoch": 1.7624805349454977, "grad_norm": 0.14338419729744836, "learning_rate": 3.1043134817999925e-05, "loss": 0.495, "num_tokens": 3678817259.0, "step": 4811 }, { "epoch": 1.7628469359714207, "grad_norm": 0.13502046229778356, "learning_rate": 3.103944026069665e-05, "loss": 0.4869, "num_tokens": 3679571710.0, "step": 4812 }, { "epoch": 1.7632133369973437, "grad_norm": 0.14472392151873467, "learning_rate": 3.103574519407026e-05, "loss": 0.4946, "num_tokens": 3680444874.0, "step": 4813 }, { "epoch": 1.7635797380232665, "grad_norm": 0.14699269123963146, "learning_rate": 3.103204961832895e-05, "loss": 0.5237, "num_tokens": 3681225937.0, "step": 4814 }, { "epoch": 1.7639461390491893, "grad_norm": 0.15733276030233714, "learning_rate": 3.102835353368093e-05, "loss": 0.5344, "num_tokens": 3681976739.0, "step": 4815 }, { "epoch": 1.764312540075112, "grad_norm": 0.13619902119235025, "learning_rate": 3.102465694033447e-05, "loss": 0.5087, "num_tokens": 3682834664.0, "step": 4816 }, { "epoch": 1.764678941101035, "grad_norm": 0.14229222580908663, "learning_rate": 3.102095983849785e-05, "loss": 0.4918, "num_tokens": 3683688891.0, "step": 4817 }, { "epoch": 1.765045342126958, "grad_norm": 0.13002868869398207, "learning_rate": 3.101726222837938e-05, "loss": 0.4803, "num_tokens": 3684548827.0, "step": 4818 }, { "epoch": 1.7654117431528809, "grad_norm": 0.14236041467697194, "learning_rate": 3.1013564110187405e-05, "loss": 0.4803, "num_tokens": 3685321590.0, "step": 4819 }, { "epoch": 1.7657781441788036, "grad_norm": 0.14141857510290337, "learning_rate": 3.100986548413029e-05, "loss": 0.5111, "num_tokens": 3686044917.0, "step": 4820 }, { "epoch": 1.7661445452047264, "grad_norm": 0.13270593828750357, "learning_rate": 3.100616635041643e-05, "loss": 0.5029, "num_tokens": 3686751152.0, "step": 4821 }, { "epoch": 1.7665109462306494, "grad_norm": 0.15885839771216848, "learning_rate": 3.100246670925425e-05, "loss": 0.4925, "num_tokens": 3687433123.0, "step": 4822 }, { "epoch": 1.7668773472565724, "grad_norm": 0.1428309563300981, "learning_rate": 3.0998766560852206e-05, "loss": 0.5081, "num_tokens": 3688183338.0, "step": 4823 }, { "epoch": 1.7672437482824952, "grad_norm": 0.1418007057929559, "learning_rate": 3.099506590541878e-05, "loss": 0.4717, "num_tokens": 3688928136.0, "step": 4824 }, { "epoch": 1.767610149308418, "grad_norm": 0.1387863056704805, "learning_rate": 3.09913647431625e-05, "loss": 0.5079, "num_tokens": 3689635149.0, "step": 4825 }, { "epoch": 1.767976550334341, "grad_norm": 0.1464253875784971, "learning_rate": 3.0987663074291884e-05, "loss": 0.5062, "num_tokens": 3690492350.0, "step": 4826 }, { "epoch": 1.7683429513602638, "grad_norm": 0.16552526542809523, "learning_rate": 3.098396089901551e-05, "loss": 0.4952, "num_tokens": 3691121257.0, "step": 4827 }, { "epoch": 1.7687093523861868, "grad_norm": 0.1278741185478235, "learning_rate": 3.098025821754198e-05, "loss": 0.5006, "num_tokens": 3691901905.0, "step": 4828 }, { "epoch": 1.7690757534121095, "grad_norm": 0.13681973344799409, "learning_rate": 3.097655503007991e-05, "loss": 0.5188, "num_tokens": 3692594387.0, "step": 4829 }, { "epoch": 1.7694421544380323, "grad_norm": 0.14982543804468992, "learning_rate": 3.0972851336837954e-05, "loss": 0.4834, "num_tokens": 3693361283.0, "step": 4830 }, { "epoch": 1.7698085554639553, "grad_norm": 0.137572497555026, "learning_rate": 3.096914713802481e-05, "loss": 0.5056, "num_tokens": 3694185343.0, "step": 4831 }, { "epoch": 1.7701749564898783, "grad_norm": 0.15118502996932845, "learning_rate": 3.096544243384917e-05, "loss": 0.5205, "num_tokens": 3694860383.0, "step": 4832 }, { "epoch": 1.770541357515801, "grad_norm": 0.15427003067303816, "learning_rate": 3.096173722451979e-05, "loss": 0.4906, "num_tokens": 3695549902.0, "step": 4833 }, { "epoch": 1.7709077585417239, "grad_norm": 0.12967624441468636, "learning_rate": 3.095803151024543e-05, "loss": 0.492, "num_tokens": 3696306692.0, "step": 4834 }, { "epoch": 1.7712741595676467, "grad_norm": 0.1599743133406735, "learning_rate": 3.0954325291234896e-05, "loss": 0.4934, "num_tokens": 3697111743.0, "step": 4835 }, { "epoch": 1.7716405605935697, "grad_norm": 0.1437678047891786, "learning_rate": 3.0950618567696996e-05, "loss": 0.4846, "num_tokens": 3697821706.0, "step": 4836 }, { "epoch": 1.7720069616194927, "grad_norm": 0.1504739844754974, "learning_rate": 3.09469113398406e-05, "loss": 0.5168, "num_tokens": 3698549170.0, "step": 4837 }, { "epoch": 1.7723733626454155, "grad_norm": 0.16940136353243626, "learning_rate": 3.094320360787457e-05, "loss": 0.4804, "num_tokens": 3699346363.0, "step": 4838 }, { "epoch": 1.7727397636713382, "grad_norm": 0.14208600982043249, "learning_rate": 3.093949537200784e-05, "loss": 0.5394, "num_tokens": 3700046360.0, "step": 4839 }, { "epoch": 1.773106164697261, "grad_norm": 0.15551261886403042, "learning_rate": 3.0935786632449345e-05, "loss": 0.5008, "num_tokens": 3700820956.0, "step": 4840 }, { "epoch": 1.773472565723184, "grad_norm": 0.1585606732216922, "learning_rate": 3.093207738940804e-05, "loss": 0.5111, "num_tokens": 3701615986.0, "step": 4841 }, { "epoch": 1.773838966749107, "grad_norm": 0.1497081401308304, "learning_rate": 3.092836764309292e-05, "loss": 0.5098, "num_tokens": 3702324665.0, "step": 4842 }, { "epoch": 1.7742053677750298, "grad_norm": 0.14644142397221904, "learning_rate": 3.092465739371302e-05, "loss": 0.4973, "num_tokens": 3703018479.0, "step": 4843 }, { "epoch": 1.7745717688009526, "grad_norm": 0.1538193703400561, "learning_rate": 3.0920946641477394e-05, "loss": 0.4759, "num_tokens": 3703800147.0, "step": 4844 }, { "epoch": 1.7749381698268754, "grad_norm": 0.13061775385839713, "learning_rate": 3.091723538659511e-05, "loss": 0.4684, "num_tokens": 3704579663.0, "step": 4845 }, { "epoch": 1.7753045708527984, "grad_norm": 0.16657856845800834, "learning_rate": 3.091352362927528e-05, "loss": 0.5118, "num_tokens": 3705400578.0, "step": 4846 }, { "epoch": 1.7756709718787214, "grad_norm": 0.14189515981084036, "learning_rate": 3.090981136972705e-05, "loss": 0.5134, "num_tokens": 3706100178.0, "step": 4847 }, { "epoch": 1.7760373729046441, "grad_norm": 0.14020977650584546, "learning_rate": 3.090609860815958e-05, "loss": 0.4663, "num_tokens": 3706918449.0, "step": 4848 }, { "epoch": 1.776403773930567, "grad_norm": 0.148201084798379, "learning_rate": 3.090238534478206e-05, "loss": 0.5274, "num_tokens": 3707699800.0, "step": 4849 }, { "epoch": 1.77677017495649, "grad_norm": 0.145926207432606, "learning_rate": 3.0898671579803726e-05, "loss": 0.539, "num_tokens": 3708477815.0, "step": 4850 }, { "epoch": 1.7771365759824127, "grad_norm": 0.14414361169893755, "learning_rate": 3.0894957313433806e-05, "loss": 0.4815, "num_tokens": 3709208506.0, "step": 4851 }, { "epoch": 1.7775029770083357, "grad_norm": 0.14484940657868595, "learning_rate": 3.08912425458816e-05, "loss": 0.4826, "num_tokens": 3710043678.0, "step": 4852 }, { "epoch": 1.7778693780342585, "grad_norm": 0.13608439741964962, "learning_rate": 3.0887527277356404e-05, "loss": 0.5367, "num_tokens": 3710736602.0, "step": 4853 }, { "epoch": 1.7782357790601813, "grad_norm": 0.15495196001960063, "learning_rate": 3.0883811508067554e-05, "loss": 0.4929, "num_tokens": 3711466169.0, "step": 4854 }, { "epoch": 1.7786021800861043, "grad_norm": 0.14542828914405143, "learning_rate": 3.0880095238224414e-05, "loss": 0.5192, "num_tokens": 3712218133.0, "step": 4855 }, { "epoch": 1.7789685811120273, "grad_norm": 0.1397165368900719, "learning_rate": 3.0876378468036384e-05, "loss": 0.5262, "num_tokens": 3712977888.0, "step": 4856 }, { "epoch": 1.77933498213795, "grad_norm": 0.1556923676461154, "learning_rate": 3.0872661197712865e-05, "loss": 0.5385, "num_tokens": 3713632398.0, "step": 4857 }, { "epoch": 1.7797013831638728, "grad_norm": 0.1435171449863426, "learning_rate": 3.086894342746332e-05, "loss": 0.5077, "num_tokens": 3714471713.0, "step": 4858 }, { "epoch": 1.7800677841897956, "grad_norm": 0.14429269641910253, "learning_rate": 3.086522515749722e-05, "loss": 0.5292, "num_tokens": 3715227500.0, "step": 4859 }, { "epoch": 1.7804341852157186, "grad_norm": 0.14037928022668586, "learning_rate": 3.0861506388024064e-05, "loss": 0.5196, "num_tokens": 3716065405.0, "step": 4860 }, { "epoch": 1.7808005862416416, "grad_norm": 0.14468873717505792, "learning_rate": 3.0857787119253395e-05, "loss": 0.5042, "num_tokens": 3716790740.0, "step": 4861 }, { "epoch": 1.7811669872675644, "grad_norm": 0.14690661330636154, "learning_rate": 3.085406735139477e-05, "loss": 0.5379, "num_tokens": 3717605537.0, "step": 4862 }, { "epoch": 1.7815333882934872, "grad_norm": 0.14737193066131296, "learning_rate": 3.085034708465778e-05, "loss": 0.5833, "num_tokens": 3718245073.0, "step": 4863 }, { "epoch": 1.78189978931941, "grad_norm": 0.14666773477250497, "learning_rate": 3.084662631925203e-05, "loss": 0.469, "num_tokens": 3718942260.0, "step": 4864 }, { "epoch": 1.782266190345333, "grad_norm": 0.1388090659936423, "learning_rate": 3.084290505538718e-05, "loss": 0.5083, "num_tokens": 3719684883.0, "step": 4865 }, { "epoch": 1.782632591371256, "grad_norm": 0.14886273928178428, "learning_rate": 3.083918329327289e-05, "loss": 0.5119, "num_tokens": 3720504606.0, "step": 4866 }, { "epoch": 1.7829989923971787, "grad_norm": 0.12324237346656713, "learning_rate": 3.083546103311888e-05, "loss": 0.452, "num_tokens": 3721296547.0, "step": 4867 }, { "epoch": 1.7833653934231015, "grad_norm": 0.14135598429892138, "learning_rate": 3.083173827513485e-05, "loss": 0.4717, "num_tokens": 3722131932.0, "step": 4868 }, { "epoch": 1.7837317944490243, "grad_norm": 0.1520238544874073, "learning_rate": 3.082801501953058e-05, "loss": 0.5286, "num_tokens": 3722895443.0, "step": 4869 }, { "epoch": 1.7840981954749473, "grad_norm": 0.15673471655068225, "learning_rate": 3.0824291266515845e-05, "loss": 0.501, "num_tokens": 3723650627.0, "step": 4870 }, { "epoch": 1.7844645965008703, "grad_norm": 0.14722479337022018, "learning_rate": 3.082056701630046e-05, "loss": 0.5236, "num_tokens": 3724346719.0, "step": 4871 }, { "epoch": 1.784830997526793, "grad_norm": 0.15278193774011423, "learning_rate": 3.081684226909428e-05, "loss": 0.5463, "num_tokens": 3724981636.0, "step": 4872 }, { "epoch": 1.7851973985527159, "grad_norm": 0.14863328431910208, "learning_rate": 3.0813117025107145e-05, "loss": 0.4903, "num_tokens": 3725851888.0, "step": 4873 }, { "epoch": 1.7855637995786389, "grad_norm": 0.13915305465482478, "learning_rate": 3.0809391284548974e-05, "loss": 0.5446, "num_tokens": 3726560694.0, "step": 4874 }, { "epoch": 1.7859302006045616, "grad_norm": 0.13852017755931317, "learning_rate": 3.080566504762969e-05, "loss": 0.5328, "num_tokens": 3727274384.0, "step": 4875 }, { "epoch": 1.7862966016304846, "grad_norm": 0.1622158488953112, "learning_rate": 3.080193831455924e-05, "loss": 0.4757, "num_tokens": 3728073278.0, "step": 4876 }, { "epoch": 1.7866630026564074, "grad_norm": 0.1396204952175888, "learning_rate": 3.0798211085547606e-05, "loss": 0.4849, "num_tokens": 3728783786.0, "step": 4877 }, { "epoch": 1.7870294036823302, "grad_norm": 0.14778322426512805, "learning_rate": 3.079448336080481e-05, "loss": 0.5139, "num_tokens": 3729510541.0, "step": 4878 }, { "epoch": 1.7873958047082532, "grad_norm": 0.1276264096361068, "learning_rate": 3.079075514054087e-05, "loss": 0.5066, "num_tokens": 3730408425.0, "step": 4879 }, { "epoch": 1.7877622057341762, "grad_norm": 0.16249748987677737, "learning_rate": 3.078702642496586e-05, "loss": 0.5197, "num_tokens": 3731188955.0, "step": 4880 }, { "epoch": 1.788128606760099, "grad_norm": 0.14121947319717557, "learning_rate": 3.078329721428987e-05, "loss": 0.487, "num_tokens": 3731997428.0, "step": 4881 }, { "epoch": 1.7884950077860218, "grad_norm": 0.1437516822817607, "learning_rate": 3.077956750872303e-05, "loss": 0.5374, "num_tokens": 3732726006.0, "step": 4882 }, { "epoch": 1.7888614088119446, "grad_norm": 0.15204819415325568, "learning_rate": 3.0775837308475466e-05, "loss": 0.473, "num_tokens": 3733442578.0, "step": 4883 }, { "epoch": 1.7892278098378676, "grad_norm": 0.13815936602670373, "learning_rate": 3.077210661375738e-05, "loss": 0.5203, "num_tokens": 3734251758.0, "step": 4884 }, { "epoch": 1.7895942108637906, "grad_norm": 0.13819258119767602, "learning_rate": 3.0768375424778966e-05, "loss": 0.517, "num_tokens": 3735096805.0, "step": 4885 }, { "epoch": 1.7899606118897133, "grad_norm": 0.13863831037619673, "learning_rate": 3.076464374175045e-05, "loss": 0.4863, "num_tokens": 3735877502.0, "step": 4886 }, { "epoch": 1.7903270129156361, "grad_norm": 0.16166548347178247, "learning_rate": 3.07609115648821e-05, "loss": 0.5025, "num_tokens": 3736645994.0, "step": 4887 }, { "epoch": 1.790693413941559, "grad_norm": 0.15907754867323337, "learning_rate": 3.075717889438421e-05, "loss": 0.5382, "num_tokens": 3737397060.0, "step": 4888 }, { "epoch": 1.791059814967482, "grad_norm": 0.1687233442030441, "learning_rate": 3.075344573046707e-05, "loss": 0.4878, "num_tokens": 3738041035.0, "step": 4889 }, { "epoch": 1.791426215993405, "grad_norm": 0.1589824620273645, "learning_rate": 3.074971207334105e-05, "loss": 0.5244, "num_tokens": 3738750799.0, "step": 4890 }, { "epoch": 1.7917926170193277, "grad_norm": 0.1699350618598411, "learning_rate": 3.074597792321652e-05, "loss": 0.525, "num_tokens": 3739460350.0, "step": 4891 }, { "epoch": 1.7921590180452505, "grad_norm": 0.15380666100776527, "learning_rate": 3.074224328030386e-05, "loss": 0.4871, "num_tokens": 3740218843.0, "step": 4892 }, { "epoch": 1.7925254190711732, "grad_norm": 0.13674736917075134, "learning_rate": 3.07385081448135e-05, "loss": 0.4879, "num_tokens": 3741060212.0, "step": 4893 }, { "epoch": 1.7928918200970962, "grad_norm": 0.15265646268761188, "learning_rate": 3.0734772516955916e-05, "loss": 0.5222, "num_tokens": 3741812303.0, "step": 4894 }, { "epoch": 1.7932582211230192, "grad_norm": 0.13538883277845856, "learning_rate": 3.073103639694157e-05, "loss": 0.5039, "num_tokens": 3742647320.0, "step": 4895 }, { "epoch": 1.793624622148942, "grad_norm": 0.14768178055954934, "learning_rate": 3.0727299784980984e-05, "loss": 0.5114, "num_tokens": 3743545204.0, "step": 4896 }, { "epoch": 1.7939910231748648, "grad_norm": 0.12034901585854553, "learning_rate": 3.072356268128468e-05, "loss": 0.4746, "num_tokens": 3744312414.0, "step": 4897 }, { "epoch": 1.7943574242007878, "grad_norm": 0.16492738980645152, "learning_rate": 3.071982508606324e-05, "loss": 0.5239, "num_tokens": 3745117007.0, "step": 4898 }, { "epoch": 1.7947238252267106, "grad_norm": 0.144794151177674, "learning_rate": 3.0716086999527254e-05, "loss": 0.492, "num_tokens": 3745781482.0, "step": 4899 }, { "epoch": 1.7950902262526336, "grad_norm": 0.1526462377398157, "learning_rate": 3.0712348421887337e-05, "loss": 0.5539, "num_tokens": 3746521284.0, "step": 4900 }, { "epoch": 1.7954566272785564, "grad_norm": 0.1706233595787579, "learning_rate": 3.0708609353354137e-05, "loss": 0.5302, "num_tokens": 3747197310.0, "step": 4901 }, { "epoch": 1.7958230283044792, "grad_norm": 0.15181017440610298, "learning_rate": 3.0704869794138335e-05, "loss": 0.5157, "num_tokens": 3747920142.0, "step": 4902 }, { "epoch": 1.7961894293304022, "grad_norm": 0.14780404980689338, "learning_rate": 3.0701129744450626e-05, "loss": 0.5293, "num_tokens": 3748744150.0, "step": 4903 }, { "epoch": 1.7965558303563252, "grad_norm": 0.1408413005429701, "learning_rate": 3.069738920450175e-05, "loss": 0.5015, "num_tokens": 3749602551.0, "step": 4904 }, { "epoch": 1.796922231382248, "grad_norm": 0.14951531397991547, "learning_rate": 3.069364817450247e-05, "loss": 0.5122, "num_tokens": 3750433351.0, "step": 4905 }, { "epoch": 1.7972886324081707, "grad_norm": 0.13517714052417212, "learning_rate": 3.068990665466356e-05, "loss": 0.4769, "num_tokens": 3751253427.0, "step": 4906 }, { "epoch": 1.7976550334340935, "grad_norm": 0.13897215471602295, "learning_rate": 3.068616464519584e-05, "loss": 0.4865, "num_tokens": 3752076332.0, "step": 4907 }, { "epoch": 1.7980214344600165, "grad_norm": 0.1332427390334486, "learning_rate": 3.068242214631016e-05, "loss": 0.4674, "num_tokens": 3752812753.0, "step": 4908 }, { "epoch": 1.7983878354859395, "grad_norm": 0.15344895823348947, "learning_rate": 3.067867915821737e-05, "loss": 0.4917, "num_tokens": 3753474267.0, "step": 4909 }, { "epoch": 1.7987542365118623, "grad_norm": 0.1262752208211508, "learning_rate": 3.067493568112839e-05, "loss": 0.5243, "num_tokens": 3754190808.0, "step": 4910 }, { "epoch": 1.799120637537785, "grad_norm": 0.13161415439768653, "learning_rate": 3.067119171525413e-05, "loss": 0.4772, "num_tokens": 3754996638.0, "step": 4911 }, { "epoch": 1.7994870385637078, "grad_norm": 0.13377319660616738, "learning_rate": 3.066744726080554e-05, "loss": 0.4651, "num_tokens": 3755688880.0, "step": 4912 }, { "epoch": 1.7998534395896308, "grad_norm": 0.14005023750488893, "learning_rate": 3.066370231799361e-05, "loss": 0.49, "num_tokens": 3756348331.0, "step": 4913 }, { "epoch": 1.8002198406155538, "grad_norm": 0.129987224833885, "learning_rate": 3.065995688702934e-05, "loss": 0.4996, "num_tokens": 3757156837.0, "step": 4914 }, { "epoch": 1.8005862416414766, "grad_norm": 0.15014522139133632, "learning_rate": 3.065621096812377e-05, "loss": 0.4956, "num_tokens": 3758020216.0, "step": 4915 }, { "epoch": 1.8009526426673994, "grad_norm": 0.1344397784034677, "learning_rate": 3.065246456148795e-05, "loss": 0.5072, "num_tokens": 3758829368.0, "step": 4916 }, { "epoch": 1.8013190436933222, "grad_norm": 0.13729254796410484, "learning_rate": 3.064871766733299e-05, "loss": 0.4727, "num_tokens": 3759432707.0, "step": 4917 }, { "epoch": 1.8016854447192452, "grad_norm": 0.13028992080058963, "learning_rate": 3.064497028586999e-05, "loss": 0.5054, "num_tokens": 3760207222.0, "step": 4918 }, { "epoch": 1.8020518457451682, "grad_norm": 0.1800324300626864, "learning_rate": 3.06412224173101e-05, "loss": 0.498, "num_tokens": 3760943498.0, "step": 4919 }, { "epoch": 1.802418246771091, "grad_norm": 0.13856527143237493, "learning_rate": 3.063747406186449e-05, "loss": 0.4962, "num_tokens": 3761748309.0, "step": 4920 }, { "epoch": 1.8027846477970138, "grad_norm": 0.14010887821874823, "learning_rate": 3.063372521974436e-05, "loss": 0.5127, "num_tokens": 3762495685.0, "step": 4921 }, { "epoch": 1.8031510488229368, "grad_norm": 0.1335580181644445, "learning_rate": 3.062997589116094e-05, "loss": 0.5004, "num_tokens": 3763288108.0, "step": 4922 }, { "epoch": 1.8035174498488595, "grad_norm": 0.1606131053731197, "learning_rate": 3.062622607632548e-05, "loss": 0.5388, "num_tokens": 3764015724.0, "step": 4923 }, { "epoch": 1.8038838508747825, "grad_norm": 0.13886032950996596, "learning_rate": 3.062247577544927e-05, "loss": 0.5018, "num_tokens": 3764707794.0, "step": 4924 }, { "epoch": 1.8042502519007053, "grad_norm": 0.14206128200587062, "learning_rate": 3.0618724988743603e-05, "loss": 0.5165, "num_tokens": 3765550971.0, "step": 4925 }, { "epoch": 1.804616652926628, "grad_norm": 0.15232536131252003, "learning_rate": 3.061497371641983e-05, "loss": 0.4886, "num_tokens": 3766314175.0, "step": 4926 }, { "epoch": 1.804983053952551, "grad_norm": 0.13983964323648254, "learning_rate": 3.061122195868931e-05, "loss": 0.5148, "num_tokens": 3767067235.0, "step": 4927 }, { "epoch": 1.805349454978474, "grad_norm": 0.1545463744948008, "learning_rate": 3.0607469715763443e-05, "loss": 0.497, "num_tokens": 3767833351.0, "step": 4928 }, { "epoch": 1.8057158560043969, "grad_norm": 0.13727045342730815, "learning_rate": 3.060371698785362e-05, "loss": 0.4672, "num_tokens": 3768604526.0, "step": 4929 }, { "epoch": 1.8060822570303197, "grad_norm": 0.15890957275309625, "learning_rate": 3.059996377517131e-05, "loss": 0.5132, "num_tokens": 3769368629.0, "step": 4930 }, { "epoch": 1.8064486580562424, "grad_norm": 0.14103577056435204, "learning_rate": 3.059621007792799e-05, "loss": 0.4954, "num_tokens": 3770218043.0, "step": 4931 }, { "epoch": 1.8068150590821654, "grad_norm": 0.1474557815790113, "learning_rate": 3.059245589633514e-05, "loss": 0.5038, "num_tokens": 3770898082.0, "step": 4932 }, { "epoch": 1.8071814601080884, "grad_norm": 0.14448030299890058, "learning_rate": 3.058870123060431e-05, "loss": 0.497, "num_tokens": 3771653002.0, "step": 4933 }, { "epoch": 1.8075478611340112, "grad_norm": 0.14511557776819511, "learning_rate": 3.058494608094704e-05, "loss": 0.4656, "num_tokens": 3772458371.0, "step": 4934 }, { "epoch": 1.807914262159934, "grad_norm": 0.13900454479085658, "learning_rate": 3.058119044757491e-05, "loss": 0.5214, "num_tokens": 3773210721.0, "step": 4935 }, { "epoch": 1.8082806631858568, "grad_norm": 0.13871750889465076, "learning_rate": 3.057743433069955e-05, "loss": 0.5151, "num_tokens": 3773979923.0, "step": 4936 }, { "epoch": 1.8086470642117798, "grad_norm": 0.16268654157884266, "learning_rate": 3.057367773053257e-05, "loss": 0.4955, "num_tokens": 3774750829.0, "step": 4937 }, { "epoch": 1.8090134652377028, "grad_norm": 0.13362031772156713, "learning_rate": 3.056992064728564e-05, "loss": 0.4973, "num_tokens": 3775605929.0, "step": 4938 }, { "epoch": 1.8093798662636256, "grad_norm": 0.14358484831309928, "learning_rate": 3.0566163081170474e-05, "loss": 0.4708, "num_tokens": 3776431504.0, "step": 4939 }, { "epoch": 1.8097462672895483, "grad_norm": 0.14219393106301645, "learning_rate": 3.0562405032398764e-05, "loss": 0.508, "num_tokens": 3777188197.0, "step": 4940 }, { "epoch": 1.8101126683154711, "grad_norm": 0.14268418175400824, "learning_rate": 3.055864650118226e-05, "loss": 0.5079, "num_tokens": 3777935652.0, "step": 4941 }, { "epoch": 1.8104790693413941, "grad_norm": 0.13665894889903343, "learning_rate": 3.055488748773275e-05, "loss": 0.5165, "num_tokens": 3778720405.0, "step": 4942 }, { "epoch": 1.8108454703673171, "grad_norm": 0.15990011643346805, "learning_rate": 3.0551127992262025e-05, "loss": 0.5165, "num_tokens": 3779475323.0, "step": 4943 }, { "epoch": 1.81121187139324, "grad_norm": 0.12926280741584098, "learning_rate": 3.05473680149819e-05, "loss": 0.4751, "num_tokens": 3780198091.0, "step": 4944 }, { "epoch": 1.8115782724191627, "grad_norm": 0.16237073422645182, "learning_rate": 3.0543607556104244e-05, "loss": 0.492, "num_tokens": 3781034052.0, "step": 4945 }, { "epoch": 1.8119446734450857, "grad_norm": 0.13563392181688277, "learning_rate": 3.053984661584094e-05, "loss": 0.4987, "num_tokens": 3781836401.0, "step": 4946 }, { "epoch": 1.8123110744710085, "grad_norm": 0.14842352643242365, "learning_rate": 3.0536085194403887e-05, "loss": 0.4965, "num_tokens": 3782532855.0, "step": 4947 }, { "epoch": 1.8126774754969315, "grad_norm": 0.12890279844528243, "learning_rate": 3.053232329200503e-05, "loss": 0.5015, "num_tokens": 3783222729.0, "step": 4948 }, { "epoch": 1.8130438765228543, "grad_norm": 0.17475458678728392, "learning_rate": 3.052856090885632e-05, "loss": 0.5041, "num_tokens": 3783899532.0, "step": 4949 }, { "epoch": 1.813410277548777, "grad_norm": 0.1310314395587992, "learning_rate": 3.0524798045169744e-05, "loss": 0.4672, "num_tokens": 3784668020.0, "step": 4950 }, { "epoch": 1.8137766785747, "grad_norm": 0.15337715881604985, "learning_rate": 3.052103470115734e-05, "loss": 0.5037, "num_tokens": 3785476059.0, "step": 4951 }, { "epoch": 1.814143079600623, "grad_norm": 0.1444047233426524, "learning_rate": 3.0517270877031135e-05, "loss": 0.495, "num_tokens": 3786220782.0, "step": 4952 }, { "epoch": 1.8145094806265458, "grad_norm": 0.13897920836144, "learning_rate": 3.0513506573003205e-05, "loss": 0.477, "num_tokens": 3787003383.0, "step": 4953 }, { "epoch": 1.8148758816524686, "grad_norm": 0.1484065997094469, "learning_rate": 3.0509741789285652e-05, "loss": 0.484, "num_tokens": 3787785201.0, "step": 4954 }, { "epoch": 1.8152422826783914, "grad_norm": 0.1296324772091692, "learning_rate": 3.050597652609059e-05, "loss": 0.4468, "num_tokens": 3788555839.0, "step": 4955 }, { "epoch": 1.8156086837043144, "grad_norm": 0.131062785229472, "learning_rate": 3.050221078363018e-05, "loss": 0.4817, "num_tokens": 3789238044.0, "step": 4956 }, { "epoch": 1.8159750847302374, "grad_norm": 0.1697066532972826, "learning_rate": 3.04984445621166e-05, "loss": 0.5312, "num_tokens": 3790013201.0, "step": 4957 }, { "epoch": 1.8163414857561602, "grad_norm": 0.1322454062374638, "learning_rate": 3.0494677861762052e-05, "loss": 0.4892, "num_tokens": 3790729137.0, "step": 4958 }, { "epoch": 1.816707886782083, "grad_norm": 0.16113607777949118, "learning_rate": 3.0490910682778775e-05, "loss": 0.4902, "num_tokens": 3791410020.0, "step": 4959 }, { "epoch": 1.8170742878080057, "grad_norm": 0.15147635370903445, "learning_rate": 3.048714302537902e-05, "loss": 0.4881, "num_tokens": 3792174881.0, "step": 4960 }, { "epoch": 1.8174406888339287, "grad_norm": 0.1468786582197908, "learning_rate": 3.0483374889775086e-05, "loss": 0.5068, "num_tokens": 3792964214.0, "step": 4961 }, { "epoch": 1.8178070898598517, "grad_norm": 0.1445195147912188, "learning_rate": 3.047960627617927e-05, "loss": 0.5135, "num_tokens": 3793682889.0, "step": 4962 }, { "epoch": 1.8181734908857745, "grad_norm": 0.16414381143058215, "learning_rate": 3.0475837184803932e-05, "loss": 0.484, "num_tokens": 3794525051.0, "step": 4963 }, { "epoch": 1.8185398919116973, "grad_norm": 0.1382566359616996, "learning_rate": 3.047206761586143e-05, "loss": 0.5001, "num_tokens": 3795227317.0, "step": 4964 }, { "epoch": 1.81890629293762, "grad_norm": 0.17782884875594945, "learning_rate": 3.0468297569564154e-05, "loss": 0.5124, "num_tokens": 3795984815.0, "step": 4965 }, { "epoch": 1.819272693963543, "grad_norm": 0.15459934926246935, "learning_rate": 3.046452704612454e-05, "loss": 0.4623, "num_tokens": 3796757166.0, "step": 4966 }, { "epoch": 1.819639094989466, "grad_norm": 0.15214511081721277, "learning_rate": 3.046075604575502e-05, "loss": 0.5211, "num_tokens": 3797463006.0, "step": 4967 }, { "epoch": 1.8200054960153889, "grad_norm": 0.14361121343935032, "learning_rate": 3.045698456866808e-05, "loss": 0.4952, "num_tokens": 3798198719.0, "step": 4968 }, { "epoch": 1.8203718970413116, "grad_norm": 0.14000174085490927, "learning_rate": 3.0453212615076215e-05, "loss": 0.4912, "num_tokens": 3799006429.0, "step": 4969 }, { "epoch": 1.8207382980672346, "grad_norm": 0.13160284875780032, "learning_rate": 3.0449440185191963e-05, "loss": 0.4784, "num_tokens": 3799894493.0, "step": 4970 }, { "epoch": 1.8211046990931574, "grad_norm": 0.1552813526069805, "learning_rate": 3.0445667279227872e-05, "loss": 0.5055, "num_tokens": 3800549182.0, "step": 4971 }, { "epoch": 1.8214711001190804, "grad_norm": 0.1520072631771174, "learning_rate": 3.0441893897396525e-05, "loss": 0.5077, "num_tokens": 3801278448.0, "step": 4972 }, { "epoch": 1.8218375011450032, "grad_norm": 0.13332709311655075, "learning_rate": 3.043812003991054e-05, "loss": 0.4849, "num_tokens": 3802141779.0, "step": 4973 }, { "epoch": 1.822203902170926, "grad_norm": 0.1343333185093807, "learning_rate": 3.0434345706982542e-05, "loss": 0.505, "num_tokens": 3802931489.0, "step": 4974 }, { "epoch": 1.822570303196849, "grad_norm": 0.14740354306907244, "learning_rate": 3.04305708988252e-05, "loss": 0.5092, "num_tokens": 3803729820.0, "step": 4975 }, { "epoch": 1.822936704222772, "grad_norm": 0.15928793427479682, "learning_rate": 3.0426795615651205e-05, "loss": 0.5011, "num_tokens": 3804537312.0, "step": 4976 }, { "epoch": 1.8233031052486948, "grad_norm": 0.1475805570920496, "learning_rate": 3.042301985767327e-05, "loss": 0.4839, "num_tokens": 3805277857.0, "step": 4977 }, { "epoch": 1.8236695062746175, "grad_norm": 0.1374795519869745, "learning_rate": 3.041924362510414e-05, "loss": 0.4835, "num_tokens": 3806004713.0, "step": 4978 }, { "epoch": 1.8240359073005403, "grad_norm": 0.15195880713929472, "learning_rate": 3.041546691815659e-05, "loss": 0.4689, "num_tokens": 3806839400.0, "step": 4979 }, { "epoch": 1.8244023083264633, "grad_norm": 0.1553383741828884, "learning_rate": 3.04116897370434e-05, "loss": 0.4826, "num_tokens": 3807614844.0, "step": 4980 }, { "epoch": 1.8247687093523863, "grad_norm": 0.15102860713968608, "learning_rate": 3.040791208197742e-05, "loss": 0.501, "num_tokens": 3808437117.0, "step": 4981 }, { "epoch": 1.8251351103783091, "grad_norm": 0.16070324742723704, "learning_rate": 3.0404133953171474e-05, "loss": 0.4674, "num_tokens": 3809238790.0, "step": 4982 }, { "epoch": 1.825501511404232, "grad_norm": 0.15443346321495321, "learning_rate": 3.0400355350838456e-05, "loss": 0.5508, "num_tokens": 3810011023.0, "step": 4983 }, { "epoch": 1.8258679124301547, "grad_norm": 0.18950491595371471, "learning_rate": 3.0396576275191264e-05, "loss": 0.4933, "num_tokens": 3810802037.0, "step": 4984 }, { "epoch": 1.8262343134560777, "grad_norm": 0.1393927808287346, "learning_rate": 3.039279672644283e-05, "loss": 0.5145, "num_tokens": 3811634757.0, "step": 4985 }, { "epoch": 1.8266007144820007, "grad_norm": 0.1582823607336427, "learning_rate": 3.0389016704806104e-05, "loss": 0.499, "num_tokens": 3812446528.0, "step": 4986 }, { "epoch": 1.8269671155079235, "grad_norm": 0.17027388542432417, "learning_rate": 3.0385236210494074e-05, "loss": 0.4751, "num_tokens": 3813080750.0, "step": 4987 }, { "epoch": 1.8273335165338462, "grad_norm": 0.1397601739301124, "learning_rate": 3.0381455243719755e-05, "loss": 0.5113, "num_tokens": 3813902857.0, "step": 4988 }, { "epoch": 1.827699917559769, "grad_norm": 0.1632823713378281, "learning_rate": 3.037767380469618e-05, "loss": 0.5129, "num_tokens": 3814604339.0, "step": 4989 }, { "epoch": 1.828066318585692, "grad_norm": 0.15357589735895905, "learning_rate": 3.037389189363641e-05, "loss": 0.5031, "num_tokens": 3815377436.0, "step": 4990 }, { "epoch": 1.828432719611615, "grad_norm": 0.14728378428901925, "learning_rate": 3.0370109510753546e-05, "loss": 0.5059, "num_tokens": 3816232325.0, "step": 4991 }, { "epoch": 1.8287991206375378, "grad_norm": 0.1384784355651523, "learning_rate": 3.0366326656260686e-05, "loss": 0.4992, "num_tokens": 3817015329.0, "step": 4992 }, { "epoch": 1.8291655216634606, "grad_norm": 0.15941191034297686, "learning_rate": 3.0362543330370995e-05, "loss": 0.5394, "num_tokens": 3817750164.0, "step": 4993 }, { "epoch": 1.8295319226893836, "grad_norm": 0.15712194098850976, "learning_rate": 3.0358759533297618e-05, "loss": 0.4904, "num_tokens": 3818580924.0, "step": 4994 }, { "epoch": 1.8298983237153064, "grad_norm": 0.13184342502090535, "learning_rate": 3.0354975265253773e-05, "loss": 0.4835, "num_tokens": 3819355646.0, "step": 4995 }, { "epoch": 1.8302647247412294, "grad_norm": 0.1408893195501649, "learning_rate": 3.035119052645267e-05, "loss": 0.498, "num_tokens": 3820136296.0, "step": 4996 }, { "epoch": 1.8306311257671521, "grad_norm": 0.14343055135586896, "learning_rate": 3.0347405317107567e-05, "loss": 0.5196, "num_tokens": 3820952018.0, "step": 4997 }, { "epoch": 1.830997526793075, "grad_norm": 0.15020987571078018, "learning_rate": 3.034361963743173e-05, "loss": 0.5142, "num_tokens": 3821707861.0, "step": 4998 }, { "epoch": 1.831363927818998, "grad_norm": 0.14959489168167905, "learning_rate": 3.033983348763847e-05, "loss": 0.5182, "num_tokens": 3822461093.0, "step": 4999 }, { "epoch": 1.831730328844921, "grad_norm": 0.14592624058715106, "learning_rate": 3.033604686794111e-05, "loss": 0.4911, "num_tokens": 3823251049.0, "step": 5000 }, { "epoch": 1.8320967298708437, "grad_norm": 0.16542487820336635, "learning_rate": 3.0332259778553016e-05, "loss": 0.4893, "num_tokens": 3824010998.0, "step": 5001 }, { "epoch": 1.8324631308967665, "grad_norm": 0.13674621113532628, "learning_rate": 3.0328472219687556e-05, "loss": 0.4817, "num_tokens": 3824758959.0, "step": 5002 }, { "epoch": 1.8328295319226893, "grad_norm": 0.15951733497116546, "learning_rate": 3.0324684191558145e-05, "loss": 0.4964, "num_tokens": 3825545794.0, "step": 5003 }, { "epoch": 1.8331959329486123, "grad_norm": 0.1389350961061757, "learning_rate": 3.032089569437822e-05, "loss": 0.4741, "num_tokens": 3826273105.0, "step": 5004 }, { "epoch": 1.8335623339745353, "grad_norm": 0.14235972861996274, "learning_rate": 3.0317106728361234e-05, "loss": 0.4702, "num_tokens": 3827124305.0, "step": 5005 }, { "epoch": 1.833928735000458, "grad_norm": 0.14684934776659525, "learning_rate": 3.0313317293720685e-05, "loss": 0.5142, "num_tokens": 3827819935.0, "step": 5006 }, { "epoch": 1.8342951360263808, "grad_norm": 0.15687384531925577, "learning_rate": 3.030952739067007e-05, "loss": 0.5085, "num_tokens": 3828509171.0, "step": 5007 }, { "epoch": 1.8346615370523036, "grad_norm": 0.1325629728719415, "learning_rate": 3.0305737019422955e-05, "loss": 0.4604, "num_tokens": 3829297919.0, "step": 5008 }, { "epoch": 1.8350279380782266, "grad_norm": 0.16240657290103272, "learning_rate": 3.0301946180192888e-05, "loss": 0.5105, "num_tokens": 3830094146.0, "step": 5009 }, { "epoch": 1.8353943391041496, "grad_norm": 0.13752095455515678, "learning_rate": 3.0298154873193467e-05, "loss": 0.4715, "num_tokens": 3830900956.0, "step": 5010 }, { "epoch": 1.8357607401300724, "grad_norm": 0.14518104876465615, "learning_rate": 3.0294363098638303e-05, "loss": 0.5075, "num_tokens": 3831727122.0, "step": 5011 }, { "epoch": 1.8361271411559952, "grad_norm": 0.15151302050814902, "learning_rate": 3.0290570856741065e-05, "loss": 0.5029, "num_tokens": 3832507417.0, "step": 5012 }, { "epoch": 1.836493542181918, "grad_norm": 0.12464245602006625, "learning_rate": 3.02867781477154e-05, "loss": 0.4968, "num_tokens": 3833406500.0, "step": 5013 }, { "epoch": 1.836859943207841, "grad_norm": 0.13447991538785545, "learning_rate": 3.0282984971775016e-05, "loss": 0.4918, "num_tokens": 3834195764.0, "step": 5014 }, { "epoch": 1.837226344233764, "grad_norm": 0.1703875221598291, "learning_rate": 3.0279191329133644e-05, "loss": 0.5168, "num_tokens": 3835021774.0, "step": 5015 }, { "epoch": 1.8375927452596867, "grad_norm": 0.13661499941790897, "learning_rate": 3.027539722000503e-05, "loss": 0.5056, "num_tokens": 3835853220.0, "step": 5016 }, { "epoch": 1.8379591462856095, "grad_norm": 0.13361832007397031, "learning_rate": 3.0271602644602948e-05, "loss": 0.5211, "num_tokens": 3836656181.0, "step": 5017 }, { "epoch": 1.8383255473115325, "grad_norm": 0.16053036793283681, "learning_rate": 3.0267807603141205e-05, "loss": 0.4733, "num_tokens": 3837373471.0, "step": 5018 }, { "epoch": 1.8386919483374553, "grad_norm": 0.1481972694295328, "learning_rate": 3.026401209583363e-05, "loss": 0.5091, "num_tokens": 3838180233.0, "step": 5019 }, { "epoch": 1.8390583493633783, "grad_norm": 0.13479433697818347, "learning_rate": 3.0260216122894078e-05, "loss": 0.5287, "num_tokens": 3838923288.0, "step": 5020 }, { "epoch": 1.839424750389301, "grad_norm": 0.17216079958561026, "learning_rate": 3.0256419684536435e-05, "loss": 0.5335, "num_tokens": 3839647109.0, "step": 5021 }, { "epoch": 1.8397911514152239, "grad_norm": 0.1525632657468556, "learning_rate": 3.0252622780974605e-05, "loss": 0.4726, "num_tokens": 3840312143.0, "step": 5022 }, { "epoch": 1.8401575524411469, "grad_norm": 0.16036978386150608, "learning_rate": 3.0248825412422533e-05, "loss": 0.5116, "num_tokens": 3841187599.0, "step": 5023 }, { "epoch": 1.8405239534670699, "grad_norm": 0.15394163503826885, "learning_rate": 3.0245027579094156e-05, "loss": 0.5071, "num_tokens": 3841953159.0, "step": 5024 }, { "epoch": 1.8408903544929927, "grad_norm": 0.14038685863721048, "learning_rate": 3.024122928120349e-05, "loss": 0.5143, "num_tokens": 3842756735.0, "step": 5025 }, { "epoch": 1.8412567555189154, "grad_norm": 0.15857319251951354, "learning_rate": 3.0237430518964532e-05, "loss": 0.5174, "num_tokens": 3843494271.0, "step": 5026 }, { "epoch": 1.8416231565448382, "grad_norm": 0.16320406410350063, "learning_rate": 3.023363129259133e-05, "loss": 0.5236, "num_tokens": 3844281801.0, "step": 5027 }, { "epoch": 1.8419895575707612, "grad_norm": 0.17057279990422372, "learning_rate": 3.022983160229794e-05, "loss": 0.4698, "num_tokens": 3845071931.0, "step": 5028 }, { "epoch": 1.8423559585966842, "grad_norm": 0.15457449214116922, "learning_rate": 3.0226031448298455e-05, "loss": 0.4951, "num_tokens": 3845842157.0, "step": 5029 }, { "epoch": 1.842722359622607, "grad_norm": 0.129342830263151, "learning_rate": 3.0222230830807003e-05, "loss": 0.5083, "num_tokens": 3846773496.0, "step": 5030 }, { "epoch": 1.8430887606485298, "grad_norm": 0.1581711191973864, "learning_rate": 3.0218429750037722e-05, "loss": 0.5018, "num_tokens": 3847486576.0, "step": 5031 }, { "epoch": 1.8434551616744526, "grad_norm": 0.1566608774249471, "learning_rate": 3.0214628206204777e-05, "loss": 0.5245, "num_tokens": 3848240986.0, "step": 5032 }, { "epoch": 1.8438215627003756, "grad_norm": 0.13773315320792673, "learning_rate": 3.021082619952237e-05, "loss": 0.503, "num_tokens": 3849067044.0, "step": 5033 }, { "epoch": 1.8441879637262986, "grad_norm": 0.13528284856744754, "learning_rate": 3.0207023730204725e-05, "loss": 0.4933, "num_tokens": 3849882565.0, "step": 5034 }, { "epoch": 1.8445543647522213, "grad_norm": 0.16032632045227077, "learning_rate": 3.0203220798466088e-05, "loss": 0.4829, "num_tokens": 3850648487.0, "step": 5035 }, { "epoch": 1.8449207657781441, "grad_norm": 0.132443703936173, "learning_rate": 3.0199417404520734e-05, "loss": 0.5081, "num_tokens": 3851425982.0, "step": 5036 }, { "epoch": 1.845287166804067, "grad_norm": 0.16415885445032694, "learning_rate": 3.0195613548582953e-05, "loss": 0.5379, "num_tokens": 3852049786.0, "step": 5037 }, { "epoch": 1.84565356782999, "grad_norm": 0.14994713064101742, "learning_rate": 3.0191809230867096e-05, "loss": 0.539, "num_tokens": 3852787337.0, "step": 5038 }, { "epoch": 1.846019968855913, "grad_norm": 0.15365422485153693, "learning_rate": 3.0188004451587495e-05, "loss": 0.4748, "num_tokens": 3853559326.0, "step": 5039 }, { "epoch": 1.8463863698818357, "grad_norm": 0.1476050609272339, "learning_rate": 3.0184199210958528e-05, "loss": 0.5059, "num_tokens": 3854293154.0, "step": 5040 }, { "epoch": 1.8467527709077585, "grad_norm": 0.1525240507130944, "learning_rate": 3.0180393509194616e-05, "loss": 0.4986, "num_tokens": 3854963244.0, "step": 5041 }, { "epoch": 1.8471191719336815, "grad_norm": 0.15486978172967317, "learning_rate": 3.017658734651017e-05, "loss": 0.4958, "num_tokens": 3855714839.0, "step": 5042 }, { "epoch": 1.8474855729596042, "grad_norm": 0.15753768808919666, "learning_rate": 3.0172780723119663e-05, "loss": 0.5081, "num_tokens": 3856433087.0, "step": 5043 }, { "epoch": 1.8478519739855273, "grad_norm": 0.14582064642533105, "learning_rate": 3.0168973639237566e-05, "loss": 0.4883, "num_tokens": 3857215078.0, "step": 5044 }, { "epoch": 1.84821837501145, "grad_norm": 0.16008766742975722, "learning_rate": 3.0165166095078392e-05, "loss": 0.487, "num_tokens": 3858029402.0, "step": 5045 }, { "epoch": 1.8485847760373728, "grad_norm": 0.14341003226191998, "learning_rate": 3.0161358090856678e-05, "loss": 0.4726, "num_tokens": 3858784548.0, "step": 5046 }, { "epoch": 1.8489511770632958, "grad_norm": 0.1505107400757704, "learning_rate": 3.0157549626786977e-05, "loss": 0.5273, "num_tokens": 3859596342.0, "step": 5047 }, { "epoch": 1.8493175780892186, "grad_norm": 0.1815668368643414, "learning_rate": 3.0153740703083887e-05, "loss": 0.4792, "num_tokens": 3860464478.0, "step": 5048 }, { "epoch": 1.8496839791151416, "grad_norm": 0.1332399014996801, "learning_rate": 3.0149931319962006e-05, "loss": 0.5172, "num_tokens": 3861153510.0, "step": 5049 }, { "epoch": 1.8500503801410644, "grad_norm": 0.16404422955188333, "learning_rate": 3.014612147763598e-05, "loss": 0.5257, "num_tokens": 3861913400.0, "step": 5050 }, { "epoch": 1.8504167811669872, "grad_norm": 0.16468134596650805, "learning_rate": 3.0142311176320467e-05, "loss": 0.4989, "num_tokens": 3862681591.0, "step": 5051 }, { "epoch": 1.8507831821929102, "grad_norm": 0.138764410582466, "learning_rate": 3.013850041623017e-05, "loss": 0.4823, "num_tokens": 3863412053.0, "step": 5052 }, { "epoch": 1.8511495832188332, "grad_norm": 0.17382917310715454, "learning_rate": 3.0134689197579784e-05, "loss": 0.5076, "num_tokens": 3864318096.0, "step": 5053 }, { "epoch": 1.851515984244756, "grad_norm": 0.1478612743799432, "learning_rate": 3.0130877520584075e-05, "loss": 0.5042, "num_tokens": 3865084776.0, "step": 5054 }, { "epoch": 1.8518823852706787, "grad_norm": 0.15924058822788398, "learning_rate": 3.012706538545779e-05, "loss": 0.4681, "num_tokens": 3865845382.0, "step": 5055 }, { "epoch": 1.8522487862966015, "grad_norm": 0.157490580995888, "learning_rate": 3.0123252792415733e-05, "loss": 0.5064, "num_tokens": 3866588750.0, "step": 5056 }, { "epoch": 1.8526151873225245, "grad_norm": 0.14025252667734794, "learning_rate": 3.0119439741672713e-05, "loss": 0.5048, "num_tokens": 3867414765.0, "step": 5057 }, { "epoch": 1.8529815883484475, "grad_norm": 0.15193426314063466, "learning_rate": 3.0115626233443586e-05, "loss": 0.5284, "num_tokens": 3868254262.0, "step": 5058 }, { "epoch": 1.8533479893743703, "grad_norm": 0.15410331577342468, "learning_rate": 3.011181226794321e-05, "loss": 0.5024, "num_tokens": 3868930166.0, "step": 5059 }, { "epoch": 1.853714390400293, "grad_norm": 0.14202099435302065, "learning_rate": 3.010799784538649e-05, "loss": 0.5231, "num_tokens": 3869623802.0, "step": 5060 }, { "epoch": 1.8540807914262158, "grad_norm": 0.13286213086804868, "learning_rate": 3.010418296598835e-05, "loss": 0.4676, "num_tokens": 3870458203.0, "step": 5061 }, { "epoch": 1.8544471924521388, "grad_norm": 0.14170799157256891, "learning_rate": 3.0100367629963733e-05, "loss": 0.4775, "num_tokens": 3871217442.0, "step": 5062 }, { "epoch": 1.8548135934780619, "grad_norm": 0.13403502500157286, "learning_rate": 3.0096551837527613e-05, "loss": 0.4821, "num_tokens": 3871948374.0, "step": 5063 }, { "epoch": 1.8551799945039846, "grad_norm": 0.15122416278161596, "learning_rate": 3.0092735588894984e-05, "loss": 0.5449, "num_tokens": 3872686346.0, "step": 5064 }, { "epoch": 1.8555463955299074, "grad_norm": 0.13612558976494707, "learning_rate": 3.0088918884280884e-05, "loss": 0.5086, "num_tokens": 3873447498.0, "step": 5065 }, { "epoch": 1.8559127965558302, "grad_norm": 0.13786033573951123, "learning_rate": 3.0085101723900347e-05, "loss": 0.4781, "num_tokens": 3874205902.0, "step": 5066 }, { "epoch": 1.8562791975817532, "grad_norm": 0.13484671700535802, "learning_rate": 3.008128410796846e-05, "loss": 0.4848, "num_tokens": 3875024133.0, "step": 5067 }, { "epoch": 1.8566455986076762, "grad_norm": 0.14775869457856364, "learning_rate": 3.0077466036700323e-05, "loss": 0.5249, "num_tokens": 3875813134.0, "step": 5068 }, { "epoch": 1.857011999633599, "grad_norm": 0.13592826908655772, "learning_rate": 3.0073647510311064e-05, "loss": 0.4823, "num_tokens": 3876720273.0, "step": 5069 }, { "epoch": 1.8573784006595218, "grad_norm": 0.12740613705623646, "learning_rate": 3.006982852901583e-05, "loss": 0.4681, "num_tokens": 3877482532.0, "step": 5070 }, { "epoch": 1.8577448016854448, "grad_norm": 0.13747081464994715, "learning_rate": 3.006600909302981e-05, "loss": 0.5041, "num_tokens": 3878304237.0, "step": 5071 }, { "epoch": 1.8581112027113675, "grad_norm": 0.14391269382569555, "learning_rate": 3.00621892025682e-05, "loss": 0.519, "num_tokens": 3879054343.0, "step": 5072 }, { "epoch": 1.8584776037372905, "grad_norm": 0.1327850283877827, "learning_rate": 3.0058368857846237e-05, "loss": 0.4915, "num_tokens": 3879800891.0, "step": 5073 }, { "epoch": 1.8588440047632133, "grad_norm": 0.14576716894922429, "learning_rate": 3.005454805907917e-05, "loss": 0.5024, "num_tokens": 3880516157.0, "step": 5074 }, { "epoch": 1.859210405789136, "grad_norm": 0.14361821183740875, "learning_rate": 3.0050726806482276e-05, "loss": 0.5053, "num_tokens": 3881300836.0, "step": 5075 }, { "epoch": 1.859576806815059, "grad_norm": 0.16380077785168656, "learning_rate": 3.004690510027088e-05, "loss": 0.514, "num_tokens": 3882074628.0, "step": 5076 }, { "epoch": 1.859943207840982, "grad_norm": 0.13980220142081806, "learning_rate": 3.0043082940660294e-05, "loss": 0.5301, "num_tokens": 3882827153.0, "step": 5077 }, { "epoch": 1.8603096088669049, "grad_norm": 0.14714158619670023, "learning_rate": 3.0039260327865887e-05, "loss": 0.5292, "num_tokens": 3883541897.0, "step": 5078 }, { "epoch": 1.8606760098928277, "grad_norm": 0.14719001592732936, "learning_rate": 3.0035437262103038e-05, "loss": 0.4898, "num_tokens": 3884471068.0, "step": 5079 }, { "epoch": 1.8610424109187504, "grad_norm": 0.15172385741996386, "learning_rate": 3.0031613743587163e-05, "loss": 0.5113, "num_tokens": 3885197231.0, "step": 5080 }, { "epoch": 1.8614088119446734, "grad_norm": 0.13067565273519896, "learning_rate": 3.0027789772533683e-05, "loss": 0.5072, "num_tokens": 3885983546.0, "step": 5081 }, { "epoch": 1.8617752129705964, "grad_norm": 0.15214721545799803, "learning_rate": 3.0023965349158074e-05, "loss": 0.4663, "num_tokens": 3886666356.0, "step": 5082 }, { "epoch": 1.8621416139965192, "grad_norm": 0.16165989711873424, "learning_rate": 3.0020140473675813e-05, "loss": 0.5192, "num_tokens": 3887389662.0, "step": 5083 }, { "epoch": 1.862508015022442, "grad_norm": 0.11954451039313033, "learning_rate": 3.0016315146302407e-05, "loss": 0.4758, "num_tokens": 3888319579.0, "step": 5084 }, { "epoch": 1.8628744160483648, "grad_norm": 0.1570959879255723, "learning_rate": 3.0012489367253395e-05, "loss": 0.5136, "num_tokens": 3889026173.0, "step": 5085 }, { "epoch": 1.8632408170742878, "grad_norm": 0.14434079741782715, "learning_rate": 3.0008663136744343e-05, "loss": 0.5159, "num_tokens": 3889817697.0, "step": 5086 }, { "epoch": 1.8636072181002108, "grad_norm": 0.14757065518830198, "learning_rate": 3.000483645499084e-05, "loss": 0.4985, "num_tokens": 3890717166.0, "step": 5087 }, { "epoch": 1.8639736191261336, "grad_norm": 0.13762691293676788, "learning_rate": 3.000100932220849e-05, "loss": 0.4971, "num_tokens": 3891414767.0, "step": 5088 }, { "epoch": 1.8643400201520564, "grad_norm": 0.12492021163991597, "learning_rate": 2.9997181738612933e-05, "loss": 0.5188, "num_tokens": 3892239062.0, "step": 5089 }, { "epoch": 1.8647064211779791, "grad_norm": 0.15593955566195344, "learning_rate": 2.9993353704419837e-05, "loss": 0.5313, "num_tokens": 3892965073.0, "step": 5090 }, { "epoch": 1.8650728222039021, "grad_norm": 0.14203061191299712, "learning_rate": 2.9989525219844884e-05, "loss": 0.4836, "num_tokens": 3893792063.0, "step": 5091 }, { "epoch": 1.8654392232298251, "grad_norm": 0.13665287430447828, "learning_rate": 2.99856962851038e-05, "loss": 0.505, "num_tokens": 3894636739.0, "step": 5092 }, { "epoch": 1.865805624255748, "grad_norm": 0.13705550763097746, "learning_rate": 2.998186690041232e-05, "loss": 0.5134, "num_tokens": 3895311039.0, "step": 5093 }, { "epoch": 1.8661720252816707, "grad_norm": 0.15834633134664491, "learning_rate": 2.9978037065986195e-05, "loss": 0.4926, "num_tokens": 3895998436.0, "step": 5094 }, { "epoch": 1.8665384263075937, "grad_norm": 0.13810714044207104, "learning_rate": 2.9974206782041232e-05, "loss": 0.5255, "num_tokens": 3896826092.0, "step": 5095 }, { "epoch": 1.8669048273335165, "grad_norm": 0.15395266915419903, "learning_rate": 2.9970376048793243e-05, "loss": 0.5532, "num_tokens": 3897701046.0, "step": 5096 }, { "epoch": 1.8672712283594395, "grad_norm": 0.13598151151622845, "learning_rate": 2.9966544866458064e-05, "loss": 0.53, "num_tokens": 3898538097.0, "step": 5097 }, { "epoch": 1.8676376293853623, "grad_norm": 0.1488295423295943, "learning_rate": 2.996271323525157e-05, "loss": 0.4934, "num_tokens": 3899464736.0, "step": 5098 }, { "epoch": 1.868004030411285, "grad_norm": 0.13785814334407578, "learning_rate": 2.9958881155389634e-05, "loss": 0.5159, "num_tokens": 3900183210.0, "step": 5099 }, { "epoch": 1.868370431437208, "grad_norm": 0.16117579453921932, "learning_rate": 2.99550486270882e-05, "loss": 0.5262, "num_tokens": 3900907446.0, "step": 5100 }, { "epoch": 1.868736832463131, "grad_norm": 0.13560524062034815, "learning_rate": 2.9951215650563184e-05, "loss": 0.5078, "num_tokens": 3901740053.0, "step": 5101 }, { "epoch": 1.8691032334890538, "grad_norm": 0.13488472119304096, "learning_rate": 2.9947382226030574e-05, "loss": 0.5045, "num_tokens": 3902542175.0, "step": 5102 }, { "epoch": 1.8694696345149766, "grad_norm": 0.13720135995556332, "learning_rate": 2.994354835370635e-05, "loss": 0.4873, "num_tokens": 3903278025.0, "step": 5103 }, { "epoch": 1.8698360355408994, "grad_norm": 0.1459673756955226, "learning_rate": 2.9939714033806534e-05, "loss": 0.4773, "num_tokens": 3904059444.0, "step": 5104 }, { "epoch": 1.8702024365668224, "grad_norm": 0.14061398201336153, "learning_rate": 2.9935879266547165e-05, "loss": 0.4753, "num_tokens": 3904812967.0, "step": 5105 }, { "epoch": 1.8705688375927454, "grad_norm": 0.15298599433689902, "learning_rate": 2.993204405214431e-05, "loss": 0.5012, "num_tokens": 3905480087.0, "step": 5106 }, { "epoch": 1.8709352386186682, "grad_norm": 0.12913908829311288, "learning_rate": 2.9928208390814078e-05, "loss": 0.5042, "num_tokens": 3906395660.0, "step": 5107 }, { "epoch": 1.871301639644591, "grad_norm": 0.13854553125844346, "learning_rate": 2.9924372282772565e-05, "loss": 0.5394, "num_tokens": 3907092454.0, "step": 5108 }, { "epoch": 1.8716680406705137, "grad_norm": 0.14876788730701063, "learning_rate": 2.992053572823594e-05, "loss": 0.4911, "num_tokens": 3907863367.0, "step": 5109 }, { "epoch": 1.8720344416964367, "grad_norm": 0.14390965419063345, "learning_rate": 2.9916698727420347e-05, "loss": 0.4978, "num_tokens": 3908565750.0, "step": 5110 }, { "epoch": 1.8724008427223597, "grad_norm": 0.13284673347337472, "learning_rate": 2.9912861280541997e-05, "loss": 0.4843, "num_tokens": 3909461945.0, "step": 5111 }, { "epoch": 1.8727672437482825, "grad_norm": 0.15230433970783175, "learning_rate": 2.9909023387817102e-05, "loss": 0.4721, "num_tokens": 3910292195.0, "step": 5112 }, { "epoch": 1.8731336447742053, "grad_norm": 0.12480525994960895, "learning_rate": 2.9905185049461908e-05, "loss": 0.5006, "num_tokens": 3911179102.0, "step": 5113 }, { "epoch": 1.873500045800128, "grad_norm": 0.14217924746176636, "learning_rate": 2.9901346265692677e-05, "loss": 0.5142, "num_tokens": 3911973588.0, "step": 5114 }, { "epoch": 1.873866446826051, "grad_norm": 0.15437905025496879, "learning_rate": 2.9897507036725718e-05, "loss": 0.5046, "num_tokens": 3912737223.0, "step": 5115 }, { "epoch": 1.874232847851974, "grad_norm": 0.13991370752892765, "learning_rate": 2.9893667362777338e-05, "loss": 0.4977, "num_tokens": 3913502376.0, "step": 5116 }, { "epoch": 1.8745992488778969, "grad_norm": 0.1444461200790152, "learning_rate": 2.988982724406389e-05, "loss": 0.5325, "num_tokens": 3914275341.0, "step": 5117 }, { "epoch": 1.8749656499038196, "grad_norm": 0.14028806424748133, "learning_rate": 2.988598668080174e-05, "loss": 0.4541, "num_tokens": 3914906467.0, "step": 5118 }, { "epoch": 1.8753320509297426, "grad_norm": 0.1592704465899526, "learning_rate": 2.988214567320728e-05, "loss": 0.4925, "num_tokens": 3915632504.0, "step": 5119 }, { "epoch": 1.8756984519556654, "grad_norm": 0.13132562447738047, "learning_rate": 2.987830422149694e-05, "loss": 0.4942, "num_tokens": 3916485609.0, "step": 5120 }, { "epoch": 1.8760648529815884, "grad_norm": 0.15506757060020898, "learning_rate": 2.987446232588715e-05, "loss": 0.4994, "num_tokens": 3917265504.0, "step": 5121 }, { "epoch": 1.8764312540075112, "grad_norm": 0.1338612615076107, "learning_rate": 2.9870619986594395e-05, "loss": 0.521, "num_tokens": 3918151506.0, "step": 5122 }, { "epoch": 1.876797655033434, "grad_norm": 0.12804993601280926, "learning_rate": 2.9866777203835155e-05, "loss": 0.5127, "num_tokens": 3918900385.0, "step": 5123 }, { "epoch": 1.877164056059357, "grad_norm": 0.16024327881166806, "learning_rate": 2.9862933977825963e-05, "loss": 0.5521, "num_tokens": 3919578302.0, "step": 5124 }, { "epoch": 1.87753045708528, "grad_norm": 0.13722158465685075, "learning_rate": 2.9859090308783357e-05, "loss": 0.4835, "num_tokens": 3920387741.0, "step": 5125 }, { "epoch": 1.8778968581112028, "grad_norm": 0.13839328509206847, "learning_rate": 2.9855246196923913e-05, "loss": 0.53, "num_tokens": 3921126329.0, "step": 5126 }, { "epoch": 1.8782632591371256, "grad_norm": 0.14581272044661625, "learning_rate": 2.985140164246421e-05, "loss": 0.4754, "num_tokens": 3921974521.0, "step": 5127 }, { "epoch": 1.8786296601630483, "grad_norm": 0.12698233437546483, "learning_rate": 2.9847556645620884e-05, "loss": 0.4964, "num_tokens": 3922760636.0, "step": 5128 }, { "epoch": 1.8789960611889713, "grad_norm": 0.14373023509259883, "learning_rate": 2.9843711206610577e-05, "loss": 0.481, "num_tokens": 3923501835.0, "step": 5129 }, { "epoch": 1.8793624622148943, "grad_norm": 0.15766240810323853, "learning_rate": 2.9839865325649958e-05, "loss": 0.4764, "num_tokens": 3924275245.0, "step": 5130 }, { "epoch": 1.8797288632408171, "grad_norm": 0.14558382636561734, "learning_rate": 2.983601900295571e-05, "loss": 0.5479, "num_tokens": 3925061204.0, "step": 5131 }, { "epoch": 1.88009526426674, "grad_norm": 0.1233230407028466, "learning_rate": 2.9832172238744563e-05, "loss": 0.5183, "num_tokens": 3925901243.0, "step": 5132 }, { "epoch": 1.8804616652926627, "grad_norm": 0.1661669089276141, "learning_rate": 2.9828325033233262e-05, "loss": 0.52, "num_tokens": 3926659244.0, "step": 5133 }, { "epoch": 1.8808280663185857, "grad_norm": 0.13574495721214902, "learning_rate": 2.982447738663857e-05, "loss": 0.5174, "num_tokens": 3927383612.0, "step": 5134 }, { "epoch": 1.8811944673445087, "grad_norm": 0.1449100821869467, "learning_rate": 2.9820629299177288e-05, "loss": 0.4884, "num_tokens": 3928341576.0, "step": 5135 }, { "epoch": 1.8815608683704315, "grad_norm": 0.13220905219707343, "learning_rate": 2.9816780771066226e-05, "loss": 0.4716, "num_tokens": 3929047521.0, "step": 5136 }, { "epoch": 1.8819272693963542, "grad_norm": 0.14280345562579272, "learning_rate": 2.9812931802522242e-05, "loss": 0.4964, "num_tokens": 3929801957.0, "step": 5137 }, { "epoch": 1.882293670422277, "grad_norm": 0.14133645573713893, "learning_rate": 2.9809082393762185e-05, "loss": 0.4989, "num_tokens": 3930557599.0, "step": 5138 }, { "epoch": 1.8826600714482, "grad_norm": 0.15321273529863672, "learning_rate": 2.9805232545002965e-05, "loss": 0.5351, "num_tokens": 3931258628.0, "step": 5139 }, { "epoch": 1.883026472474123, "grad_norm": 0.14115503078833544, "learning_rate": 2.9801382256461488e-05, "loss": 0.5163, "num_tokens": 3931986173.0, "step": 5140 }, { "epoch": 1.8833928735000458, "grad_norm": 0.1347017282364553, "learning_rate": 2.97975315283547e-05, "loss": 0.4868, "num_tokens": 3932661054.0, "step": 5141 }, { "epoch": 1.8837592745259686, "grad_norm": 0.14022673589612794, "learning_rate": 2.979368036089958e-05, "loss": 0.5126, "num_tokens": 3933411114.0, "step": 5142 }, { "epoch": 1.8841256755518916, "grad_norm": 0.13039112561554778, "learning_rate": 2.9789828754313103e-05, "loss": 0.5103, "num_tokens": 3934147407.0, "step": 5143 }, { "epoch": 1.8844920765778144, "grad_norm": 0.14558240985626758, "learning_rate": 2.9785976708812295e-05, "loss": 0.5117, "num_tokens": 3934956309.0, "step": 5144 }, { "epoch": 1.8848584776037374, "grad_norm": 0.14809828351021748, "learning_rate": 2.9782124224614194e-05, "loss": 0.5241, "num_tokens": 3935797888.0, "step": 5145 }, { "epoch": 1.8852248786296602, "grad_norm": 0.1290860632929306, "learning_rate": 2.9778271301935878e-05, "loss": 0.4658, "num_tokens": 3936503136.0, "step": 5146 }, { "epoch": 1.885591279655583, "grad_norm": 0.13572680976440513, "learning_rate": 2.977441794099442e-05, "loss": 0.5043, "num_tokens": 3937253112.0, "step": 5147 }, { "epoch": 1.885957680681506, "grad_norm": 0.12952041118742852, "learning_rate": 2.977056414200694e-05, "loss": 0.4983, "num_tokens": 3938096473.0, "step": 5148 }, { "epoch": 1.886324081707429, "grad_norm": 0.15159607764146588, "learning_rate": 2.9766709905190597e-05, "loss": 0.5079, "num_tokens": 3938690133.0, "step": 5149 }, { "epoch": 1.8866904827333517, "grad_norm": 0.14951234366394212, "learning_rate": 2.976285523076254e-05, "loss": 0.5, "num_tokens": 3939475732.0, "step": 5150 }, { "epoch": 1.8870568837592745, "grad_norm": 0.14050292789996224, "learning_rate": 2.975900011893995e-05, "loss": 0.4762, "num_tokens": 3940257990.0, "step": 5151 }, { "epoch": 1.8874232847851973, "grad_norm": 0.144418340557272, "learning_rate": 2.9755144569940064e-05, "loss": 0.4607, "num_tokens": 3941140970.0, "step": 5152 }, { "epoch": 1.8877896858111203, "grad_norm": 0.13092790443052885, "learning_rate": 2.9751288583980104e-05, "loss": 0.4783, "num_tokens": 3941928524.0, "step": 5153 }, { "epoch": 1.8881560868370433, "grad_norm": 0.1494316504191053, "learning_rate": 2.9747432161277343e-05, "loss": 0.5147, "num_tokens": 3942744314.0, "step": 5154 }, { "epoch": 1.888522487862966, "grad_norm": 0.1416119014899711, "learning_rate": 2.9743575302049065e-05, "loss": 0.4783, "num_tokens": 3943514989.0, "step": 5155 }, { "epoch": 1.8888888888888888, "grad_norm": 0.15408573138170797, "learning_rate": 2.9739718006512584e-05, "loss": 0.5083, "num_tokens": 3944201144.0, "step": 5156 }, { "epoch": 1.8892552899148116, "grad_norm": 0.15925068240331555, "learning_rate": 2.9735860274885242e-05, "loss": 0.5389, "num_tokens": 3944955480.0, "step": 5157 }, { "epoch": 1.8896216909407346, "grad_norm": 0.14452234856302, "learning_rate": 2.973200210738439e-05, "loss": 0.5106, "num_tokens": 3945634729.0, "step": 5158 }, { "epoch": 1.8899880919666576, "grad_norm": 0.15559285492369698, "learning_rate": 2.9728143504227423e-05, "loss": 0.5026, "num_tokens": 3946442835.0, "step": 5159 }, { "epoch": 1.8903544929925804, "grad_norm": 0.15534359156585348, "learning_rate": 2.9724284465631754e-05, "loss": 0.4999, "num_tokens": 3947296811.0, "step": 5160 }, { "epoch": 1.8907208940185032, "grad_norm": 0.15166213593756447, "learning_rate": 2.9720424991814815e-05, "loss": 0.5116, "num_tokens": 3948016721.0, "step": 5161 }, { "epoch": 1.891087295044426, "grad_norm": 0.16294546326764167, "learning_rate": 2.9716565082994067e-05, "loss": 0.5298, "num_tokens": 3948690438.0, "step": 5162 }, { "epoch": 1.891453696070349, "grad_norm": 0.17189327687697006, "learning_rate": 2.9712704739386988e-05, "loss": 0.5074, "num_tokens": 3949363321.0, "step": 5163 }, { "epoch": 1.891820097096272, "grad_norm": 0.14185930080854223, "learning_rate": 2.97088439612111e-05, "loss": 0.5089, "num_tokens": 3950188451.0, "step": 5164 }, { "epoch": 1.8921864981221947, "grad_norm": 0.13522844511323326, "learning_rate": 2.970498274868393e-05, "loss": 0.5088, "num_tokens": 3950942562.0, "step": 5165 }, { "epoch": 1.8925528991481175, "grad_norm": 0.15124189960686413, "learning_rate": 2.9701121102023035e-05, "loss": 0.5063, "num_tokens": 3951628183.0, "step": 5166 }, { "epoch": 1.8929193001740405, "grad_norm": 0.13078976922481703, "learning_rate": 2.9697259021446002e-05, "loss": 0.4651, "num_tokens": 3952501148.0, "step": 5167 }, { "epoch": 1.8932857011999633, "grad_norm": 0.14022697514816304, "learning_rate": 2.9693396507170432e-05, "loss": 0.4975, "num_tokens": 3953242143.0, "step": 5168 }, { "epoch": 1.8936521022258863, "grad_norm": 0.14413766261461772, "learning_rate": 2.9689533559413967e-05, "loss": 0.5051, "num_tokens": 3954010495.0, "step": 5169 }, { "epoch": 1.894018503251809, "grad_norm": 0.13181416078006514, "learning_rate": 2.968567017839425e-05, "loss": 0.5096, "num_tokens": 3954800126.0, "step": 5170 }, { "epoch": 1.8943849042777319, "grad_norm": 0.15123134319608642, "learning_rate": 2.9681806364328966e-05, "loss": 0.518, "num_tokens": 3955429132.0, "step": 5171 }, { "epoch": 1.8947513053036549, "grad_norm": 0.15146305174182692, "learning_rate": 2.9677942117435833e-05, "loss": 0.471, "num_tokens": 3956170673.0, "step": 5172 }, { "epoch": 1.8951177063295779, "grad_norm": 0.16183245267739038, "learning_rate": 2.967407743793256e-05, "loss": 0.5563, "num_tokens": 3956905599.0, "step": 5173 }, { "epoch": 1.8954841073555007, "grad_norm": 0.15653888197098662, "learning_rate": 2.9670212326036912e-05, "loss": 0.4859, "num_tokens": 3957567783.0, "step": 5174 }, { "epoch": 1.8958505083814234, "grad_norm": 0.1384527020887284, "learning_rate": 2.966634678196666e-05, "loss": 0.513, "num_tokens": 3958287796.0, "step": 5175 }, { "epoch": 1.8962169094073462, "grad_norm": 0.16804237650456666, "learning_rate": 2.9662480805939623e-05, "loss": 0.5077, "num_tokens": 3959165348.0, "step": 5176 }, { "epoch": 1.8965833104332692, "grad_norm": 0.1444533367800368, "learning_rate": 2.96586143981736e-05, "loss": 0.5248, "num_tokens": 3959849105.0, "step": 5177 }, { "epoch": 1.8969497114591922, "grad_norm": 0.13672162941910082, "learning_rate": 2.965474755888646e-05, "loss": 0.5225, "num_tokens": 3960682274.0, "step": 5178 }, { "epoch": 1.897316112485115, "grad_norm": 0.153406205765428, "learning_rate": 2.9650880288296086e-05, "loss": 0.5199, "num_tokens": 3961447630.0, "step": 5179 }, { "epoch": 1.8976825135110378, "grad_norm": 0.16473244895844824, "learning_rate": 2.9647012586620357e-05, "loss": 0.5215, "num_tokens": 3962133355.0, "step": 5180 }, { "epoch": 1.8980489145369606, "grad_norm": 0.14929199929755754, "learning_rate": 2.9643144454077214e-05, "loss": 0.4962, "num_tokens": 3962916153.0, "step": 5181 }, { "epoch": 1.8984153155628836, "grad_norm": 0.13937012991944706, "learning_rate": 2.9639275890884593e-05, "loss": 0.4963, "num_tokens": 3963702558.0, "step": 5182 }, { "epoch": 1.8987817165888066, "grad_norm": 0.17307746874078161, "learning_rate": 2.9635406897260477e-05, "loss": 0.5536, "num_tokens": 3964311960.0, "step": 5183 }, { "epoch": 1.8991481176147293, "grad_norm": 0.1399900979377212, "learning_rate": 2.9631537473422852e-05, "loss": 0.4651, "num_tokens": 3965060582.0, "step": 5184 }, { "epoch": 1.8995145186406521, "grad_norm": 0.13743632976083728, "learning_rate": 2.9627667619589745e-05, "loss": 0.5231, "num_tokens": 3965957873.0, "step": 5185 }, { "epoch": 1.899880919666575, "grad_norm": 0.1353361158863109, "learning_rate": 2.9623797335979204e-05, "loss": 0.5204, "num_tokens": 3966927389.0, "step": 5186 }, { "epoch": 1.900247320692498, "grad_norm": 0.14693323030104394, "learning_rate": 2.9619926622809294e-05, "loss": 0.4985, "num_tokens": 3967717481.0, "step": 5187 }, { "epoch": 1.900613721718421, "grad_norm": 0.1290515399955103, "learning_rate": 2.9616055480298106e-05, "loss": 0.4799, "num_tokens": 3968567726.0, "step": 5188 }, { "epoch": 1.9009801227443437, "grad_norm": 0.1331659079900497, "learning_rate": 2.961218390866377e-05, "loss": 0.5182, "num_tokens": 3969301931.0, "step": 5189 }, { "epoch": 1.9013465237702665, "grad_norm": 0.13593818858545945, "learning_rate": 2.9608311908124408e-05, "loss": 0.4927, "num_tokens": 3970208824.0, "step": 5190 }, { "epoch": 1.9017129247961895, "grad_norm": 0.13710674197632153, "learning_rate": 2.9604439478898202e-05, "loss": 0.4756, "num_tokens": 3970984647.0, "step": 5191 }, { "epoch": 1.9020793258221123, "grad_norm": 0.15181992527804652, "learning_rate": 2.9600566621203338e-05, "loss": 0.5194, "num_tokens": 3971754532.0, "step": 5192 }, { "epoch": 1.9024457268480353, "grad_norm": 0.14504021671504355, "learning_rate": 2.959669333525802e-05, "loss": 0.4641, "num_tokens": 3972538751.0, "step": 5193 }, { "epoch": 1.902812127873958, "grad_norm": 0.11907490229341608, "learning_rate": 2.9592819621280507e-05, "loss": 0.4831, "num_tokens": 3973471382.0, "step": 5194 }, { "epoch": 1.9031785288998808, "grad_norm": 0.1403534152279594, "learning_rate": 2.9588945479489055e-05, "loss": 0.5171, "num_tokens": 3974118611.0, "step": 5195 }, { "epoch": 1.9035449299258038, "grad_norm": 0.14526319713652755, "learning_rate": 2.958507091010194e-05, "loss": 0.5064, "num_tokens": 3974884830.0, "step": 5196 }, { "epoch": 1.9039113309517268, "grad_norm": 0.13705653795292136, "learning_rate": 2.9581195913337474e-05, "loss": 0.5187, "num_tokens": 3975610709.0, "step": 5197 }, { "epoch": 1.9042777319776496, "grad_norm": 0.14610055745620512, "learning_rate": 2.9577320489414004e-05, "loss": 0.5054, "num_tokens": 3976392971.0, "step": 5198 }, { "epoch": 1.9046441330035724, "grad_norm": 0.1401646008992299, "learning_rate": 2.9573444638549883e-05, "loss": 0.5022, "num_tokens": 3977112359.0, "step": 5199 }, { "epoch": 1.9050105340294952, "grad_norm": 0.1353234113691973, "learning_rate": 2.9569568360963492e-05, "loss": 0.5112, "num_tokens": 3977971725.0, "step": 5200 }, { "epoch": 1.9053769350554182, "grad_norm": 0.15128686419330542, "learning_rate": 2.956569165687324e-05, "loss": 0.5002, "num_tokens": 3978822367.0, "step": 5201 }, { "epoch": 1.9057433360813412, "grad_norm": 0.14276044483633052, "learning_rate": 2.9561814526497557e-05, "loss": 0.5193, "num_tokens": 3979581118.0, "step": 5202 }, { "epoch": 1.906109737107264, "grad_norm": 0.13792189570861033, "learning_rate": 2.95579369700549e-05, "loss": 0.4883, "num_tokens": 3980293683.0, "step": 5203 }, { "epoch": 1.9064761381331867, "grad_norm": 0.1365479571766665, "learning_rate": 2.955405898776375e-05, "loss": 0.4915, "num_tokens": 3981119895.0, "step": 5204 }, { "epoch": 1.9068425391591095, "grad_norm": 0.15430345181718008, "learning_rate": 2.9550180579842597e-05, "loss": 0.5169, "num_tokens": 3981805778.0, "step": 5205 }, { "epoch": 1.9072089401850325, "grad_norm": 0.13626106948261704, "learning_rate": 2.9546301746509986e-05, "loss": 0.4826, "num_tokens": 3982605270.0, "step": 5206 }, { "epoch": 1.9075753412109555, "grad_norm": 0.14961052923951698, "learning_rate": 2.954242248798446e-05, "loss": 0.5075, "num_tokens": 3983393385.0, "step": 5207 }, { "epoch": 1.9079417422368783, "grad_norm": 0.1295326440948166, "learning_rate": 2.953854280448459e-05, "loss": 0.5021, "num_tokens": 3984233013.0, "step": 5208 }, { "epoch": 1.908308143262801, "grad_norm": 0.1454527529446835, "learning_rate": 2.9534662696228983e-05, "loss": 0.4936, "num_tokens": 3985116325.0, "step": 5209 }, { "epoch": 1.9086745442887239, "grad_norm": 0.1541169852895369, "learning_rate": 2.9530782163436257e-05, "loss": 0.5125, "num_tokens": 3985906532.0, "step": 5210 }, { "epoch": 1.9090409453146469, "grad_norm": 0.1473284152077884, "learning_rate": 2.952690120632506e-05, "loss": 0.5056, "num_tokens": 3986643937.0, "step": 5211 }, { "epoch": 1.9094073463405699, "grad_norm": 0.13689873803546593, "learning_rate": 2.9523019825114066e-05, "loss": 0.5244, "num_tokens": 3987472576.0, "step": 5212 }, { "epoch": 1.9097737473664926, "grad_norm": 0.13774823575561929, "learning_rate": 2.951913802002196e-05, "loss": 0.5097, "num_tokens": 3988246361.0, "step": 5213 }, { "epoch": 1.9101401483924154, "grad_norm": 0.16420299059774998, "learning_rate": 2.9515255791267475e-05, "loss": 0.5221, "num_tokens": 3988903707.0, "step": 5214 }, { "epoch": 1.9105065494183384, "grad_norm": 0.14350926249978807, "learning_rate": 2.951137313906933e-05, "loss": 0.5216, "num_tokens": 3989593182.0, "step": 5215 }, { "epoch": 1.9108729504442612, "grad_norm": 0.1533717987787381, "learning_rate": 2.9507490063646328e-05, "loss": 0.4917, "num_tokens": 3990429365.0, "step": 5216 }, { "epoch": 1.9112393514701842, "grad_norm": 0.14500387891521702, "learning_rate": 2.9503606565217225e-05, "loss": 0.4932, "num_tokens": 3991290635.0, "step": 5217 }, { "epoch": 1.911605752496107, "grad_norm": 0.1589996004971223, "learning_rate": 2.949972264400085e-05, "loss": 0.5153, "num_tokens": 3991923782.0, "step": 5218 }, { "epoch": 1.9119721535220298, "grad_norm": 0.17175523824690508, "learning_rate": 2.9495838300216036e-05, "loss": 0.5409, "num_tokens": 3992624238.0, "step": 5219 }, { "epoch": 1.9123385545479528, "grad_norm": 0.15668625697296326, "learning_rate": 2.9491953534081652e-05, "loss": 0.5704, "num_tokens": 3993255336.0, "step": 5220 }, { "epoch": 1.9127049555738758, "grad_norm": 0.15397039227036985, "learning_rate": 2.948806834581657e-05, "loss": 0.4988, "num_tokens": 3994105259.0, "step": 5221 }, { "epoch": 1.9130713565997985, "grad_norm": 0.14057430514701463, "learning_rate": 2.9484182735639714e-05, "loss": 0.4926, "num_tokens": 3994811357.0, "step": 5222 }, { "epoch": 1.9134377576257213, "grad_norm": 0.13313136626721986, "learning_rate": 2.948029670377001e-05, "loss": 0.5241, "num_tokens": 3995589365.0, "step": 5223 }, { "epoch": 1.913804158651644, "grad_norm": 0.15324224991517135, "learning_rate": 2.9476410250426414e-05, "loss": 0.494, "num_tokens": 3996426554.0, "step": 5224 }, { "epoch": 1.914170559677567, "grad_norm": 0.13624816776106918, "learning_rate": 2.947252337582791e-05, "loss": 0.4999, "num_tokens": 3997230540.0, "step": 5225 }, { "epoch": 1.91453696070349, "grad_norm": 0.14216352685177316, "learning_rate": 2.94686360801935e-05, "loss": 0.4796, "num_tokens": 3997945413.0, "step": 5226 }, { "epoch": 1.914903361729413, "grad_norm": 0.12611799415135536, "learning_rate": 2.946474836374221e-05, "loss": 0.4986, "num_tokens": 3998855277.0, "step": 5227 }, { "epoch": 1.9152697627553357, "grad_norm": 0.14537758612675164, "learning_rate": 2.946086022669309e-05, "loss": 0.5024, "num_tokens": 3999652265.0, "step": 5228 }, { "epoch": 1.9156361637812584, "grad_norm": 0.1466192666637313, "learning_rate": 2.9456971669265228e-05, "loss": 0.5121, "num_tokens": 4000446155.0, "step": 5229 }, { "epoch": 1.9160025648071815, "grad_norm": 0.14428545204369944, "learning_rate": 2.9453082691677704e-05, "loss": 0.4879, "num_tokens": 4001129352.0, "step": 5230 }, { "epoch": 1.9163689658331045, "grad_norm": 0.1385375224073259, "learning_rate": 2.9449193294149653e-05, "loss": 0.5264, "num_tokens": 4001946344.0, "step": 5231 }, { "epoch": 1.9167353668590272, "grad_norm": 0.15032873342492972, "learning_rate": 2.9445303476900223e-05, "loss": 0.4989, "num_tokens": 4002764464.0, "step": 5232 }, { "epoch": 1.91710176788495, "grad_norm": 0.1346402958355452, "learning_rate": 2.944141324014857e-05, "loss": 0.5147, "num_tokens": 4003607936.0, "step": 5233 }, { "epoch": 1.9174681689108728, "grad_norm": 0.12210820303433202, "learning_rate": 2.9437522584113908e-05, "loss": 0.4573, "num_tokens": 4004530196.0, "step": 5234 }, { "epoch": 1.9178345699367958, "grad_norm": 0.13140012595813655, "learning_rate": 2.943363150901543e-05, "loss": 0.471, "num_tokens": 4005289146.0, "step": 5235 }, { "epoch": 1.9182009709627188, "grad_norm": 0.142308048079093, "learning_rate": 2.9429740015072402e-05, "loss": 0.5131, "num_tokens": 4006050318.0, "step": 5236 }, { "epoch": 1.9185673719886416, "grad_norm": 0.13173477496989006, "learning_rate": 2.942584810250407e-05, "loss": 0.4854, "num_tokens": 4006826974.0, "step": 5237 }, { "epoch": 1.9189337730145644, "grad_norm": 0.16428871774057116, "learning_rate": 2.9421955771529736e-05, "loss": 0.4954, "num_tokens": 4007696022.0, "step": 5238 }, { "epoch": 1.9193001740404874, "grad_norm": 0.12787559452778283, "learning_rate": 2.9418063022368698e-05, "loss": 0.4864, "num_tokens": 4008480855.0, "step": 5239 }, { "epoch": 1.9196665750664101, "grad_norm": 0.14347880096716453, "learning_rate": 2.9414169855240296e-05, "loss": 0.4988, "num_tokens": 4009190407.0, "step": 5240 }, { "epoch": 1.9200329760923331, "grad_norm": 0.1272547048326165, "learning_rate": 2.9410276270363897e-05, "loss": 0.4731, "num_tokens": 4009995060.0, "step": 5241 }, { "epoch": 1.920399377118256, "grad_norm": 0.14551751721120604, "learning_rate": 2.940638226795887e-05, "loss": 0.4998, "num_tokens": 4010795588.0, "step": 5242 }, { "epoch": 1.9207657781441787, "grad_norm": 0.12794873975616658, "learning_rate": 2.940248784824463e-05, "loss": 0.5292, "num_tokens": 4011635998.0, "step": 5243 }, { "epoch": 1.9211321791701017, "grad_norm": 0.1403079855243473, "learning_rate": 2.9398593011440604e-05, "loss": 0.4932, "num_tokens": 4012421609.0, "step": 5244 }, { "epoch": 1.9214985801960247, "grad_norm": 0.1378923078920144, "learning_rate": 2.9394697757766246e-05, "loss": 0.5147, "num_tokens": 4013244651.0, "step": 5245 }, { "epoch": 1.9218649812219475, "grad_norm": 0.15748735634928338, "learning_rate": 2.9390802087441022e-05, "loss": 0.5205, "num_tokens": 4013877180.0, "step": 5246 }, { "epoch": 1.9222313822478703, "grad_norm": 0.13768028875029717, "learning_rate": 2.9386906000684448e-05, "loss": 0.4999, "num_tokens": 4014703047.0, "step": 5247 }, { "epoch": 1.922597783273793, "grad_norm": 0.15547195487736928, "learning_rate": 2.9383009497716042e-05, "loss": 0.5088, "num_tokens": 4015443906.0, "step": 5248 }, { "epoch": 1.922964184299716, "grad_norm": 0.13987855330207177, "learning_rate": 2.9379112578755343e-05, "loss": 0.4733, "num_tokens": 4016255905.0, "step": 5249 }, { "epoch": 1.923330585325639, "grad_norm": 0.24048635616201047, "learning_rate": 2.9375215244021926e-05, "loss": 0.5362, "num_tokens": 4017049125.0, "step": 5250 }, { "epoch": 1.9236969863515618, "grad_norm": 0.1603717343869077, "learning_rate": 2.9371317493735384e-05, "loss": 0.4862, "num_tokens": 4017834917.0, "step": 5251 }, { "epoch": 1.9240633873774846, "grad_norm": 0.13369796251923544, "learning_rate": 2.9367419328115337e-05, "loss": 0.5281, "num_tokens": 4018645894.0, "step": 5252 }, { "epoch": 1.9244297884034074, "grad_norm": 0.1576072060292261, "learning_rate": 2.9363520747381424e-05, "loss": 0.4938, "num_tokens": 4019329227.0, "step": 5253 }, { "epoch": 1.9247961894293304, "grad_norm": 0.14546145545926606, "learning_rate": 2.9359621751753308e-05, "loss": 0.4787, "num_tokens": 4020024477.0, "step": 5254 }, { "epoch": 1.9251625904552534, "grad_norm": 0.14233185142253799, "learning_rate": 2.935572234145067e-05, "loss": 0.5102, "num_tokens": 4020698404.0, "step": 5255 }, { "epoch": 1.9255289914811762, "grad_norm": 0.14671298187327964, "learning_rate": 2.9351822516693234e-05, "loss": 0.5228, "num_tokens": 4021506191.0, "step": 5256 }, { "epoch": 1.925895392507099, "grad_norm": 0.1481842730504564, "learning_rate": 2.9347922277700725e-05, "loss": 0.4965, "num_tokens": 4022238022.0, "step": 5257 }, { "epoch": 1.9262617935330217, "grad_norm": 0.13923357791137214, "learning_rate": 2.9344021624692904e-05, "loss": 0.4877, "num_tokens": 4023113367.0, "step": 5258 }, { "epoch": 1.9266281945589447, "grad_norm": 0.16362766608873924, "learning_rate": 2.9340120557889542e-05, "loss": 0.4917, "num_tokens": 4023866137.0, "step": 5259 }, { "epoch": 1.9269945955848677, "grad_norm": 0.14899115564039023, "learning_rate": 2.9336219077510458e-05, "loss": 0.5125, "num_tokens": 4024502030.0, "step": 5260 }, { "epoch": 1.9273609966107905, "grad_norm": 0.15842976869223446, "learning_rate": 2.9332317183775463e-05, "loss": 0.4934, "num_tokens": 4025145861.0, "step": 5261 }, { "epoch": 1.9277273976367133, "grad_norm": 0.17139679279073486, "learning_rate": 2.9328414876904415e-05, "loss": 0.5101, "num_tokens": 4025880497.0, "step": 5262 }, { "epoch": 1.9280937986626363, "grad_norm": 0.1670804949340327, "learning_rate": 2.9324512157117195e-05, "loss": 0.5199, "num_tokens": 4026597618.0, "step": 5263 }, { "epoch": 1.928460199688559, "grad_norm": 0.14615428337216088, "learning_rate": 2.9320609024633697e-05, "loss": 0.5112, "num_tokens": 4027393984.0, "step": 5264 }, { "epoch": 1.928826600714482, "grad_norm": 0.15662205843633814, "learning_rate": 2.9316705479673823e-05, "loss": 0.4948, "num_tokens": 4028105286.0, "step": 5265 }, { "epoch": 1.9291930017404049, "grad_norm": 0.14751922652177288, "learning_rate": 2.9312801522457543e-05, "loss": 0.5345, "num_tokens": 4028872132.0, "step": 5266 }, { "epoch": 1.9295594027663276, "grad_norm": 0.15408986435357933, "learning_rate": 2.9308897153204806e-05, "loss": 0.5, "num_tokens": 4029622822.0, "step": 5267 }, { "epoch": 1.9299258037922506, "grad_norm": 0.14829607079797116, "learning_rate": 2.9304992372135604e-05, "loss": 0.5153, "num_tokens": 4030402035.0, "step": 5268 }, { "epoch": 1.9302922048181737, "grad_norm": 0.14494188444687597, "learning_rate": 2.9301087179469955e-05, "loss": 0.4932, "num_tokens": 4031194017.0, "step": 5269 }, { "epoch": 1.9306586058440964, "grad_norm": 0.133863532945556, "learning_rate": 2.92971815754279e-05, "loss": 0.5261, "num_tokens": 4031883051.0, "step": 5270 }, { "epoch": 1.9310250068700192, "grad_norm": 0.15964378107194901, "learning_rate": 2.929327556022949e-05, "loss": 0.4915, "num_tokens": 4032736396.0, "step": 5271 }, { "epoch": 1.931391407895942, "grad_norm": 0.14677920220762516, "learning_rate": 2.9289369134094805e-05, "loss": 0.5084, "num_tokens": 4033499369.0, "step": 5272 }, { "epoch": 1.931757808921865, "grad_norm": 0.1347889691220073, "learning_rate": 2.9285462297243955e-05, "loss": 0.5181, "num_tokens": 4034335582.0, "step": 5273 }, { "epoch": 1.932124209947788, "grad_norm": 0.15758664311856369, "learning_rate": 2.9281555049897076e-05, "loss": 0.514, "num_tokens": 4035119246.0, "step": 5274 }, { "epoch": 1.9324906109737108, "grad_norm": 0.14282708207512154, "learning_rate": 2.9277647392274308e-05, "loss": 0.5041, "num_tokens": 4035908265.0, "step": 5275 }, { "epoch": 1.9328570119996336, "grad_norm": 0.15602609476400314, "learning_rate": 2.9273739324595837e-05, "loss": 0.5339, "num_tokens": 4036708222.0, "step": 5276 }, { "epoch": 1.9332234130255563, "grad_norm": 0.15668021123424236, "learning_rate": 2.926983084708185e-05, "loss": 0.4998, "num_tokens": 4037404700.0, "step": 5277 }, { "epoch": 1.9335898140514793, "grad_norm": 0.14937772653325382, "learning_rate": 2.9265921959952578e-05, "loss": 0.5094, "num_tokens": 4038151414.0, "step": 5278 }, { "epoch": 1.9339562150774023, "grad_norm": 0.138254156912974, "learning_rate": 2.9262012663428264e-05, "loss": 0.5203, "num_tokens": 4038927749.0, "step": 5279 }, { "epoch": 1.9343226161033251, "grad_norm": 0.14288246063467566, "learning_rate": 2.925810295772917e-05, "loss": 0.4929, "num_tokens": 4039633958.0, "step": 5280 }, { "epoch": 1.934689017129248, "grad_norm": 0.15151713790282137, "learning_rate": 2.9254192843075592e-05, "loss": 0.5117, "num_tokens": 4040436336.0, "step": 5281 }, { "epoch": 1.9350554181551707, "grad_norm": 0.1280267335111001, "learning_rate": 2.9250282319687837e-05, "loss": 0.4701, "num_tokens": 4041347958.0, "step": 5282 }, { "epoch": 1.9354218191810937, "grad_norm": 0.15018562236831143, "learning_rate": 2.9246371387786255e-05, "loss": 0.4957, "num_tokens": 4042036431.0, "step": 5283 }, { "epoch": 1.9357882202070167, "grad_norm": 0.15138315686846138, "learning_rate": 2.9242460047591187e-05, "loss": 0.503, "num_tokens": 4042738820.0, "step": 5284 }, { "epoch": 1.9361546212329395, "grad_norm": 0.14991306739277277, "learning_rate": 2.9238548299323035e-05, "loss": 0.4727, "num_tokens": 4043430504.0, "step": 5285 }, { "epoch": 1.9365210222588622, "grad_norm": 0.16907628291849372, "learning_rate": 2.923463614320219e-05, "loss": 0.5278, "num_tokens": 4044093426.0, "step": 5286 }, { "epoch": 1.9368874232847852, "grad_norm": 0.14462151715172358, "learning_rate": 2.9230723579449084e-05, "loss": 0.5232, "num_tokens": 4044826268.0, "step": 5287 }, { "epoch": 1.937253824310708, "grad_norm": 0.1697597629229902, "learning_rate": 2.9226810608284175e-05, "loss": 0.483, "num_tokens": 4045561538.0, "step": 5288 }, { "epoch": 1.937620225336631, "grad_norm": 0.14190488271279647, "learning_rate": 2.922289722992793e-05, "loss": 0.4889, "num_tokens": 4046351685.0, "step": 5289 }, { "epoch": 1.9379866263625538, "grad_norm": 0.17528697305796176, "learning_rate": 2.921898344460085e-05, "loss": 0.5231, "num_tokens": 4047130758.0, "step": 5290 }, { "epoch": 1.9383530273884766, "grad_norm": 0.14915301502546588, "learning_rate": 2.921506925252346e-05, "loss": 0.5022, "num_tokens": 4047844009.0, "step": 5291 }, { "epoch": 1.9387194284143996, "grad_norm": 0.13200131316067437, "learning_rate": 2.921115465391629e-05, "loss": 0.4918, "num_tokens": 4048588643.0, "step": 5292 }, { "epoch": 1.9390858294403226, "grad_norm": 0.1781492572269587, "learning_rate": 2.9207239648999918e-05, "loss": 0.5101, "num_tokens": 4049428518.0, "step": 5293 }, { "epoch": 1.9394522304662454, "grad_norm": 0.14413808983678209, "learning_rate": 2.9203324237994928e-05, "loss": 0.4874, "num_tokens": 4050179488.0, "step": 5294 }, { "epoch": 1.9398186314921682, "grad_norm": 0.14427609376265416, "learning_rate": 2.919940842112194e-05, "loss": 0.4974, "num_tokens": 4050945712.0, "step": 5295 }, { "epoch": 1.940185032518091, "grad_norm": 0.16017416073428634, "learning_rate": 2.919549219860158e-05, "loss": 0.5092, "num_tokens": 4051729079.0, "step": 5296 }, { "epoch": 1.940551433544014, "grad_norm": 0.1625303190360986, "learning_rate": 2.91915755706545e-05, "loss": 0.4813, "num_tokens": 4052535429.0, "step": 5297 }, { "epoch": 1.940917834569937, "grad_norm": 0.14868645945020328, "learning_rate": 2.9187658537501406e-05, "loss": 0.4811, "num_tokens": 4053304847.0, "step": 5298 }, { "epoch": 1.9412842355958597, "grad_norm": 0.147931530209904, "learning_rate": 2.9183741099362974e-05, "loss": 0.5077, "num_tokens": 4054041165.0, "step": 5299 }, { "epoch": 1.9416506366217825, "grad_norm": 0.15724765483695238, "learning_rate": 2.9179823256459937e-05, "loss": 0.5228, "num_tokens": 4054858501.0, "step": 5300 }, { "epoch": 1.9420170376477053, "grad_norm": 0.17330613534792616, "learning_rate": 2.9175905009013057e-05, "loss": 0.5018, "num_tokens": 4055621479.0, "step": 5301 }, { "epoch": 1.9423834386736283, "grad_norm": 0.14476344038767988, "learning_rate": 2.9171986357243094e-05, "loss": 0.4977, "num_tokens": 4056414551.0, "step": 5302 }, { "epoch": 1.9427498396995513, "grad_norm": 0.15017130868486436, "learning_rate": 2.9168067301370842e-05, "loss": 0.5302, "num_tokens": 4057097312.0, "step": 5303 }, { "epoch": 1.943116240725474, "grad_norm": 0.14621661612042952, "learning_rate": 2.9164147841617123e-05, "loss": 0.4961, "num_tokens": 4057902156.0, "step": 5304 }, { "epoch": 1.9434826417513968, "grad_norm": 0.1335232588501526, "learning_rate": 2.916022797820278e-05, "loss": 0.5332, "num_tokens": 4058677503.0, "step": 5305 }, { "epoch": 1.9438490427773196, "grad_norm": 0.14447426756129272, "learning_rate": 2.915630771134867e-05, "loss": 0.4808, "num_tokens": 4059450787.0, "step": 5306 }, { "epoch": 1.9442154438032426, "grad_norm": 0.13931998092668893, "learning_rate": 2.9152387041275675e-05, "loss": 0.489, "num_tokens": 4060270426.0, "step": 5307 }, { "epoch": 1.9445818448291656, "grad_norm": 0.15047655453449274, "learning_rate": 2.914846596820471e-05, "loss": 0.5202, "num_tokens": 4060960527.0, "step": 5308 }, { "epoch": 1.9449482458550884, "grad_norm": 0.14250821855486331, "learning_rate": 2.9144544492356714e-05, "loss": 0.4756, "num_tokens": 4061848741.0, "step": 5309 }, { "epoch": 1.9453146468810112, "grad_norm": 0.14129156083813724, "learning_rate": 2.9140622613952625e-05, "loss": 0.5091, "num_tokens": 4062545498.0, "step": 5310 }, { "epoch": 1.9456810479069342, "grad_norm": 0.15258927604747638, "learning_rate": 2.9136700333213426e-05, "loss": 0.4898, "num_tokens": 4063208569.0, "step": 5311 }, { "epoch": 1.946047448932857, "grad_norm": 0.14099925552991496, "learning_rate": 2.9132777650360117e-05, "loss": 0.51, "num_tokens": 4063960741.0, "step": 5312 }, { "epoch": 1.94641384995878, "grad_norm": 0.15771537601827088, "learning_rate": 2.9128854565613724e-05, "loss": 0.5085, "num_tokens": 4064752985.0, "step": 5313 }, { "epoch": 1.9467802509847028, "grad_norm": 0.14825794452407054, "learning_rate": 2.9124931079195278e-05, "loss": 0.5119, "num_tokens": 4065465041.0, "step": 5314 }, { "epoch": 1.9471466520106255, "grad_norm": 0.16108610147737512, "learning_rate": 2.9121007191325858e-05, "loss": 0.542, "num_tokens": 4066226038.0, "step": 5315 }, { "epoch": 1.9475130530365485, "grad_norm": 0.1903509323714626, "learning_rate": 2.911708290222655e-05, "loss": 0.5245, "num_tokens": 4066993999.0, "step": 5316 }, { "epoch": 1.9478794540624715, "grad_norm": 0.14548235224986325, "learning_rate": 2.9113158212118466e-05, "loss": 0.5179, "num_tokens": 4067779077.0, "step": 5317 }, { "epoch": 1.9482458550883943, "grad_norm": 0.15060994523124271, "learning_rate": 2.910923312122274e-05, "loss": 0.5122, "num_tokens": 4068438613.0, "step": 5318 }, { "epoch": 1.948612256114317, "grad_norm": 0.14896283236589647, "learning_rate": 2.9105307629760533e-05, "loss": 0.4864, "num_tokens": 4069172567.0, "step": 5319 }, { "epoch": 1.9489786571402399, "grad_norm": 0.17396143821648113, "learning_rate": 2.9101381737953024e-05, "loss": 0.5163, "num_tokens": 4069820103.0, "step": 5320 }, { "epoch": 1.9493450581661629, "grad_norm": 0.15997309563705972, "learning_rate": 2.909745544602141e-05, "loss": 0.5009, "num_tokens": 4070631164.0, "step": 5321 }, { "epoch": 1.9497114591920859, "grad_norm": 0.147342795052707, "learning_rate": 2.9093528754186927e-05, "loss": 0.5073, "num_tokens": 4071458702.0, "step": 5322 }, { "epoch": 1.9500778602180087, "grad_norm": 0.14407635135768185, "learning_rate": 2.9089601662670808e-05, "loss": 0.4803, "num_tokens": 4072303519.0, "step": 5323 }, { "epoch": 1.9504442612439314, "grad_norm": 0.1654587088885716, "learning_rate": 2.908567417169433e-05, "loss": 0.5492, "num_tokens": 4073020740.0, "step": 5324 }, { "epoch": 1.9508106622698542, "grad_norm": 0.14984954613153603, "learning_rate": 2.90817462814788e-05, "loss": 0.4876, "num_tokens": 4073938083.0, "step": 5325 }, { "epoch": 1.9511770632957772, "grad_norm": 0.1474358490124892, "learning_rate": 2.9077817992245512e-05, "loss": 0.5145, "num_tokens": 4074685148.0, "step": 5326 }, { "epoch": 1.9515434643217002, "grad_norm": 0.15904805279081632, "learning_rate": 2.907388930421581e-05, "loss": 0.5313, "num_tokens": 4075441632.0, "step": 5327 }, { "epoch": 1.951909865347623, "grad_norm": 0.15047784075964668, "learning_rate": 2.906996021761106e-05, "loss": 0.501, "num_tokens": 4076216847.0, "step": 5328 }, { "epoch": 1.9522762663735458, "grad_norm": 0.14430519795891059, "learning_rate": 2.9066030732652635e-05, "loss": 0.5296, "num_tokens": 4076965266.0, "step": 5329 }, { "epoch": 1.9526426673994686, "grad_norm": 0.14427910614898964, "learning_rate": 2.9062100849561948e-05, "loss": 0.4938, "num_tokens": 4077670460.0, "step": 5330 }, { "epoch": 1.9530090684253916, "grad_norm": 0.15162445863435328, "learning_rate": 2.905817056856043e-05, "loss": 0.4792, "num_tokens": 4078507564.0, "step": 5331 }, { "epoch": 1.9533754694513146, "grad_norm": 0.14149218072621703, "learning_rate": 2.905423988986952e-05, "loss": 0.5624, "num_tokens": 4079205945.0, "step": 5332 }, { "epoch": 1.9537418704772374, "grad_norm": 0.14642396522105583, "learning_rate": 2.9050308813710692e-05, "loss": 0.5064, "num_tokens": 4079977849.0, "step": 5333 }, { "epoch": 1.9541082715031601, "grad_norm": 0.13249462724845038, "learning_rate": 2.904637734030544e-05, "loss": 0.4983, "num_tokens": 4080643067.0, "step": 5334 }, { "epoch": 1.9544746725290831, "grad_norm": 0.1352894431851324, "learning_rate": 2.9042445469875296e-05, "loss": 0.4833, "num_tokens": 4081511550.0, "step": 5335 }, { "epoch": 1.954841073555006, "grad_norm": 0.16102313206471605, "learning_rate": 2.9038513202641786e-05, "loss": 0.491, "num_tokens": 4082309397.0, "step": 5336 }, { "epoch": 1.955207474580929, "grad_norm": 0.1289575286050062, "learning_rate": 2.903458053882647e-05, "loss": 0.4904, "num_tokens": 4083117304.0, "step": 5337 }, { "epoch": 1.9555738756068517, "grad_norm": 0.13267643513649843, "learning_rate": 2.903064747865094e-05, "loss": 0.4608, "num_tokens": 4083908520.0, "step": 5338 }, { "epoch": 1.9559402766327745, "grad_norm": 0.13483417662321387, "learning_rate": 2.9026714022336797e-05, "loss": 0.487, "num_tokens": 4084692508.0, "step": 5339 }, { "epoch": 1.9563066776586975, "grad_norm": 0.13451504505436632, "learning_rate": 2.9022780170105674e-05, "loss": 0.5382, "num_tokens": 4085452000.0, "step": 5340 }, { "epoch": 1.9566730786846205, "grad_norm": 0.13259528591333006, "learning_rate": 2.9018845922179214e-05, "loss": 0.4976, "num_tokens": 4086332510.0, "step": 5341 }, { "epoch": 1.9570394797105433, "grad_norm": 0.1374778130706504, "learning_rate": 2.9014911278779105e-05, "loss": 0.4849, "num_tokens": 4087034072.0, "step": 5342 }, { "epoch": 1.957405880736466, "grad_norm": 0.13820108863847255, "learning_rate": 2.9010976240127022e-05, "loss": 0.4854, "num_tokens": 4087825191.0, "step": 5343 }, { "epoch": 1.9577722817623888, "grad_norm": 0.1298437573171079, "learning_rate": 2.9007040806444702e-05, "loss": 0.4787, "num_tokens": 4088596856.0, "step": 5344 }, { "epoch": 1.9581386827883118, "grad_norm": 0.1314358999626192, "learning_rate": 2.900310497795388e-05, "loss": 0.5368, "num_tokens": 4089418332.0, "step": 5345 }, { "epoch": 1.9585050838142348, "grad_norm": 0.13044682356360746, "learning_rate": 2.8999168754876312e-05, "loss": 0.5211, "num_tokens": 4090254379.0, "step": 5346 }, { "epoch": 1.9588714848401576, "grad_norm": 0.13486120726372092, "learning_rate": 2.8995232137433787e-05, "loss": 0.4723, "num_tokens": 4090891764.0, "step": 5347 }, { "epoch": 1.9592378858660804, "grad_norm": 0.13250339676082598, "learning_rate": 2.8991295125848114e-05, "loss": 0.4806, "num_tokens": 4091662432.0, "step": 5348 }, { "epoch": 1.9596042868920032, "grad_norm": 0.1283013735440302, "learning_rate": 2.8987357720341117e-05, "loss": 0.4976, "num_tokens": 4092446113.0, "step": 5349 }, { "epoch": 1.9599706879179262, "grad_norm": 0.12848465795220024, "learning_rate": 2.8983419921134652e-05, "loss": 0.5228, "num_tokens": 4093267940.0, "step": 5350 }, { "epoch": 1.9603370889438492, "grad_norm": 0.13405061881579666, "learning_rate": 2.8979481728450595e-05, "loss": 0.4689, "num_tokens": 4094103062.0, "step": 5351 }, { "epoch": 1.960703489969772, "grad_norm": 0.14811839436821278, "learning_rate": 2.897554314251083e-05, "loss": 0.4957, "num_tokens": 4094878677.0, "step": 5352 }, { "epoch": 1.9610698909956947, "grad_norm": 0.12069922907512796, "learning_rate": 2.8971604163537285e-05, "loss": 0.4919, "num_tokens": 4095746762.0, "step": 5353 }, { "epoch": 1.9614362920216175, "grad_norm": 0.11872046456571239, "learning_rate": 2.8967664791751896e-05, "loss": 0.4714, "num_tokens": 4096707260.0, "step": 5354 }, { "epoch": 1.9618026930475405, "grad_norm": 0.1430092127441323, "learning_rate": 2.8963725027376634e-05, "loss": 0.5125, "num_tokens": 4097412623.0, "step": 5355 }, { "epoch": 1.9621690940734635, "grad_norm": 0.14974983584240348, "learning_rate": 2.8959784870633467e-05, "loss": 0.5206, "num_tokens": 4098101682.0, "step": 5356 }, { "epoch": 1.9625354950993863, "grad_norm": 0.13951567517395744, "learning_rate": 2.8955844321744413e-05, "loss": 0.5076, "num_tokens": 4098806575.0, "step": 5357 }, { "epoch": 1.962901896125309, "grad_norm": 0.13566141909284554, "learning_rate": 2.8951903380931497e-05, "loss": 0.4958, "num_tokens": 4099576691.0, "step": 5358 }, { "epoch": 1.963268297151232, "grad_norm": 0.13008792579126255, "learning_rate": 2.894796204841677e-05, "loss": 0.4738, "num_tokens": 4100361292.0, "step": 5359 }, { "epoch": 1.9636346981771549, "grad_norm": 0.18603369897547717, "learning_rate": 2.8944020324422307e-05, "loss": 0.4993, "num_tokens": 4101087381.0, "step": 5360 }, { "epoch": 1.9640010992030779, "grad_norm": 0.13476988236370055, "learning_rate": 2.8940078209170196e-05, "loss": 0.5308, "num_tokens": 4101756745.0, "step": 5361 }, { "epoch": 1.9643675002290006, "grad_norm": 0.1513659100473847, "learning_rate": 2.893613570288256e-05, "loss": 0.5026, "num_tokens": 4102522010.0, "step": 5362 }, { "epoch": 1.9647339012549234, "grad_norm": 0.1425490640796869, "learning_rate": 2.8932192805781537e-05, "loss": 0.5078, "num_tokens": 4103236513.0, "step": 5363 }, { "epoch": 1.9651003022808464, "grad_norm": 0.13559872426036895, "learning_rate": 2.892824951808928e-05, "loss": 0.467, "num_tokens": 4103973551.0, "step": 5364 }, { "epoch": 1.9654667033067694, "grad_norm": 0.15281930990317133, "learning_rate": 2.8924305840027985e-05, "loss": 0.4831, "num_tokens": 4104845340.0, "step": 5365 }, { "epoch": 1.9658331043326922, "grad_norm": 0.14312237500632433, "learning_rate": 2.8920361771819845e-05, "loss": 0.512, "num_tokens": 4105651988.0, "step": 5366 }, { "epoch": 1.966199505358615, "grad_norm": 0.13915365435718266, "learning_rate": 2.8916417313687098e-05, "loss": 0.4934, "num_tokens": 4106333491.0, "step": 5367 }, { "epoch": 1.9665659063845378, "grad_norm": 0.15008565910891478, "learning_rate": 2.8912472465851978e-05, "loss": 0.5396, "num_tokens": 4107112009.0, "step": 5368 }, { "epoch": 1.9669323074104608, "grad_norm": 0.14882310870990298, "learning_rate": 2.8908527228536763e-05, "loss": 0.5314, "num_tokens": 4107810155.0, "step": 5369 }, { "epoch": 1.9672987084363838, "grad_norm": 0.13705718863078373, "learning_rate": 2.8904581601963753e-05, "loss": 0.4759, "num_tokens": 4108538290.0, "step": 5370 }, { "epoch": 1.9676651094623065, "grad_norm": 0.13325573517712533, "learning_rate": 2.8900635586355252e-05, "loss": 0.4858, "num_tokens": 4109414614.0, "step": 5371 }, { "epoch": 1.9680315104882293, "grad_norm": 0.1420081933029865, "learning_rate": 2.88966891819336e-05, "loss": 0.5138, "num_tokens": 4110078323.0, "step": 5372 }, { "epoch": 1.968397911514152, "grad_norm": 0.13969035743794422, "learning_rate": 2.889274238892116e-05, "loss": 0.5175, "num_tokens": 4110911187.0, "step": 5373 }, { "epoch": 1.9687643125400751, "grad_norm": 0.12703001342038805, "learning_rate": 2.88887952075403e-05, "loss": 0.5029, "num_tokens": 4111675028.0, "step": 5374 }, { "epoch": 1.9691307135659981, "grad_norm": 0.14219228087152674, "learning_rate": 2.888484763801344e-05, "loss": 0.4575, "num_tokens": 4112459286.0, "step": 5375 }, { "epoch": 1.969497114591921, "grad_norm": 0.14981841255602624, "learning_rate": 2.8880899680562984e-05, "loss": 0.5013, "num_tokens": 4113288532.0, "step": 5376 }, { "epoch": 1.9698635156178437, "grad_norm": 0.13386231217427247, "learning_rate": 2.8876951335411397e-05, "loss": 0.4915, "num_tokens": 4114069204.0, "step": 5377 }, { "epoch": 1.9702299166437665, "grad_norm": 0.14564565821588607, "learning_rate": 2.8873002602781132e-05, "loss": 0.4636, "num_tokens": 4114738237.0, "step": 5378 }, { "epoch": 1.9705963176696895, "grad_norm": 0.14927602246491248, "learning_rate": 2.8869053482894682e-05, "loss": 0.5051, "num_tokens": 4115589619.0, "step": 5379 }, { "epoch": 1.9709627186956125, "grad_norm": 0.13649112321467372, "learning_rate": 2.8865103975974564e-05, "loss": 0.5062, "num_tokens": 4116311487.0, "step": 5380 }, { "epoch": 1.9713291197215352, "grad_norm": 0.145927776687309, "learning_rate": 2.886115408224331e-05, "loss": 0.5035, "num_tokens": 4117014331.0, "step": 5381 }, { "epoch": 1.971695520747458, "grad_norm": 0.154080621591675, "learning_rate": 2.885720380192347e-05, "loss": 0.527, "num_tokens": 4117723300.0, "step": 5382 }, { "epoch": 1.9720619217733808, "grad_norm": 0.1338276913454207, "learning_rate": 2.885325313523762e-05, "loss": 0.5052, "num_tokens": 4118423673.0, "step": 5383 }, { "epoch": 1.9724283227993038, "grad_norm": 0.15375882643517874, "learning_rate": 2.884930208240836e-05, "loss": 0.5094, "num_tokens": 4119158803.0, "step": 5384 }, { "epoch": 1.9727947238252268, "grad_norm": 0.15114368361925196, "learning_rate": 2.8845350643658317e-05, "loss": 0.5195, "num_tokens": 4119862104.0, "step": 5385 }, { "epoch": 1.9731611248511496, "grad_norm": 0.13670965270287547, "learning_rate": 2.884139881921013e-05, "loss": 0.5062, "num_tokens": 4120629147.0, "step": 5386 }, { "epoch": 1.9735275258770724, "grad_norm": 0.16205880512894105, "learning_rate": 2.8837446609286464e-05, "loss": 0.5409, "num_tokens": 4121314799.0, "step": 5387 }, { "epoch": 1.9738939269029954, "grad_norm": 0.1476884892358581, "learning_rate": 2.8833494014109988e-05, "loss": 0.4976, "num_tokens": 4122086575.0, "step": 5388 }, { "epoch": 1.9742603279289181, "grad_norm": 0.12959222593393105, "learning_rate": 2.8829541033903433e-05, "loss": 0.5155, "num_tokens": 4122963847.0, "step": 5389 }, { "epoch": 1.9746267289548411, "grad_norm": 0.14801666747406775, "learning_rate": 2.8825587668889516e-05, "loss": 0.5367, "num_tokens": 4123778315.0, "step": 5390 }, { "epoch": 1.974993129980764, "grad_norm": 0.13419805678792926, "learning_rate": 2.8821633919290986e-05, "loss": 0.4716, "num_tokens": 4124532533.0, "step": 5391 }, { "epoch": 1.9753595310066867, "grad_norm": 0.13739319064332234, "learning_rate": 2.8817679785330615e-05, "loss": 0.5021, "num_tokens": 4125220416.0, "step": 5392 }, { "epoch": 1.9757259320326097, "grad_norm": 0.14767838380584003, "learning_rate": 2.8813725267231207e-05, "loss": 0.4864, "num_tokens": 4125990792.0, "step": 5393 }, { "epoch": 1.9760923330585327, "grad_norm": 0.1466034185567647, "learning_rate": 2.880977036521557e-05, "loss": 0.5062, "num_tokens": 4126724817.0, "step": 5394 }, { "epoch": 1.9764587340844555, "grad_norm": 0.16640882034233936, "learning_rate": 2.8805815079506536e-05, "loss": 0.5223, "num_tokens": 4127367656.0, "step": 5395 }, { "epoch": 1.9768251351103783, "grad_norm": 0.14796751033174946, "learning_rate": 2.880185941032697e-05, "loss": 0.5176, "num_tokens": 4128106723.0, "step": 5396 }, { "epoch": 1.977191536136301, "grad_norm": 0.1387132871265182, "learning_rate": 2.8797903357899754e-05, "loss": 0.4929, "num_tokens": 4128885308.0, "step": 5397 }, { "epoch": 1.977557937162224, "grad_norm": 0.13863876541696216, "learning_rate": 2.8793946922447785e-05, "loss": 0.4956, "num_tokens": 4129595483.0, "step": 5398 }, { "epoch": 1.977924338188147, "grad_norm": 0.12915705869051453, "learning_rate": 2.8789990104193994e-05, "loss": 0.516, "num_tokens": 4130444167.0, "step": 5399 }, { "epoch": 1.9782907392140698, "grad_norm": 0.14784365529938961, "learning_rate": 2.878603290336132e-05, "loss": 0.5238, "num_tokens": 4131209452.0, "step": 5400 }, { "epoch": 1.9786571402399926, "grad_norm": 0.18830808755073158, "learning_rate": 2.878207532017273e-05, "loss": 0.5179, "num_tokens": 4131958150.0, "step": 5401 }, { "epoch": 1.9790235412659154, "grad_norm": 0.13269996402595738, "learning_rate": 2.877811735485121e-05, "loss": 0.4876, "num_tokens": 4132698627.0, "step": 5402 }, { "epoch": 1.9793899422918384, "grad_norm": 0.1804283688302323, "learning_rate": 2.8774159007619774e-05, "loss": 0.5191, "num_tokens": 4133410498.0, "step": 5403 }, { "epoch": 1.9797563433177614, "grad_norm": 0.16186930779342987, "learning_rate": 2.8770200278701454e-05, "loss": 0.4981, "num_tokens": 4134186500.0, "step": 5404 }, { "epoch": 1.9801227443436842, "grad_norm": 0.162233703318881, "learning_rate": 2.8766241168319302e-05, "loss": 0.4912, "num_tokens": 4134882760.0, "step": 5405 }, { "epoch": 1.980489145369607, "grad_norm": 0.19157262441053, "learning_rate": 2.8762281676696386e-05, "loss": 0.5156, "num_tokens": 4135637840.0, "step": 5406 }, { "epoch": 1.9808555463955297, "grad_norm": 0.16441542429105743, "learning_rate": 2.8758321804055813e-05, "loss": 0.4631, "num_tokens": 4136397182.0, "step": 5407 }, { "epoch": 1.9812219474214527, "grad_norm": 0.20067467926630575, "learning_rate": 2.8754361550620693e-05, "loss": 0.5032, "num_tokens": 4137139109.0, "step": 5408 }, { "epoch": 1.9815883484473757, "grad_norm": 0.18956680306399945, "learning_rate": 2.8750400916614168e-05, "loss": 0.5059, "num_tokens": 4137921559.0, "step": 5409 }, { "epoch": 1.9819547494732985, "grad_norm": 0.1421996901471095, "learning_rate": 2.8746439902259387e-05, "loss": 0.4994, "num_tokens": 4138799747.0, "step": 5410 }, { "epoch": 1.9823211504992213, "grad_norm": 0.1956651576237096, "learning_rate": 2.8742478507779546e-05, "loss": 0.479, "num_tokens": 4139764235.0, "step": 5411 }, { "epoch": 1.9826875515251443, "grad_norm": 0.17133012463190175, "learning_rate": 2.8738516733397846e-05, "loss": 0.5202, "num_tokens": 4140500226.0, "step": 5412 }, { "epoch": 1.983053952551067, "grad_norm": 0.13821135204013085, "learning_rate": 2.87345545793375e-05, "loss": 0.4698, "num_tokens": 4141236540.0, "step": 5413 }, { "epoch": 1.98342035357699, "grad_norm": 0.14897654848048933, "learning_rate": 2.873059204582177e-05, "loss": 0.4994, "num_tokens": 4142024411.0, "step": 5414 }, { "epoch": 1.9837867546029129, "grad_norm": 0.20016486997010494, "learning_rate": 2.8726629133073904e-05, "loss": 0.4942, "num_tokens": 4142704162.0, "step": 5415 }, { "epoch": 1.9841531556288357, "grad_norm": 0.13868592418083253, "learning_rate": 2.872266584131721e-05, "loss": 0.5025, "num_tokens": 4143507413.0, "step": 5416 }, { "epoch": 1.9845195566547587, "grad_norm": 0.21566615714473036, "learning_rate": 2.8718702170774987e-05, "loss": 0.5126, "num_tokens": 4144297009.0, "step": 5417 }, { "epoch": 1.9848859576806817, "grad_norm": 0.15539113985707037, "learning_rate": 2.8714738121670566e-05, "loss": 0.5141, "num_tokens": 4144998482.0, "step": 5418 }, { "epoch": 1.9852523587066044, "grad_norm": 0.14925686213833333, "learning_rate": 2.87107736942273e-05, "loss": 0.5341, "num_tokens": 4145783591.0, "step": 5419 }, { "epoch": 1.9856187597325272, "grad_norm": 0.17684877448938507, "learning_rate": 2.8706808888668573e-05, "loss": 0.5082, "num_tokens": 4146644934.0, "step": 5420 }, { "epoch": 1.98598516075845, "grad_norm": 0.13441285214821921, "learning_rate": 2.870284370521776e-05, "loss": 0.4688, "num_tokens": 4147372886.0, "step": 5421 }, { "epoch": 1.986351561784373, "grad_norm": 0.1758662330054803, "learning_rate": 2.86988781440983e-05, "loss": 0.4656, "num_tokens": 4148135825.0, "step": 5422 }, { "epoch": 1.986717962810296, "grad_norm": 0.16313091139427222, "learning_rate": 2.869491220553361e-05, "loss": 0.5098, "num_tokens": 4148861317.0, "step": 5423 }, { "epoch": 1.9870843638362188, "grad_norm": 0.17569183329938756, "learning_rate": 2.869094588974717e-05, "loss": 0.5105, "num_tokens": 4149631937.0, "step": 5424 }, { "epoch": 1.9874507648621416, "grad_norm": 0.17205901513309937, "learning_rate": 2.8686979196962442e-05, "loss": 0.4889, "num_tokens": 4150426880.0, "step": 5425 }, { "epoch": 1.9878171658880643, "grad_norm": 0.14104489937429818, "learning_rate": 2.868301212740294e-05, "loss": 0.4997, "num_tokens": 4151193881.0, "step": 5426 }, { "epoch": 1.9881835669139873, "grad_norm": 0.17672782624173372, "learning_rate": 2.8679044681292176e-05, "loss": 0.4997, "num_tokens": 4152003173.0, "step": 5427 }, { "epoch": 1.9885499679399103, "grad_norm": 0.17665385910980122, "learning_rate": 2.86750768588537e-05, "loss": 0.5306, "num_tokens": 4152706042.0, "step": 5428 }, { "epoch": 1.9889163689658331, "grad_norm": 0.1393664259157166, "learning_rate": 2.867110866031108e-05, "loss": 0.5099, "num_tokens": 4153496367.0, "step": 5429 }, { "epoch": 1.989282769991756, "grad_norm": 0.15718105744917346, "learning_rate": 2.866714008588789e-05, "loss": 0.4826, "num_tokens": 4154189382.0, "step": 5430 }, { "epoch": 1.9896491710176787, "grad_norm": 0.13743854701068225, "learning_rate": 2.8663171135807758e-05, "loss": 0.4857, "num_tokens": 4155016995.0, "step": 5431 }, { "epoch": 1.9900155720436017, "grad_norm": 0.14044968680453032, "learning_rate": 2.8659201810294294e-05, "loss": 0.5299, "num_tokens": 4155790673.0, "step": 5432 }, { "epoch": 1.9903819730695247, "grad_norm": 0.18284053592409508, "learning_rate": 2.8655232109571156e-05, "loss": 0.4965, "num_tokens": 4156577325.0, "step": 5433 }, { "epoch": 1.9907483740954475, "grad_norm": 0.12350843007011839, "learning_rate": 2.865126203386201e-05, "loss": 0.5278, "num_tokens": 4157400194.0, "step": 5434 }, { "epoch": 1.9911147751213703, "grad_norm": 0.181898098755445, "learning_rate": 2.864729158339056e-05, "loss": 0.514, "num_tokens": 4158233110.0, "step": 5435 }, { "epoch": 1.9914811761472933, "grad_norm": 0.1467136301505963, "learning_rate": 2.8643320758380502e-05, "loss": 0.4904, "num_tokens": 4159032738.0, "step": 5436 }, { "epoch": 1.991847577173216, "grad_norm": 0.14934472428972562, "learning_rate": 2.863934955905559e-05, "loss": 0.5202, "num_tokens": 4159854810.0, "step": 5437 }, { "epoch": 1.992213978199139, "grad_norm": 0.1553100267806168, "learning_rate": 2.8635377985639556e-05, "loss": 0.51, "num_tokens": 4160647977.0, "step": 5438 }, { "epoch": 1.9925803792250618, "grad_norm": 0.14322485263314763, "learning_rate": 2.86314060383562e-05, "loss": 0.4913, "num_tokens": 4161446876.0, "step": 5439 }, { "epoch": 1.9929467802509846, "grad_norm": 0.14891541912042328, "learning_rate": 2.8627433717429303e-05, "loss": 0.4621, "num_tokens": 4162165647.0, "step": 5440 }, { "epoch": 1.9933131812769076, "grad_norm": 0.15821293278941656, "learning_rate": 2.8623461023082693e-05, "loss": 0.5283, "num_tokens": 4162957470.0, "step": 5441 }, { "epoch": 1.9936795823028306, "grad_norm": 0.1448075492617991, "learning_rate": 2.8619487955540203e-05, "loss": 0.5271, "num_tokens": 4163661407.0, "step": 5442 }, { "epoch": 1.9940459833287534, "grad_norm": 0.15586814159334625, "learning_rate": 2.8615514515025697e-05, "loss": 0.5386, "num_tokens": 4164409268.0, "step": 5443 }, { "epoch": 1.9944123843546762, "grad_norm": 0.14189970789221606, "learning_rate": 2.8611540701763058e-05, "loss": 0.493, "num_tokens": 4165071927.0, "step": 5444 }, { "epoch": 1.994778785380599, "grad_norm": 0.14604816557190878, "learning_rate": 2.8607566515976185e-05, "loss": 0.4979, "num_tokens": 4165834642.0, "step": 5445 }, { "epoch": 1.995145186406522, "grad_norm": 0.13953133905442594, "learning_rate": 2.8603591957889005e-05, "loss": 0.4961, "num_tokens": 4166579492.0, "step": 5446 }, { "epoch": 1.995511587432445, "grad_norm": 0.14082940039340935, "learning_rate": 2.8599617027725465e-05, "loss": 0.4824, "num_tokens": 4167373474.0, "step": 5447 }, { "epoch": 1.9958779884583677, "grad_norm": 0.14521694427897394, "learning_rate": 2.859564172570952e-05, "loss": 0.5371, "num_tokens": 4168087111.0, "step": 5448 }, { "epoch": 1.9962443894842905, "grad_norm": 0.12647243560265226, "learning_rate": 2.8591666052065167e-05, "loss": 0.5075, "num_tokens": 4168800830.0, "step": 5449 }, { "epoch": 1.9966107905102133, "grad_norm": 0.14395255534148435, "learning_rate": 2.8587690007016408e-05, "loss": 0.5127, "num_tokens": 4169570039.0, "step": 5450 }, { "epoch": 1.9969771915361363, "grad_norm": 0.1388959711223564, "learning_rate": 2.8583713590787277e-05, "loss": 0.4904, "num_tokens": 4170397539.0, "step": 5451 }, { "epoch": 1.9973435925620593, "grad_norm": 0.14907566626993485, "learning_rate": 2.8579736803601826e-05, "loss": 0.5055, "num_tokens": 4171089761.0, "step": 5452 }, { "epoch": 1.997709993587982, "grad_norm": 0.15718522933448364, "learning_rate": 2.8575759645684108e-05, "loss": 0.5138, "num_tokens": 4171804037.0, "step": 5453 }, { "epoch": 1.9980763946139048, "grad_norm": 0.14160556374695063, "learning_rate": 2.857178211725823e-05, "loss": 0.5251, "num_tokens": 4172549497.0, "step": 5454 }, { "epoch": 1.9984427956398276, "grad_norm": 0.13926008961599068, "learning_rate": 2.8567804218548296e-05, "loss": 0.4793, "num_tokens": 4173336819.0, "step": 5455 }, { "epoch": 1.9988091966657506, "grad_norm": 0.1434275505741041, "learning_rate": 2.8563825949778444e-05, "loss": 0.4991, "num_tokens": 4174067833.0, "step": 5456 }, { "epoch": 1.9991755976916736, "grad_norm": 0.14978477855135328, "learning_rate": 2.855984731117283e-05, "loss": 0.4816, "num_tokens": 4174954211.0, "step": 5457 }, { "epoch": 1.9995419987175964, "grad_norm": 0.15135464752331187, "learning_rate": 2.855586830295562e-05, "loss": 0.5086, "num_tokens": 4175797151.0, "step": 5458 }, { "epoch": 1.9999083997435192, "grad_norm": 0.16482966453969367, "learning_rate": 2.8551888925351017e-05, "loss": 0.4832, "num_tokens": 4176505476.0, "step": 5459 }, { "epoch": 2.0, "grad_norm": 0.16482966453969367, "learning_rate": 2.854790917858323e-05, "loss": 0.545, "num_tokens": 4176664451.0, "step": 5460 }, { "epoch": 2.000366401025923, "grad_norm": 0.31947198729447696, "learning_rate": 2.8543929062876498e-05, "loss": 0.4514, "num_tokens": 4177405055.0, "step": 5461 }, { "epoch": 2.0007328020518456, "grad_norm": 0.15231600737549755, "learning_rate": 2.8539948578455092e-05, "loss": 0.4698, "num_tokens": 4178271995.0, "step": 5462 }, { "epoch": 2.001099203077769, "grad_norm": 0.16443396162947194, "learning_rate": 2.853596772554327e-05, "loss": 0.4448, "num_tokens": 4179002021.0, "step": 5463 }, { "epoch": 2.0014656041036916, "grad_norm": 0.1612626039992077, "learning_rate": 2.853198650436534e-05, "loss": 0.4465, "num_tokens": 4179746377.0, "step": 5464 }, { "epoch": 2.0018320051296143, "grad_norm": 0.20286407280701815, "learning_rate": 2.8528004915145627e-05, "loss": 0.4504, "num_tokens": 4180466710.0, "step": 5465 }, { "epoch": 2.002198406155537, "grad_norm": 0.1476733481414703, "learning_rate": 2.8524022958108474e-05, "loss": 0.4411, "num_tokens": 4181259665.0, "step": 5466 }, { "epoch": 2.00256480718146, "grad_norm": 0.2136687265478494, "learning_rate": 2.8520040633478222e-05, "loss": 0.4641, "num_tokens": 4182162757.0, "step": 5467 }, { "epoch": 2.002931208207383, "grad_norm": 0.15792765838079767, "learning_rate": 2.8516057941479274e-05, "loss": 0.4611, "num_tokens": 4182869197.0, "step": 5468 }, { "epoch": 2.003297609233306, "grad_norm": 0.15774072497403374, "learning_rate": 2.851207488233603e-05, "loss": 0.4375, "num_tokens": 4183720805.0, "step": 5469 }, { "epoch": 2.0036640102592287, "grad_norm": 0.16942640263101102, "learning_rate": 2.850809145627291e-05, "loss": 0.4207, "num_tokens": 4184510247.0, "step": 5470 }, { "epoch": 2.0040304112851515, "grad_norm": 0.19565759602390956, "learning_rate": 2.850410766351435e-05, "loss": 0.4777, "num_tokens": 4185209241.0, "step": 5471 }, { "epoch": 2.0043968123110742, "grad_norm": 0.1742970339210354, "learning_rate": 2.8500123504284834e-05, "loss": 0.4657, "num_tokens": 4185988517.0, "step": 5472 }, { "epoch": 2.0047632133369975, "grad_norm": 0.1767651775451955, "learning_rate": 2.8496138978808828e-05, "loss": 0.4432, "num_tokens": 4186836287.0, "step": 5473 }, { "epoch": 2.0051296143629203, "grad_norm": 0.1730735464971617, "learning_rate": 2.8492154087310854e-05, "loss": 0.441, "num_tokens": 4187681411.0, "step": 5474 }, { "epoch": 2.005496015388843, "grad_norm": 0.16711054282793866, "learning_rate": 2.848816883001543e-05, "loss": 0.4125, "num_tokens": 4188518401.0, "step": 5475 }, { "epoch": 2.005862416414766, "grad_norm": 0.16579313109543561, "learning_rate": 2.8484183207147102e-05, "loss": 0.4669, "num_tokens": 4189296095.0, "step": 5476 }, { "epoch": 2.006228817440689, "grad_norm": 0.1644088665755618, "learning_rate": 2.8480197218930448e-05, "loss": 0.456, "num_tokens": 4190067268.0, "step": 5477 }, { "epoch": 2.006595218466612, "grad_norm": 0.16842107122080466, "learning_rate": 2.8476210865590048e-05, "loss": 0.4319, "num_tokens": 4190908073.0, "step": 5478 }, { "epoch": 2.0069616194925346, "grad_norm": 0.15228619719172942, "learning_rate": 2.8472224147350513e-05, "loss": 0.4592, "num_tokens": 4191633388.0, "step": 5479 }, { "epoch": 2.0073280205184574, "grad_norm": 0.1846567042457126, "learning_rate": 2.846823706443647e-05, "loss": 0.4566, "num_tokens": 4192262524.0, "step": 5480 }, { "epoch": 2.00769442154438, "grad_norm": 0.15237220334313803, "learning_rate": 2.846424961707258e-05, "loss": 0.4256, "num_tokens": 4193102422.0, "step": 5481 }, { "epoch": 2.0080608225703034, "grad_norm": 0.16841692740888284, "learning_rate": 2.8460261805483504e-05, "loss": 0.4678, "num_tokens": 4193846136.0, "step": 5482 }, { "epoch": 2.008427223596226, "grad_norm": 0.153025246012336, "learning_rate": 2.8456273629893934e-05, "loss": 0.4618, "num_tokens": 4194705086.0, "step": 5483 }, { "epoch": 2.008793624622149, "grad_norm": 0.15470246650526412, "learning_rate": 2.8452285090528585e-05, "loss": 0.4319, "num_tokens": 4195394275.0, "step": 5484 }, { "epoch": 2.0091600256480717, "grad_norm": 0.14601782738330488, "learning_rate": 2.8448296187612193e-05, "loss": 0.4594, "num_tokens": 4196177430.0, "step": 5485 }, { "epoch": 2.0095264266739945, "grad_norm": 0.1565553291652108, "learning_rate": 2.84443069213695e-05, "loss": 0.4425, "num_tokens": 4196913001.0, "step": 5486 }, { "epoch": 2.0098928276999177, "grad_norm": 0.14680133228714823, "learning_rate": 2.8440317292025287e-05, "loss": 0.4644, "num_tokens": 4197623761.0, "step": 5487 }, { "epoch": 2.0102592287258405, "grad_norm": 0.1643255635708689, "learning_rate": 2.8436327299804345e-05, "loss": 0.4343, "num_tokens": 4198338643.0, "step": 5488 }, { "epoch": 2.0106256297517633, "grad_norm": 0.15361125945399431, "learning_rate": 2.8432336944931495e-05, "loss": 0.4622, "num_tokens": 4199241823.0, "step": 5489 }, { "epoch": 2.010992030777686, "grad_norm": 0.14538593326880075, "learning_rate": 2.8428346227631566e-05, "loss": 0.4356, "num_tokens": 4199983682.0, "step": 5490 }, { "epoch": 2.011358431803609, "grad_norm": 0.1650955336667555, "learning_rate": 2.8424355148129415e-05, "loss": 0.4684, "num_tokens": 4200721934.0, "step": 5491 }, { "epoch": 2.011724832829532, "grad_norm": 0.15248904050358789, "learning_rate": 2.8420363706649913e-05, "loss": 0.435, "num_tokens": 4201441311.0, "step": 5492 }, { "epoch": 2.012091233855455, "grad_norm": 0.13415568059044994, "learning_rate": 2.8416371903417964e-05, "loss": 0.474, "num_tokens": 4202191222.0, "step": 5493 }, { "epoch": 2.0124576348813776, "grad_norm": 0.14090899547819322, "learning_rate": 2.841237973865847e-05, "loss": 0.4291, "num_tokens": 4203066020.0, "step": 5494 }, { "epoch": 2.0128240359073004, "grad_norm": 0.14899551452562212, "learning_rate": 2.840838721259638e-05, "loss": 0.4692, "num_tokens": 4203889351.0, "step": 5495 }, { "epoch": 2.013190436933223, "grad_norm": 0.1334863262893362, "learning_rate": 2.840439432545666e-05, "loss": 0.4247, "num_tokens": 4204667570.0, "step": 5496 }, { "epoch": 2.0135568379591464, "grad_norm": 0.14049488866677912, "learning_rate": 2.840040107746427e-05, "loss": 0.4269, "num_tokens": 4205550340.0, "step": 5497 }, { "epoch": 2.013923238985069, "grad_norm": 0.1472958804032202, "learning_rate": 2.8396407468844206e-05, "loss": 0.4588, "num_tokens": 4206307560.0, "step": 5498 }, { "epoch": 2.014289640010992, "grad_norm": 0.14138190412082674, "learning_rate": 2.8392413499821496e-05, "loss": 0.4519, "num_tokens": 4206974147.0, "step": 5499 }, { "epoch": 2.0146560410369148, "grad_norm": 0.1598478413452472, "learning_rate": 2.838841917062118e-05, "loss": 0.4729, "num_tokens": 4207664245.0, "step": 5500 }, { "epoch": 2.015022442062838, "grad_norm": 0.1528552950563616, "learning_rate": 2.838442448146831e-05, "loss": 0.4453, "num_tokens": 4208519914.0, "step": 5501 }, { "epoch": 2.0153888430887608, "grad_norm": 0.13901150902354886, "learning_rate": 2.838042943258797e-05, "loss": 0.4483, "num_tokens": 4209378480.0, "step": 5502 }, { "epoch": 2.0157552441146835, "grad_norm": 0.1430228164748254, "learning_rate": 2.8376434024205252e-05, "loss": 0.4516, "num_tokens": 4210135734.0, "step": 5503 }, { "epoch": 2.0161216451406063, "grad_norm": 0.14927184849006278, "learning_rate": 2.8372438256545284e-05, "loss": 0.4564, "num_tokens": 4210920750.0, "step": 5504 }, { "epoch": 2.016488046166529, "grad_norm": 0.14449713273955403, "learning_rate": 2.8368442129833203e-05, "loss": 0.4588, "num_tokens": 4211726236.0, "step": 5505 }, { "epoch": 2.0168544471924523, "grad_norm": 0.12885106435347096, "learning_rate": 2.836444564429416e-05, "loss": 0.4087, "num_tokens": 4212510394.0, "step": 5506 }, { "epoch": 2.017220848218375, "grad_norm": 0.1384666807648335, "learning_rate": 2.8360448800153348e-05, "loss": 0.4874, "num_tokens": 4213255974.0, "step": 5507 }, { "epoch": 2.017587249244298, "grad_norm": 0.14994185931700185, "learning_rate": 2.835645159763597e-05, "loss": 0.4681, "num_tokens": 4214211317.0, "step": 5508 }, { "epoch": 2.0179536502702207, "grad_norm": 0.12739997946825424, "learning_rate": 2.835245403696723e-05, "loss": 0.4394, "num_tokens": 4214998147.0, "step": 5509 }, { "epoch": 2.0183200512961434, "grad_norm": 0.14536931664868455, "learning_rate": 2.8348456118372378e-05, "loss": 0.4274, "num_tokens": 4215725060.0, "step": 5510 }, { "epoch": 2.0186864523220667, "grad_norm": 0.1462769847587341, "learning_rate": 2.8344457842076675e-05, "loss": 0.4401, "num_tokens": 4216456131.0, "step": 5511 }, { "epoch": 2.0190528533479895, "grad_norm": 0.14332247260034664, "learning_rate": 2.8340459208305397e-05, "loss": 0.4451, "num_tokens": 4217203971.0, "step": 5512 }, { "epoch": 2.0194192543739122, "grad_norm": 0.13449687234657415, "learning_rate": 2.833646021728386e-05, "loss": 0.4498, "num_tokens": 4218031915.0, "step": 5513 }, { "epoch": 2.019785655399835, "grad_norm": 0.1532818550474698, "learning_rate": 2.8332460869237368e-05, "loss": 0.4524, "num_tokens": 4218810207.0, "step": 5514 }, { "epoch": 2.020152056425758, "grad_norm": 0.13727165654412338, "learning_rate": 2.8328461164391268e-05, "loss": 0.4388, "num_tokens": 4219518444.0, "step": 5515 }, { "epoch": 2.020518457451681, "grad_norm": 0.15731442939176896, "learning_rate": 2.8324461102970932e-05, "loss": 0.4575, "num_tokens": 4220317307.0, "step": 5516 }, { "epoch": 2.020884858477604, "grad_norm": 0.13540304053927896, "learning_rate": 2.832046068520172e-05, "loss": 0.4524, "num_tokens": 4221073513.0, "step": 5517 }, { "epoch": 2.0212512595035266, "grad_norm": 0.1833243348518608, "learning_rate": 2.831645991130905e-05, "loss": 0.4713, "num_tokens": 4221773800.0, "step": 5518 }, { "epoch": 2.0216176605294494, "grad_norm": 0.13532070132061316, "learning_rate": 2.831245878151835e-05, "loss": 0.4343, "num_tokens": 4222648877.0, "step": 5519 }, { "epoch": 2.021984061555372, "grad_norm": 0.14935196439359738, "learning_rate": 2.8308457296055045e-05, "loss": 0.4739, "num_tokens": 4223481057.0, "step": 5520 }, { "epoch": 2.0223504625812954, "grad_norm": 0.13703540648035176, "learning_rate": 2.8304455455144603e-05, "loss": 0.4598, "num_tokens": 4224227903.0, "step": 5521 }, { "epoch": 2.022716863607218, "grad_norm": 0.14483934484337674, "learning_rate": 2.8300453259012507e-05, "loss": 0.4732, "num_tokens": 4224956215.0, "step": 5522 }, { "epoch": 2.023083264633141, "grad_norm": 0.1628635319123389, "learning_rate": 2.8296450707884258e-05, "loss": 0.4884, "num_tokens": 4225752670.0, "step": 5523 }, { "epoch": 2.0234496656590637, "grad_norm": 0.14430719270247896, "learning_rate": 2.8292447801985383e-05, "loss": 0.4192, "num_tokens": 4226583705.0, "step": 5524 }, { "epoch": 2.023816066684987, "grad_norm": 0.13260234647077107, "learning_rate": 2.828844454154141e-05, "loss": 0.4543, "num_tokens": 4227320246.0, "step": 5525 }, { "epoch": 2.0241824677109097, "grad_norm": 0.14662919348912198, "learning_rate": 2.828444092677791e-05, "loss": 0.4473, "num_tokens": 4227990479.0, "step": 5526 }, { "epoch": 2.0245488687368325, "grad_norm": 0.1405953738937335, "learning_rate": 2.8280436957920473e-05, "loss": 0.4788, "num_tokens": 4228867403.0, "step": 5527 }, { "epoch": 2.0249152697627553, "grad_norm": 0.13583866152576962, "learning_rate": 2.8276432635194693e-05, "loss": 0.4408, "num_tokens": 4229630863.0, "step": 5528 }, { "epoch": 2.025281670788678, "grad_norm": 0.14705734313741176, "learning_rate": 2.8272427958826186e-05, "loss": 0.4641, "num_tokens": 4230406093.0, "step": 5529 }, { "epoch": 2.0256480718146013, "grad_norm": 0.1583963814330898, "learning_rate": 2.826842292904061e-05, "loss": 0.4572, "num_tokens": 4231103484.0, "step": 5530 }, { "epoch": 2.026014472840524, "grad_norm": 0.13843742884013793, "learning_rate": 2.8264417546063604e-05, "loss": 0.4736, "num_tokens": 4231828795.0, "step": 5531 }, { "epoch": 2.026380873866447, "grad_norm": 0.1498142952137125, "learning_rate": 2.826041181012086e-05, "loss": 0.4562, "num_tokens": 4232624748.0, "step": 5532 }, { "epoch": 2.0267472748923696, "grad_norm": 0.14426432406190182, "learning_rate": 2.825640572143809e-05, "loss": 0.4505, "num_tokens": 4233426306.0, "step": 5533 }, { "epoch": 2.0271136759182924, "grad_norm": 0.1374729938531646, "learning_rate": 2.8252399280241004e-05, "loss": 0.4634, "num_tokens": 4234204571.0, "step": 5534 }, { "epoch": 2.0274800769442156, "grad_norm": 0.16191893506976215, "learning_rate": 2.824839248675534e-05, "loss": 0.4695, "num_tokens": 4234926602.0, "step": 5535 }, { "epoch": 2.0278464779701384, "grad_norm": 0.13292773395564375, "learning_rate": 2.8244385341206863e-05, "loss": 0.4763, "num_tokens": 4235761424.0, "step": 5536 }, { "epoch": 2.028212878996061, "grad_norm": 0.1472975942313336, "learning_rate": 2.8240377843821356e-05, "loss": 0.4299, "num_tokens": 4236630147.0, "step": 5537 }, { "epoch": 2.028579280021984, "grad_norm": 0.1399287496650169, "learning_rate": 2.823636999482463e-05, "loss": 0.4662, "num_tokens": 4237438136.0, "step": 5538 }, { "epoch": 2.0289456810479067, "grad_norm": 0.14212237882257778, "learning_rate": 2.823236179444248e-05, "loss": 0.4444, "num_tokens": 4238240020.0, "step": 5539 }, { "epoch": 2.02931208207383, "grad_norm": 0.1662403835852016, "learning_rate": 2.8228353242900762e-05, "loss": 0.4527, "num_tokens": 4238950103.0, "step": 5540 }, { "epoch": 2.0296784830997527, "grad_norm": 0.14297353572767946, "learning_rate": 2.8224344340425337e-05, "loss": 0.4543, "num_tokens": 4239737279.0, "step": 5541 }, { "epoch": 2.0300448841256755, "grad_norm": 0.16024322059699198, "learning_rate": 2.822033508724208e-05, "loss": 0.439, "num_tokens": 4240479360.0, "step": 5542 }, { "epoch": 2.0304112851515983, "grad_norm": 0.14213344773302514, "learning_rate": 2.821632548357689e-05, "loss": 0.4284, "num_tokens": 4241084833.0, "step": 5543 }, { "epoch": 2.030777686177521, "grad_norm": 0.1430936309970831, "learning_rate": 2.8212315529655697e-05, "loss": 0.4594, "num_tokens": 4241783395.0, "step": 5544 }, { "epoch": 2.0311440872034443, "grad_norm": 0.16296323162780832, "learning_rate": 2.8208305225704417e-05, "loss": 0.4254, "num_tokens": 4242483426.0, "step": 5545 }, { "epoch": 2.031510488229367, "grad_norm": 0.13923943208500417, "learning_rate": 2.8204294571949037e-05, "loss": 0.4635, "num_tokens": 4243206968.0, "step": 5546 }, { "epoch": 2.03187688925529, "grad_norm": 0.14522952610601156, "learning_rate": 2.820028356861552e-05, "loss": 0.4525, "num_tokens": 4243955928.0, "step": 5547 }, { "epoch": 2.0322432902812126, "grad_norm": 0.14103933857740406, "learning_rate": 2.819627221592986e-05, "loss": 0.4675, "num_tokens": 4244715158.0, "step": 5548 }, { "epoch": 2.032609691307136, "grad_norm": 0.15543811853512995, "learning_rate": 2.819226051411808e-05, "loss": 0.4597, "num_tokens": 4245405580.0, "step": 5549 }, { "epoch": 2.0329760923330586, "grad_norm": 0.137383692891671, "learning_rate": 2.8188248463406223e-05, "loss": 0.4387, "num_tokens": 4246202433.0, "step": 5550 }, { "epoch": 2.0333424933589814, "grad_norm": 0.14979574990734149, "learning_rate": 2.818423606402034e-05, "loss": 0.4569, "num_tokens": 4246949220.0, "step": 5551 }, { "epoch": 2.033708894384904, "grad_norm": 0.16015952630165894, "learning_rate": 2.8180223316186504e-05, "loss": 0.4471, "num_tokens": 4247688662.0, "step": 5552 }, { "epoch": 2.034075295410827, "grad_norm": 0.14744299002249142, "learning_rate": 2.817621022013083e-05, "loss": 0.4797, "num_tokens": 4248347144.0, "step": 5553 }, { "epoch": 2.03444169643675, "grad_norm": 0.13645401789665698, "learning_rate": 2.8172196776079406e-05, "loss": 0.4706, "num_tokens": 4249131895.0, "step": 5554 }, { "epoch": 2.034808097462673, "grad_norm": 0.15625515718485564, "learning_rate": 2.816818298425839e-05, "loss": 0.4346, "num_tokens": 4249897211.0, "step": 5555 }, { "epoch": 2.0351744984885958, "grad_norm": 0.131933017719567, "learning_rate": 2.8164168844893928e-05, "loss": 0.4226, "num_tokens": 4250672349.0, "step": 5556 }, { "epoch": 2.0355408995145186, "grad_norm": 0.17035221271960416, "learning_rate": 2.8160154358212198e-05, "loss": 0.4791, "num_tokens": 4251437442.0, "step": 5557 }, { "epoch": 2.0359073005404413, "grad_norm": 0.13867600047327086, "learning_rate": 2.815613952443939e-05, "loss": 0.4159, "num_tokens": 4252193753.0, "step": 5558 }, { "epoch": 2.0362737015663646, "grad_norm": 0.13622422090561, "learning_rate": 2.8152124343801722e-05, "loss": 0.4647, "num_tokens": 4253009757.0, "step": 5559 }, { "epoch": 2.0366401025922873, "grad_norm": 0.19393460062008958, "learning_rate": 2.8148108816525424e-05, "loss": 0.4878, "num_tokens": 4253704868.0, "step": 5560 }, { "epoch": 2.03700650361821, "grad_norm": 0.1499189682432175, "learning_rate": 2.814409294283676e-05, "loss": 0.4867, "num_tokens": 4254415041.0, "step": 5561 }, { "epoch": 2.037372904644133, "grad_norm": 0.1607349901653313, "learning_rate": 2.8140076722961983e-05, "loss": 0.4614, "num_tokens": 4255163574.0, "step": 5562 }, { "epoch": 2.0377393056700557, "grad_norm": 0.17230430091061696, "learning_rate": 2.8136060157127397e-05, "loss": 0.4652, "num_tokens": 4255914694.0, "step": 5563 }, { "epoch": 2.038105706695979, "grad_norm": 0.15381115696703082, "learning_rate": 2.813204324555932e-05, "loss": 0.4467, "num_tokens": 4256614833.0, "step": 5564 }, { "epoch": 2.0384721077219017, "grad_norm": 0.16832718730178048, "learning_rate": 2.8128025988484072e-05, "loss": 0.4736, "num_tokens": 4257409456.0, "step": 5565 }, { "epoch": 2.0388385087478245, "grad_norm": 0.1717576015704431, "learning_rate": 2.812400838612801e-05, "loss": 0.472, "num_tokens": 4258229318.0, "step": 5566 }, { "epoch": 2.0392049097737472, "grad_norm": 0.15489673356062333, "learning_rate": 2.8119990438717494e-05, "loss": 0.4699, "num_tokens": 4259123834.0, "step": 5567 }, { "epoch": 2.03957131079967, "grad_norm": 0.1823689119420981, "learning_rate": 2.8115972146478925e-05, "loss": 0.443, "num_tokens": 4259966082.0, "step": 5568 }, { "epoch": 2.0399377118255932, "grad_norm": 0.1341113563445595, "learning_rate": 2.8111953509638707e-05, "loss": 0.4487, "num_tokens": 4260784838.0, "step": 5569 }, { "epoch": 2.040304112851516, "grad_norm": 0.14937146385100444, "learning_rate": 2.8107934528423265e-05, "loss": 0.4434, "num_tokens": 4261553179.0, "step": 5570 }, { "epoch": 2.040670513877439, "grad_norm": 0.15769558347295606, "learning_rate": 2.8103915203059057e-05, "loss": 0.4973, "num_tokens": 4262227751.0, "step": 5571 }, { "epoch": 2.0410369149033616, "grad_norm": 0.14304587451879244, "learning_rate": 2.8099895533772538e-05, "loss": 0.4371, "num_tokens": 4262953653.0, "step": 5572 }, { "epoch": 2.041403315929285, "grad_norm": 0.14791545829245997, "learning_rate": 2.80958755207902e-05, "loss": 0.4415, "num_tokens": 4263680447.0, "step": 5573 }, { "epoch": 2.0417697169552076, "grad_norm": 0.13966652802671128, "learning_rate": 2.809185516433855e-05, "loss": 0.4347, "num_tokens": 4264390111.0, "step": 5574 }, { "epoch": 2.0421361179811304, "grad_norm": 0.1487553421195633, "learning_rate": 2.808783446464411e-05, "loss": 0.4466, "num_tokens": 4265165804.0, "step": 5575 }, { "epoch": 2.042502519007053, "grad_norm": 0.16104992552264172, "learning_rate": 2.808381342193343e-05, "loss": 0.4969, "num_tokens": 4265803886.0, "step": 5576 }, { "epoch": 2.042868920032976, "grad_norm": 0.1483609389562406, "learning_rate": 2.8079792036433065e-05, "loss": 0.4698, "num_tokens": 4266586900.0, "step": 5577 }, { "epoch": 2.043235321058899, "grad_norm": 0.1455633466651938, "learning_rate": 2.8075770308369606e-05, "loss": 0.4651, "num_tokens": 4267370680.0, "step": 5578 }, { "epoch": 2.043601722084822, "grad_norm": 0.14502265596070382, "learning_rate": 2.8071748237969653e-05, "loss": 0.4468, "num_tokens": 4268167024.0, "step": 5579 }, { "epoch": 2.0439681231107447, "grad_norm": 0.14230729172003587, "learning_rate": 2.806772582545983e-05, "loss": 0.469, "num_tokens": 4268871771.0, "step": 5580 }, { "epoch": 2.0443345241366675, "grad_norm": 0.13891354711343626, "learning_rate": 2.8063703071066764e-05, "loss": 0.4549, "num_tokens": 4269659938.0, "step": 5581 }, { "epoch": 2.0447009251625903, "grad_norm": 0.13354979858214364, "learning_rate": 2.8059679975017133e-05, "loss": 0.4407, "num_tokens": 4270488799.0, "step": 5582 }, { "epoch": 2.0450673261885135, "grad_norm": 0.13680499885774317, "learning_rate": 2.805565653753761e-05, "loss": 0.4597, "num_tokens": 4271366691.0, "step": 5583 }, { "epoch": 2.0454337272144363, "grad_norm": 0.1314268299945926, "learning_rate": 2.8051632758854895e-05, "loss": 0.429, "num_tokens": 4272295268.0, "step": 5584 }, { "epoch": 2.045800128240359, "grad_norm": 0.12861679661146178, "learning_rate": 2.8047608639195708e-05, "loss": 0.4703, "num_tokens": 4273016961.0, "step": 5585 }, { "epoch": 2.046166529266282, "grad_norm": 0.15295039408512218, "learning_rate": 2.8043584178786774e-05, "loss": 0.4376, "num_tokens": 4273809481.0, "step": 5586 }, { "epoch": 2.0465329302922046, "grad_norm": 0.12463913586382165, "learning_rate": 2.803955937785486e-05, "loss": 0.4165, "num_tokens": 4274634215.0, "step": 5587 }, { "epoch": 2.046899331318128, "grad_norm": 0.1357973788748213, "learning_rate": 2.803553423662675e-05, "loss": 0.4275, "num_tokens": 4275412584.0, "step": 5588 }, { "epoch": 2.0472657323440506, "grad_norm": 0.14436361664494976, "learning_rate": 2.8031508755329216e-05, "loss": 0.4699, "num_tokens": 4276133123.0, "step": 5589 }, { "epoch": 2.0476321333699734, "grad_norm": 0.14442918292473367, "learning_rate": 2.8027482934189086e-05, "loss": 0.4164, "num_tokens": 4276918441.0, "step": 5590 }, { "epoch": 2.047998534395896, "grad_norm": 0.12859589010776204, "learning_rate": 2.80234567734332e-05, "loss": 0.4524, "num_tokens": 4277675810.0, "step": 5591 }, { "epoch": 2.048364935421819, "grad_norm": 0.14561919205660717, "learning_rate": 2.8019430273288394e-05, "loss": 0.4517, "num_tokens": 4278415938.0, "step": 5592 }, { "epoch": 2.048731336447742, "grad_norm": 0.1429224000266053, "learning_rate": 2.8015403433981546e-05, "loss": 0.4559, "num_tokens": 4279155967.0, "step": 5593 }, { "epoch": 2.049097737473665, "grad_norm": 0.1347761636999767, "learning_rate": 2.8011376255739554e-05, "loss": 0.4419, "num_tokens": 4279851934.0, "step": 5594 }, { "epoch": 2.0494641384995878, "grad_norm": 0.1342613490113224, "learning_rate": 2.800734873878932e-05, "loss": 0.4281, "num_tokens": 4280550402.0, "step": 5595 }, { "epoch": 2.0498305395255105, "grad_norm": 0.15692354532423416, "learning_rate": 2.8003320883357767e-05, "loss": 0.4648, "num_tokens": 4281252942.0, "step": 5596 }, { "epoch": 2.0501969405514338, "grad_norm": 0.14819670840094726, "learning_rate": 2.7999292689671854e-05, "loss": 0.4474, "num_tokens": 4282112621.0, "step": 5597 }, { "epoch": 2.0505633415773565, "grad_norm": 0.1376138738149632, "learning_rate": 2.799526415795854e-05, "loss": 0.4469, "num_tokens": 4282826975.0, "step": 5598 }, { "epoch": 2.0509297426032793, "grad_norm": 0.14029746468182894, "learning_rate": 2.7991235288444815e-05, "loss": 0.4604, "num_tokens": 4283545770.0, "step": 5599 }, { "epoch": 2.051296143629202, "grad_norm": 0.1747152586886685, "learning_rate": 2.7987206081357686e-05, "loss": 0.4334, "num_tokens": 4284446207.0, "step": 5600 }, { "epoch": 2.051662544655125, "grad_norm": 0.12450843242544801, "learning_rate": 2.798317653692417e-05, "loss": 0.4542, "num_tokens": 4285191253.0, "step": 5601 }, { "epoch": 2.052028945681048, "grad_norm": 0.17484532172998013, "learning_rate": 2.797914665537131e-05, "loss": 0.4527, "num_tokens": 4285883331.0, "step": 5602 }, { "epoch": 2.052395346706971, "grad_norm": 0.12828781694295788, "learning_rate": 2.7975116436926177e-05, "loss": 0.4317, "num_tokens": 4286637236.0, "step": 5603 }, { "epoch": 2.0527617477328937, "grad_norm": 0.13540056040434137, "learning_rate": 2.7971085881815843e-05, "loss": 0.4254, "num_tokens": 4287418232.0, "step": 5604 }, { "epoch": 2.0531281487588164, "grad_norm": 0.14488635591277277, "learning_rate": 2.7967054990267407e-05, "loss": 0.4765, "num_tokens": 4288220093.0, "step": 5605 }, { "epoch": 2.053494549784739, "grad_norm": 0.1697932189119635, "learning_rate": 2.7963023762507996e-05, "loss": 0.4461, "num_tokens": 4288968103.0, "step": 5606 }, { "epoch": 2.0538609508106624, "grad_norm": 0.15363070432008927, "learning_rate": 2.795899219876474e-05, "loss": 0.4798, "num_tokens": 4289609797.0, "step": 5607 }, { "epoch": 2.0542273518365852, "grad_norm": 0.14629588726780515, "learning_rate": 2.79549602992648e-05, "loss": 0.4851, "num_tokens": 4290353500.0, "step": 5608 }, { "epoch": 2.054593752862508, "grad_norm": 0.1414040920071753, "learning_rate": 2.7950928064235347e-05, "loss": 0.4488, "num_tokens": 4291080023.0, "step": 5609 }, { "epoch": 2.054960153888431, "grad_norm": 0.1325964067834625, "learning_rate": 2.794689549390358e-05, "loss": 0.4796, "num_tokens": 4291810332.0, "step": 5610 }, { "epoch": 2.0553265549143536, "grad_norm": 0.1413973106764728, "learning_rate": 2.794286258849672e-05, "loss": 0.4595, "num_tokens": 4292626245.0, "step": 5611 }, { "epoch": 2.055692955940277, "grad_norm": 0.13985144245544817, "learning_rate": 2.7938829348241977e-05, "loss": 0.4386, "num_tokens": 4293361804.0, "step": 5612 }, { "epoch": 2.0560593569661996, "grad_norm": 0.17869733446207886, "learning_rate": 2.7934795773366623e-05, "loss": 0.4763, "num_tokens": 4294130763.0, "step": 5613 }, { "epoch": 2.0564257579921223, "grad_norm": 0.13985138177582537, "learning_rate": 2.793076186409792e-05, "loss": 0.4744, "num_tokens": 4294853988.0, "step": 5614 }, { "epoch": 2.056792159018045, "grad_norm": 0.14605823835975568, "learning_rate": 2.7926727620663154e-05, "loss": 0.4506, "num_tokens": 4295690135.0, "step": 5615 }, { "epoch": 2.057158560043968, "grad_norm": 0.15469550258749926, "learning_rate": 2.792269304328963e-05, "loss": 0.4632, "num_tokens": 4296339013.0, "step": 5616 }, { "epoch": 2.057524961069891, "grad_norm": 0.1437885182054225, "learning_rate": 2.791865813220469e-05, "loss": 0.4455, "num_tokens": 4297054910.0, "step": 5617 }, { "epoch": 2.057891362095814, "grad_norm": 0.14047677730142746, "learning_rate": 2.7914622887635667e-05, "loss": 0.4592, "num_tokens": 4297845281.0, "step": 5618 }, { "epoch": 2.0582577631217367, "grad_norm": 0.1299810427210579, "learning_rate": 2.791058730980992e-05, "loss": 0.4287, "num_tokens": 4298852488.0, "step": 5619 }, { "epoch": 2.0586241641476595, "grad_norm": 0.1275096386584484, "learning_rate": 2.7906551398954843e-05, "loss": 0.4668, "num_tokens": 4299701079.0, "step": 5620 }, { "epoch": 2.0589905651735827, "grad_norm": 0.14229165534907715, "learning_rate": 2.7902515155297836e-05, "loss": 0.4671, "num_tokens": 4300339950.0, "step": 5621 }, { "epoch": 2.0593569661995055, "grad_norm": 0.1308404317979522, "learning_rate": 2.7898478579066313e-05, "loss": 0.4447, "num_tokens": 4301118290.0, "step": 5622 }, { "epoch": 2.0597233672254283, "grad_norm": 0.1398073413905838, "learning_rate": 2.789444167048772e-05, "loss": 0.436, "num_tokens": 4301872386.0, "step": 5623 }, { "epoch": 2.060089768251351, "grad_norm": 0.13012096735278797, "learning_rate": 2.7890404429789506e-05, "loss": 0.4826, "num_tokens": 4302582888.0, "step": 5624 }, { "epoch": 2.060456169277274, "grad_norm": 0.1433111690842427, "learning_rate": 2.7886366857199157e-05, "loss": 0.4361, "num_tokens": 4303307694.0, "step": 5625 }, { "epoch": 2.060822570303197, "grad_norm": 0.14271463062634668, "learning_rate": 2.7882328952944164e-05, "loss": 0.4526, "num_tokens": 4304117880.0, "step": 5626 }, { "epoch": 2.06118897132912, "grad_norm": 0.14060663954330843, "learning_rate": 2.7878290717252046e-05, "loss": 0.468, "num_tokens": 4304854907.0, "step": 5627 }, { "epoch": 2.0615553723550426, "grad_norm": 0.1310190082982974, "learning_rate": 2.787425215035032e-05, "loss": 0.4564, "num_tokens": 4305563726.0, "step": 5628 }, { "epoch": 2.0619217733809654, "grad_norm": 0.13473120226262075, "learning_rate": 2.787021325246655e-05, "loss": 0.4416, "num_tokens": 4306346671.0, "step": 5629 }, { "epoch": 2.062288174406888, "grad_norm": 0.15374189913329436, "learning_rate": 2.7866174023828314e-05, "loss": 0.426, "num_tokens": 4307119466.0, "step": 5630 }, { "epoch": 2.0626545754328114, "grad_norm": 0.14335985199759302, "learning_rate": 2.7862134464663174e-05, "loss": 0.4713, "num_tokens": 4307837297.0, "step": 5631 }, { "epoch": 2.063020976458734, "grad_norm": 0.14001773559566125, "learning_rate": 2.7858094575198764e-05, "loss": 0.4521, "num_tokens": 4308665725.0, "step": 5632 }, { "epoch": 2.063387377484657, "grad_norm": 0.14232054291898774, "learning_rate": 2.7854054355662703e-05, "loss": 0.4634, "num_tokens": 4309352675.0, "step": 5633 }, { "epoch": 2.0637537785105797, "grad_norm": 0.14548661917648634, "learning_rate": 2.7850013806282624e-05, "loss": 0.4645, "num_tokens": 4310105499.0, "step": 5634 }, { "epoch": 2.0641201795365025, "grad_norm": 0.13916134640458433, "learning_rate": 2.7845972927286196e-05, "loss": 0.4446, "num_tokens": 4310996779.0, "step": 5635 }, { "epoch": 2.0644865805624257, "grad_norm": 0.15613717278265882, "learning_rate": 2.7841931718901104e-05, "loss": 0.4436, "num_tokens": 4311655227.0, "step": 5636 }, { "epoch": 2.0648529815883485, "grad_norm": 0.15505781565199903, "learning_rate": 2.7837890181355045e-05, "loss": 0.4433, "num_tokens": 4312376092.0, "step": 5637 }, { "epoch": 2.0652193826142713, "grad_norm": 0.1346807915069273, "learning_rate": 2.7833848314875746e-05, "loss": 0.4463, "num_tokens": 4313096605.0, "step": 5638 }, { "epoch": 2.065585783640194, "grad_norm": 0.14799863032360824, "learning_rate": 2.7829806119690925e-05, "loss": 0.4357, "num_tokens": 4313803899.0, "step": 5639 }, { "epoch": 2.065952184666117, "grad_norm": 0.14571170698578856, "learning_rate": 2.782576359602835e-05, "loss": 0.4772, "num_tokens": 4314532112.0, "step": 5640 }, { "epoch": 2.06631858569204, "grad_norm": 0.16083928578240111, "learning_rate": 2.7821720744115798e-05, "loss": 0.4205, "num_tokens": 4315201636.0, "step": 5641 }, { "epoch": 2.066684986717963, "grad_norm": 0.15070716804168752, "learning_rate": 2.7817677564181057e-05, "loss": 0.4777, "num_tokens": 4315858473.0, "step": 5642 }, { "epoch": 2.0670513877438856, "grad_norm": 0.15408522464470065, "learning_rate": 2.7813634056451938e-05, "loss": 0.4683, "num_tokens": 4316538289.0, "step": 5643 }, { "epoch": 2.0674177887698084, "grad_norm": 0.1669299763014294, "learning_rate": 2.7809590221156272e-05, "loss": 0.4837, "num_tokens": 4317261224.0, "step": 5644 }, { "epoch": 2.0677841897957316, "grad_norm": 0.15913576668609164, "learning_rate": 2.780554605852191e-05, "loss": 0.4458, "num_tokens": 4318049978.0, "step": 5645 }, { "epoch": 2.0681505908216544, "grad_norm": 0.13445790348095665, "learning_rate": 2.780150156877671e-05, "loss": 0.4401, "num_tokens": 4318829842.0, "step": 5646 }, { "epoch": 2.068516991847577, "grad_norm": 0.1465943745377943, "learning_rate": 2.779745675214856e-05, "loss": 0.4463, "num_tokens": 4319637987.0, "step": 5647 }, { "epoch": 2.0688833928735, "grad_norm": 0.14314441043201695, "learning_rate": 2.7793411608865372e-05, "loss": 0.4543, "num_tokens": 4320322164.0, "step": 5648 }, { "epoch": 2.0692497938994228, "grad_norm": 0.15852504521318475, "learning_rate": 2.7789366139155057e-05, "loss": 0.4468, "num_tokens": 4321134893.0, "step": 5649 }, { "epoch": 2.069616194925346, "grad_norm": 0.1453811594677019, "learning_rate": 2.7785320343245558e-05, "loss": 0.4394, "num_tokens": 4321944608.0, "step": 5650 }, { "epoch": 2.0699825959512688, "grad_norm": 0.15033154040525915, "learning_rate": 2.7781274221364837e-05, "loss": 0.4675, "num_tokens": 4322608354.0, "step": 5651 }, { "epoch": 2.0703489969771915, "grad_norm": 0.15350059884071443, "learning_rate": 2.777722777374087e-05, "loss": 0.4428, "num_tokens": 4323439568.0, "step": 5652 }, { "epoch": 2.0707153980031143, "grad_norm": 0.12830371514130373, "learning_rate": 2.777318100060165e-05, "loss": 0.4418, "num_tokens": 4324198276.0, "step": 5653 }, { "epoch": 2.071081799029037, "grad_norm": 0.1318587883530395, "learning_rate": 2.7769133902175184e-05, "loss": 0.4307, "num_tokens": 4324970419.0, "step": 5654 }, { "epoch": 2.0714482000549603, "grad_norm": 0.1793952758585539, "learning_rate": 2.776508647868952e-05, "loss": 0.4557, "num_tokens": 4325819022.0, "step": 5655 }, { "epoch": 2.071814601080883, "grad_norm": 0.13725779212031966, "learning_rate": 2.7761038730372697e-05, "loss": 0.4258, "num_tokens": 4326636621.0, "step": 5656 }, { "epoch": 2.072181002106806, "grad_norm": 0.13051304709898626, "learning_rate": 2.7756990657452786e-05, "loss": 0.4621, "num_tokens": 4327389796.0, "step": 5657 }, { "epoch": 2.0725474031327287, "grad_norm": 0.1708740911491523, "learning_rate": 2.7752942260157877e-05, "loss": 0.4302, "num_tokens": 4328208058.0, "step": 5658 }, { "epoch": 2.0729138041586515, "grad_norm": 0.13058130758849296, "learning_rate": 2.7748893538716067e-05, "loss": 0.4392, "num_tokens": 4328912418.0, "step": 5659 }, { "epoch": 2.0732802051845747, "grad_norm": 0.13628645899117647, "learning_rate": 2.774484449335549e-05, "loss": 0.4385, "num_tokens": 4329673204.0, "step": 5660 }, { "epoch": 2.0736466062104975, "grad_norm": 0.1367372696841388, "learning_rate": 2.7740795124304277e-05, "loss": 0.4501, "num_tokens": 4330417937.0, "step": 5661 }, { "epoch": 2.0740130072364202, "grad_norm": 0.14270226265879565, "learning_rate": 2.773674543179059e-05, "loss": 0.447, "num_tokens": 4331256958.0, "step": 5662 }, { "epoch": 2.074379408262343, "grad_norm": 0.13732796751341259, "learning_rate": 2.7732695416042617e-05, "loss": 0.4585, "num_tokens": 4332121167.0, "step": 5663 }, { "epoch": 2.074745809288266, "grad_norm": 0.13442533126180153, "learning_rate": 2.7728645077288545e-05, "loss": 0.4383, "num_tokens": 4332823889.0, "step": 5664 }, { "epoch": 2.075112210314189, "grad_norm": 0.14347350154668964, "learning_rate": 2.772459441575659e-05, "loss": 0.4492, "num_tokens": 4333542748.0, "step": 5665 }, { "epoch": 2.075478611340112, "grad_norm": 0.15190211409973983, "learning_rate": 2.772054343167498e-05, "loss": 0.4543, "num_tokens": 4334168654.0, "step": 5666 }, { "epoch": 2.0758450123660346, "grad_norm": 0.14490900194297254, "learning_rate": 2.7716492125271974e-05, "loss": 0.4456, "num_tokens": 4335018739.0, "step": 5667 }, { "epoch": 2.0762114133919574, "grad_norm": 0.12787066247157158, "learning_rate": 2.7712440496775844e-05, "loss": 0.4755, "num_tokens": 4335765183.0, "step": 5668 }, { "epoch": 2.07657781441788, "grad_norm": 0.14691778898353378, "learning_rate": 2.7708388546414867e-05, "loss": 0.4421, "num_tokens": 4336470791.0, "step": 5669 }, { "epoch": 2.0769442154438034, "grad_norm": 0.15618606374231828, "learning_rate": 2.7704336274417354e-05, "loss": 0.4809, "num_tokens": 4337226321.0, "step": 5670 }, { "epoch": 2.077310616469726, "grad_norm": 0.14947283024397232, "learning_rate": 2.7700283681011616e-05, "loss": 0.4711, "num_tokens": 4337940177.0, "step": 5671 }, { "epoch": 2.077677017495649, "grad_norm": 0.13478573995946025, "learning_rate": 2.7696230766426025e-05, "loss": 0.4666, "num_tokens": 4338677958.0, "step": 5672 }, { "epoch": 2.0780434185215717, "grad_norm": 0.14192126602538302, "learning_rate": 2.7692177530888914e-05, "loss": 0.4535, "num_tokens": 4339474305.0, "step": 5673 }, { "epoch": 2.078409819547495, "grad_norm": 0.14173919131765492, "learning_rate": 2.768812397462866e-05, "loss": 0.4311, "num_tokens": 4340204911.0, "step": 5674 }, { "epoch": 2.0787762205734177, "grad_norm": 0.13716608313692352, "learning_rate": 2.768407009787367e-05, "loss": 0.4548, "num_tokens": 4340948356.0, "step": 5675 }, { "epoch": 2.0791426215993405, "grad_norm": 0.14074605393828743, "learning_rate": 2.7680015900852354e-05, "loss": 0.4674, "num_tokens": 4341716896.0, "step": 5676 }, { "epoch": 2.0795090226252633, "grad_norm": 0.13473138705417995, "learning_rate": 2.7675961383793146e-05, "loss": 0.4578, "num_tokens": 4342462946.0, "step": 5677 }, { "epoch": 2.079875423651186, "grad_norm": 0.1295281920850718, "learning_rate": 2.7671906546924497e-05, "loss": 0.4396, "num_tokens": 4343467526.0, "step": 5678 }, { "epoch": 2.0802418246771093, "grad_norm": 0.12930830411251057, "learning_rate": 2.7667851390474873e-05, "loss": 0.4365, "num_tokens": 4344337119.0, "step": 5679 }, { "epoch": 2.080608225703032, "grad_norm": 0.14838618442597162, "learning_rate": 2.7663795914672747e-05, "loss": 0.4608, "num_tokens": 4344954230.0, "step": 5680 }, { "epoch": 2.080974626728955, "grad_norm": 0.13731680738341548, "learning_rate": 2.7659740119746642e-05, "loss": 0.4523, "num_tokens": 4345687163.0, "step": 5681 }, { "epoch": 2.0813410277548776, "grad_norm": 0.1480482489384198, "learning_rate": 2.7655684005925073e-05, "loss": 0.438, "num_tokens": 4346384697.0, "step": 5682 }, { "epoch": 2.0817074287808004, "grad_norm": 0.12776072545285047, "learning_rate": 2.7651627573436584e-05, "loss": 0.4839, "num_tokens": 4347138008.0, "step": 5683 }, { "epoch": 2.0820738298067236, "grad_norm": 0.14249917902584452, "learning_rate": 2.7647570822509728e-05, "loss": 0.4522, "num_tokens": 4347936646.0, "step": 5684 }, { "epoch": 2.0824402308326464, "grad_norm": 0.141706896463121, "learning_rate": 2.7643513753373078e-05, "loss": 0.4465, "num_tokens": 4348752351.0, "step": 5685 }, { "epoch": 2.082806631858569, "grad_norm": 0.14094973756822737, "learning_rate": 2.7639456366255225e-05, "loss": 0.4672, "num_tokens": 4349511079.0, "step": 5686 }, { "epoch": 2.083173032884492, "grad_norm": 0.15182446937936717, "learning_rate": 2.7635398661384797e-05, "loss": 0.4599, "num_tokens": 4350195824.0, "step": 5687 }, { "epoch": 2.0835394339104147, "grad_norm": 0.14028163318927783, "learning_rate": 2.763134063899041e-05, "loss": 0.4353, "num_tokens": 4350913045.0, "step": 5688 }, { "epoch": 2.083905834936338, "grad_norm": 0.14063192325444354, "learning_rate": 2.7627282299300714e-05, "loss": 0.4277, "num_tokens": 4351772496.0, "step": 5689 }, { "epoch": 2.0842722359622607, "grad_norm": 0.13877688685010375, "learning_rate": 2.7623223642544374e-05, "loss": 0.4827, "num_tokens": 4352483778.0, "step": 5690 }, { "epoch": 2.0846386369881835, "grad_norm": 0.15244847021673058, "learning_rate": 2.7619164668950077e-05, "loss": 0.4463, "num_tokens": 4353303757.0, "step": 5691 }, { "epoch": 2.0850050380141063, "grad_norm": 0.1313021881115878, "learning_rate": 2.7615105378746513e-05, "loss": 0.4379, "num_tokens": 4354115536.0, "step": 5692 }, { "epoch": 2.0853714390400295, "grad_norm": 0.13640846324627837, "learning_rate": 2.761104577216241e-05, "loss": 0.4357, "num_tokens": 4354841509.0, "step": 5693 }, { "epoch": 2.0857378400659523, "grad_norm": 0.1684501787689619, "learning_rate": 2.7606985849426514e-05, "loss": 0.4561, "num_tokens": 4355528526.0, "step": 5694 }, { "epoch": 2.086104241091875, "grad_norm": 0.1543876590994306, "learning_rate": 2.7602925610767565e-05, "loss": 0.4576, "num_tokens": 4356232845.0, "step": 5695 }, { "epoch": 2.086470642117798, "grad_norm": 0.1426167318193687, "learning_rate": 2.759886505641433e-05, "loss": 0.4223, "num_tokens": 4356975338.0, "step": 5696 }, { "epoch": 2.0868370431437206, "grad_norm": 0.14824633497731246, "learning_rate": 2.759480418659562e-05, "loss": 0.4635, "num_tokens": 4357750292.0, "step": 5697 }, { "epoch": 2.087203444169644, "grad_norm": 0.14828518021143655, "learning_rate": 2.759074300154023e-05, "loss": 0.4354, "num_tokens": 4358532544.0, "step": 5698 }, { "epoch": 2.0875698451955667, "grad_norm": 0.14212395236360184, "learning_rate": 2.7586681501476982e-05, "loss": 0.4448, "num_tokens": 4359283585.0, "step": 5699 }, { "epoch": 2.0879362462214894, "grad_norm": 0.1374773370910671, "learning_rate": 2.7582619686634727e-05, "loss": 0.4571, "num_tokens": 4360087199.0, "step": 5700 }, { "epoch": 2.088302647247412, "grad_norm": 0.14956661289856873, "learning_rate": 2.7578557557242315e-05, "loss": 0.4399, "num_tokens": 4360813917.0, "step": 5701 }, { "epoch": 2.088669048273335, "grad_norm": 0.13461749174363805, "learning_rate": 2.7574495113528644e-05, "loss": 0.4543, "num_tokens": 4361702105.0, "step": 5702 }, { "epoch": 2.089035449299258, "grad_norm": 0.14143334471976093, "learning_rate": 2.7570432355722596e-05, "loss": 0.4444, "num_tokens": 4362500556.0, "step": 5703 }, { "epoch": 2.089401850325181, "grad_norm": 0.14651110974505563, "learning_rate": 2.7566369284053088e-05, "loss": 0.4055, "num_tokens": 4363254811.0, "step": 5704 }, { "epoch": 2.089768251351104, "grad_norm": 0.13332010182676632, "learning_rate": 2.7562305898749054e-05, "loss": 0.4512, "num_tokens": 4364060317.0, "step": 5705 }, { "epoch": 2.0901346523770266, "grad_norm": 0.14138977901776978, "learning_rate": 2.7558242200039443e-05, "loss": 0.4488, "num_tokens": 4364725736.0, "step": 5706 }, { "epoch": 2.0905010534029493, "grad_norm": 0.14779789761737047, "learning_rate": 2.7554178188153225e-05, "loss": 0.4312, "num_tokens": 4365534391.0, "step": 5707 }, { "epoch": 2.0908674544288726, "grad_norm": 0.1439099526545853, "learning_rate": 2.755011386331937e-05, "loss": 0.4589, "num_tokens": 4366242440.0, "step": 5708 }, { "epoch": 2.0912338554547953, "grad_norm": 0.14337442862872107, "learning_rate": 2.7546049225766906e-05, "loss": 0.4787, "num_tokens": 4367075739.0, "step": 5709 }, { "epoch": 2.091600256480718, "grad_norm": 0.1327537502801228, "learning_rate": 2.754198427572483e-05, "loss": 0.4647, "num_tokens": 4367888233.0, "step": 5710 }, { "epoch": 2.091966657506641, "grad_norm": 0.14803466243522398, "learning_rate": 2.753791901342219e-05, "loss": 0.4335, "num_tokens": 4368607076.0, "step": 5711 }, { "epoch": 2.0923330585325637, "grad_norm": 0.15431886088295027, "learning_rate": 2.753385343908804e-05, "loss": 0.4632, "num_tokens": 4369409691.0, "step": 5712 }, { "epoch": 2.092699459558487, "grad_norm": 0.14250269523239395, "learning_rate": 2.7529787552951456e-05, "loss": 0.4413, "num_tokens": 4370260182.0, "step": 5713 }, { "epoch": 2.0930658605844097, "grad_norm": 0.12521470766805073, "learning_rate": 2.752572135524153e-05, "loss": 0.4374, "num_tokens": 4371089771.0, "step": 5714 }, { "epoch": 2.0934322616103325, "grad_norm": 0.15875341612919777, "learning_rate": 2.7521654846187356e-05, "loss": 0.415, "num_tokens": 4371870563.0, "step": 5715 }, { "epoch": 2.0937986626362552, "grad_norm": 0.14307943810685422, "learning_rate": 2.7517588026018073e-05, "loss": 0.4613, "num_tokens": 4372659761.0, "step": 5716 }, { "epoch": 2.094165063662178, "grad_norm": 0.15123038014401463, "learning_rate": 2.7513520894962823e-05, "loss": 0.4312, "num_tokens": 4373318140.0, "step": 5717 }, { "epoch": 2.0945314646881013, "grad_norm": 0.1326295411635131, "learning_rate": 2.7509453453250755e-05, "loss": 0.4596, "num_tokens": 4374066495.0, "step": 5718 }, { "epoch": 2.094897865714024, "grad_norm": 0.1799512777073439, "learning_rate": 2.7505385701111066e-05, "loss": 0.4853, "num_tokens": 4374851190.0, "step": 5719 }, { "epoch": 2.095264266739947, "grad_norm": 0.14955972557822872, "learning_rate": 2.7501317638772943e-05, "loss": 0.464, "num_tokens": 4375593070.0, "step": 5720 }, { "epoch": 2.0956306677658696, "grad_norm": 0.14354579284301897, "learning_rate": 2.7497249266465593e-05, "loss": 0.4578, "num_tokens": 4376374090.0, "step": 5721 }, { "epoch": 2.095997068791793, "grad_norm": 0.16088492323404238, "learning_rate": 2.749318058441825e-05, "loss": 0.4528, "num_tokens": 4377222201.0, "step": 5722 }, { "epoch": 2.0963634698177156, "grad_norm": 0.13108394154748054, "learning_rate": 2.7489111592860167e-05, "loss": 0.4197, "num_tokens": 4377967909.0, "step": 5723 }, { "epoch": 2.0967298708436384, "grad_norm": 0.14006785828195467, "learning_rate": 2.7485042292020606e-05, "loss": 0.4398, "num_tokens": 4378849235.0, "step": 5724 }, { "epoch": 2.097096271869561, "grad_norm": 0.15163673094062505, "learning_rate": 2.748097268212885e-05, "loss": 0.452, "num_tokens": 4379412845.0, "step": 5725 }, { "epoch": 2.097462672895484, "grad_norm": 0.15057687775793693, "learning_rate": 2.7476902763414197e-05, "loss": 0.4157, "num_tokens": 4380205764.0, "step": 5726 }, { "epoch": 2.097829073921407, "grad_norm": 0.1342894369737949, "learning_rate": 2.747283253610597e-05, "loss": 0.4583, "num_tokens": 4380978936.0, "step": 5727 }, { "epoch": 2.09819547494733, "grad_norm": 0.14011059497208564, "learning_rate": 2.746876200043349e-05, "loss": 0.4476, "num_tokens": 4381752819.0, "step": 5728 }, { "epoch": 2.0985618759732527, "grad_norm": 0.13894805079189446, "learning_rate": 2.7464691156626136e-05, "loss": 0.4651, "num_tokens": 4382431859.0, "step": 5729 }, { "epoch": 2.0989282769991755, "grad_norm": 0.1388810049005939, "learning_rate": 2.7460620004913256e-05, "loss": 0.4506, "num_tokens": 4383253279.0, "step": 5730 }, { "epoch": 2.0992946780250983, "grad_norm": 0.13229700614691672, "learning_rate": 2.7456548545524243e-05, "loss": 0.4874, "num_tokens": 4383966668.0, "step": 5731 }, { "epoch": 2.0996610790510215, "grad_norm": 0.14243152952457136, "learning_rate": 2.74524767786885e-05, "loss": 0.4299, "num_tokens": 4384746564.0, "step": 5732 }, { "epoch": 2.1000274800769443, "grad_norm": 0.1275478742042638, "learning_rate": 2.744840470463545e-05, "loss": 0.4363, "num_tokens": 4385577820.0, "step": 5733 }, { "epoch": 2.100393881102867, "grad_norm": 0.1395047268775088, "learning_rate": 2.744433232359454e-05, "loss": 0.463, "num_tokens": 4386157709.0, "step": 5734 }, { "epoch": 2.10076028212879, "grad_norm": 0.15735872306461052, "learning_rate": 2.744025963579522e-05, "loss": 0.4296, "num_tokens": 4386832790.0, "step": 5735 }, { "epoch": 2.1011266831547126, "grad_norm": 0.16058936638299443, "learning_rate": 2.7436186641466957e-05, "loss": 0.4292, "num_tokens": 4387636976.0, "step": 5736 }, { "epoch": 2.101493084180636, "grad_norm": 0.12856052388974062, "learning_rate": 2.743211334083925e-05, "loss": 0.4683, "num_tokens": 4388490932.0, "step": 5737 }, { "epoch": 2.1018594852065586, "grad_norm": 0.14269547209069944, "learning_rate": 2.7428039734141605e-05, "loss": 0.4222, "num_tokens": 4389271396.0, "step": 5738 }, { "epoch": 2.1022258862324814, "grad_norm": 0.1585300904711593, "learning_rate": 2.7423965821603546e-05, "loss": 0.4742, "num_tokens": 4390100206.0, "step": 5739 }, { "epoch": 2.102592287258404, "grad_norm": 0.14105445033831807, "learning_rate": 2.7419891603454624e-05, "loss": 0.4503, "num_tokens": 4390821530.0, "step": 5740 }, { "epoch": 2.1029586882843274, "grad_norm": 0.15282706094785886, "learning_rate": 2.7415817079924388e-05, "loss": 0.4514, "num_tokens": 4391517782.0, "step": 5741 }, { "epoch": 2.10332508931025, "grad_norm": 0.1246260514690737, "learning_rate": 2.7411742251242422e-05, "loss": 0.4083, "num_tokens": 4392288299.0, "step": 5742 }, { "epoch": 2.103691490336173, "grad_norm": 0.150576316698523, "learning_rate": 2.740766711763831e-05, "loss": 0.4521, "num_tokens": 4392983049.0, "step": 5743 }, { "epoch": 2.1040578913620958, "grad_norm": 0.14300225425175433, "learning_rate": 2.7403591679341683e-05, "loss": 0.4472, "num_tokens": 4393776206.0, "step": 5744 }, { "epoch": 2.1044242923880185, "grad_norm": 0.14424557364706378, "learning_rate": 2.739951593658215e-05, "loss": 0.4724, "num_tokens": 4394532754.0, "step": 5745 }, { "epoch": 2.1047906934139418, "grad_norm": 0.15745447231488338, "learning_rate": 2.7395439889589372e-05, "loss": 0.5035, "num_tokens": 4395206393.0, "step": 5746 }, { "epoch": 2.1051570944398645, "grad_norm": 0.14200466728024425, "learning_rate": 2.7391363538592996e-05, "loss": 0.4428, "num_tokens": 4396015040.0, "step": 5747 }, { "epoch": 2.1055234954657873, "grad_norm": 0.1673117962266979, "learning_rate": 2.738728688382272e-05, "loss": 0.4392, "num_tokens": 4396613808.0, "step": 5748 }, { "epoch": 2.10588989649171, "grad_norm": 0.1401722459216146, "learning_rate": 2.7383209925508233e-05, "loss": 0.5056, "num_tokens": 4397363098.0, "step": 5749 }, { "epoch": 2.106256297517633, "grad_norm": 0.1527513119366057, "learning_rate": 2.7379132663879238e-05, "loss": 0.4792, "num_tokens": 4398122823.0, "step": 5750 }, { "epoch": 2.106622698543556, "grad_norm": 0.15931899785675335, "learning_rate": 2.7375055099165483e-05, "loss": 0.4576, "num_tokens": 4398767083.0, "step": 5751 }, { "epoch": 2.106989099569479, "grad_norm": 0.15336503170885682, "learning_rate": 2.7370977231596724e-05, "loss": 0.4381, "num_tokens": 4399549902.0, "step": 5752 }, { "epoch": 2.1073555005954017, "grad_norm": 0.13534203476277096, "learning_rate": 2.7366899061402693e-05, "loss": 0.4381, "num_tokens": 4400363420.0, "step": 5753 }, { "epoch": 2.1077219016213244, "grad_norm": 0.15878634039897113, "learning_rate": 2.7362820588813205e-05, "loss": 0.4589, "num_tokens": 4401116343.0, "step": 5754 }, { "epoch": 2.1080883026472472, "grad_norm": 0.14804581634688183, "learning_rate": 2.7358741814058048e-05, "loss": 0.4449, "num_tokens": 4401873477.0, "step": 5755 }, { "epoch": 2.1084547036731704, "grad_norm": 0.1417389663781892, "learning_rate": 2.7354662737367036e-05, "loss": 0.4724, "num_tokens": 4402692172.0, "step": 5756 }, { "epoch": 2.1088211046990932, "grad_norm": 0.16072430587593986, "learning_rate": 2.7350583358970007e-05, "loss": 0.4685, "num_tokens": 4403463429.0, "step": 5757 }, { "epoch": 2.109187505725016, "grad_norm": 0.14097139572953854, "learning_rate": 2.7346503679096805e-05, "loss": 0.467, "num_tokens": 4404317499.0, "step": 5758 }, { "epoch": 2.109553906750939, "grad_norm": 0.14400516997562102, "learning_rate": 2.7342423697977304e-05, "loss": 0.4604, "num_tokens": 4405151788.0, "step": 5759 }, { "epoch": 2.1099203077768616, "grad_norm": 0.14398915926334024, "learning_rate": 2.733834341584139e-05, "loss": 0.4576, "num_tokens": 4405824190.0, "step": 5760 }, { "epoch": 2.110286708802785, "grad_norm": 0.1199446274567107, "learning_rate": 2.7334262832918962e-05, "loss": 0.44, "num_tokens": 4406511639.0, "step": 5761 }, { "epoch": 2.1106531098287076, "grad_norm": 0.1550710603712974, "learning_rate": 2.7330181949439935e-05, "loss": 0.4754, "num_tokens": 4407312825.0, "step": 5762 }, { "epoch": 2.1110195108546304, "grad_norm": 0.14577419249685206, "learning_rate": 2.732610076563425e-05, "loss": 0.467, "num_tokens": 4408111766.0, "step": 5763 }, { "epoch": 2.111385911880553, "grad_norm": 0.12978851038227338, "learning_rate": 2.732201928173186e-05, "loss": 0.4577, "num_tokens": 4408952808.0, "step": 5764 }, { "epoch": 2.111752312906476, "grad_norm": 0.15300870405941905, "learning_rate": 2.7317937497962724e-05, "loss": 0.4338, "num_tokens": 4409675401.0, "step": 5765 }, { "epoch": 2.112118713932399, "grad_norm": 0.13112551749927182, "learning_rate": 2.731385541455684e-05, "loss": 0.4699, "num_tokens": 4410361620.0, "step": 5766 }, { "epoch": 2.112485114958322, "grad_norm": 0.17036943259525344, "learning_rate": 2.730977303174421e-05, "loss": 0.4672, "num_tokens": 4411076354.0, "step": 5767 }, { "epoch": 2.1128515159842447, "grad_norm": 0.147164062156352, "learning_rate": 2.730569034975485e-05, "loss": 0.4463, "num_tokens": 4411729148.0, "step": 5768 }, { "epoch": 2.1132179170101675, "grad_norm": 0.1422134893258528, "learning_rate": 2.7301607368818798e-05, "loss": 0.4584, "num_tokens": 4412536984.0, "step": 5769 }, { "epoch": 2.1135843180360907, "grad_norm": 0.1354703053602096, "learning_rate": 2.72975240891661e-05, "loss": 0.4544, "num_tokens": 4413278214.0, "step": 5770 }, { "epoch": 2.1139507190620135, "grad_norm": 0.14632454426423216, "learning_rate": 2.7293440511026844e-05, "loss": 0.4633, "num_tokens": 4414006477.0, "step": 5771 }, { "epoch": 2.1143171200879363, "grad_norm": 0.13948690696066088, "learning_rate": 2.7289356634631107e-05, "loss": 0.4697, "num_tokens": 4414763760.0, "step": 5772 }, { "epoch": 2.114683521113859, "grad_norm": 0.15151616232531573, "learning_rate": 2.7285272460208987e-05, "loss": 0.4474, "num_tokens": 4415611375.0, "step": 5773 }, { "epoch": 2.115049922139782, "grad_norm": 0.13055849321821522, "learning_rate": 2.7281187987990617e-05, "loss": 0.4897, "num_tokens": 4416451828.0, "step": 5774 }, { "epoch": 2.115416323165705, "grad_norm": 0.16218624409334712, "learning_rate": 2.7277103218206128e-05, "loss": 0.4924, "num_tokens": 4417137250.0, "step": 5775 }, { "epoch": 2.115782724191628, "grad_norm": 0.16399562075335555, "learning_rate": 2.7273018151085674e-05, "loss": 0.4455, "num_tokens": 4417852485.0, "step": 5776 }, { "epoch": 2.1161491252175506, "grad_norm": 0.12610973586612353, "learning_rate": 2.726893278685943e-05, "loss": 0.4377, "num_tokens": 4418589173.0, "step": 5777 }, { "epoch": 2.1165155262434734, "grad_norm": 0.14907558901917026, "learning_rate": 2.7264847125757585e-05, "loss": 0.4833, "num_tokens": 4419381021.0, "step": 5778 }, { "epoch": 2.116881927269396, "grad_norm": 0.14782285374484144, "learning_rate": 2.7260761168010335e-05, "loss": 0.44, "num_tokens": 4420102292.0, "step": 5779 }, { "epoch": 2.1172483282953194, "grad_norm": 0.13873938145316853, "learning_rate": 2.725667491384791e-05, "loss": 0.4511, "num_tokens": 4420846473.0, "step": 5780 }, { "epoch": 2.117614729321242, "grad_norm": 0.14384751543694396, "learning_rate": 2.725258836350054e-05, "loss": 0.4733, "num_tokens": 4421562431.0, "step": 5781 }, { "epoch": 2.117981130347165, "grad_norm": 0.14117503940945236, "learning_rate": 2.7248501517198495e-05, "loss": 0.4183, "num_tokens": 4422299337.0, "step": 5782 }, { "epoch": 2.1183475313730877, "grad_norm": 0.14100018855906118, "learning_rate": 2.724441437517203e-05, "loss": 0.4935, "num_tokens": 4423178704.0, "step": 5783 }, { "epoch": 2.1187139323990105, "grad_norm": 0.14523421362190966, "learning_rate": 2.7240326937651434e-05, "loss": 0.4519, "num_tokens": 4423871070.0, "step": 5784 }, { "epoch": 2.1190803334249337, "grad_norm": 0.14197933742008884, "learning_rate": 2.7236239204867026e-05, "loss": 0.4568, "num_tokens": 4424761094.0, "step": 5785 }, { "epoch": 2.1194467344508565, "grad_norm": 0.15215139300432087, "learning_rate": 2.723215117704912e-05, "loss": 0.4531, "num_tokens": 4425545274.0, "step": 5786 }, { "epoch": 2.1198131354767793, "grad_norm": 0.12369858876193025, "learning_rate": 2.7228062854428047e-05, "loss": 0.4381, "num_tokens": 4426287815.0, "step": 5787 }, { "epoch": 2.120179536502702, "grad_norm": 0.15526720676408495, "learning_rate": 2.7223974237234173e-05, "loss": 0.4727, "num_tokens": 4427198941.0, "step": 5788 }, { "epoch": 2.1205459375286253, "grad_norm": 0.12877525386596664, "learning_rate": 2.721988532569786e-05, "loss": 0.4258, "num_tokens": 4427948768.0, "step": 5789 }, { "epoch": 2.120912338554548, "grad_norm": 0.13916116434600553, "learning_rate": 2.7215796120049502e-05, "loss": 0.4479, "num_tokens": 4428726074.0, "step": 5790 }, { "epoch": 2.121278739580471, "grad_norm": 0.14143029825335798, "learning_rate": 2.7211706620519497e-05, "loss": 0.4592, "num_tokens": 4429478076.0, "step": 5791 }, { "epoch": 2.1216451406063936, "grad_norm": 0.13823353235933103, "learning_rate": 2.7207616827338273e-05, "loss": 0.4777, "num_tokens": 4430176905.0, "step": 5792 }, { "epoch": 2.1220115416323164, "grad_norm": 0.1596415306995083, "learning_rate": 2.7203526740736264e-05, "loss": 0.4616, "num_tokens": 4430829097.0, "step": 5793 }, { "epoch": 2.1223779426582396, "grad_norm": 0.1488016113991348, "learning_rate": 2.719943636094393e-05, "loss": 0.4624, "num_tokens": 4431629718.0, "step": 5794 }, { "epoch": 2.1227443436841624, "grad_norm": 0.14048010916221348, "learning_rate": 2.719534568819173e-05, "loss": 0.472, "num_tokens": 4432371467.0, "step": 5795 }, { "epoch": 2.123110744710085, "grad_norm": 0.14651868050603756, "learning_rate": 2.719125472271016e-05, "loss": 0.4419, "num_tokens": 4433152591.0, "step": 5796 }, { "epoch": 2.123477145736008, "grad_norm": 0.14035548206427173, "learning_rate": 2.7187163464729717e-05, "loss": 0.469, "num_tokens": 4433949317.0, "step": 5797 }, { "epoch": 2.1238435467619308, "grad_norm": 0.16593385814357928, "learning_rate": 2.718307191448093e-05, "loss": 0.4561, "num_tokens": 4434714134.0, "step": 5798 }, { "epoch": 2.124209947787854, "grad_norm": 0.1391682140921459, "learning_rate": 2.7178980072194326e-05, "loss": 0.4548, "num_tokens": 4435439074.0, "step": 5799 }, { "epoch": 2.1245763488137768, "grad_norm": 0.14384100895982313, "learning_rate": 2.717488793810046e-05, "loss": 0.433, "num_tokens": 4436212025.0, "step": 5800 }, { "epoch": 2.1249427498396996, "grad_norm": 0.14964614338767088, "learning_rate": 2.7170795512429916e-05, "loss": 0.4443, "num_tokens": 4436972794.0, "step": 5801 }, { "epoch": 2.1253091508656223, "grad_norm": 0.1507471706854164, "learning_rate": 2.716670279541326e-05, "loss": 0.4279, "num_tokens": 4437688083.0, "step": 5802 }, { "epoch": 2.125675551891545, "grad_norm": 0.1343206482643634, "learning_rate": 2.71626097872811e-05, "loss": 0.4829, "num_tokens": 4438554131.0, "step": 5803 }, { "epoch": 2.1260419529174683, "grad_norm": 0.1395380280011528, "learning_rate": 2.715851648826406e-05, "loss": 0.4636, "num_tokens": 4439330455.0, "step": 5804 }, { "epoch": 2.126408353943391, "grad_norm": 0.15307104137880934, "learning_rate": 2.715442289859277e-05, "loss": 0.4606, "num_tokens": 4440087034.0, "step": 5805 }, { "epoch": 2.126774754969314, "grad_norm": 0.1264710531326442, "learning_rate": 2.7150329018497884e-05, "loss": 0.4388, "num_tokens": 4441000243.0, "step": 5806 }, { "epoch": 2.1271411559952367, "grad_norm": 0.13527464594952138, "learning_rate": 2.7146234848210063e-05, "loss": 0.4535, "num_tokens": 4441725547.0, "step": 5807 }, { "epoch": 2.1275075570211595, "grad_norm": 0.13845600562755309, "learning_rate": 2.7142140387960004e-05, "loss": 0.4353, "num_tokens": 4442471921.0, "step": 5808 }, { "epoch": 2.1278739580470827, "grad_norm": 0.138275234950689, "learning_rate": 2.713804563797841e-05, "loss": 0.45, "num_tokens": 4443274716.0, "step": 5809 }, { "epoch": 2.1282403590730055, "grad_norm": 0.14322558428079749, "learning_rate": 2.7133950598495972e-05, "loss": 0.4758, "num_tokens": 4444117246.0, "step": 5810 }, { "epoch": 2.1286067600989282, "grad_norm": 0.1207776307499461, "learning_rate": 2.7129855269743445e-05, "loss": 0.4333, "num_tokens": 4445008043.0, "step": 5811 }, { "epoch": 2.128973161124851, "grad_norm": 0.13459167927876672, "learning_rate": 2.712575965195157e-05, "loss": 0.4495, "num_tokens": 4445747283.0, "step": 5812 }, { "epoch": 2.129339562150774, "grad_norm": 0.15416515057210214, "learning_rate": 2.7121663745351125e-05, "loss": 0.4641, "num_tokens": 4446485789.0, "step": 5813 }, { "epoch": 2.129705963176697, "grad_norm": 0.13160651113704822, "learning_rate": 2.7117567550172876e-05, "loss": 0.4408, "num_tokens": 4447294860.0, "step": 5814 }, { "epoch": 2.13007236420262, "grad_norm": 0.15685655274763335, "learning_rate": 2.711347106664762e-05, "loss": 0.438, "num_tokens": 4448140994.0, "step": 5815 }, { "epoch": 2.1304387652285426, "grad_norm": 0.124559199757391, "learning_rate": 2.7109374295006197e-05, "loss": 0.4941, "num_tokens": 4448845188.0, "step": 5816 }, { "epoch": 2.1308051662544654, "grad_norm": 0.1577734937256051, "learning_rate": 2.7105277235479408e-05, "loss": 0.4537, "num_tokens": 4449539529.0, "step": 5817 }, { "epoch": 2.1311715672803886, "grad_norm": 0.15161542361502067, "learning_rate": 2.710117988829811e-05, "loss": 0.4438, "num_tokens": 4450296971.0, "step": 5818 }, { "epoch": 2.1315379683063114, "grad_norm": 0.13009502116590166, "learning_rate": 2.7097082253693177e-05, "loss": 0.4832, "num_tokens": 4450971498.0, "step": 5819 }, { "epoch": 2.131904369332234, "grad_norm": 0.14830292335077291, "learning_rate": 2.7092984331895477e-05, "loss": 0.4551, "num_tokens": 4451711632.0, "step": 5820 }, { "epoch": 2.132270770358157, "grad_norm": 0.14174272832850218, "learning_rate": 2.7088886123135913e-05, "loss": 0.4304, "num_tokens": 4452531705.0, "step": 5821 }, { "epoch": 2.1326371713840797, "grad_norm": 0.13396412767421248, "learning_rate": 2.7084787627645376e-05, "loss": 0.4342, "num_tokens": 4453290350.0, "step": 5822 }, { "epoch": 2.133003572410003, "grad_norm": 0.15039375011872647, "learning_rate": 2.7080688845654818e-05, "loss": 0.4553, "num_tokens": 4453916123.0, "step": 5823 }, { "epoch": 2.1333699734359257, "grad_norm": 0.15580735360905498, "learning_rate": 2.7076589777395178e-05, "loss": 0.4909, "num_tokens": 4454557309.0, "step": 5824 }, { "epoch": 2.1337363744618485, "grad_norm": 0.1598756784143912, "learning_rate": 2.7072490423097414e-05, "loss": 0.4587, "num_tokens": 4455255830.0, "step": 5825 }, { "epoch": 2.1341027754877713, "grad_norm": 0.12424111569877798, "learning_rate": 2.7068390782992495e-05, "loss": 0.447, "num_tokens": 4456010314.0, "step": 5826 }, { "epoch": 2.134469176513694, "grad_norm": 0.16284592641850235, "learning_rate": 2.7064290857311418e-05, "loss": 0.4514, "num_tokens": 4456849392.0, "step": 5827 }, { "epoch": 2.1348355775396173, "grad_norm": 0.14733908536652818, "learning_rate": 2.7060190646285202e-05, "loss": 0.4387, "num_tokens": 4457617288.0, "step": 5828 }, { "epoch": 2.13520197856554, "grad_norm": 0.14911606941436775, "learning_rate": 2.705609015014486e-05, "loss": 0.4689, "num_tokens": 4458443103.0, "step": 5829 }, { "epoch": 2.135568379591463, "grad_norm": 0.1317315698787807, "learning_rate": 2.7051989369121425e-05, "loss": 0.4496, "num_tokens": 4459169572.0, "step": 5830 }, { "epoch": 2.1359347806173856, "grad_norm": 0.144925159123278, "learning_rate": 2.704788830344597e-05, "loss": 0.4802, "num_tokens": 4459862106.0, "step": 5831 }, { "epoch": 2.1363011816433084, "grad_norm": 0.16885043420917628, "learning_rate": 2.7043786953349557e-05, "loss": 0.5087, "num_tokens": 4460483732.0, "step": 5832 }, { "epoch": 2.1366675826692316, "grad_norm": 0.15663373838382716, "learning_rate": 2.703968531906329e-05, "loss": 0.4679, "num_tokens": 4461274479.0, "step": 5833 }, { "epoch": 2.1370339836951544, "grad_norm": 0.14778618964388637, "learning_rate": 2.7035583400818253e-05, "loss": 0.4642, "num_tokens": 4461997270.0, "step": 5834 }, { "epoch": 2.137400384721077, "grad_norm": 0.16383314227187334, "learning_rate": 2.7031481198845577e-05, "loss": 0.4713, "num_tokens": 4462673687.0, "step": 5835 }, { "epoch": 2.137766785747, "grad_norm": 0.1514205243233693, "learning_rate": 2.7027378713376404e-05, "loss": 0.4618, "num_tokens": 4463404966.0, "step": 5836 }, { "epoch": 2.138133186772923, "grad_norm": 0.14128815681970303, "learning_rate": 2.7023275944641874e-05, "loss": 0.4351, "num_tokens": 4464151910.0, "step": 5837 }, { "epoch": 2.138499587798846, "grad_norm": 0.13598926926911295, "learning_rate": 2.7019172892873165e-05, "loss": 0.4503, "num_tokens": 4464902638.0, "step": 5838 }, { "epoch": 2.1388659888247687, "grad_norm": 0.1492856265009779, "learning_rate": 2.7015069558301455e-05, "loss": 0.4624, "num_tokens": 4465716779.0, "step": 5839 }, { "epoch": 2.1392323898506915, "grad_norm": 0.1448193893898694, "learning_rate": 2.7010965941157952e-05, "loss": 0.4646, "num_tokens": 4466515391.0, "step": 5840 }, { "epoch": 2.1395987908766143, "grad_norm": 0.1252619668222743, "learning_rate": 2.700686204167387e-05, "loss": 0.4654, "num_tokens": 4467254056.0, "step": 5841 }, { "epoch": 2.1399651919025375, "grad_norm": 0.15100365189627205, "learning_rate": 2.7002757860080433e-05, "loss": 0.4824, "num_tokens": 4467912001.0, "step": 5842 }, { "epoch": 2.1403315929284603, "grad_norm": 0.13134465624724884, "learning_rate": 2.6998653396608908e-05, "loss": 0.4633, "num_tokens": 4468681778.0, "step": 5843 }, { "epoch": 2.140697993954383, "grad_norm": 0.14103715614408538, "learning_rate": 2.6994548651490538e-05, "loss": 0.4471, "num_tokens": 4469441200.0, "step": 5844 }, { "epoch": 2.141064394980306, "grad_norm": 0.1300558472107553, "learning_rate": 2.6990443624956612e-05, "loss": 0.4385, "num_tokens": 4470159431.0, "step": 5845 }, { "epoch": 2.1414307960062287, "grad_norm": 0.13541830724365547, "learning_rate": 2.6986338317238433e-05, "loss": 0.4817, "num_tokens": 4471030913.0, "step": 5846 }, { "epoch": 2.141797197032152, "grad_norm": 0.14020703766239656, "learning_rate": 2.69822327285673e-05, "loss": 0.4486, "num_tokens": 4471833880.0, "step": 5847 }, { "epoch": 2.1421635980580747, "grad_norm": 0.12999804535959522, "learning_rate": 2.697812685917455e-05, "loss": 0.4491, "num_tokens": 4472644493.0, "step": 5848 }, { "epoch": 2.1425299990839974, "grad_norm": 0.13950838425204426, "learning_rate": 2.6974020709291523e-05, "loss": 0.467, "num_tokens": 4473373714.0, "step": 5849 }, { "epoch": 2.14289640010992, "grad_norm": 0.14938305181774658, "learning_rate": 2.6969914279149573e-05, "loss": 0.4849, "num_tokens": 4474128262.0, "step": 5850 }, { "epoch": 2.143262801135843, "grad_norm": 0.14264410215952833, "learning_rate": 2.6965807568980085e-05, "loss": 0.4348, "num_tokens": 4474966387.0, "step": 5851 }, { "epoch": 2.1436292021617662, "grad_norm": 0.11563403789987779, "learning_rate": 2.6961700579014443e-05, "loss": 0.4236, "num_tokens": 4475715836.0, "step": 5852 }, { "epoch": 2.143995603187689, "grad_norm": 0.14809461374173005, "learning_rate": 2.695759330948405e-05, "loss": 0.4578, "num_tokens": 4476621120.0, "step": 5853 }, { "epoch": 2.144362004213612, "grad_norm": 0.1513589598081424, "learning_rate": 2.6953485760620338e-05, "loss": 0.4384, "num_tokens": 4477329803.0, "step": 5854 }, { "epoch": 2.1447284052395346, "grad_norm": 0.1380467162430036, "learning_rate": 2.6949377932654744e-05, "loss": 0.4495, "num_tokens": 4478115598.0, "step": 5855 }, { "epoch": 2.1450948062654573, "grad_norm": 0.15293555553855082, "learning_rate": 2.6945269825818715e-05, "loss": 0.4323, "num_tokens": 4478764802.0, "step": 5856 }, { "epoch": 2.1454612072913806, "grad_norm": 0.15280915799231845, "learning_rate": 2.6941161440343714e-05, "loss": 0.4185, "num_tokens": 4479489670.0, "step": 5857 }, { "epoch": 2.1458276083173033, "grad_norm": 0.13844842493659992, "learning_rate": 2.693705277646125e-05, "loss": 0.4069, "num_tokens": 4480296508.0, "step": 5858 }, { "epoch": 2.146194009343226, "grad_norm": 0.15001472383557052, "learning_rate": 2.69329438344028e-05, "loss": 0.4482, "num_tokens": 4481177698.0, "step": 5859 }, { "epoch": 2.146560410369149, "grad_norm": 0.15908480529431926, "learning_rate": 2.6928834614399894e-05, "loss": 0.4995, "num_tokens": 4481812635.0, "step": 5860 }, { "epoch": 2.1469268113950717, "grad_norm": 0.14053047385206424, "learning_rate": 2.6924725116684055e-05, "loss": 0.4571, "num_tokens": 4482587971.0, "step": 5861 }, { "epoch": 2.147293212420995, "grad_norm": 0.13978804377195111, "learning_rate": 2.692061534148684e-05, "loss": 0.4522, "num_tokens": 4483492252.0, "step": 5862 }, { "epoch": 2.1476596134469177, "grad_norm": 0.14426826261630857, "learning_rate": 2.6916505289039804e-05, "loss": 0.4436, "num_tokens": 4484262612.0, "step": 5863 }, { "epoch": 2.1480260144728405, "grad_norm": 0.1291261997858813, "learning_rate": 2.691239495957453e-05, "loss": 0.4384, "num_tokens": 4485062746.0, "step": 5864 }, { "epoch": 2.1483924154987633, "grad_norm": 0.13897853338845095, "learning_rate": 2.6908284353322613e-05, "loss": 0.463, "num_tokens": 4485859998.0, "step": 5865 }, { "epoch": 2.1487588165246865, "grad_norm": 0.14527631872481983, "learning_rate": 2.6904173470515667e-05, "loss": 0.4735, "num_tokens": 4486681761.0, "step": 5866 }, { "epoch": 2.1491252175506093, "grad_norm": 0.13451673930526606, "learning_rate": 2.6900062311385306e-05, "loss": 0.4641, "num_tokens": 4487435926.0, "step": 5867 }, { "epoch": 2.149491618576532, "grad_norm": 0.13522991239401652, "learning_rate": 2.6895950876163187e-05, "loss": 0.443, "num_tokens": 4488100271.0, "step": 5868 }, { "epoch": 2.149858019602455, "grad_norm": 0.1414321552295666, "learning_rate": 2.6891839165080957e-05, "loss": 0.451, "num_tokens": 4488818876.0, "step": 5869 }, { "epoch": 2.1502244206283776, "grad_norm": 0.12649392129119216, "learning_rate": 2.6887727178370286e-05, "loss": 0.4513, "num_tokens": 4489678494.0, "step": 5870 }, { "epoch": 2.150590821654301, "grad_norm": 0.1458613512926564, "learning_rate": 2.688361491626287e-05, "loss": 0.462, "num_tokens": 4490436958.0, "step": 5871 }, { "epoch": 2.1509572226802236, "grad_norm": 0.1379790221507782, "learning_rate": 2.6879502378990406e-05, "loss": 0.5073, "num_tokens": 4491174075.0, "step": 5872 }, { "epoch": 2.1513236237061464, "grad_norm": 0.15757082356018223, "learning_rate": 2.6875389566784616e-05, "loss": 0.4607, "num_tokens": 4492014446.0, "step": 5873 }, { "epoch": 2.151690024732069, "grad_norm": 0.12996997160260906, "learning_rate": 2.6871276479877235e-05, "loss": 0.4497, "num_tokens": 4492835582.0, "step": 5874 }, { "epoch": 2.152056425757992, "grad_norm": 0.13244436006516438, "learning_rate": 2.6867163118500016e-05, "loss": 0.4748, "num_tokens": 4493551704.0, "step": 5875 }, { "epoch": 2.152422826783915, "grad_norm": 0.1616632325553454, "learning_rate": 2.686304948288472e-05, "loss": 0.4943, "num_tokens": 4494268009.0, "step": 5876 }, { "epoch": 2.152789227809838, "grad_norm": 0.13713167375468663, "learning_rate": 2.6858935573263124e-05, "loss": 0.4474, "num_tokens": 4495071684.0, "step": 5877 }, { "epoch": 2.1531556288357607, "grad_norm": 0.14176855999951565, "learning_rate": 2.6854821389867036e-05, "loss": 0.4597, "num_tokens": 4495789378.0, "step": 5878 }, { "epoch": 2.1535220298616835, "grad_norm": 0.14548296755238252, "learning_rate": 2.685070693292825e-05, "loss": 0.4709, "num_tokens": 4496578340.0, "step": 5879 }, { "epoch": 2.1538884308876063, "grad_norm": 0.1504563819790175, "learning_rate": 2.6846592202678616e-05, "loss": 0.4208, "num_tokens": 4497320684.0, "step": 5880 }, { "epoch": 2.1542548319135295, "grad_norm": 0.15139448053394858, "learning_rate": 2.684247719934996e-05, "loss": 0.4429, "num_tokens": 4498048723.0, "step": 5881 }, { "epoch": 2.1546212329394523, "grad_norm": 0.14828655200062735, "learning_rate": 2.6838361923174145e-05, "loss": 0.4536, "num_tokens": 4498665676.0, "step": 5882 }, { "epoch": 2.154987633965375, "grad_norm": 0.14919103094041286, "learning_rate": 2.683424637438304e-05, "loss": 0.4619, "num_tokens": 4499455400.0, "step": 5883 }, { "epoch": 2.155354034991298, "grad_norm": 0.16277488939643034, "learning_rate": 2.6830130553208542e-05, "loss": 0.4559, "num_tokens": 4500307926.0, "step": 5884 }, { "epoch": 2.155720436017221, "grad_norm": 0.1363064482436458, "learning_rate": 2.6826014459882554e-05, "loss": 0.4564, "num_tokens": 4501078837.0, "step": 5885 }, { "epoch": 2.156086837043144, "grad_norm": 0.15380136262826602, "learning_rate": 2.682189809463699e-05, "loss": 0.4516, "num_tokens": 4501805340.0, "step": 5886 }, { "epoch": 2.1564532380690666, "grad_norm": 0.17305651690854332, "learning_rate": 2.6817781457703778e-05, "loss": 0.5043, "num_tokens": 4502591238.0, "step": 5887 }, { "epoch": 2.1568196390949894, "grad_norm": 0.13384008563023714, "learning_rate": 2.681366454931489e-05, "loss": 0.4592, "num_tokens": 4503343355.0, "step": 5888 }, { "epoch": 2.157186040120912, "grad_norm": 0.14762015676146034, "learning_rate": 2.680954736970227e-05, "loss": 0.4561, "num_tokens": 4504074006.0, "step": 5889 }, { "epoch": 2.1575524411468354, "grad_norm": 0.15358158820071618, "learning_rate": 2.680542991909791e-05, "loss": 0.4344, "num_tokens": 4504832578.0, "step": 5890 }, { "epoch": 2.157918842172758, "grad_norm": 0.13884212815450875, "learning_rate": 2.68013121977338e-05, "loss": 0.4578, "num_tokens": 4505663212.0, "step": 5891 }, { "epoch": 2.158285243198681, "grad_norm": 0.13871925569885046, "learning_rate": 2.679719420584195e-05, "loss": 0.4654, "num_tokens": 4506345612.0, "step": 5892 }, { "epoch": 2.1586516442246038, "grad_norm": 0.1395774966211208, "learning_rate": 2.6793075943654403e-05, "loss": 0.459, "num_tokens": 4507035340.0, "step": 5893 }, { "epoch": 2.1590180452505265, "grad_norm": 0.15563783418382965, "learning_rate": 2.678895741140318e-05, "loss": 0.4355, "num_tokens": 4507764057.0, "step": 5894 }, { "epoch": 2.1593844462764498, "grad_norm": 0.12190674068191042, "learning_rate": 2.6784838609320346e-05, "loss": 0.434, "num_tokens": 4508613679.0, "step": 5895 }, { "epoch": 2.1597508473023725, "grad_norm": 0.13034587248051982, "learning_rate": 2.6780719537637976e-05, "loss": 0.4711, "num_tokens": 4509338593.0, "step": 5896 }, { "epoch": 2.1601172483282953, "grad_norm": 0.1573791340463801, "learning_rate": 2.6776600196588148e-05, "loss": 0.4385, "num_tokens": 4510097358.0, "step": 5897 }, { "epoch": 2.160483649354218, "grad_norm": 0.11326960200877709, "learning_rate": 2.6772480586402973e-05, "loss": 0.4325, "num_tokens": 4510775403.0, "step": 5898 }, { "epoch": 2.160850050380141, "grad_norm": 0.14669392396206982, "learning_rate": 2.676836070731456e-05, "loss": 0.4308, "num_tokens": 4511530987.0, "step": 5899 }, { "epoch": 2.161216451406064, "grad_norm": 0.12752727863620764, "learning_rate": 2.676424055955506e-05, "loss": 0.4539, "num_tokens": 4512264625.0, "step": 5900 }, { "epoch": 2.161582852431987, "grad_norm": 0.14033188860603227, "learning_rate": 2.6760120143356596e-05, "loss": 0.4177, "num_tokens": 4512976843.0, "step": 5901 }, { "epoch": 2.1619492534579097, "grad_norm": 0.138513637488408, "learning_rate": 2.6755999458951348e-05, "loss": 0.4616, "num_tokens": 4513678405.0, "step": 5902 }, { "epoch": 2.1623156544838324, "grad_norm": 0.13832279637322287, "learning_rate": 2.6751878506571497e-05, "loss": 0.435, "num_tokens": 4514510016.0, "step": 5903 }, { "epoch": 2.1626820555097552, "grad_norm": 0.1297432039897496, "learning_rate": 2.674775728644922e-05, "loss": 0.4992, "num_tokens": 4515282564.0, "step": 5904 }, { "epoch": 2.1630484565356785, "grad_norm": 0.13685833689805865, "learning_rate": 2.6743635798816737e-05, "loss": 0.4428, "num_tokens": 4516055162.0, "step": 5905 }, { "epoch": 2.1634148575616012, "grad_norm": 0.13915381323496448, "learning_rate": 2.6739514043906267e-05, "loss": 0.4486, "num_tokens": 4516799628.0, "step": 5906 }, { "epoch": 2.163781258587524, "grad_norm": 0.1385792178737731, "learning_rate": 2.6735392021950045e-05, "loss": 0.5068, "num_tokens": 4517593211.0, "step": 5907 }, { "epoch": 2.164147659613447, "grad_norm": 0.14938948778522995, "learning_rate": 2.6731269733180333e-05, "loss": 0.4312, "num_tokens": 4518319755.0, "step": 5908 }, { "epoch": 2.1645140606393696, "grad_norm": 0.13179232695434245, "learning_rate": 2.6727147177829396e-05, "loss": 0.4492, "num_tokens": 4519000723.0, "step": 5909 }, { "epoch": 2.164880461665293, "grad_norm": 0.15075486459141654, "learning_rate": 2.6723024356129518e-05, "loss": 0.4587, "num_tokens": 4519908953.0, "step": 5910 }, { "epoch": 2.1652468626912156, "grad_norm": 0.12513650527693404, "learning_rate": 2.6718901268312992e-05, "loss": 0.4067, "num_tokens": 4520658987.0, "step": 5911 }, { "epoch": 2.1656132637171384, "grad_norm": 0.14563955498454545, "learning_rate": 2.671477791461214e-05, "loss": 0.4268, "num_tokens": 4521451313.0, "step": 5912 }, { "epoch": 2.165979664743061, "grad_norm": 0.13041309989723818, "learning_rate": 2.671065429525929e-05, "loss": 0.5007, "num_tokens": 4522187147.0, "step": 5913 }, { "epoch": 2.166346065768984, "grad_norm": 0.145746308985762, "learning_rate": 2.6706530410486767e-05, "loss": 0.4713, "num_tokens": 4523004643.0, "step": 5914 }, { "epoch": 2.166712466794907, "grad_norm": 0.13456781381729685, "learning_rate": 2.6702406260526955e-05, "loss": 0.4592, "num_tokens": 4523773080.0, "step": 5915 }, { "epoch": 2.16707886782083, "grad_norm": 0.14290037313480933, "learning_rate": 2.6698281845612215e-05, "loss": 0.4714, "num_tokens": 4524548020.0, "step": 5916 }, { "epoch": 2.1674452688467527, "grad_norm": 0.13154320514007314, "learning_rate": 2.669415716597494e-05, "loss": 0.4497, "num_tokens": 4525326905.0, "step": 5917 }, { "epoch": 2.1678116698726755, "grad_norm": 0.14838872504206663, "learning_rate": 2.6690032221847525e-05, "loss": 0.4967, "num_tokens": 4525999420.0, "step": 5918 }, { "epoch": 2.1681780708985987, "grad_norm": 0.15184213122851223, "learning_rate": 2.66859070134624e-05, "loss": 0.451, "num_tokens": 4526743559.0, "step": 5919 }, { "epoch": 2.1685444719245215, "grad_norm": 0.16102359970878263, "learning_rate": 2.6681781541051982e-05, "loss": 0.4532, "num_tokens": 4527548934.0, "step": 5920 }, { "epoch": 2.1689108729504443, "grad_norm": 0.13729023204620064, "learning_rate": 2.6677655804848735e-05, "loss": 0.4516, "num_tokens": 4528342972.0, "step": 5921 }, { "epoch": 2.169277273976367, "grad_norm": 0.1413387052038263, "learning_rate": 2.6673529805085108e-05, "loss": 0.4728, "num_tokens": 4529027592.0, "step": 5922 }, { "epoch": 2.16964367500229, "grad_norm": 0.15648124131087462, "learning_rate": 2.66694035419936e-05, "loss": 0.4531, "num_tokens": 4529751824.0, "step": 5923 }, { "epoch": 2.170010076028213, "grad_norm": 0.15435895723318688, "learning_rate": 2.666527701580667e-05, "loss": 0.4417, "num_tokens": 4530533266.0, "step": 5924 }, { "epoch": 2.170376477054136, "grad_norm": 0.14270526682002344, "learning_rate": 2.666115022675685e-05, "loss": 0.4425, "num_tokens": 4531241582.0, "step": 5925 }, { "epoch": 2.1707428780800586, "grad_norm": 0.12910628844846872, "learning_rate": 2.6657023175076657e-05, "loss": 0.4397, "num_tokens": 4532003534.0, "step": 5926 }, { "epoch": 2.1711092791059814, "grad_norm": 0.15698571722881427, "learning_rate": 2.665289586099863e-05, "loss": 0.4531, "num_tokens": 4532687567.0, "step": 5927 }, { "epoch": 2.171475680131904, "grad_norm": 0.15256290277880044, "learning_rate": 2.6648768284755316e-05, "loss": 0.464, "num_tokens": 4533576306.0, "step": 5928 }, { "epoch": 2.1718420811578274, "grad_norm": 0.11649654283327732, "learning_rate": 2.6644640446579275e-05, "loss": 0.4416, "num_tokens": 4534450355.0, "step": 5929 }, { "epoch": 2.17220848218375, "grad_norm": 0.1481263620887106, "learning_rate": 2.66405123467031e-05, "loss": 0.4777, "num_tokens": 4535256161.0, "step": 5930 }, { "epoch": 2.172574883209673, "grad_norm": 0.13490020702921185, "learning_rate": 2.663638398535939e-05, "loss": 0.4609, "num_tokens": 4536075851.0, "step": 5931 }, { "epoch": 2.1729412842355957, "grad_norm": 0.11829090412041364, "learning_rate": 2.6632255362780745e-05, "loss": 0.4398, "num_tokens": 4536894576.0, "step": 5932 }, { "epoch": 2.173307685261519, "grad_norm": 0.15404147781412114, "learning_rate": 2.662812647919979e-05, "loss": 0.4659, "num_tokens": 4537762906.0, "step": 5933 }, { "epoch": 2.1736740862874417, "grad_norm": 0.14000835696935154, "learning_rate": 2.6623997334849166e-05, "loss": 0.4398, "num_tokens": 4538442394.0, "step": 5934 }, { "epoch": 2.1740404873133645, "grad_norm": 0.13805991842554008, "learning_rate": 2.6619867929961544e-05, "loss": 0.4643, "num_tokens": 4539179128.0, "step": 5935 }, { "epoch": 2.1744068883392873, "grad_norm": 0.15541588888809127, "learning_rate": 2.6615738264769567e-05, "loss": 0.4726, "num_tokens": 4540009655.0, "step": 5936 }, { "epoch": 2.17477328936521, "grad_norm": 0.14275790052416704, "learning_rate": 2.6611608339505933e-05, "loss": 0.4685, "num_tokens": 4540684366.0, "step": 5937 }, { "epoch": 2.1751396903911333, "grad_norm": 0.15439438384150972, "learning_rate": 2.6607478154403348e-05, "loss": 0.4567, "num_tokens": 4541408853.0, "step": 5938 }, { "epoch": 2.175506091417056, "grad_norm": 0.15783433853932738, "learning_rate": 2.660334770969451e-05, "loss": 0.4263, "num_tokens": 4542181133.0, "step": 5939 }, { "epoch": 2.175872492442979, "grad_norm": 0.13671492070732164, "learning_rate": 2.6599217005612157e-05, "loss": 0.4959, "num_tokens": 4543005223.0, "step": 5940 }, { "epoch": 2.1762388934689016, "grad_norm": 0.13813218572616928, "learning_rate": 2.6595086042389026e-05, "loss": 0.4471, "num_tokens": 4543782587.0, "step": 5941 }, { "epoch": 2.1766052944948244, "grad_norm": 0.14056783496834035, "learning_rate": 2.659095482025788e-05, "loss": 0.4676, "num_tokens": 4544545169.0, "step": 5942 }, { "epoch": 2.1769716955207477, "grad_norm": 0.15178783788240846, "learning_rate": 2.6586823339451493e-05, "loss": 0.4758, "num_tokens": 4545221760.0, "step": 5943 }, { "epoch": 2.1773380965466704, "grad_norm": 0.1794448459862802, "learning_rate": 2.6582691600202638e-05, "loss": 0.4583, "num_tokens": 4545922799.0, "step": 5944 }, { "epoch": 2.177704497572593, "grad_norm": 0.1423120621371003, "learning_rate": 2.6578559602744123e-05, "loss": 0.4499, "num_tokens": 4546661134.0, "step": 5945 }, { "epoch": 2.178070898598516, "grad_norm": 0.14354975802366063, "learning_rate": 2.6574427347308772e-05, "loss": 0.4709, "num_tokens": 4547468590.0, "step": 5946 }, { "epoch": 2.1784372996244388, "grad_norm": 0.150700900330128, "learning_rate": 2.6570294834129402e-05, "loss": 0.4553, "num_tokens": 4548154722.0, "step": 5947 }, { "epoch": 2.178803700650362, "grad_norm": 0.12014952673502376, "learning_rate": 2.656616206343887e-05, "loss": 0.4292, "num_tokens": 4548973890.0, "step": 5948 }, { "epoch": 2.1791701016762848, "grad_norm": 0.15743221906878893, "learning_rate": 2.6562029035470023e-05, "loss": 0.4546, "num_tokens": 4549631967.0, "step": 5949 }, { "epoch": 2.1795365027022076, "grad_norm": 0.15743250643831974, "learning_rate": 2.6557895750455743e-05, "loss": 0.436, "num_tokens": 4550441992.0, "step": 5950 }, { "epoch": 2.1799029037281303, "grad_norm": 0.13880035631991897, "learning_rate": 2.6553762208628907e-05, "loss": 0.4584, "num_tokens": 4551131642.0, "step": 5951 }, { "epoch": 2.180269304754053, "grad_norm": 0.13699628850901485, "learning_rate": 2.6549628410222434e-05, "loss": 0.4591, "num_tokens": 4551970130.0, "step": 5952 }, { "epoch": 2.1806357057799763, "grad_norm": 0.13753462755843102, "learning_rate": 2.6545494355469234e-05, "loss": 0.4405, "num_tokens": 4552705427.0, "step": 5953 }, { "epoch": 2.181002106805899, "grad_norm": 0.1492912780643155, "learning_rate": 2.6541360044602228e-05, "loss": 0.4323, "num_tokens": 4553510976.0, "step": 5954 }, { "epoch": 2.181368507831822, "grad_norm": 0.1405027644208479, "learning_rate": 2.6537225477854378e-05, "loss": 0.4416, "num_tokens": 4554151389.0, "step": 5955 }, { "epoch": 2.1817349088577447, "grad_norm": 0.14594482998881667, "learning_rate": 2.6533090655458633e-05, "loss": 0.4754, "num_tokens": 4554921448.0, "step": 5956 }, { "epoch": 2.1821013098836675, "grad_norm": 0.15263476073150672, "learning_rate": 2.6528955577647976e-05, "loss": 0.4753, "num_tokens": 4555634829.0, "step": 5957 }, { "epoch": 2.1824677109095907, "grad_norm": 0.13062173660366203, "learning_rate": 2.6524820244655386e-05, "loss": 0.4784, "num_tokens": 4556323319.0, "step": 5958 }, { "epoch": 2.1828341119355135, "grad_norm": 0.14862914243099337, "learning_rate": 2.652068465671388e-05, "loss": 0.4564, "num_tokens": 4557012464.0, "step": 5959 }, { "epoch": 2.1832005129614362, "grad_norm": 0.14716942581788134, "learning_rate": 2.6516548814056468e-05, "loss": 0.4522, "num_tokens": 4557770303.0, "step": 5960 }, { "epoch": 2.183566913987359, "grad_norm": 0.13234486945816817, "learning_rate": 2.6512412716916178e-05, "loss": 0.4469, "num_tokens": 4558505181.0, "step": 5961 }, { "epoch": 2.183933315013282, "grad_norm": 0.15798295784513544, "learning_rate": 2.650827636552606e-05, "loss": 0.511, "num_tokens": 4559089286.0, "step": 5962 }, { "epoch": 2.184299716039205, "grad_norm": 0.16670927465484378, "learning_rate": 2.650413976011917e-05, "loss": 0.4432, "num_tokens": 4559813855.0, "step": 5963 }, { "epoch": 2.184666117065128, "grad_norm": 0.13948903426931827, "learning_rate": 2.6500002900928596e-05, "loss": 0.5039, "num_tokens": 4560448466.0, "step": 5964 }, { "epoch": 2.1850325180910506, "grad_norm": 0.15028784430486886, "learning_rate": 2.6495865788187423e-05, "loss": 0.4591, "num_tokens": 4561267306.0, "step": 5965 }, { "epoch": 2.1853989191169734, "grad_norm": 0.1381126386154254, "learning_rate": 2.6491728422128745e-05, "loss": 0.4334, "num_tokens": 4562017014.0, "step": 5966 }, { "epoch": 2.1857653201428966, "grad_norm": 0.1443750286705702, "learning_rate": 2.6487590802985695e-05, "loss": 0.4259, "num_tokens": 4562877247.0, "step": 5967 }, { "epoch": 2.1861317211688194, "grad_norm": 0.13279810328806382, "learning_rate": 2.648345293099139e-05, "loss": 0.4766, "num_tokens": 4563568805.0, "step": 5968 }, { "epoch": 2.186498122194742, "grad_norm": 0.1695311966429347, "learning_rate": 2.6479314806378985e-05, "loss": 0.4822, "num_tokens": 4564291538.0, "step": 5969 }, { "epoch": 2.186864523220665, "grad_norm": 0.1343972511728543, "learning_rate": 2.6475176429381643e-05, "loss": 0.4554, "num_tokens": 4565119860.0, "step": 5970 }, { "epoch": 2.1872309242465877, "grad_norm": 0.14943103822781229, "learning_rate": 2.647103780023253e-05, "loss": 0.4616, "num_tokens": 4565951227.0, "step": 5971 }, { "epoch": 2.187597325272511, "grad_norm": 0.16024184060372704, "learning_rate": 2.646689891916484e-05, "loss": 0.4346, "num_tokens": 4566747422.0, "step": 5972 }, { "epoch": 2.1879637262984337, "grad_norm": 0.11995518894311878, "learning_rate": 2.6462759786411783e-05, "loss": 0.4722, "num_tokens": 4567470585.0, "step": 5973 }, { "epoch": 2.1883301273243565, "grad_norm": 0.15459701001657847, "learning_rate": 2.6458620402206564e-05, "loss": 0.4461, "num_tokens": 4568275092.0, "step": 5974 }, { "epoch": 2.1886965283502793, "grad_norm": 0.1304345851227648, "learning_rate": 2.6454480766782428e-05, "loss": 0.4541, "num_tokens": 4569053854.0, "step": 5975 }, { "epoch": 2.189062929376202, "grad_norm": 0.14375685579171518, "learning_rate": 2.6450340880372604e-05, "loss": 0.4321, "num_tokens": 4569789775.0, "step": 5976 }, { "epoch": 2.1894293304021253, "grad_norm": 0.13080166908929874, "learning_rate": 2.644620074321037e-05, "loss": 0.4545, "num_tokens": 4570651390.0, "step": 5977 }, { "epoch": 2.189795731428048, "grad_norm": 0.1394684679291188, "learning_rate": 2.6442060355528987e-05, "loss": 0.4756, "num_tokens": 4571487243.0, "step": 5978 }, { "epoch": 2.190162132453971, "grad_norm": 0.1385211416169393, "learning_rate": 2.6437919717561754e-05, "loss": 0.4578, "num_tokens": 4572122254.0, "step": 5979 }, { "epoch": 2.1905285334798936, "grad_norm": 0.14989690405993933, "learning_rate": 2.643377882954197e-05, "loss": 0.4841, "num_tokens": 4572913108.0, "step": 5980 }, { "epoch": 2.190894934505817, "grad_norm": 0.1610511928059301, "learning_rate": 2.6429637691702937e-05, "loss": 0.4951, "num_tokens": 4573699375.0, "step": 5981 }, { "epoch": 2.1912613355317396, "grad_norm": 0.14043107171614297, "learning_rate": 2.642549630427801e-05, "loss": 0.4549, "num_tokens": 4574542124.0, "step": 5982 }, { "epoch": 2.1916277365576624, "grad_norm": 0.14947868289991803, "learning_rate": 2.642135466750051e-05, "loss": 0.4683, "num_tokens": 4575184072.0, "step": 5983 }, { "epoch": 2.191994137583585, "grad_norm": 0.15502291155848105, "learning_rate": 2.641721278160382e-05, "loss": 0.4539, "num_tokens": 4575946825.0, "step": 5984 }, { "epoch": 2.192360538609508, "grad_norm": 0.14152817703953285, "learning_rate": 2.641307064682129e-05, "loss": 0.418, "num_tokens": 4576817269.0, "step": 5985 }, { "epoch": 2.192726939635431, "grad_norm": 0.14638668641629898, "learning_rate": 2.640892826338631e-05, "loss": 0.5132, "num_tokens": 4577630982.0, "step": 5986 }, { "epoch": 2.193093340661354, "grad_norm": 0.1512649486539466, "learning_rate": 2.64047856315323e-05, "loss": 0.4392, "num_tokens": 4578310038.0, "step": 5987 }, { "epoch": 2.1934597416872768, "grad_norm": 0.1647266259923956, "learning_rate": 2.6400642751492656e-05, "loss": 0.447, "num_tokens": 4579059241.0, "step": 5988 }, { "epoch": 2.1938261427131995, "grad_norm": 0.1368862157901119, "learning_rate": 2.639649962350081e-05, "loss": 0.4829, "num_tokens": 4579828318.0, "step": 5989 }, { "epoch": 2.1941925437391223, "grad_norm": 0.137857389331123, "learning_rate": 2.6392356247790213e-05, "loss": 0.465, "num_tokens": 4580529174.0, "step": 5990 }, { "epoch": 2.1945589447650455, "grad_norm": 0.14922725651972246, "learning_rate": 2.6388212624594308e-05, "loss": 0.4495, "num_tokens": 4581359896.0, "step": 5991 }, { "epoch": 2.1949253457909683, "grad_norm": 0.13563950522115834, "learning_rate": 2.6384068754146582e-05, "loss": 0.4623, "num_tokens": 4582042447.0, "step": 5992 }, { "epoch": 2.195291746816891, "grad_norm": 0.1307460359734335, "learning_rate": 2.63799246366805e-05, "loss": 0.4332, "num_tokens": 4582895310.0, "step": 5993 }, { "epoch": 2.195658147842814, "grad_norm": 0.1617463693081791, "learning_rate": 2.6375780272429578e-05, "loss": 0.4887, "num_tokens": 4583562872.0, "step": 5994 }, { "epoch": 2.1960245488687367, "grad_norm": 0.13753420884846307, "learning_rate": 2.637163566162732e-05, "loss": 0.4379, "num_tokens": 4584315946.0, "step": 5995 }, { "epoch": 2.19639094989466, "grad_norm": 0.1442762856088673, "learning_rate": 2.6367490804507255e-05, "loss": 0.4807, "num_tokens": 4585103103.0, "step": 5996 }, { "epoch": 2.1967573509205827, "grad_norm": 0.12858958520339211, "learning_rate": 2.636334570130292e-05, "loss": 0.4646, "num_tokens": 4585854354.0, "step": 5997 }, { "epoch": 2.1971237519465054, "grad_norm": 0.14861020747050036, "learning_rate": 2.6359200352247863e-05, "loss": 0.4765, "num_tokens": 4586601151.0, "step": 5998 }, { "epoch": 2.1974901529724282, "grad_norm": 0.1365563787808127, "learning_rate": 2.6355054757575672e-05, "loss": 0.4574, "num_tokens": 4587447178.0, "step": 5999 }, { "epoch": 2.197856553998351, "grad_norm": 0.1322401666893104, "learning_rate": 2.635090891751991e-05, "loss": 0.4725, "num_tokens": 4588177940.0, "step": 6000 }, { "epoch": 2.1982229550242742, "grad_norm": 0.14906826680415236, "learning_rate": 2.6346762832314174e-05, "loss": 0.4557, "num_tokens": 4588830703.0, "step": 6001 }, { "epoch": 2.198589356050197, "grad_norm": 0.14868889503197566, "learning_rate": 2.634261650219209e-05, "loss": 0.4591, "num_tokens": 4589504841.0, "step": 6002 }, { "epoch": 2.19895575707612, "grad_norm": 0.13809372715358745, "learning_rate": 2.6338469927387253e-05, "loss": 0.4531, "num_tokens": 4590299612.0, "step": 6003 }, { "epoch": 2.1993221581020426, "grad_norm": 0.13807943038453624, "learning_rate": 2.6334323108133326e-05, "loss": 0.4272, "num_tokens": 4591159493.0, "step": 6004 }, { "epoch": 2.1996885591279653, "grad_norm": 0.12170272417598695, "learning_rate": 2.633017604466395e-05, "loss": 0.4468, "num_tokens": 4591956227.0, "step": 6005 }, { "epoch": 2.2000549601538886, "grad_norm": 0.135263783030747, "learning_rate": 2.6326028737212776e-05, "loss": 0.4829, "num_tokens": 4592721961.0, "step": 6006 }, { "epoch": 2.2004213611798114, "grad_norm": 0.1398914093042481, "learning_rate": 2.6321881186013515e-05, "loss": 0.4794, "num_tokens": 4593383027.0, "step": 6007 }, { "epoch": 2.200787762205734, "grad_norm": 0.15523971513445997, "learning_rate": 2.631773339129982e-05, "loss": 0.4492, "num_tokens": 4594208972.0, "step": 6008 }, { "epoch": 2.201154163231657, "grad_norm": 0.14091411618743596, "learning_rate": 2.6313585353305425e-05, "loss": 0.4295, "num_tokens": 4594919214.0, "step": 6009 }, { "epoch": 2.2015205642575797, "grad_norm": 0.14365914108637204, "learning_rate": 2.6309437072264037e-05, "loss": 0.4515, "num_tokens": 4595678667.0, "step": 6010 }, { "epoch": 2.201886965283503, "grad_norm": 0.12740121511907357, "learning_rate": 2.6305288548409392e-05, "loss": 0.4414, "num_tokens": 4596408747.0, "step": 6011 }, { "epoch": 2.2022533663094257, "grad_norm": 0.14241442883318417, "learning_rate": 2.6301139781975235e-05, "loss": 0.464, "num_tokens": 4597300945.0, "step": 6012 }, { "epoch": 2.2026197673353485, "grad_norm": 0.1343678963531498, "learning_rate": 2.6296990773195322e-05, "loss": 0.4853, "num_tokens": 4598094611.0, "step": 6013 }, { "epoch": 2.2029861683612713, "grad_norm": 0.16442673600957777, "learning_rate": 2.6292841522303438e-05, "loss": 0.5111, "num_tokens": 4598765426.0, "step": 6014 }, { "epoch": 2.2033525693871945, "grad_norm": 0.13928826059616226, "learning_rate": 2.6288692029533365e-05, "loss": 0.5087, "num_tokens": 4599528876.0, "step": 6015 }, { "epoch": 2.2037189704131173, "grad_norm": 0.14545671165669713, "learning_rate": 2.6284542295118907e-05, "loss": 0.452, "num_tokens": 4600236425.0, "step": 6016 }, { "epoch": 2.20408537143904, "grad_norm": 0.14443242283939864, "learning_rate": 2.628039231929388e-05, "loss": 0.4794, "num_tokens": 4601099288.0, "step": 6017 }, { "epoch": 2.204451772464963, "grad_norm": 0.1375752143441506, "learning_rate": 2.6276242102292092e-05, "loss": 0.4725, "num_tokens": 4601787148.0, "step": 6018 }, { "epoch": 2.2048181734908856, "grad_norm": 0.11979694245274426, "learning_rate": 2.6272091644347417e-05, "loss": 0.4684, "num_tokens": 4602692432.0, "step": 6019 }, { "epoch": 2.205184574516809, "grad_norm": 0.1432785107857501, "learning_rate": 2.6267940945693687e-05, "loss": 0.4941, "num_tokens": 4603344423.0, "step": 6020 }, { "epoch": 2.2055509755427316, "grad_norm": 0.13647879814102168, "learning_rate": 2.6263790006564783e-05, "loss": 0.4629, "num_tokens": 4604073479.0, "step": 6021 }, { "epoch": 2.2059173765686544, "grad_norm": 0.15360368894522508, "learning_rate": 2.625963882719459e-05, "loss": 0.484, "num_tokens": 4604848512.0, "step": 6022 }, { "epoch": 2.206283777594577, "grad_norm": 0.14157449644225453, "learning_rate": 2.625548740781699e-05, "loss": 0.4459, "num_tokens": 4605629920.0, "step": 6023 }, { "epoch": 2.2066501786205, "grad_norm": 0.13425207834930578, "learning_rate": 2.6251335748665906e-05, "loss": 0.4676, "num_tokens": 4606313829.0, "step": 6024 }, { "epoch": 2.207016579646423, "grad_norm": 0.1306024177753984, "learning_rate": 2.6247183849975254e-05, "loss": 0.4291, "num_tokens": 4607061296.0, "step": 6025 }, { "epoch": 2.207382980672346, "grad_norm": 0.13836054308375195, "learning_rate": 2.6243031711978983e-05, "loss": 0.4525, "num_tokens": 4607853519.0, "step": 6026 }, { "epoch": 2.2077493816982687, "grad_norm": 0.12139400419812162, "learning_rate": 2.6238879334911023e-05, "loss": 0.466, "num_tokens": 4608686617.0, "step": 6027 }, { "epoch": 2.2081157827241915, "grad_norm": 0.16204925084216137, "learning_rate": 2.623472671900535e-05, "loss": 0.5097, "num_tokens": 4609477745.0, "step": 6028 }, { "epoch": 2.2084821837501147, "grad_norm": 0.1429555125398714, "learning_rate": 2.623057386449594e-05, "loss": 0.4536, "num_tokens": 4610200504.0, "step": 6029 }, { "epoch": 2.2088485847760375, "grad_norm": 0.13431264677053317, "learning_rate": 2.6226420771616784e-05, "loss": 0.4511, "num_tokens": 4610925763.0, "step": 6030 }, { "epoch": 2.2092149858019603, "grad_norm": 0.15478382343412114, "learning_rate": 2.622226744060189e-05, "loss": 0.4548, "num_tokens": 4611648720.0, "step": 6031 }, { "epoch": 2.209581386827883, "grad_norm": 0.12906221797783687, "learning_rate": 2.6218113871685273e-05, "loss": 0.4374, "num_tokens": 4612470269.0, "step": 6032 }, { "epoch": 2.209947787853806, "grad_norm": 0.12532039215330446, "learning_rate": 2.6213960065100957e-05, "loss": 0.4615, "num_tokens": 4613209291.0, "step": 6033 }, { "epoch": 2.210314188879729, "grad_norm": 0.1445034672670066, "learning_rate": 2.6209806021082996e-05, "loss": 0.4706, "num_tokens": 4613983955.0, "step": 6034 }, { "epoch": 2.210680589905652, "grad_norm": 0.14029321657394841, "learning_rate": 2.6205651739865442e-05, "loss": 0.4384, "num_tokens": 4614769725.0, "step": 6035 }, { "epoch": 2.2110469909315746, "grad_norm": 0.14065074090118043, "learning_rate": 2.6201497221682364e-05, "loss": 0.4669, "num_tokens": 4615523628.0, "step": 6036 }, { "epoch": 2.2114133919574974, "grad_norm": 0.13974423325314228, "learning_rate": 2.6197342466767852e-05, "loss": 0.437, "num_tokens": 4616376293.0, "step": 6037 }, { "epoch": 2.21177979298342, "grad_norm": 0.13117191855518773, "learning_rate": 2.6193187475356004e-05, "loss": 0.471, "num_tokens": 4617170686.0, "step": 6038 }, { "epoch": 2.2121461940093434, "grad_norm": 0.13124573401206083, "learning_rate": 2.6189032247680925e-05, "loss": 0.4447, "num_tokens": 4618052374.0, "step": 6039 }, { "epoch": 2.212512595035266, "grad_norm": 0.1454277677849758, "learning_rate": 2.6184876783976743e-05, "loss": 0.4826, "num_tokens": 4618884911.0, "step": 6040 }, { "epoch": 2.212878996061189, "grad_norm": 0.13720729462126166, "learning_rate": 2.61807210844776e-05, "loss": 0.4521, "num_tokens": 4619665940.0, "step": 6041 }, { "epoch": 2.2132453970871118, "grad_norm": 0.14639760172084754, "learning_rate": 2.6176565149417642e-05, "loss": 0.4815, "num_tokens": 4620531224.0, "step": 6042 }, { "epoch": 2.2136117981130345, "grad_norm": 0.1317405833693058, "learning_rate": 2.617240897903103e-05, "loss": 0.4558, "num_tokens": 4621233644.0, "step": 6043 }, { "epoch": 2.2139781991389578, "grad_norm": 0.1580592104149295, "learning_rate": 2.6168252573551945e-05, "loss": 0.4736, "num_tokens": 4621995695.0, "step": 6044 }, { "epoch": 2.2143446001648805, "grad_norm": 0.15844108130047538, "learning_rate": 2.6164095933214584e-05, "loss": 0.4604, "num_tokens": 4622817366.0, "step": 6045 }, { "epoch": 2.2147110011908033, "grad_norm": 0.13450335455053064, "learning_rate": 2.6159939058253146e-05, "loss": 0.4518, "num_tokens": 4623584132.0, "step": 6046 }, { "epoch": 2.215077402216726, "grad_norm": 0.1533980039528584, "learning_rate": 2.6155781948901838e-05, "loss": 0.4516, "num_tokens": 4624321483.0, "step": 6047 }, { "epoch": 2.215443803242649, "grad_norm": 0.13429235992002705, "learning_rate": 2.6151624605394906e-05, "loss": 0.422, "num_tokens": 4625065009.0, "step": 6048 }, { "epoch": 2.215810204268572, "grad_norm": 0.14101206423656645, "learning_rate": 2.614746702796659e-05, "loss": 0.4067, "num_tokens": 4625843751.0, "step": 6049 }, { "epoch": 2.216176605294495, "grad_norm": 0.1445750698790652, "learning_rate": 2.614330921685114e-05, "loss": 0.4405, "num_tokens": 4626590540.0, "step": 6050 }, { "epoch": 2.2165430063204177, "grad_norm": 0.14962094579902505, "learning_rate": 2.6139151172282833e-05, "loss": 0.4407, "num_tokens": 4627321534.0, "step": 6051 }, { "epoch": 2.2169094073463405, "grad_norm": 0.1384469241464016, "learning_rate": 2.613499289449595e-05, "loss": 0.4329, "num_tokens": 4628211051.0, "step": 6052 }, { "epoch": 2.2172758083722632, "grad_norm": 0.1427927325308684, "learning_rate": 2.613083438372479e-05, "loss": 0.4311, "num_tokens": 4628946628.0, "step": 6053 }, { "epoch": 2.2176422093981865, "grad_norm": 0.13045891126737932, "learning_rate": 2.6126675640203653e-05, "loss": 0.4323, "num_tokens": 4629661715.0, "step": 6054 }, { "epoch": 2.2180086104241092, "grad_norm": 0.14593178436115004, "learning_rate": 2.6122516664166865e-05, "loss": 0.45, "num_tokens": 4630497986.0, "step": 6055 }, { "epoch": 2.218375011450032, "grad_norm": 0.13772789083349934, "learning_rate": 2.6118357455848768e-05, "loss": 0.4159, "num_tokens": 4631295295.0, "step": 6056 }, { "epoch": 2.218741412475955, "grad_norm": 0.12333801240673446, "learning_rate": 2.6114198015483707e-05, "loss": 0.4645, "num_tokens": 4632183508.0, "step": 6057 }, { "epoch": 2.2191078135018776, "grad_norm": 0.14882620268858474, "learning_rate": 2.6110038343306045e-05, "loss": 0.4382, "num_tokens": 4633062831.0, "step": 6058 }, { "epoch": 2.219474214527801, "grad_norm": 0.12635303386018606, "learning_rate": 2.6105878439550157e-05, "loss": 0.462, "num_tokens": 4633868781.0, "step": 6059 }, { "epoch": 2.2198406155537236, "grad_norm": 0.138703861950567, "learning_rate": 2.6101718304450425e-05, "loss": 0.4823, "num_tokens": 4634617786.0, "step": 6060 }, { "epoch": 2.2202070165796464, "grad_norm": 0.14921615274787867, "learning_rate": 2.609755793824126e-05, "loss": 0.4448, "num_tokens": 4635318518.0, "step": 6061 }, { "epoch": 2.220573417605569, "grad_norm": 0.15429688141224607, "learning_rate": 2.6093397341157066e-05, "loss": 0.4612, "num_tokens": 4636080721.0, "step": 6062 }, { "epoch": 2.2209398186314924, "grad_norm": 0.1199714327157657, "learning_rate": 2.608923651343227e-05, "loss": 0.4016, "num_tokens": 4636854694.0, "step": 6063 }, { "epoch": 2.221306219657415, "grad_norm": 0.1288950100773675, "learning_rate": 2.6085075455301324e-05, "loss": 0.4024, "num_tokens": 4637634486.0, "step": 6064 }, { "epoch": 2.221672620683338, "grad_norm": 0.14628043698223403, "learning_rate": 2.608091416699867e-05, "loss": 0.4541, "num_tokens": 4638390781.0, "step": 6065 }, { "epoch": 2.2220390217092607, "grad_norm": 0.1360566770255872, "learning_rate": 2.607675264875877e-05, "loss": 0.4391, "num_tokens": 4639123890.0, "step": 6066 }, { "epoch": 2.2224054227351835, "grad_norm": 0.13868780145505358, "learning_rate": 2.607259090081612e-05, "loss": 0.4664, "num_tokens": 4639777106.0, "step": 6067 }, { "epoch": 2.2227718237611067, "grad_norm": 0.17849512056824057, "learning_rate": 2.6068428923405194e-05, "loss": 0.452, "num_tokens": 4640437942.0, "step": 6068 }, { "epoch": 2.2231382247870295, "grad_norm": 0.13361712698366376, "learning_rate": 2.606426671676051e-05, "loss": 0.4475, "num_tokens": 4641172565.0, "step": 6069 }, { "epoch": 2.2235046258129523, "grad_norm": 0.1391168450316144, "learning_rate": 2.6060104281116573e-05, "loss": 0.4549, "num_tokens": 4641984330.0, "step": 6070 }, { "epoch": 2.223871026838875, "grad_norm": 0.1463182997207648, "learning_rate": 2.6055941616707926e-05, "loss": 0.472, "num_tokens": 4642643334.0, "step": 6071 }, { "epoch": 2.224237427864798, "grad_norm": 0.1336827954243321, "learning_rate": 2.6051778723769108e-05, "loss": 0.4583, "num_tokens": 4643522597.0, "step": 6072 }, { "epoch": 2.224603828890721, "grad_norm": 0.12872978702839336, "learning_rate": 2.6047615602534667e-05, "loss": 0.4387, "num_tokens": 4644304472.0, "step": 6073 }, { "epoch": 2.224970229916644, "grad_norm": 0.16021992984107938, "learning_rate": 2.6043452253239186e-05, "loss": 0.4523, "num_tokens": 4644969034.0, "step": 6074 }, { "epoch": 2.2253366309425666, "grad_norm": 0.1529208760907879, "learning_rate": 2.6039288676117233e-05, "loss": 0.447, "num_tokens": 4645714483.0, "step": 6075 }, { "epoch": 2.2257030319684894, "grad_norm": 0.12179303951910454, "learning_rate": 2.6035124871403423e-05, "loss": 0.4318, "num_tokens": 4646497258.0, "step": 6076 }, { "epoch": 2.226069432994412, "grad_norm": 0.15729398163713668, "learning_rate": 2.6030960839332336e-05, "loss": 0.4271, "num_tokens": 4647346526.0, "step": 6077 }, { "epoch": 2.2264358340203354, "grad_norm": 0.12558248407607625, "learning_rate": 2.6026796580138617e-05, "loss": 0.4645, "num_tokens": 4648147937.0, "step": 6078 }, { "epoch": 2.226802235046258, "grad_norm": 0.13636518446357473, "learning_rate": 2.6022632094056884e-05, "loss": 0.4555, "num_tokens": 4649009939.0, "step": 6079 }, { "epoch": 2.227168636072181, "grad_norm": 0.12987194667595797, "learning_rate": 2.6018467381321793e-05, "loss": 0.4333, "num_tokens": 4649775398.0, "step": 6080 }, { "epoch": 2.2275350370981037, "grad_norm": 0.14770269461290575, "learning_rate": 2.6014302442167995e-05, "loss": 0.4232, "num_tokens": 4650473303.0, "step": 6081 }, { "epoch": 2.227901438124027, "grad_norm": 0.13082601493903886, "learning_rate": 2.601013727683017e-05, "loss": 0.4379, "num_tokens": 4651149123.0, "step": 6082 }, { "epoch": 2.2282678391499497, "grad_norm": 0.13420015952950026, "learning_rate": 2.600597188554299e-05, "loss": 0.4418, "num_tokens": 4652032974.0, "step": 6083 }, { "epoch": 2.2286342401758725, "grad_norm": 0.1358457307822947, "learning_rate": 2.6001806268541166e-05, "loss": 0.4615, "num_tokens": 4652644989.0, "step": 6084 }, { "epoch": 2.2290006412017953, "grad_norm": 0.14470856832137774, "learning_rate": 2.5997640426059395e-05, "loss": 0.4531, "num_tokens": 4653463329.0, "step": 6085 }, { "epoch": 2.229367042227718, "grad_norm": 0.13480280162581534, "learning_rate": 2.5993474358332406e-05, "loss": 0.4597, "num_tokens": 4654241256.0, "step": 6086 }, { "epoch": 2.2297334432536413, "grad_norm": 0.13787541188685595, "learning_rate": 2.5989308065594937e-05, "loss": 0.4734, "num_tokens": 4655032767.0, "step": 6087 }, { "epoch": 2.230099844279564, "grad_norm": 0.14528837228410943, "learning_rate": 2.5985141548081734e-05, "loss": 0.4303, "num_tokens": 4655858603.0, "step": 6088 }, { "epoch": 2.230466245305487, "grad_norm": 0.11935197669786686, "learning_rate": 2.5980974806027557e-05, "loss": 0.4473, "num_tokens": 4656646917.0, "step": 6089 }, { "epoch": 2.2308326463314097, "grad_norm": 0.13518017565944154, "learning_rate": 2.5976807839667167e-05, "loss": 0.4516, "num_tokens": 4657442415.0, "step": 6090 }, { "epoch": 2.2311990473573324, "grad_norm": 0.11368791032440845, "learning_rate": 2.5972640649235377e-05, "loss": 0.4368, "num_tokens": 4658256698.0, "step": 6091 }, { "epoch": 2.2315654483832557, "grad_norm": 0.14016523642862186, "learning_rate": 2.5968473234966956e-05, "loss": 0.4298, "num_tokens": 4658990885.0, "step": 6092 }, { "epoch": 2.2319318494091784, "grad_norm": 0.1374392882264916, "learning_rate": 2.596430559709674e-05, "loss": 0.4788, "num_tokens": 4659791688.0, "step": 6093 }, { "epoch": 2.232298250435101, "grad_norm": 0.15213731567026423, "learning_rate": 2.596013773585953e-05, "loss": 0.4433, "num_tokens": 4660601220.0, "step": 6094 }, { "epoch": 2.232664651461024, "grad_norm": 0.12441365533673647, "learning_rate": 2.5955969651490183e-05, "loss": 0.452, "num_tokens": 4661358205.0, "step": 6095 }, { "epoch": 2.2330310524869468, "grad_norm": 0.13445764797827803, "learning_rate": 2.5951801344223535e-05, "loss": 0.4557, "num_tokens": 4662108780.0, "step": 6096 }, { "epoch": 2.23339745351287, "grad_norm": 0.14099665530418973, "learning_rate": 2.594763281429445e-05, "loss": 0.4617, "num_tokens": 4662905956.0, "step": 6097 }, { "epoch": 2.233763854538793, "grad_norm": 0.12563835597672687, "learning_rate": 2.5943464061937805e-05, "loss": 0.4104, "num_tokens": 4663705426.0, "step": 6098 }, { "epoch": 2.2341302555647156, "grad_norm": 0.12475182905316716, "learning_rate": 2.5939295087388485e-05, "loss": 0.4568, "num_tokens": 4664475164.0, "step": 6099 }, { "epoch": 2.2344966565906383, "grad_norm": 0.1490476905225881, "learning_rate": 2.593512589088138e-05, "loss": 0.4331, "num_tokens": 4665382455.0, "step": 6100 }, { "epoch": 2.234863057616561, "grad_norm": 0.12299859601139226, "learning_rate": 2.593095647265141e-05, "loss": 0.454, "num_tokens": 4666222937.0, "step": 6101 }, { "epoch": 2.2352294586424843, "grad_norm": 0.12951709806275472, "learning_rate": 2.5926786832933514e-05, "loss": 0.4369, "num_tokens": 4667052615.0, "step": 6102 }, { "epoch": 2.235595859668407, "grad_norm": 0.1486667931908319, "learning_rate": 2.5922616971962598e-05, "loss": 0.4714, "num_tokens": 4667863780.0, "step": 6103 }, { "epoch": 2.23596226069433, "grad_norm": 0.12923228202335943, "learning_rate": 2.591844688997363e-05, "loss": 0.4592, "num_tokens": 4668530364.0, "step": 6104 }, { "epoch": 2.2363286617202527, "grad_norm": 0.14687211769181702, "learning_rate": 2.5914276587201563e-05, "loss": 0.4356, "num_tokens": 4669383055.0, "step": 6105 }, { "epoch": 2.2366950627461755, "grad_norm": 0.13484332473479319, "learning_rate": 2.5910106063881383e-05, "loss": 0.4374, "num_tokens": 4670324022.0, "step": 6106 }, { "epoch": 2.2370614637720987, "grad_norm": 0.13192048616436985, "learning_rate": 2.5905935320248058e-05, "loss": 0.457, "num_tokens": 4671087079.0, "step": 6107 }, { "epoch": 2.2374278647980215, "grad_norm": 0.15345063081035232, "learning_rate": 2.5901764356536606e-05, "loss": 0.4565, "num_tokens": 4671891371.0, "step": 6108 }, { "epoch": 2.2377942658239443, "grad_norm": 0.1262450347324237, "learning_rate": 2.5897593172982026e-05, "loss": 0.4586, "num_tokens": 4672713601.0, "step": 6109 }, { "epoch": 2.238160666849867, "grad_norm": 0.15403566575001584, "learning_rate": 2.5893421769819344e-05, "loss": 0.4427, "num_tokens": 4673551961.0, "step": 6110 }, { "epoch": 2.2385270678757903, "grad_norm": 0.14151620409376328, "learning_rate": 2.5889250147283593e-05, "loss": 0.4336, "num_tokens": 4674255845.0, "step": 6111 }, { "epoch": 2.238893468901713, "grad_norm": 0.14516365252596097, "learning_rate": 2.5885078305609827e-05, "loss": 0.4533, "num_tokens": 4675002648.0, "step": 6112 }, { "epoch": 2.239259869927636, "grad_norm": 0.1301924215352769, "learning_rate": 2.5880906245033104e-05, "loss": 0.4545, "num_tokens": 4675862933.0, "step": 6113 }, { "epoch": 2.2396262709535586, "grad_norm": 0.1563960146424879, "learning_rate": 2.5876733965788497e-05, "loss": 0.4667, "num_tokens": 4676586321.0, "step": 6114 }, { "epoch": 2.2399926719794814, "grad_norm": 0.13339261642484102, "learning_rate": 2.587256146811109e-05, "loss": 0.4866, "num_tokens": 4677397122.0, "step": 6115 }, { "epoch": 2.2403590730054046, "grad_norm": 0.16103201138657847, "learning_rate": 2.5868388752235983e-05, "loss": 0.48, "num_tokens": 4678122256.0, "step": 6116 }, { "epoch": 2.2407254740313274, "grad_norm": 0.14292483596551905, "learning_rate": 2.586421581839828e-05, "loss": 0.4503, "num_tokens": 4678820985.0, "step": 6117 }, { "epoch": 2.24109187505725, "grad_norm": 0.14532870375283674, "learning_rate": 2.5860042666833113e-05, "loss": 0.4689, "num_tokens": 4679555862.0, "step": 6118 }, { "epoch": 2.241458276083173, "grad_norm": 0.1515079333760115, "learning_rate": 2.5855869297775608e-05, "loss": 0.4474, "num_tokens": 4680488457.0, "step": 6119 }, { "epoch": 2.2418246771090957, "grad_norm": 0.11479976388313722, "learning_rate": 2.5851695711460907e-05, "loss": 0.4217, "num_tokens": 4681217741.0, "step": 6120 }, { "epoch": 2.242191078135019, "grad_norm": 0.14894842835627048, "learning_rate": 2.5847521908124186e-05, "loss": 0.4621, "num_tokens": 4681986810.0, "step": 6121 }, { "epoch": 2.2425574791609417, "grad_norm": 0.13942159547277927, "learning_rate": 2.58433478880006e-05, "loss": 0.4714, "num_tokens": 4682803267.0, "step": 6122 }, { "epoch": 2.2429238801868645, "grad_norm": 0.13608226925238118, "learning_rate": 2.583917365132534e-05, "loss": 0.4618, "num_tokens": 4683462638.0, "step": 6123 }, { "epoch": 2.2432902812127873, "grad_norm": 0.15673202679039636, "learning_rate": 2.5834999198333598e-05, "loss": 0.4457, "num_tokens": 4684172478.0, "step": 6124 }, { "epoch": 2.24365668223871, "grad_norm": 0.1290056494233765, "learning_rate": 2.5830824529260584e-05, "loss": 0.4844, "num_tokens": 4684933947.0, "step": 6125 }, { "epoch": 2.2440230832646333, "grad_norm": 0.1505831543911111, "learning_rate": 2.5826649644341515e-05, "loss": 0.4668, "num_tokens": 4685681329.0, "step": 6126 }, { "epoch": 2.244389484290556, "grad_norm": 0.16730744227577746, "learning_rate": 2.5822474543811623e-05, "loss": 0.4655, "num_tokens": 4686489548.0, "step": 6127 }, { "epoch": 2.244755885316479, "grad_norm": 0.13922294216820932, "learning_rate": 2.581829922790616e-05, "loss": 0.4114, "num_tokens": 4687254988.0, "step": 6128 }, { "epoch": 2.2451222863424016, "grad_norm": 0.1283829235506372, "learning_rate": 2.581412369686037e-05, "loss": 0.4412, "num_tokens": 4687977862.0, "step": 6129 }, { "epoch": 2.245488687368325, "grad_norm": 0.13746350520037395, "learning_rate": 2.5809947950909538e-05, "loss": 0.4631, "num_tokens": 4688754688.0, "step": 6130 }, { "epoch": 2.2458550883942476, "grad_norm": 0.13423068540746513, "learning_rate": 2.5805771990288926e-05, "loss": 0.4371, "num_tokens": 4689559863.0, "step": 6131 }, { "epoch": 2.2462214894201704, "grad_norm": 0.12895721050669057, "learning_rate": 2.580159581523383e-05, "loss": 0.4314, "num_tokens": 4690313748.0, "step": 6132 }, { "epoch": 2.246587890446093, "grad_norm": 0.14642306005853895, "learning_rate": 2.579741942597957e-05, "loss": 0.487, "num_tokens": 4691062821.0, "step": 6133 }, { "epoch": 2.246954291472016, "grad_norm": 0.13348152817851572, "learning_rate": 2.5793242822761446e-05, "loss": 0.4621, "num_tokens": 4691909097.0, "step": 6134 }, { "epoch": 2.247320692497939, "grad_norm": 0.1364245909657535, "learning_rate": 2.5789066005814794e-05, "loss": 0.4544, "num_tokens": 4692559118.0, "step": 6135 }, { "epoch": 2.247687093523862, "grad_norm": 0.14818223087294233, "learning_rate": 2.5784888975374958e-05, "loss": 0.4356, "num_tokens": 4693377230.0, "step": 6136 }, { "epoch": 2.2480534945497848, "grad_norm": 0.1331849476700943, "learning_rate": 2.578071173167728e-05, "loss": 0.4585, "num_tokens": 4694140588.0, "step": 6137 }, { "epoch": 2.2484198955757075, "grad_norm": 0.1591098109830475, "learning_rate": 2.5776534274957133e-05, "loss": 0.4376, "num_tokens": 4694822461.0, "step": 6138 }, { "epoch": 2.2487862966016303, "grad_norm": 0.14197711961222678, "learning_rate": 2.5772356605449898e-05, "loss": 0.473, "num_tokens": 4695586953.0, "step": 6139 }, { "epoch": 2.2491526976275535, "grad_norm": 0.12600426389781058, "learning_rate": 2.576817872339095e-05, "loss": 0.4478, "num_tokens": 4696412873.0, "step": 6140 }, { "epoch": 2.2495190986534763, "grad_norm": 0.14419031199204962, "learning_rate": 2.576400062901571e-05, "loss": 0.4518, "num_tokens": 4697184244.0, "step": 6141 }, { "epoch": 2.249885499679399, "grad_norm": 0.13270292872367717, "learning_rate": 2.5759822322559566e-05, "loss": 0.4515, "num_tokens": 4697961911.0, "step": 6142 }, { "epoch": 2.250251900705322, "grad_norm": 0.14220959841580574, "learning_rate": 2.575564380425796e-05, "loss": 0.4549, "num_tokens": 4698694003.0, "step": 6143 }, { "epoch": 2.2506183017312447, "grad_norm": 0.13876518135695814, "learning_rate": 2.5751465074346325e-05, "loss": 0.4605, "num_tokens": 4699352148.0, "step": 6144 }, { "epoch": 2.250984702757168, "grad_norm": 0.15370490838190468, "learning_rate": 2.574728613306011e-05, "loss": 0.4583, "num_tokens": 4700205741.0, "step": 6145 }, { "epoch": 2.2513511037830907, "grad_norm": 0.12099229748709811, "learning_rate": 2.5743106980634777e-05, "loss": 0.4657, "num_tokens": 4700937867.0, "step": 6146 }, { "epoch": 2.2517175048090134, "grad_norm": 0.14766853375112654, "learning_rate": 2.573892761730579e-05, "loss": 0.4699, "num_tokens": 4701612184.0, "step": 6147 }, { "epoch": 2.2520839058349362, "grad_norm": 0.14589379391147067, "learning_rate": 2.5734748043308643e-05, "loss": 0.4478, "num_tokens": 4702339528.0, "step": 6148 }, { "epoch": 2.252450306860859, "grad_norm": 0.15189479705315867, "learning_rate": 2.5730568258878825e-05, "loss": 0.4446, "num_tokens": 4703151775.0, "step": 6149 }, { "epoch": 2.2528167078867822, "grad_norm": 0.1179988647476624, "learning_rate": 2.5726388264251852e-05, "loss": 0.436, "num_tokens": 4704009124.0, "step": 6150 }, { "epoch": 2.253183108912705, "grad_norm": 0.14560267486597225, "learning_rate": 2.572220805966324e-05, "loss": 0.453, "num_tokens": 4704863843.0, "step": 6151 }, { "epoch": 2.253549509938628, "grad_norm": 0.1372191888529048, "learning_rate": 2.5718027645348515e-05, "loss": 0.4406, "num_tokens": 4705622161.0, "step": 6152 }, { "epoch": 2.2539159109645506, "grad_norm": 0.13355916241857146, "learning_rate": 2.571384702154323e-05, "loss": 0.4552, "num_tokens": 4706302631.0, "step": 6153 }, { "epoch": 2.2542823119904734, "grad_norm": 0.15549857229300912, "learning_rate": 2.5709666188482933e-05, "loss": 0.4558, "num_tokens": 4706961173.0, "step": 6154 }, { "epoch": 2.2546487130163966, "grad_norm": 0.14381680031781482, "learning_rate": 2.5705485146403196e-05, "loss": 0.4714, "num_tokens": 4707812690.0, "step": 6155 }, { "epoch": 2.2550151140423194, "grad_norm": 0.13185761834097645, "learning_rate": 2.5701303895539602e-05, "loss": 0.4479, "num_tokens": 4708736122.0, "step": 6156 }, { "epoch": 2.255381515068242, "grad_norm": 0.14344906175903896, "learning_rate": 2.569712243612772e-05, "loss": 0.4302, "num_tokens": 4709396432.0, "step": 6157 }, { "epoch": 2.255747916094165, "grad_norm": 0.13207002185773478, "learning_rate": 2.569294076840318e-05, "loss": 0.466, "num_tokens": 4710160210.0, "step": 6158 }, { "epoch": 2.2561143171200877, "grad_norm": 0.14246162253309744, "learning_rate": 2.568875889260158e-05, "loss": 0.446, "num_tokens": 4710881636.0, "step": 6159 }, { "epoch": 2.256480718146011, "grad_norm": 0.1537274069262157, "learning_rate": 2.5684576808958554e-05, "loss": 0.4656, "num_tokens": 4711593513.0, "step": 6160 }, { "epoch": 2.2568471191719337, "grad_norm": 0.14995498504404114, "learning_rate": 2.5680394517709735e-05, "loss": 0.4737, "num_tokens": 4712291063.0, "step": 6161 }, { "epoch": 2.2572135201978565, "grad_norm": 0.14025612920409844, "learning_rate": 2.5676212019090767e-05, "loss": 0.4821, "num_tokens": 4713150079.0, "step": 6162 }, { "epoch": 2.2575799212237793, "grad_norm": 0.12515880922442402, "learning_rate": 2.5672029313337325e-05, "loss": 0.4356, "num_tokens": 4713911635.0, "step": 6163 }, { "epoch": 2.2579463222497025, "grad_norm": 0.1607216919493358, "learning_rate": 2.5667846400685067e-05, "loss": 0.478, "num_tokens": 4714677593.0, "step": 6164 }, { "epoch": 2.2583127232756253, "grad_norm": 0.1294401963012804, "learning_rate": 2.566366328136969e-05, "loss": 0.4687, "num_tokens": 4715595426.0, "step": 6165 }, { "epoch": 2.258679124301548, "grad_norm": 0.1375316209944707, "learning_rate": 2.565947995562688e-05, "loss": 0.5065, "num_tokens": 4716384246.0, "step": 6166 }, { "epoch": 2.259045525327471, "grad_norm": 0.12947733589546478, "learning_rate": 2.5655296423692352e-05, "loss": 0.4224, "num_tokens": 4717135514.0, "step": 6167 }, { "epoch": 2.2594119263533936, "grad_norm": 0.14448685600756378, "learning_rate": 2.5651112685801817e-05, "loss": 0.477, "num_tokens": 4717882807.0, "step": 6168 }, { "epoch": 2.259778327379317, "grad_norm": 0.13913958042589675, "learning_rate": 2.5646928742191007e-05, "loss": 0.4909, "num_tokens": 4718581952.0, "step": 6169 }, { "epoch": 2.2601447284052396, "grad_norm": 0.14235197909933733, "learning_rate": 2.5642744593095673e-05, "loss": 0.4533, "num_tokens": 4719296037.0, "step": 6170 }, { "epoch": 2.2605111294311624, "grad_norm": 0.1344927375383907, "learning_rate": 2.5638560238751567e-05, "loss": 0.4416, "num_tokens": 4720072219.0, "step": 6171 }, { "epoch": 2.260877530457085, "grad_norm": 0.13238021947138603, "learning_rate": 2.5634375679394446e-05, "loss": 0.4429, "num_tokens": 4720821601.0, "step": 6172 }, { "epoch": 2.2612439314830084, "grad_norm": 0.1399436914566274, "learning_rate": 2.5630190915260093e-05, "loss": 0.4596, "num_tokens": 4721515803.0, "step": 6173 }, { "epoch": 2.261610332508931, "grad_norm": 0.13957806611535006, "learning_rate": 2.5626005946584294e-05, "loss": 0.4262, "num_tokens": 4722220282.0, "step": 6174 }, { "epoch": 2.261976733534854, "grad_norm": 0.14481337197319533, "learning_rate": 2.5621820773602862e-05, "loss": 0.4665, "num_tokens": 4722926012.0, "step": 6175 }, { "epoch": 2.2623431345607767, "grad_norm": 0.12605730650655061, "learning_rate": 2.561763539655159e-05, "loss": 0.4652, "num_tokens": 4723705807.0, "step": 6176 }, { "epoch": 2.2627095355866995, "grad_norm": 0.13649201201789185, "learning_rate": 2.5613449815666306e-05, "loss": 0.473, "num_tokens": 4724496468.0, "step": 6177 }, { "epoch": 2.2630759366126227, "grad_norm": 0.15350848588298313, "learning_rate": 2.560926403118285e-05, "loss": 0.4468, "num_tokens": 4725174377.0, "step": 6178 }, { "epoch": 2.2634423376385455, "grad_norm": 0.13420353695295556, "learning_rate": 2.5605078043337068e-05, "loss": 0.4662, "num_tokens": 4726023943.0, "step": 6179 }, { "epoch": 2.2638087386644683, "grad_norm": 0.13809397243298013, "learning_rate": 2.5600891852364812e-05, "loss": 0.4462, "num_tokens": 4726768703.0, "step": 6180 }, { "epoch": 2.264175139690391, "grad_norm": 0.15134763561853826, "learning_rate": 2.5596705458501955e-05, "loss": 0.5052, "num_tokens": 4727547125.0, "step": 6181 }, { "epoch": 2.264541540716314, "grad_norm": 0.12666927484898574, "learning_rate": 2.559251886198438e-05, "loss": 0.4245, "num_tokens": 4728309401.0, "step": 6182 }, { "epoch": 2.264907941742237, "grad_norm": 0.1327933697696903, "learning_rate": 2.5588332063047974e-05, "loss": 0.4672, "num_tokens": 4729017630.0, "step": 6183 }, { "epoch": 2.26527434276816, "grad_norm": 0.14669573455759782, "learning_rate": 2.5584145061928636e-05, "loss": 0.4471, "num_tokens": 4729744240.0, "step": 6184 }, { "epoch": 2.2656407437940826, "grad_norm": 0.1448583935213938, "learning_rate": 2.5579957858862296e-05, "loss": 0.4799, "num_tokens": 4730411606.0, "step": 6185 }, { "epoch": 2.2660071448200054, "grad_norm": 0.15410081893964073, "learning_rate": 2.5575770454084864e-05, "loss": 0.4398, "num_tokens": 4731236837.0, "step": 6186 }, { "epoch": 2.266373545845928, "grad_norm": 0.13622370041646217, "learning_rate": 2.5571582847832287e-05, "loss": 0.4343, "num_tokens": 4732017681.0, "step": 6187 }, { "epoch": 2.2667399468718514, "grad_norm": 0.12921238040727906, "learning_rate": 2.556739504034051e-05, "loss": 0.4635, "num_tokens": 4732827995.0, "step": 6188 }, { "epoch": 2.267106347897774, "grad_norm": 0.12688957805143564, "learning_rate": 2.5563207031845486e-05, "loss": 0.4519, "num_tokens": 4733607961.0, "step": 6189 }, { "epoch": 2.267472748923697, "grad_norm": 0.1397842888538062, "learning_rate": 2.555901882258321e-05, "loss": 0.483, "num_tokens": 4734316293.0, "step": 6190 }, { "epoch": 2.2678391499496198, "grad_norm": 0.18210160437883288, "learning_rate": 2.5554830412789638e-05, "loss": 0.5043, "num_tokens": 4734966959.0, "step": 6191 }, { "epoch": 2.2682055509755426, "grad_norm": 0.1353375157864905, "learning_rate": 2.5550641802700775e-05, "loss": 0.459, "num_tokens": 4735770865.0, "step": 6192 }, { "epoch": 2.2685719520014658, "grad_norm": 0.16039011245020882, "learning_rate": 2.5546452992552627e-05, "loss": 0.4655, "num_tokens": 4736423599.0, "step": 6193 }, { "epoch": 2.2689383530273886, "grad_norm": 0.13256099297779525, "learning_rate": 2.5542263982581214e-05, "loss": 0.427, "num_tokens": 4737228183.0, "step": 6194 }, { "epoch": 2.2693047540533113, "grad_norm": 0.123160571434823, "learning_rate": 2.5538074773022557e-05, "loss": 0.4538, "num_tokens": 4738042248.0, "step": 6195 }, { "epoch": 2.269671155079234, "grad_norm": 0.1466602627182291, "learning_rate": 2.5533885364112693e-05, "loss": 0.4425, "num_tokens": 4738862807.0, "step": 6196 }, { "epoch": 2.270037556105157, "grad_norm": 0.14607288117269204, "learning_rate": 2.5529695756087684e-05, "loss": 0.4802, "num_tokens": 4739703118.0, "step": 6197 }, { "epoch": 2.27040395713108, "grad_norm": 0.1405334295024535, "learning_rate": 2.5525505949183584e-05, "loss": 0.4547, "num_tokens": 4740466302.0, "step": 6198 }, { "epoch": 2.270770358157003, "grad_norm": 0.12872753859739863, "learning_rate": 2.5521315943636462e-05, "loss": 0.4266, "num_tokens": 4741234095.0, "step": 6199 }, { "epoch": 2.2711367591829257, "grad_norm": 0.1325311450542422, "learning_rate": 2.551712573968241e-05, "loss": 0.4592, "num_tokens": 4741922314.0, "step": 6200 }, { "epoch": 2.2715031602088485, "grad_norm": 0.14732901619947436, "learning_rate": 2.551293533755752e-05, "loss": 0.4632, "num_tokens": 4742679190.0, "step": 6201 }, { "epoch": 2.2718695612347712, "grad_norm": 0.14140627106128195, "learning_rate": 2.5508744737497898e-05, "loss": 0.4282, "num_tokens": 4743539775.0, "step": 6202 }, { "epoch": 2.2722359622606945, "grad_norm": 0.1289878382307159, "learning_rate": 2.550455393973966e-05, "loss": 0.4404, "num_tokens": 4744305918.0, "step": 6203 }, { "epoch": 2.2726023632866172, "grad_norm": 0.13967731181822118, "learning_rate": 2.550036294451893e-05, "loss": 0.4226, "num_tokens": 4745181544.0, "step": 6204 }, { "epoch": 2.27296876431254, "grad_norm": 0.13438557590994224, "learning_rate": 2.549617175207187e-05, "loss": 0.4587, "num_tokens": 4745929884.0, "step": 6205 }, { "epoch": 2.273335165338463, "grad_norm": 0.1556479336064076, "learning_rate": 2.5491980362634596e-05, "loss": 0.4649, "num_tokens": 4746780624.0, "step": 6206 }, { "epoch": 2.2737015663643856, "grad_norm": 0.14131428923711278, "learning_rate": 2.54877887764433e-05, "loss": 0.4712, "num_tokens": 4747435521.0, "step": 6207 }, { "epoch": 2.274067967390309, "grad_norm": 0.1505638426475109, "learning_rate": 2.5483596993734138e-05, "loss": 0.442, "num_tokens": 4748265391.0, "step": 6208 }, { "epoch": 2.2744343684162316, "grad_norm": 0.16236747887154376, "learning_rate": 2.54794050147433e-05, "loss": 0.4755, "num_tokens": 4748956005.0, "step": 6209 }, { "epoch": 2.2748007694421544, "grad_norm": 0.15292272067984916, "learning_rate": 2.547521283970698e-05, "loss": 0.4342, "num_tokens": 4749674090.0, "step": 6210 }, { "epoch": 2.275167170468077, "grad_norm": 0.13102158422713817, "learning_rate": 2.547102046886138e-05, "loss": 0.4348, "num_tokens": 4750429999.0, "step": 6211 }, { "epoch": 2.2755335714940004, "grad_norm": 0.15026874999149556, "learning_rate": 2.546682790244273e-05, "loss": 0.4859, "num_tokens": 4751173386.0, "step": 6212 }, { "epoch": 2.275899972519923, "grad_norm": 0.14002145733962176, "learning_rate": 2.5462635140687255e-05, "loss": 0.4395, "num_tokens": 4751906738.0, "step": 6213 }, { "epoch": 2.276266373545846, "grad_norm": 0.13080406730490943, "learning_rate": 2.5458442183831173e-05, "loss": 0.4469, "num_tokens": 4752702581.0, "step": 6214 }, { "epoch": 2.2766327745717687, "grad_norm": 0.16387308730437142, "learning_rate": 2.545424903211076e-05, "loss": 0.497, "num_tokens": 4753460442.0, "step": 6215 }, { "epoch": 2.2769991755976915, "grad_norm": 0.16412294533915284, "learning_rate": 2.5450055685762264e-05, "loss": 0.4369, "num_tokens": 4754280561.0, "step": 6216 }, { "epoch": 2.2773655766236147, "grad_norm": 0.12577859972595856, "learning_rate": 2.544586214502197e-05, "loss": 0.444, "num_tokens": 4755074036.0, "step": 6217 }, { "epoch": 2.2777319776495375, "grad_norm": 0.1465559473411172, "learning_rate": 2.5441668410126146e-05, "loss": 0.4427, "num_tokens": 4755719143.0, "step": 6218 }, { "epoch": 2.2780983786754603, "grad_norm": 0.15404804713237036, "learning_rate": 2.543747448131109e-05, "loss": 0.4672, "num_tokens": 4756578097.0, "step": 6219 }, { "epoch": 2.278464779701383, "grad_norm": 0.133546118848216, "learning_rate": 2.5433280358813112e-05, "loss": 0.4735, "num_tokens": 4757356638.0, "step": 6220 }, { "epoch": 2.2788311807273063, "grad_norm": 0.14240968725572656, "learning_rate": 2.542908604286852e-05, "loss": 0.4368, "num_tokens": 4758102739.0, "step": 6221 }, { "epoch": 2.279197581753229, "grad_norm": 0.13540765368326, "learning_rate": 2.5424891533713654e-05, "loss": 0.4281, "num_tokens": 4758878757.0, "step": 6222 }, { "epoch": 2.279563982779152, "grad_norm": 0.1472628093783944, "learning_rate": 2.542069683158484e-05, "loss": 0.4692, "num_tokens": 4759654470.0, "step": 6223 }, { "epoch": 2.2799303838050746, "grad_norm": 0.14178534002765686, "learning_rate": 2.541650193671843e-05, "loss": 0.4848, "num_tokens": 4760322150.0, "step": 6224 }, { "epoch": 2.2802967848309974, "grad_norm": 0.1489786325374826, "learning_rate": 2.5412306849350782e-05, "loss": 0.454, "num_tokens": 4761057343.0, "step": 6225 }, { "epoch": 2.2806631858569206, "grad_norm": 0.12912798313674897, "learning_rate": 2.5408111569718265e-05, "loss": 0.4579, "num_tokens": 4761897899.0, "step": 6226 }, { "epoch": 2.2810295868828434, "grad_norm": 0.12901264816645197, "learning_rate": 2.540391609805727e-05, "loss": 0.4717, "num_tokens": 4762576748.0, "step": 6227 }, { "epoch": 2.281395987908766, "grad_norm": 0.13427485264744252, "learning_rate": 2.5399720434604184e-05, "loss": 0.4289, "num_tokens": 4763451621.0, "step": 6228 }, { "epoch": 2.281762388934689, "grad_norm": 0.12887374058184417, "learning_rate": 2.53955245795954e-05, "loss": 0.4532, "num_tokens": 4764254139.0, "step": 6229 }, { "epoch": 2.2821287899606117, "grad_norm": 0.13332547381524554, "learning_rate": 2.5391328533267344e-05, "loss": 0.4417, "num_tokens": 4764988995.0, "step": 6230 }, { "epoch": 2.282495190986535, "grad_norm": 0.1273213586301094, "learning_rate": 2.5387132295856432e-05, "loss": 0.4643, "num_tokens": 4765840688.0, "step": 6231 }, { "epoch": 2.2828615920124578, "grad_norm": 0.14522138107292093, "learning_rate": 2.5382935867599107e-05, "loss": 0.4432, "num_tokens": 4766588613.0, "step": 6232 }, { "epoch": 2.2832279930383805, "grad_norm": 0.12785641286661256, "learning_rate": 2.5378739248731802e-05, "loss": 0.4373, "num_tokens": 4767348614.0, "step": 6233 }, { "epoch": 2.2835943940643033, "grad_norm": 0.12867758260852774, "learning_rate": 2.5374542439490987e-05, "loss": 0.45, "num_tokens": 4768291611.0, "step": 6234 }, { "epoch": 2.283960795090226, "grad_norm": 0.13088035353617988, "learning_rate": 2.5370345440113125e-05, "loss": 0.4771, "num_tokens": 4769063676.0, "step": 6235 }, { "epoch": 2.2843271961161493, "grad_norm": 0.12177104795930564, "learning_rate": 2.5366148250834695e-05, "loss": 0.4388, "num_tokens": 4769883260.0, "step": 6236 }, { "epoch": 2.284693597142072, "grad_norm": 0.1267484846332012, "learning_rate": 2.536195087189218e-05, "loss": 0.4296, "num_tokens": 4770658381.0, "step": 6237 }, { "epoch": 2.285059998167995, "grad_norm": 0.1288086374718481, "learning_rate": 2.5357753303522084e-05, "loss": 0.4643, "num_tokens": 4771377316.0, "step": 6238 }, { "epoch": 2.2854263991939177, "grad_norm": 0.15716345212228627, "learning_rate": 2.5353555545960915e-05, "loss": 0.4506, "num_tokens": 4772175494.0, "step": 6239 }, { "epoch": 2.2857928002198404, "grad_norm": 0.12535911215701956, "learning_rate": 2.5349357599445204e-05, "loss": 0.4546, "num_tokens": 4772979057.0, "step": 6240 }, { "epoch": 2.2861592012457637, "grad_norm": 0.12269732648089895, "learning_rate": 2.534515946421146e-05, "loss": 0.4404, "num_tokens": 4773804271.0, "step": 6241 }, { "epoch": 2.2865256022716864, "grad_norm": 0.15687504473881936, "learning_rate": 2.534096114049625e-05, "loss": 0.4586, "num_tokens": 4774583781.0, "step": 6242 }, { "epoch": 2.286892003297609, "grad_norm": 0.12642080580409085, "learning_rate": 2.533676262853611e-05, "loss": 0.4333, "num_tokens": 4775327529.0, "step": 6243 }, { "epoch": 2.287258404323532, "grad_norm": 0.13847810731685262, "learning_rate": 2.5332563928567608e-05, "loss": 0.423, "num_tokens": 4776085319.0, "step": 6244 }, { "epoch": 2.287624805349455, "grad_norm": 0.1467174001529488, "learning_rate": 2.5328365040827317e-05, "loss": 0.484, "num_tokens": 4776715918.0, "step": 6245 }, { "epoch": 2.287991206375378, "grad_norm": 0.17044246233549942, "learning_rate": 2.5324165965551825e-05, "loss": 0.4562, "num_tokens": 4777457371.0, "step": 6246 }, { "epoch": 2.288357607401301, "grad_norm": 0.13113820949366545, "learning_rate": 2.5319966702977724e-05, "loss": 0.4911, "num_tokens": 4778144480.0, "step": 6247 }, { "epoch": 2.2887240084272236, "grad_norm": 0.13538515879672527, "learning_rate": 2.531576725334162e-05, "loss": 0.4799, "num_tokens": 4778977355.0, "step": 6248 }, { "epoch": 2.2890904094531463, "grad_norm": 0.15661993397093274, "learning_rate": 2.531156761688013e-05, "loss": 0.4695, "num_tokens": 4779752247.0, "step": 6249 }, { "epoch": 2.289456810479069, "grad_norm": 0.14146778338778804, "learning_rate": 2.5307367793829877e-05, "loss": 0.4387, "num_tokens": 4780612073.0, "step": 6250 }, { "epoch": 2.2898232115049924, "grad_norm": 0.1400879788243512, "learning_rate": 2.5303167784427504e-05, "loss": 0.4602, "num_tokens": 4781339198.0, "step": 6251 }, { "epoch": 2.290189612530915, "grad_norm": 0.16111393743764857, "learning_rate": 2.5298967588909658e-05, "loss": 0.4181, "num_tokens": 4782002988.0, "step": 6252 }, { "epoch": 2.290556013556838, "grad_norm": 0.16309137166724127, "learning_rate": 2.5294767207512986e-05, "loss": 0.4505, "num_tokens": 4782722729.0, "step": 6253 }, { "epoch": 2.2909224145827607, "grad_norm": 0.13925078306474023, "learning_rate": 2.529056664047417e-05, "loss": 0.4589, "num_tokens": 4783489640.0, "step": 6254 }, { "epoch": 2.2912888156086835, "grad_norm": 0.1654500883676468, "learning_rate": 2.5286365888029887e-05, "loss": 0.4657, "num_tokens": 4784120871.0, "step": 6255 }, { "epoch": 2.2916552166346067, "grad_norm": 0.1574044287306769, "learning_rate": 2.528216495041682e-05, "loss": 0.4609, "num_tokens": 4784880835.0, "step": 6256 }, { "epoch": 2.2920216176605295, "grad_norm": 0.14841021191189765, "learning_rate": 2.5277963827871676e-05, "loss": 0.4632, "num_tokens": 4785786523.0, "step": 6257 }, { "epoch": 2.2923880186864523, "grad_norm": 0.13601364402691218, "learning_rate": 2.5273762520631157e-05, "loss": 0.4438, "num_tokens": 4786550559.0, "step": 6258 }, { "epoch": 2.292754419712375, "grad_norm": 0.14278151440143658, "learning_rate": 2.526956102893199e-05, "loss": 0.478, "num_tokens": 4787315637.0, "step": 6259 }, { "epoch": 2.2931208207382983, "grad_norm": 0.14454241488486616, "learning_rate": 2.526535935301091e-05, "loss": 0.4495, "num_tokens": 4788050925.0, "step": 6260 }, { "epoch": 2.293487221764221, "grad_norm": 0.14300690711359734, "learning_rate": 2.5261157493104645e-05, "loss": 0.4277, "num_tokens": 4788795921.0, "step": 6261 }, { "epoch": 2.293853622790144, "grad_norm": 0.14311783985136006, "learning_rate": 2.5256955449449955e-05, "loss": 0.4602, "num_tokens": 4789561602.0, "step": 6262 }, { "epoch": 2.2942200238160666, "grad_norm": 0.14626668284349836, "learning_rate": 2.525275322228361e-05, "loss": 0.4766, "num_tokens": 4790180180.0, "step": 6263 }, { "epoch": 2.2945864248419894, "grad_norm": 0.14342094099972075, "learning_rate": 2.5248550811842368e-05, "loss": 0.4689, "num_tokens": 4790949213.0, "step": 6264 }, { "epoch": 2.2949528258679126, "grad_norm": 0.14413870571637025, "learning_rate": 2.5244348218363023e-05, "loss": 0.4535, "num_tokens": 4791638574.0, "step": 6265 }, { "epoch": 2.2953192268938354, "grad_norm": 0.13227164493896607, "learning_rate": 2.5240145442082367e-05, "loss": 0.4204, "num_tokens": 4792518968.0, "step": 6266 }, { "epoch": 2.295685627919758, "grad_norm": 0.13240517350418107, "learning_rate": 2.5235942483237194e-05, "loss": 0.4555, "num_tokens": 4793286093.0, "step": 6267 }, { "epoch": 2.296052028945681, "grad_norm": 0.13750974192674859, "learning_rate": 2.5231739342064323e-05, "loss": 0.4629, "num_tokens": 4794000964.0, "step": 6268 }, { "epoch": 2.296418429971604, "grad_norm": 0.14187654025133647, "learning_rate": 2.5227536018800588e-05, "loss": 0.4876, "num_tokens": 4794736099.0, "step": 6269 }, { "epoch": 2.296784830997527, "grad_norm": 0.13387975737690522, "learning_rate": 2.522333251368281e-05, "loss": 0.4199, "num_tokens": 4795487077.0, "step": 6270 }, { "epoch": 2.2971512320234497, "grad_norm": 0.13525263079837732, "learning_rate": 2.521912882694783e-05, "loss": 0.4585, "num_tokens": 4796311649.0, "step": 6271 }, { "epoch": 2.2975176330493725, "grad_norm": 0.13096446442654472, "learning_rate": 2.521492495883252e-05, "loss": 0.407, "num_tokens": 4797016391.0, "step": 6272 }, { "epoch": 2.2978840340752953, "grad_norm": 0.1478304717597124, "learning_rate": 2.521072090957373e-05, "loss": 0.4505, "num_tokens": 4797678355.0, "step": 6273 }, { "epoch": 2.2982504351012185, "grad_norm": 0.1273480723419128, "learning_rate": 2.5206516679408353e-05, "loss": 0.4418, "num_tokens": 4798489593.0, "step": 6274 }, { "epoch": 2.2986168361271413, "grad_norm": 0.14360349839541378, "learning_rate": 2.520231226857326e-05, "loss": 0.4605, "num_tokens": 4799238941.0, "step": 6275 }, { "epoch": 2.298983237153064, "grad_norm": 0.13752474498221764, "learning_rate": 2.5198107677305337e-05, "loss": 0.4359, "num_tokens": 4800026843.0, "step": 6276 }, { "epoch": 2.299349638178987, "grad_norm": 0.13037894169928144, "learning_rate": 2.519390290584151e-05, "loss": 0.473, "num_tokens": 4800811340.0, "step": 6277 }, { "epoch": 2.2997160392049096, "grad_norm": 0.1412873174920439, "learning_rate": 2.5189697954418692e-05, "loss": 0.5012, "num_tokens": 4801514971.0, "step": 6278 }, { "epoch": 2.300082440230833, "grad_norm": 0.14565731978831264, "learning_rate": 2.5185492823273795e-05, "loss": 0.4706, "num_tokens": 4802366324.0, "step": 6279 }, { "epoch": 2.3004488412567556, "grad_norm": 0.13391370669378747, "learning_rate": 2.518128751264377e-05, "loss": 0.4375, "num_tokens": 4803225893.0, "step": 6280 }, { "epoch": 2.3008152422826784, "grad_norm": 0.14765363577073432, "learning_rate": 2.517708202276555e-05, "loss": 0.4622, "num_tokens": 4804010723.0, "step": 6281 }, { "epoch": 2.301181643308601, "grad_norm": 0.1375624905105774, "learning_rate": 2.5172876353876107e-05, "loss": 0.4579, "num_tokens": 4804810999.0, "step": 6282 }, { "epoch": 2.301548044334524, "grad_norm": 0.13524467828884973, "learning_rate": 2.5168670506212398e-05, "loss": 0.4709, "num_tokens": 4805510538.0, "step": 6283 }, { "epoch": 2.301914445360447, "grad_norm": 0.14734585449367782, "learning_rate": 2.51644644800114e-05, "loss": 0.4416, "num_tokens": 4806247422.0, "step": 6284 }, { "epoch": 2.30228084638637, "grad_norm": 0.12609156394341653, "learning_rate": 2.5160258275510103e-05, "loss": 0.4778, "num_tokens": 4807036328.0, "step": 6285 }, { "epoch": 2.3026472474122928, "grad_norm": 0.1425779643570002, "learning_rate": 2.5156051892945498e-05, "loss": 0.4375, "num_tokens": 4807805607.0, "step": 6286 }, { "epoch": 2.3030136484382155, "grad_norm": 0.13573597070947793, "learning_rate": 2.5151845332554597e-05, "loss": 0.4688, "num_tokens": 4808729197.0, "step": 6287 }, { "epoch": 2.3033800494641383, "grad_norm": 0.1459254530808361, "learning_rate": 2.5147638594574408e-05, "loss": 0.4909, "num_tokens": 4809466169.0, "step": 6288 }, { "epoch": 2.3037464504900615, "grad_norm": 0.12724675369759816, "learning_rate": 2.5143431679241977e-05, "loss": 0.4343, "num_tokens": 4810304301.0, "step": 6289 }, { "epoch": 2.3041128515159843, "grad_norm": 0.1378883776762423, "learning_rate": 2.5139224586794316e-05, "loss": 0.4502, "num_tokens": 4810989944.0, "step": 6290 }, { "epoch": 2.304479252541907, "grad_norm": 0.14352744820538266, "learning_rate": 2.513501731746849e-05, "loss": 0.4489, "num_tokens": 4811733877.0, "step": 6291 }, { "epoch": 2.30484565356783, "grad_norm": 0.13516988395529825, "learning_rate": 2.513080987150155e-05, "loss": 0.4619, "num_tokens": 4812544314.0, "step": 6292 }, { "epoch": 2.3052120545937527, "grad_norm": 0.15566830920511257, "learning_rate": 2.5126602249130557e-05, "loss": 0.5042, "num_tokens": 4813293504.0, "step": 6293 }, { "epoch": 2.305578455619676, "grad_norm": 0.14360504387288028, "learning_rate": 2.51223944505926e-05, "loss": 0.4625, "num_tokens": 4814056490.0, "step": 6294 }, { "epoch": 2.3059448566455987, "grad_norm": 0.12670912244362817, "learning_rate": 2.5118186476124753e-05, "loss": 0.4662, "num_tokens": 4814927599.0, "step": 6295 }, { "epoch": 2.3063112576715215, "grad_norm": 0.14299603627708907, "learning_rate": 2.5113978325964116e-05, "loss": 0.4509, "num_tokens": 4815668419.0, "step": 6296 }, { "epoch": 2.3066776586974442, "grad_norm": 0.12241667131009673, "learning_rate": 2.5109770000347805e-05, "loss": 0.4544, "num_tokens": 4816601420.0, "step": 6297 }, { "epoch": 2.307044059723367, "grad_norm": 0.1444538837278471, "learning_rate": 2.510556149951292e-05, "loss": 0.5001, "num_tokens": 4817271632.0, "step": 6298 }, { "epoch": 2.3074104607492902, "grad_norm": 0.1440375117424645, "learning_rate": 2.5101352823696595e-05, "loss": 0.4436, "num_tokens": 4818052513.0, "step": 6299 }, { "epoch": 2.307776861775213, "grad_norm": 0.14392324648254404, "learning_rate": 2.509714397313597e-05, "loss": 0.4657, "num_tokens": 4818725775.0, "step": 6300 }, { "epoch": 2.308143262801136, "grad_norm": 0.1643234095465639, "learning_rate": 2.5092934948068187e-05, "loss": 0.4606, "num_tokens": 4819526689.0, "step": 6301 }, { "epoch": 2.3085096638270586, "grad_norm": 0.14399597455342963, "learning_rate": 2.5088725748730404e-05, "loss": 0.4776, "num_tokens": 4820304148.0, "step": 6302 }, { "epoch": 2.3088760648529814, "grad_norm": 0.13421939826535864, "learning_rate": 2.5084516375359775e-05, "loss": 0.4243, "num_tokens": 4821045105.0, "step": 6303 }, { "epoch": 2.3092424658789046, "grad_norm": 0.16850550946554987, "learning_rate": 2.5080306828193485e-05, "loss": 0.4493, "num_tokens": 4821874415.0, "step": 6304 }, { "epoch": 2.3096088669048274, "grad_norm": 0.13262880118932027, "learning_rate": 2.5076097107468722e-05, "loss": 0.4796, "num_tokens": 4822530994.0, "step": 6305 }, { "epoch": 2.30997526793075, "grad_norm": 0.17766892311082197, "learning_rate": 2.5071887213422678e-05, "loss": 0.4622, "num_tokens": 4823333196.0, "step": 6306 }, { "epoch": 2.310341668956673, "grad_norm": 0.1520073017949851, "learning_rate": 2.5067677146292552e-05, "loss": 0.4765, "num_tokens": 4824073980.0, "step": 6307 }, { "epoch": 2.3107080699825957, "grad_norm": 0.15365224030308153, "learning_rate": 2.506346690631557e-05, "loss": 0.4551, "num_tokens": 4824833848.0, "step": 6308 }, { "epoch": 2.311074471008519, "grad_norm": 0.16271909933566783, "learning_rate": 2.5059256493728937e-05, "loss": 0.4325, "num_tokens": 4825615992.0, "step": 6309 }, { "epoch": 2.3114408720344417, "grad_norm": 0.1281636341901832, "learning_rate": 2.5055045908769905e-05, "loss": 0.4318, "num_tokens": 4826415144.0, "step": 6310 }, { "epoch": 2.3118072730603645, "grad_norm": 0.14892020043818568, "learning_rate": 2.505083515167571e-05, "loss": 0.4779, "num_tokens": 4827201715.0, "step": 6311 }, { "epoch": 2.3121736740862873, "grad_norm": 0.1381595568752998, "learning_rate": 2.504662422268361e-05, "loss": 0.4462, "num_tokens": 4828111559.0, "step": 6312 }, { "epoch": 2.3125400751122105, "grad_norm": 0.13394055751796546, "learning_rate": 2.5042413122030856e-05, "loss": 0.4434, "num_tokens": 4828903048.0, "step": 6313 }, { "epoch": 2.3129064761381333, "grad_norm": 0.15236419987801866, "learning_rate": 2.5038201849954736e-05, "loss": 0.4235, "num_tokens": 4829673790.0, "step": 6314 }, { "epoch": 2.313272877164056, "grad_norm": 0.12761356787314085, "learning_rate": 2.503399040669252e-05, "loss": 0.4426, "num_tokens": 4830450895.0, "step": 6315 }, { "epoch": 2.313639278189979, "grad_norm": 0.15172213528389233, "learning_rate": 2.502977879248151e-05, "loss": 0.4579, "num_tokens": 4831217889.0, "step": 6316 }, { "epoch": 2.3140056792159016, "grad_norm": 0.14814175971504598, "learning_rate": 2.5025567007559e-05, "loss": 0.4586, "num_tokens": 4831842714.0, "step": 6317 }, { "epoch": 2.314372080241825, "grad_norm": 0.15043789586907128, "learning_rate": 2.5021355052162296e-05, "loss": 0.4606, "num_tokens": 4832587671.0, "step": 6318 }, { "epoch": 2.3147384812677476, "grad_norm": 0.1492986805819486, "learning_rate": 2.501714292652873e-05, "loss": 0.4794, "num_tokens": 4833300759.0, "step": 6319 }, { "epoch": 2.3151048822936704, "grad_norm": 0.1403331021986898, "learning_rate": 2.5012930630895633e-05, "loss": 0.4212, "num_tokens": 4834126807.0, "step": 6320 }, { "epoch": 2.315471283319593, "grad_norm": 0.1435945837664108, "learning_rate": 2.5008718165500337e-05, "loss": 0.5098, "num_tokens": 4834850397.0, "step": 6321 }, { "epoch": 2.3158376843455164, "grad_norm": 0.14553236697372376, "learning_rate": 2.500450553058019e-05, "loss": 0.461, "num_tokens": 4835749546.0, "step": 6322 }, { "epoch": 2.316204085371439, "grad_norm": 0.12787426753082431, "learning_rate": 2.500029272637256e-05, "loss": 0.4556, "num_tokens": 4836513301.0, "step": 6323 }, { "epoch": 2.316570486397362, "grad_norm": 0.15182045577906672, "learning_rate": 2.4996079753114807e-05, "loss": 0.488, "num_tokens": 4837245328.0, "step": 6324 }, { "epoch": 2.3169368874232847, "grad_norm": 0.1400026544688773, "learning_rate": 2.4991866611044314e-05, "loss": 0.4631, "num_tokens": 4837994589.0, "step": 6325 }, { "epoch": 2.3173032884492075, "grad_norm": 0.14574267052569528, "learning_rate": 2.498765330039847e-05, "loss": 0.4554, "num_tokens": 4838717684.0, "step": 6326 }, { "epoch": 2.3176696894751307, "grad_norm": 0.14897210605993316, "learning_rate": 2.4983439821414668e-05, "loss": 0.4637, "num_tokens": 4839392155.0, "step": 6327 }, { "epoch": 2.3180360905010535, "grad_norm": 0.14466174223216421, "learning_rate": 2.497922617433032e-05, "loss": 0.4605, "num_tokens": 4840280733.0, "step": 6328 }, { "epoch": 2.3184024915269763, "grad_norm": 0.13195335973546202, "learning_rate": 2.4975012359382832e-05, "loss": 0.4611, "num_tokens": 4841177824.0, "step": 6329 }, { "epoch": 2.318768892552899, "grad_norm": 0.15474779588053747, "learning_rate": 2.4970798376809633e-05, "loss": 0.4483, "num_tokens": 4841760836.0, "step": 6330 }, { "epoch": 2.319135293578822, "grad_norm": 0.1394377395077133, "learning_rate": 2.496658422684817e-05, "loss": 0.4615, "num_tokens": 4842413490.0, "step": 6331 }, { "epoch": 2.319501694604745, "grad_norm": 0.15087662365715618, "learning_rate": 2.4962369909735874e-05, "loss": 0.445, "num_tokens": 4843067558.0, "step": 6332 }, { "epoch": 2.319868095630668, "grad_norm": 0.1495495252011466, "learning_rate": 2.4958155425710202e-05, "loss": 0.4554, "num_tokens": 4843703118.0, "step": 6333 }, { "epoch": 2.3202344966565907, "grad_norm": 0.15785021219834816, "learning_rate": 2.495394077500862e-05, "loss": 0.4285, "num_tokens": 4844530500.0, "step": 6334 }, { "epoch": 2.3206008976825134, "grad_norm": 0.1272030501360318, "learning_rate": 2.4949725957868592e-05, "loss": 0.4124, "num_tokens": 4845338350.0, "step": 6335 }, { "epoch": 2.320967298708436, "grad_norm": 0.13289880232366533, "learning_rate": 2.4945510974527612e-05, "loss": 0.4594, "num_tokens": 4846122408.0, "step": 6336 }, { "epoch": 2.3213336997343594, "grad_norm": 0.15108010982387912, "learning_rate": 2.4941295825223164e-05, "loss": 0.4786, "num_tokens": 4846819188.0, "step": 6337 }, { "epoch": 2.321700100760282, "grad_norm": 0.15141441653919832, "learning_rate": 2.4937080510192746e-05, "loss": 0.4593, "num_tokens": 4847582733.0, "step": 6338 }, { "epoch": 2.322066501786205, "grad_norm": 0.13694850600786, "learning_rate": 2.493286502967388e-05, "loss": 0.4556, "num_tokens": 4848403902.0, "step": 6339 }, { "epoch": 2.3224329028121278, "grad_norm": 0.14626743522240887, "learning_rate": 2.4928649383904074e-05, "loss": 0.437, "num_tokens": 4849118287.0, "step": 6340 }, { "epoch": 2.3227993038380506, "grad_norm": 0.14064548081966, "learning_rate": 2.492443357312086e-05, "loss": 0.4632, "num_tokens": 4849867308.0, "step": 6341 }, { "epoch": 2.323165704863974, "grad_norm": 0.14132875388329708, "learning_rate": 2.492021759756177e-05, "loss": 0.4608, "num_tokens": 4850599367.0, "step": 6342 }, { "epoch": 2.3235321058898966, "grad_norm": 0.13410946875682345, "learning_rate": 2.4916001457464364e-05, "loss": 0.4456, "num_tokens": 4851384750.0, "step": 6343 }, { "epoch": 2.3238985069158193, "grad_norm": 0.130123201761074, "learning_rate": 2.4911785153066196e-05, "loss": 0.4484, "num_tokens": 4852130090.0, "step": 6344 }, { "epoch": 2.324264907941742, "grad_norm": 0.14833506964685525, "learning_rate": 2.4907568684604816e-05, "loss": 0.4961, "num_tokens": 4852869740.0, "step": 6345 }, { "epoch": 2.324631308967665, "grad_norm": 0.15095885577097892, "learning_rate": 2.4903352052317818e-05, "loss": 0.466, "num_tokens": 4853606037.0, "step": 6346 }, { "epoch": 2.324997709993588, "grad_norm": 0.1600069381593941, "learning_rate": 2.4899135256442778e-05, "loss": 0.4715, "num_tokens": 4854150288.0, "step": 6347 }, { "epoch": 2.325364111019511, "grad_norm": 0.1363883789493149, "learning_rate": 2.489491829721729e-05, "loss": 0.4563, "num_tokens": 4855030365.0, "step": 6348 }, { "epoch": 2.3257305120454337, "grad_norm": 0.15034799862865028, "learning_rate": 2.4890701174878956e-05, "loss": 0.4895, "num_tokens": 4855864158.0, "step": 6349 }, { "epoch": 2.3260969130713565, "grad_norm": 0.1379119459680004, "learning_rate": 2.4886483889665393e-05, "loss": 0.4625, "num_tokens": 4856588262.0, "step": 6350 }, { "epoch": 2.3264633140972792, "grad_norm": 0.15078778627835276, "learning_rate": 2.4882266441814212e-05, "loss": 0.4624, "num_tokens": 4857324398.0, "step": 6351 }, { "epoch": 2.3268297151232025, "grad_norm": 0.1371922998134357, "learning_rate": 2.4878048831563052e-05, "loss": 0.4204, "num_tokens": 4858179027.0, "step": 6352 }, { "epoch": 2.3271961161491252, "grad_norm": 0.13565649881084588, "learning_rate": 2.487383105914954e-05, "loss": 0.4561, "num_tokens": 4858936143.0, "step": 6353 }, { "epoch": 2.327562517175048, "grad_norm": 0.13075296492584515, "learning_rate": 2.486961312481135e-05, "loss": 0.4588, "num_tokens": 4859698907.0, "step": 6354 }, { "epoch": 2.327928918200971, "grad_norm": 0.1532359812616064, "learning_rate": 2.486539502878611e-05, "loss": 0.4818, "num_tokens": 4860468171.0, "step": 6355 }, { "epoch": 2.3282953192268936, "grad_norm": 0.15316493406842094, "learning_rate": 2.486117677131151e-05, "loss": 0.4604, "num_tokens": 4861316886.0, "step": 6356 }, { "epoch": 2.328661720252817, "grad_norm": 0.13021681110828232, "learning_rate": 2.4856958352625208e-05, "loss": 0.4333, "num_tokens": 4862080642.0, "step": 6357 }, { "epoch": 2.3290281212787396, "grad_norm": 0.1367486797747263, "learning_rate": 2.48527397729649e-05, "loss": 0.4741, "num_tokens": 4862860245.0, "step": 6358 }, { "epoch": 2.3293945223046624, "grad_norm": 0.1411325762620283, "learning_rate": 2.4848521032568273e-05, "loss": 0.4613, "num_tokens": 4863629571.0, "step": 6359 }, { "epoch": 2.329760923330585, "grad_norm": 0.12645945322011065, "learning_rate": 2.4844302131673034e-05, "loss": 0.4595, "num_tokens": 4864499621.0, "step": 6360 }, { "epoch": 2.3301273243565084, "grad_norm": 0.13041051655343458, "learning_rate": 2.4840083070516895e-05, "loss": 0.433, "num_tokens": 4865222227.0, "step": 6361 }, { "epoch": 2.330493725382431, "grad_norm": 0.14338693466042668, "learning_rate": 2.4835863849337574e-05, "loss": 0.4555, "num_tokens": 4866008820.0, "step": 6362 }, { "epoch": 2.330860126408354, "grad_norm": 0.13749730592932474, "learning_rate": 2.4831644468372806e-05, "loss": 0.459, "num_tokens": 4866784662.0, "step": 6363 }, { "epoch": 2.3312265274342767, "grad_norm": 0.12554453554509673, "learning_rate": 2.4827424927860332e-05, "loss": 0.452, "num_tokens": 4867503091.0, "step": 6364 }, { "epoch": 2.3315929284601995, "grad_norm": 0.13997225639854624, "learning_rate": 2.482320522803789e-05, "loss": 0.4465, "num_tokens": 4868337701.0, "step": 6365 }, { "epoch": 2.3319593294861227, "grad_norm": 0.12263200942745543, "learning_rate": 2.4818985369143243e-05, "loss": 0.4489, "num_tokens": 4869038913.0, "step": 6366 }, { "epoch": 2.3323257305120455, "grad_norm": 0.13111230953006744, "learning_rate": 2.4814765351414162e-05, "loss": 0.432, "num_tokens": 4869845348.0, "step": 6367 }, { "epoch": 2.3326921315379683, "grad_norm": 0.13674582650175268, "learning_rate": 2.481054517508841e-05, "loss": 0.4372, "num_tokens": 4870729216.0, "step": 6368 }, { "epoch": 2.333058532563891, "grad_norm": 0.12408115576393196, "learning_rate": 2.4806324840403785e-05, "loss": 0.4214, "num_tokens": 4871411829.0, "step": 6369 }, { "epoch": 2.3334249335898143, "grad_norm": 0.12677710003313442, "learning_rate": 2.4802104347598064e-05, "loss": 0.439, "num_tokens": 4872332896.0, "step": 6370 }, { "epoch": 2.333791334615737, "grad_norm": 0.1322155376667244, "learning_rate": 2.4797883696909065e-05, "loss": 0.4322, "num_tokens": 4873075546.0, "step": 6371 }, { "epoch": 2.33415773564166, "grad_norm": 0.13562979695122113, "learning_rate": 2.479366288857459e-05, "loss": 0.4602, "num_tokens": 4873933248.0, "step": 6372 }, { "epoch": 2.3345241366675826, "grad_norm": 0.14255755954186466, "learning_rate": 2.4789441922832458e-05, "loss": 0.4778, "num_tokens": 4874577482.0, "step": 6373 }, { "epoch": 2.3348905376935054, "grad_norm": 0.13711106535864398, "learning_rate": 2.4785220799920496e-05, "loss": 0.4494, "num_tokens": 4875538958.0, "step": 6374 }, { "epoch": 2.3352569387194286, "grad_norm": 0.13381043819527447, "learning_rate": 2.4780999520076545e-05, "loss": 0.4718, "num_tokens": 4876291153.0, "step": 6375 }, { "epoch": 2.3356233397453514, "grad_norm": 0.13566735016451134, "learning_rate": 2.4776778083538448e-05, "loss": 0.4246, "num_tokens": 4877071141.0, "step": 6376 }, { "epoch": 2.335989740771274, "grad_norm": 0.14084579412244075, "learning_rate": 2.4772556490544068e-05, "loss": 0.4512, "num_tokens": 4877808457.0, "step": 6377 }, { "epoch": 2.336356141797197, "grad_norm": 0.13349399712716709, "learning_rate": 2.476833474133126e-05, "loss": 0.4448, "num_tokens": 4878689232.0, "step": 6378 }, { "epoch": 2.3367225428231198, "grad_norm": 0.13701848563686728, "learning_rate": 2.47641128361379e-05, "loss": 0.481, "num_tokens": 4879409180.0, "step": 6379 }, { "epoch": 2.337088943849043, "grad_norm": 0.12626501023999281, "learning_rate": 2.4759890775201862e-05, "loss": 0.4744, "num_tokens": 4880153763.0, "step": 6380 }, { "epoch": 2.3374553448749658, "grad_norm": 0.16171853732494448, "learning_rate": 2.475566855876106e-05, "loss": 0.4517, "num_tokens": 4880929058.0, "step": 6381 }, { "epoch": 2.3378217459008885, "grad_norm": 0.12261772763489043, "learning_rate": 2.4751446187053353e-05, "loss": 0.4381, "num_tokens": 4881729513.0, "step": 6382 }, { "epoch": 2.3381881469268113, "grad_norm": 0.1396882024142635, "learning_rate": 2.4747223660316686e-05, "loss": 0.4242, "num_tokens": 4882440073.0, "step": 6383 }, { "epoch": 2.338554547952734, "grad_norm": 0.1429296668268503, "learning_rate": 2.474300097878896e-05, "loss": 0.4921, "num_tokens": 4883089779.0, "step": 6384 }, { "epoch": 2.3389209489786573, "grad_norm": 0.14562564633253672, "learning_rate": 2.4738778142708095e-05, "loss": 0.4474, "num_tokens": 4883740561.0, "step": 6385 }, { "epoch": 2.33928735000458, "grad_norm": 0.14381775171490646, "learning_rate": 2.4734555152312034e-05, "loss": 0.469, "num_tokens": 4884429653.0, "step": 6386 }, { "epoch": 2.339653751030503, "grad_norm": 0.131980004974734, "learning_rate": 2.4730332007838707e-05, "loss": 0.4403, "num_tokens": 4885270609.0, "step": 6387 }, { "epoch": 2.3400201520564257, "grad_norm": 0.11943158015545416, "learning_rate": 2.472610870952609e-05, "loss": 0.4403, "num_tokens": 4885987586.0, "step": 6388 }, { "epoch": 2.3403865530823484, "grad_norm": 0.14887173306352433, "learning_rate": 2.472188525761211e-05, "loss": 0.4852, "num_tokens": 4886713003.0, "step": 6389 }, { "epoch": 2.3407529541082717, "grad_norm": 0.14603925722417235, "learning_rate": 2.4717661652334763e-05, "loss": 0.4893, "num_tokens": 4887397610.0, "step": 6390 }, { "epoch": 2.3411193551341944, "grad_norm": 0.14571124475021396, "learning_rate": 2.4713437893932007e-05, "loss": 0.4525, "num_tokens": 4888235849.0, "step": 6391 }, { "epoch": 2.3414857561601172, "grad_norm": 0.1299438905167106, "learning_rate": 2.470921398264184e-05, "loss": 0.4263, "num_tokens": 4888963727.0, "step": 6392 }, { "epoch": 2.34185215718604, "grad_norm": 0.1430702143563766, "learning_rate": 2.470498991870226e-05, "loss": 0.47, "num_tokens": 4889782799.0, "step": 6393 }, { "epoch": 2.342218558211963, "grad_norm": 0.13622814792837856, "learning_rate": 2.4700765702351253e-05, "loss": 0.4518, "num_tokens": 4890477697.0, "step": 6394 }, { "epoch": 2.342584959237886, "grad_norm": 0.1368194924314777, "learning_rate": 2.469654133382684e-05, "loss": 0.4202, "num_tokens": 4891219253.0, "step": 6395 }, { "epoch": 2.342951360263809, "grad_norm": 0.14213773371305233, "learning_rate": 2.4692316813367045e-05, "loss": 0.45, "num_tokens": 4892024425.0, "step": 6396 }, { "epoch": 2.3433177612897316, "grad_norm": 0.1314857054812741, "learning_rate": 2.468809214120989e-05, "loss": 0.4526, "num_tokens": 4892935201.0, "step": 6397 }, { "epoch": 2.3436841623156544, "grad_norm": 0.13255781679492032, "learning_rate": 2.4683867317593414e-05, "loss": 0.4929, "num_tokens": 4893634496.0, "step": 6398 }, { "epoch": 2.344050563341577, "grad_norm": 0.13847964254577047, "learning_rate": 2.4679642342755665e-05, "loss": 0.445, "num_tokens": 4894345409.0, "step": 6399 }, { "epoch": 2.3444169643675004, "grad_norm": 0.1383487775427429, "learning_rate": 2.46754172169347e-05, "loss": 0.4757, "num_tokens": 4895082026.0, "step": 6400 }, { "epoch": 2.344783365393423, "grad_norm": 0.14372411607355695, "learning_rate": 2.467119194036857e-05, "loss": 0.4675, "num_tokens": 4895797673.0, "step": 6401 }, { "epoch": 2.345149766419346, "grad_norm": 0.1694854250637446, "learning_rate": 2.4666966513295356e-05, "loss": 0.4632, "num_tokens": 4896474789.0, "step": 6402 }, { "epoch": 2.3455161674452687, "grad_norm": 0.13397775452414223, "learning_rate": 2.4662740935953134e-05, "loss": 0.4505, "num_tokens": 4897235650.0, "step": 6403 }, { "epoch": 2.3458825684711915, "grad_norm": 0.13239275524463975, "learning_rate": 2.4658515208579995e-05, "loss": 0.4455, "num_tokens": 4898107554.0, "step": 6404 }, { "epoch": 2.3462489694971147, "grad_norm": 0.1360450355650924, "learning_rate": 2.4654289331414036e-05, "loss": 0.4178, "num_tokens": 4898849676.0, "step": 6405 }, { "epoch": 2.3466153705230375, "grad_norm": 0.13974496537795733, "learning_rate": 2.4650063304693362e-05, "loss": 0.4514, "num_tokens": 4899552700.0, "step": 6406 }, { "epoch": 2.3469817715489603, "grad_norm": 0.13948132095857535, "learning_rate": 2.4645837128656087e-05, "loss": 0.4362, "num_tokens": 4900305762.0, "step": 6407 }, { "epoch": 2.347348172574883, "grad_norm": 0.13654624092458195, "learning_rate": 2.4641610803540328e-05, "loss": 0.4452, "num_tokens": 4901103981.0, "step": 6408 }, { "epoch": 2.3477145736008063, "grad_norm": 0.1340102061968953, "learning_rate": 2.4637384329584215e-05, "loss": 0.4623, "num_tokens": 4901856064.0, "step": 6409 }, { "epoch": 2.348080974626729, "grad_norm": 0.13018260489159367, "learning_rate": 2.4633157707025898e-05, "loss": 0.4545, "num_tokens": 4902547134.0, "step": 6410 }, { "epoch": 2.348447375652652, "grad_norm": 0.15407248514945032, "learning_rate": 2.4628930936103516e-05, "loss": 0.4631, "num_tokens": 4903246436.0, "step": 6411 }, { "epoch": 2.3488137766785746, "grad_norm": 0.14058374362686718, "learning_rate": 2.4624704017055218e-05, "loss": 0.4691, "num_tokens": 4903945727.0, "step": 6412 }, { "epoch": 2.3491801777044974, "grad_norm": 0.14144907463007694, "learning_rate": 2.4620476950119178e-05, "loss": 0.4373, "num_tokens": 4904717819.0, "step": 6413 }, { "epoch": 2.3495465787304206, "grad_norm": 0.12477929181725078, "learning_rate": 2.461624973553356e-05, "loss": 0.4442, "num_tokens": 4905562325.0, "step": 6414 }, { "epoch": 2.3499129797563434, "grad_norm": 0.13253134161077001, "learning_rate": 2.4612022373536554e-05, "loss": 0.4472, "num_tokens": 4906298786.0, "step": 6415 }, { "epoch": 2.350279380782266, "grad_norm": 0.14235622782542012, "learning_rate": 2.4607794864366344e-05, "loss": 0.4943, "num_tokens": 4907071801.0, "step": 6416 }, { "epoch": 2.350645781808189, "grad_norm": 0.13668041227768954, "learning_rate": 2.4603567208261125e-05, "loss": 0.4376, "num_tokens": 4907878706.0, "step": 6417 }, { "epoch": 2.351012182834112, "grad_norm": 0.13322721935846998, "learning_rate": 2.45993394054591e-05, "loss": 0.4545, "num_tokens": 4908681115.0, "step": 6418 }, { "epoch": 2.351378583860035, "grad_norm": 0.12679627241240957, "learning_rate": 2.4595111456198496e-05, "loss": 0.4485, "num_tokens": 4909545724.0, "step": 6419 }, { "epoch": 2.3517449848859577, "grad_norm": 0.12738211851141568, "learning_rate": 2.4590883360717524e-05, "loss": 0.4533, "num_tokens": 4910379576.0, "step": 6420 }, { "epoch": 2.3521113859118805, "grad_norm": 0.13866306165498887, "learning_rate": 2.4586655119254416e-05, "loss": 0.4075, "num_tokens": 4911089402.0, "step": 6421 }, { "epoch": 2.3524777869378033, "grad_norm": 0.13786463789328154, "learning_rate": 2.4582426732047403e-05, "loss": 0.4908, "num_tokens": 4911888761.0, "step": 6422 }, { "epoch": 2.3528441879637265, "grad_norm": 0.126442316638359, "learning_rate": 2.4578198199334753e-05, "loss": 0.4578, "num_tokens": 4912655777.0, "step": 6423 }, { "epoch": 2.3532105889896493, "grad_norm": 0.14634832138823045, "learning_rate": 2.4573969521354695e-05, "loss": 0.4699, "num_tokens": 4913408272.0, "step": 6424 }, { "epoch": 2.353576990015572, "grad_norm": 0.13497537124986383, "learning_rate": 2.4569740698345504e-05, "loss": 0.4303, "num_tokens": 4914047837.0, "step": 6425 }, { "epoch": 2.353943391041495, "grad_norm": 0.14080777914945575, "learning_rate": 2.4565511730545457e-05, "loss": 0.476, "num_tokens": 4914816626.0, "step": 6426 }, { "epoch": 2.3543097920674176, "grad_norm": 0.13003180731867237, "learning_rate": 2.4561282618192826e-05, "loss": 0.4458, "num_tokens": 4915624205.0, "step": 6427 }, { "epoch": 2.354676193093341, "grad_norm": 0.14714866986312886, "learning_rate": 2.4557053361525897e-05, "loss": 0.4919, "num_tokens": 4916386522.0, "step": 6428 }, { "epoch": 2.3550425941192636, "grad_norm": 0.14275755702933057, "learning_rate": 2.455282396078296e-05, "loss": 0.4309, "num_tokens": 4917167736.0, "step": 6429 }, { "epoch": 2.3554089951451864, "grad_norm": 0.13343386804981044, "learning_rate": 2.454859441620234e-05, "loss": 0.4018, "num_tokens": 4917893652.0, "step": 6430 }, { "epoch": 2.355775396171109, "grad_norm": 0.13220589171303138, "learning_rate": 2.454436472802233e-05, "loss": 0.4355, "num_tokens": 4918721557.0, "step": 6431 }, { "epoch": 2.356141797197032, "grad_norm": 0.15307678215138884, "learning_rate": 2.4540134896481252e-05, "loss": 0.4778, "num_tokens": 4919601155.0, "step": 6432 }, { "epoch": 2.356508198222955, "grad_norm": 0.13457313990892023, "learning_rate": 2.4535904921817436e-05, "loss": 0.4599, "num_tokens": 4920340135.0, "step": 6433 }, { "epoch": 2.356874599248878, "grad_norm": 0.15566655589751352, "learning_rate": 2.4531674804269225e-05, "loss": 0.4615, "num_tokens": 4921118435.0, "step": 6434 }, { "epoch": 2.3572410002748008, "grad_norm": 0.13870176117185076, "learning_rate": 2.452744454407496e-05, "loss": 0.4734, "num_tokens": 4921822145.0, "step": 6435 }, { "epoch": 2.3576074013007235, "grad_norm": 0.1402849555645169, "learning_rate": 2.452321414147298e-05, "loss": 0.4831, "num_tokens": 4922641969.0, "step": 6436 }, { "epoch": 2.3579738023266463, "grad_norm": 0.14501234293227086, "learning_rate": 2.4518983596701654e-05, "loss": 0.4831, "num_tokens": 4923543264.0, "step": 6437 }, { "epoch": 2.3583402033525696, "grad_norm": 0.15488678538489808, "learning_rate": 2.451475290999936e-05, "loss": 0.4433, "num_tokens": 4924310051.0, "step": 6438 }, { "epoch": 2.3587066043784923, "grad_norm": 0.12130294299006908, "learning_rate": 2.451052208160446e-05, "loss": 0.4353, "num_tokens": 4925171779.0, "step": 6439 }, { "epoch": 2.359073005404415, "grad_norm": 0.1512157109548103, "learning_rate": 2.4506291111755343e-05, "loss": 0.4516, "num_tokens": 4925960252.0, "step": 6440 }, { "epoch": 2.359439406430338, "grad_norm": 0.1467178102157809, "learning_rate": 2.450206000069041e-05, "loss": 0.456, "num_tokens": 4926645635.0, "step": 6441 }, { "epoch": 2.3598058074562607, "grad_norm": 0.13043008881344745, "learning_rate": 2.4497828748648042e-05, "loss": 0.4263, "num_tokens": 4927483323.0, "step": 6442 }, { "epoch": 2.360172208482184, "grad_norm": 0.13472031609417653, "learning_rate": 2.4493597355866667e-05, "loss": 0.4342, "num_tokens": 4928237665.0, "step": 6443 }, { "epoch": 2.3605386095081067, "grad_norm": 0.15126197558730997, "learning_rate": 2.448936582258468e-05, "loss": 0.4314, "num_tokens": 4929120200.0, "step": 6444 }, { "epoch": 2.3609050105340295, "grad_norm": 0.12176144978643352, "learning_rate": 2.448513414904053e-05, "loss": 0.4495, "num_tokens": 4929876600.0, "step": 6445 }, { "epoch": 2.3612714115599522, "grad_norm": 0.13505851374655764, "learning_rate": 2.4480902335472627e-05, "loss": 0.4629, "num_tokens": 4930676851.0, "step": 6446 }, { "epoch": 2.361637812585875, "grad_norm": 0.12400594757393951, "learning_rate": 2.4476670382119416e-05, "loss": 0.4328, "num_tokens": 4931439572.0, "step": 6447 }, { "epoch": 2.3620042136117982, "grad_norm": 0.1449871243825573, "learning_rate": 2.4472438289219353e-05, "loss": 0.4521, "num_tokens": 4932212682.0, "step": 6448 }, { "epoch": 2.362370614637721, "grad_norm": 0.12607515055872198, "learning_rate": 2.4468206057010878e-05, "loss": 0.4329, "num_tokens": 4933054912.0, "step": 6449 }, { "epoch": 2.362737015663644, "grad_norm": 0.13008566778804742, "learning_rate": 2.4463973685732474e-05, "loss": 0.442, "num_tokens": 4933907108.0, "step": 6450 }, { "epoch": 2.3631034166895666, "grad_norm": 0.13302128363809, "learning_rate": 2.4459741175622596e-05, "loss": 0.4754, "num_tokens": 4934650300.0, "step": 6451 }, { "epoch": 2.3634698177154894, "grad_norm": 0.13765094934726768, "learning_rate": 2.4455508526919727e-05, "loss": 0.4475, "num_tokens": 4935430649.0, "step": 6452 }, { "epoch": 2.3638362187414126, "grad_norm": 0.12996351654958932, "learning_rate": 2.4451275739862364e-05, "loss": 0.4892, "num_tokens": 4936122018.0, "step": 6453 }, { "epoch": 2.3642026197673354, "grad_norm": 0.1424625845712295, "learning_rate": 2.444704281468898e-05, "loss": 0.4539, "num_tokens": 4936873196.0, "step": 6454 }, { "epoch": 2.364569020793258, "grad_norm": 0.14169944222911113, "learning_rate": 2.4442809751638095e-05, "loss": 0.4949, "num_tokens": 4937592492.0, "step": 6455 }, { "epoch": 2.364935421819181, "grad_norm": 0.13689280842835394, "learning_rate": 2.443857655094821e-05, "loss": 0.4728, "num_tokens": 4938420181.0, "step": 6456 }, { "epoch": 2.365301822845104, "grad_norm": 0.13163032621672688, "learning_rate": 2.4434343212857857e-05, "loss": 0.452, "num_tokens": 4939177456.0, "step": 6457 }, { "epoch": 2.365668223871027, "grad_norm": 0.14105548486898317, "learning_rate": 2.4430109737605543e-05, "loss": 0.4358, "num_tokens": 4939790869.0, "step": 6458 }, { "epoch": 2.3660346248969497, "grad_norm": 0.14632040068688168, "learning_rate": 2.442587612542981e-05, "loss": 0.5075, "num_tokens": 4940503305.0, "step": 6459 }, { "epoch": 2.3664010259228725, "grad_norm": 0.15117361947813943, "learning_rate": 2.4421642376569196e-05, "loss": 0.4633, "num_tokens": 4941234378.0, "step": 6460 }, { "epoch": 2.3667674269487953, "grad_norm": 0.13453340665755795, "learning_rate": 2.4417408491262256e-05, "loss": 0.4265, "num_tokens": 4942098860.0, "step": 6461 }, { "epoch": 2.3671338279747185, "grad_norm": 0.12195327517945581, "learning_rate": 2.4413174469747546e-05, "loss": 0.4122, "num_tokens": 4942773242.0, "step": 6462 }, { "epoch": 2.3675002290006413, "grad_norm": 0.12402369042669598, "learning_rate": 2.4408940312263618e-05, "loss": 0.4472, "num_tokens": 4943608273.0, "step": 6463 }, { "epoch": 2.367866630026564, "grad_norm": 0.13486247562204604, "learning_rate": 2.4404706019049053e-05, "loss": 0.4461, "num_tokens": 4944358877.0, "step": 6464 }, { "epoch": 2.368233031052487, "grad_norm": 0.13075820707888253, "learning_rate": 2.440047159034243e-05, "loss": 0.4393, "num_tokens": 4945181686.0, "step": 6465 }, { "epoch": 2.36859943207841, "grad_norm": 0.14369079626738432, "learning_rate": 2.4396237026382332e-05, "loss": 0.4705, "num_tokens": 4945979715.0, "step": 6466 }, { "epoch": 2.368965833104333, "grad_norm": 0.11486486322374811, "learning_rate": 2.4392002327407357e-05, "loss": 0.4442, "num_tokens": 4946769702.0, "step": 6467 }, { "epoch": 2.3693322341302556, "grad_norm": 0.14670982204231695, "learning_rate": 2.4387767493656118e-05, "loss": 0.4714, "num_tokens": 4947594109.0, "step": 6468 }, { "epoch": 2.3696986351561784, "grad_norm": 0.12714762133439622, "learning_rate": 2.43835325253672e-05, "loss": 0.4887, "num_tokens": 4948332650.0, "step": 6469 }, { "epoch": 2.370065036182101, "grad_norm": 0.14288790108438562, "learning_rate": 2.4379297422779244e-05, "loss": 0.4399, "num_tokens": 4948989654.0, "step": 6470 }, { "epoch": 2.3704314372080244, "grad_norm": 0.14746712158938988, "learning_rate": 2.437506218613086e-05, "loss": 0.4832, "num_tokens": 4949833960.0, "step": 6471 }, { "epoch": 2.370797838233947, "grad_norm": 0.1353032477638792, "learning_rate": 2.4370826815660678e-05, "loss": 0.4567, "num_tokens": 4950565240.0, "step": 6472 }, { "epoch": 2.37116423925987, "grad_norm": 0.13572662846641792, "learning_rate": 2.4366591311607355e-05, "loss": 0.4557, "num_tokens": 4951350834.0, "step": 6473 }, { "epoch": 2.3715306402857927, "grad_norm": 0.12804875011990777, "learning_rate": 2.436235567420952e-05, "loss": 0.4621, "num_tokens": 4952253082.0, "step": 6474 }, { "epoch": 2.3718970413117155, "grad_norm": 0.1399885209254971, "learning_rate": 2.4358119903705847e-05, "loss": 0.4279, "num_tokens": 4953042907.0, "step": 6475 }, { "epoch": 2.3722634423376388, "grad_norm": 0.13855124394371604, "learning_rate": 2.435388400033498e-05, "loss": 0.4474, "num_tokens": 4953747454.0, "step": 6476 }, { "epoch": 2.3726298433635615, "grad_norm": 0.1367272487883251, "learning_rate": 2.4349647964335606e-05, "loss": 0.4278, "num_tokens": 4954515629.0, "step": 6477 }, { "epoch": 2.3729962443894843, "grad_norm": 0.13216218171844907, "learning_rate": 2.4345411795946387e-05, "loss": 0.4754, "num_tokens": 4955457084.0, "step": 6478 }, { "epoch": 2.373362645415407, "grad_norm": 0.14449628085245952, "learning_rate": 2.4341175495406015e-05, "loss": 0.4685, "num_tokens": 4956161723.0, "step": 6479 }, { "epoch": 2.37372904644133, "grad_norm": 0.13364321133076587, "learning_rate": 2.433693906295319e-05, "loss": 0.4468, "num_tokens": 4956807956.0, "step": 6480 }, { "epoch": 2.374095447467253, "grad_norm": 0.15098804946216796, "learning_rate": 2.4332702498826594e-05, "loss": 0.4583, "num_tokens": 4957556141.0, "step": 6481 }, { "epoch": 2.374461848493176, "grad_norm": 0.1411105319106853, "learning_rate": 2.4328465803264946e-05, "loss": 0.4546, "num_tokens": 4958316915.0, "step": 6482 }, { "epoch": 2.3748282495190987, "grad_norm": 0.1369322783873022, "learning_rate": 2.432422897650696e-05, "loss": 0.4375, "num_tokens": 4959031328.0, "step": 6483 }, { "epoch": 2.3751946505450214, "grad_norm": 0.14602676322991462, "learning_rate": 2.431999201879136e-05, "loss": 0.423, "num_tokens": 4959896275.0, "step": 6484 }, { "epoch": 2.375561051570944, "grad_norm": 0.1235662842070698, "learning_rate": 2.4315754930356875e-05, "loss": 0.4543, "num_tokens": 4960683770.0, "step": 6485 }, { "epoch": 2.3759274525968674, "grad_norm": 0.16045593408532846, "learning_rate": 2.4311517711442233e-05, "loss": 0.4556, "num_tokens": 4961509711.0, "step": 6486 }, { "epoch": 2.37629385362279, "grad_norm": 0.13137417070133384, "learning_rate": 2.430728036228619e-05, "loss": 0.4755, "num_tokens": 4962214817.0, "step": 6487 }, { "epoch": 2.376660254648713, "grad_norm": 0.13910673919583247, "learning_rate": 2.430304288312749e-05, "loss": 0.4259, "num_tokens": 4963072518.0, "step": 6488 }, { "epoch": 2.377026655674636, "grad_norm": 0.12912443993361575, "learning_rate": 2.429880527420489e-05, "loss": 0.4727, "num_tokens": 4963784134.0, "step": 6489 }, { "epoch": 2.3773930567005586, "grad_norm": 0.14190629708940172, "learning_rate": 2.4294567535757158e-05, "loss": 0.4548, "num_tokens": 4964570547.0, "step": 6490 }, { "epoch": 2.377759457726482, "grad_norm": 0.13351806151260512, "learning_rate": 2.429032966802308e-05, "loss": 0.4543, "num_tokens": 4965303768.0, "step": 6491 }, { "epoch": 2.3781258587524046, "grad_norm": 0.14189360613866506, "learning_rate": 2.428609167124142e-05, "loss": 0.4148, "num_tokens": 4966150126.0, "step": 6492 }, { "epoch": 2.3784922597783273, "grad_norm": 0.14704035421575498, "learning_rate": 2.428185354565097e-05, "loss": 0.4508, "num_tokens": 4966877356.0, "step": 6493 }, { "epoch": 2.37885866080425, "grad_norm": 0.12330089724055063, "learning_rate": 2.427761529149053e-05, "loss": 0.4795, "num_tokens": 4967677707.0, "step": 6494 }, { "epoch": 2.379225061830173, "grad_norm": 0.13989920476671963, "learning_rate": 2.4273376908998904e-05, "loss": 0.4623, "num_tokens": 4968481055.0, "step": 6495 }, { "epoch": 2.379591462856096, "grad_norm": 0.14081855774746896, "learning_rate": 2.4269138398414888e-05, "loss": 0.4435, "num_tokens": 4969188508.0, "step": 6496 }, { "epoch": 2.379957863882019, "grad_norm": 0.1357904884263981, "learning_rate": 2.4264899759977313e-05, "loss": 0.4306, "num_tokens": 4969972412.0, "step": 6497 }, { "epoch": 2.3803242649079417, "grad_norm": 0.12757663440786346, "learning_rate": 2.4260660993925e-05, "loss": 0.4278, "num_tokens": 4970726624.0, "step": 6498 }, { "epoch": 2.3806906659338645, "grad_norm": 0.13359941709507134, "learning_rate": 2.425642210049678e-05, "loss": 0.4441, "num_tokens": 4971584205.0, "step": 6499 }, { "epoch": 2.3810570669597872, "grad_norm": 0.13503735136061223, "learning_rate": 2.4252183079931492e-05, "loss": 0.4684, "num_tokens": 4972434758.0, "step": 6500 }, { "epoch": 2.3814234679857105, "grad_norm": 0.13672800348179204, "learning_rate": 2.424794393246797e-05, "loss": 0.4458, "num_tokens": 4973214207.0, "step": 6501 }, { "epoch": 2.3817898690116333, "grad_norm": 0.14247358506544736, "learning_rate": 2.4243704658345087e-05, "loss": 0.4426, "num_tokens": 4973904799.0, "step": 6502 }, { "epoch": 2.382156270037556, "grad_norm": 0.1419742831356699, "learning_rate": 2.423946525780169e-05, "loss": 0.4617, "num_tokens": 4974741295.0, "step": 6503 }, { "epoch": 2.382522671063479, "grad_norm": 0.13385602054486842, "learning_rate": 2.4235225731076647e-05, "loss": 0.4495, "num_tokens": 4975484995.0, "step": 6504 }, { "epoch": 2.382889072089402, "grad_norm": 0.1349053550151628, "learning_rate": 2.423098607840884e-05, "loss": 0.4721, "num_tokens": 4976227052.0, "step": 6505 }, { "epoch": 2.383255473115325, "grad_norm": 0.13725430429734453, "learning_rate": 2.4226746300037138e-05, "loss": 0.4357, "num_tokens": 4976900992.0, "step": 6506 }, { "epoch": 2.3836218741412476, "grad_norm": 0.1534197579686172, "learning_rate": 2.4222506396200444e-05, "loss": 0.4668, "num_tokens": 4977655071.0, "step": 6507 }, { "epoch": 2.3839882751671704, "grad_norm": 0.12917917798843884, "learning_rate": 2.421826636713764e-05, "loss": 0.4365, "num_tokens": 4978400284.0, "step": 6508 }, { "epoch": 2.384354676193093, "grad_norm": 0.15441208216571506, "learning_rate": 2.4214026213087625e-05, "loss": 0.4923, "num_tokens": 4979171657.0, "step": 6509 }, { "epoch": 2.3847210772190164, "grad_norm": 0.14020418599061035, "learning_rate": 2.4209785934289337e-05, "loss": 0.4756, "num_tokens": 4979885145.0, "step": 6510 }, { "epoch": 2.385087478244939, "grad_norm": 0.14222788663759833, "learning_rate": 2.4205545530981655e-05, "loss": 0.4546, "num_tokens": 4980783704.0, "step": 6511 }, { "epoch": 2.385453879270862, "grad_norm": 0.12721282379527818, "learning_rate": 2.420130500340353e-05, "loss": 0.4666, "num_tokens": 4981648126.0, "step": 6512 }, { "epoch": 2.3858202802967847, "grad_norm": 0.13455789638426738, "learning_rate": 2.4197064351793878e-05, "loss": 0.4292, "num_tokens": 4982454106.0, "step": 6513 }, { "epoch": 2.386186681322708, "grad_norm": 0.13079359951775418, "learning_rate": 2.4192823576391646e-05, "loss": 0.4639, "num_tokens": 4983238686.0, "step": 6514 }, { "epoch": 2.3865530823486307, "grad_norm": 0.15227479502295327, "learning_rate": 2.418858267743577e-05, "loss": 0.4653, "num_tokens": 4984098327.0, "step": 6515 }, { "epoch": 2.3869194833745535, "grad_norm": 0.14169722913460905, "learning_rate": 2.4184341655165203e-05, "loss": 0.4556, "num_tokens": 4984705182.0, "step": 6516 }, { "epoch": 2.3872858844004763, "grad_norm": 0.15698360341591586, "learning_rate": 2.4180100509818915e-05, "loss": 0.4692, "num_tokens": 4985386506.0, "step": 6517 }, { "epoch": 2.387652285426399, "grad_norm": 0.14211804029322014, "learning_rate": 2.417585924163586e-05, "loss": 0.4686, "num_tokens": 4986114024.0, "step": 6518 }, { "epoch": 2.3880186864523223, "grad_norm": 0.14321005520045083, "learning_rate": 2.4171617850855008e-05, "loss": 0.4494, "num_tokens": 4986817878.0, "step": 6519 }, { "epoch": 2.388385087478245, "grad_norm": 0.14258687086129854, "learning_rate": 2.416737633771535e-05, "loss": 0.4587, "num_tokens": 4987536038.0, "step": 6520 }, { "epoch": 2.388751488504168, "grad_norm": 0.15432526666386692, "learning_rate": 2.4163134702455853e-05, "loss": 0.4601, "num_tokens": 4988230211.0, "step": 6521 }, { "epoch": 2.3891178895300906, "grad_norm": 0.14336989242404208, "learning_rate": 2.415889294531554e-05, "loss": 0.4359, "num_tokens": 4988997643.0, "step": 6522 }, { "epoch": 2.3894842905560134, "grad_norm": 0.17235569269594525, "learning_rate": 2.4154651066533378e-05, "loss": 0.4387, "num_tokens": 4989708725.0, "step": 6523 }, { "epoch": 2.3898506915819366, "grad_norm": 0.13634084205294927, "learning_rate": 2.4150409066348395e-05, "loss": 0.4408, "num_tokens": 4990475350.0, "step": 6524 }, { "epoch": 2.3902170926078594, "grad_norm": 0.13863279899565273, "learning_rate": 2.4146166944999602e-05, "loss": 0.4509, "num_tokens": 4991177490.0, "step": 6525 }, { "epoch": 2.390583493633782, "grad_norm": 0.1452835333370407, "learning_rate": 2.414192470272601e-05, "loss": 0.4426, "num_tokens": 4991965177.0, "step": 6526 }, { "epoch": 2.390949894659705, "grad_norm": 0.1272909032301065, "learning_rate": 2.4137682339766655e-05, "loss": 0.4777, "num_tokens": 4992671887.0, "step": 6527 }, { "epoch": 2.3913162956856278, "grad_norm": 0.14798036360067685, "learning_rate": 2.4133439856360564e-05, "loss": 0.4434, "num_tokens": 4993261726.0, "step": 6528 }, { "epoch": 2.391682696711551, "grad_norm": 0.14522791217733233, "learning_rate": 2.412919725274679e-05, "loss": 0.466, "num_tokens": 4994030028.0, "step": 6529 }, { "epoch": 2.3920490977374738, "grad_norm": 0.13753434719733348, "learning_rate": 2.4124954529164363e-05, "loss": 0.4824, "num_tokens": 4994738620.0, "step": 6530 }, { "epoch": 2.3924154987633965, "grad_norm": 0.13352772674310298, "learning_rate": 2.412071168585235e-05, "loss": 0.4474, "num_tokens": 4995524660.0, "step": 6531 }, { "epoch": 2.3927818997893193, "grad_norm": 0.13024078301134676, "learning_rate": 2.4116468723049805e-05, "loss": 0.4552, "num_tokens": 4996253894.0, "step": 6532 }, { "epoch": 2.393148300815242, "grad_norm": 0.1407516560291044, "learning_rate": 2.4112225640995806e-05, "loss": 0.4357, "num_tokens": 4997125856.0, "step": 6533 }, { "epoch": 2.3935147018411653, "grad_norm": 0.1601576026603575, "learning_rate": 2.410798243992942e-05, "loss": 0.4909, "num_tokens": 4997838790.0, "step": 6534 }, { "epoch": 2.393881102867088, "grad_norm": 0.14573839421226212, "learning_rate": 2.4103739120089723e-05, "loss": 0.488, "num_tokens": 4998482101.0, "step": 6535 }, { "epoch": 2.394247503893011, "grad_norm": 0.15285079021782694, "learning_rate": 2.409949568171581e-05, "loss": 0.4223, "num_tokens": 4999221152.0, "step": 6536 }, { "epoch": 2.3946139049189337, "grad_norm": 0.13875444833638315, "learning_rate": 2.409525212504678e-05, "loss": 0.4435, "num_tokens": 5000019755.0, "step": 6537 }, { "epoch": 2.3949803059448564, "grad_norm": 0.13652059522819426, "learning_rate": 2.4091008450321724e-05, "loss": 0.4453, "num_tokens": 5000809944.0, "step": 6538 }, { "epoch": 2.3953467069707797, "grad_norm": 0.1384773443326732, "learning_rate": 2.4086764657779762e-05, "loss": 0.4349, "num_tokens": 5001547578.0, "step": 6539 }, { "epoch": 2.3957131079967025, "grad_norm": 0.15626708247210083, "learning_rate": 2.4082520747659994e-05, "loss": 0.4802, "num_tokens": 5002338296.0, "step": 6540 }, { "epoch": 2.3960795090226252, "grad_norm": 0.14518695428994632, "learning_rate": 2.4078276720201555e-05, "loss": 0.4292, "num_tokens": 5003112895.0, "step": 6541 }, { "epoch": 2.396445910048548, "grad_norm": 0.1393514677591077, "learning_rate": 2.407403257564356e-05, "loss": 0.4583, "num_tokens": 5003762837.0, "step": 6542 }, { "epoch": 2.396812311074471, "grad_norm": 0.15413690392830012, "learning_rate": 2.4069788314225146e-05, "loss": 0.4257, "num_tokens": 5004558068.0, "step": 6543 }, { "epoch": 2.397178712100394, "grad_norm": 0.13552464741342207, "learning_rate": 2.4065543936185472e-05, "loss": 0.4347, "num_tokens": 5005347751.0, "step": 6544 }, { "epoch": 2.397545113126317, "grad_norm": 0.12929060538408912, "learning_rate": 2.406129944176366e-05, "loss": 0.4679, "num_tokens": 5006105803.0, "step": 6545 }, { "epoch": 2.3979115141522396, "grad_norm": 0.14231716053391977, "learning_rate": 2.4057054831198877e-05, "loss": 0.4634, "num_tokens": 5006818272.0, "step": 6546 }, { "epoch": 2.3982779151781624, "grad_norm": 0.15487570765941674, "learning_rate": 2.4052810104730283e-05, "loss": 0.4477, "num_tokens": 5007564676.0, "step": 6547 }, { "epoch": 2.398644316204085, "grad_norm": 0.15572173155916783, "learning_rate": 2.4048565262597046e-05, "loss": 0.461, "num_tokens": 5008342659.0, "step": 6548 }, { "epoch": 2.3990107172300084, "grad_norm": 0.13186531778538, "learning_rate": 2.4044320305038337e-05, "loss": 0.4457, "num_tokens": 5009142728.0, "step": 6549 }, { "epoch": 2.399377118255931, "grad_norm": 0.13125900654989972, "learning_rate": 2.404007523229334e-05, "loss": 0.4449, "num_tokens": 5009916013.0, "step": 6550 }, { "epoch": 2.399743519281854, "grad_norm": 0.13398877043339533, "learning_rate": 2.4035830044601228e-05, "loss": 0.4617, "num_tokens": 5010671912.0, "step": 6551 }, { "epoch": 2.4001099203077767, "grad_norm": 0.1358016940761346, "learning_rate": 2.403158474220122e-05, "loss": 0.4655, "num_tokens": 5011529420.0, "step": 6552 }, { "epoch": 2.4004763213337, "grad_norm": 0.14105210980721503, "learning_rate": 2.4027339325332486e-05, "loss": 0.472, "num_tokens": 5012360272.0, "step": 6553 }, { "epoch": 2.4008427223596227, "grad_norm": 0.13561301816901042, "learning_rate": 2.402309379423425e-05, "loss": 0.4231, "num_tokens": 5013099597.0, "step": 6554 }, { "epoch": 2.4012091233855455, "grad_norm": 0.1264179493124452, "learning_rate": 2.4018848149145726e-05, "loss": 0.4536, "num_tokens": 5014018895.0, "step": 6555 }, { "epoch": 2.4015755244114683, "grad_norm": 0.1289448270653473, "learning_rate": 2.4014602390306125e-05, "loss": 0.4622, "num_tokens": 5014818964.0, "step": 6556 }, { "epoch": 2.401941925437391, "grad_norm": 0.13759434723744568, "learning_rate": 2.4010356517954677e-05, "loss": 0.4386, "num_tokens": 5015679307.0, "step": 6557 }, { "epoch": 2.4023083264633143, "grad_norm": 0.11953847981523022, "learning_rate": 2.400611053233061e-05, "loss": 0.4403, "num_tokens": 5016459402.0, "step": 6558 }, { "epoch": 2.402674727489237, "grad_norm": 0.14211392272035211, "learning_rate": 2.4001864433673164e-05, "loss": 0.462, "num_tokens": 5017135326.0, "step": 6559 }, { "epoch": 2.40304112851516, "grad_norm": 0.13635238583472556, "learning_rate": 2.399761822222159e-05, "loss": 0.4459, "num_tokens": 5018003488.0, "step": 6560 }, { "epoch": 2.4034075295410826, "grad_norm": 0.14112501363231889, "learning_rate": 2.3993371898215127e-05, "loss": 0.4877, "num_tokens": 5018675105.0, "step": 6561 }, { "epoch": 2.403773930567006, "grad_norm": 0.14246672456729445, "learning_rate": 2.3989125461893035e-05, "loss": 0.466, "num_tokens": 5019436963.0, "step": 6562 }, { "epoch": 2.4041403315929286, "grad_norm": 0.1307796391990422, "learning_rate": 2.398487891349458e-05, "loss": 0.4345, "num_tokens": 5020176574.0, "step": 6563 }, { "epoch": 2.4045067326188514, "grad_norm": 0.13083205245802237, "learning_rate": 2.3980632253259036e-05, "loss": 0.4756, "num_tokens": 5020998464.0, "step": 6564 }, { "epoch": 2.404873133644774, "grad_norm": 0.13897573916132988, "learning_rate": 2.3976385481425672e-05, "loss": 0.4436, "num_tokens": 5021907334.0, "step": 6565 }, { "epoch": 2.405239534670697, "grad_norm": 0.13478239084983443, "learning_rate": 2.3972138598233774e-05, "loss": 0.466, "num_tokens": 5022803201.0, "step": 6566 }, { "epoch": 2.40560593569662, "grad_norm": 0.12462177736255219, "learning_rate": 2.3967891603922633e-05, "loss": 0.4683, "num_tokens": 5023540675.0, "step": 6567 }, { "epoch": 2.405972336722543, "grad_norm": 0.14676479411371446, "learning_rate": 2.396364449873154e-05, "loss": 0.4832, "num_tokens": 5024375651.0, "step": 6568 }, { "epoch": 2.4063387377484657, "grad_norm": 0.14562199399689355, "learning_rate": 2.3959397282899794e-05, "loss": 0.4741, "num_tokens": 5025207717.0, "step": 6569 }, { "epoch": 2.4067051387743885, "grad_norm": 0.142273552031599, "learning_rate": 2.3955149956666713e-05, "loss": 0.476, "num_tokens": 5025874730.0, "step": 6570 }, { "epoch": 2.4070715398003113, "grad_norm": 0.1410602473250493, "learning_rate": 2.39509025202716e-05, "loss": 0.4737, "num_tokens": 5026745764.0, "step": 6571 }, { "epoch": 2.4074379408262345, "grad_norm": 0.13496512275714811, "learning_rate": 2.3946654973953777e-05, "loss": 0.4698, "num_tokens": 5027354082.0, "step": 6572 }, { "epoch": 2.4078043418521573, "grad_norm": 0.1573403356367072, "learning_rate": 2.3942407317952565e-05, "loss": 0.4176, "num_tokens": 5028063821.0, "step": 6573 }, { "epoch": 2.40817074287808, "grad_norm": 0.12476311476880773, "learning_rate": 2.393815955250731e-05, "loss": 0.4475, "num_tokens": 5028804787.0, "step": 6574 }, { "epoch": 2.408537143904003, "grad_norm": 0.13792922344602682, "learning_rate": 2.3933911677857342e-05, "loss": 0.4012, "num_tokens": 5029652356.0, "step": 6575 }, { "epoch": 2.4089035449299256, "grad_norm": 0.12537862978826253, "learning_rate": 2.392966369424201e-05, "loss": 0.4482, "num_tokens": 5030520562.0, "step": 6576 }, { "epoch": 2.409269945955849, "grad_norm": 0.1369403313693816, "learning_rate": 2.3925415601900658e-05, "loss": 0.4972, "num_tokens": 5031419413.0, "step": 6577 }, { "epoch": 2.4096363469817716, "grad_norm": 0.13091133788238937, "learning_rate": 2.3921167401072642e-05, "loss": 0.4286, "num_tokens": 5032239013.0, "step": 6578 }, { "epoch": 2.4100027480076944, "grad_norm": 0.1272129315873751, "learning_rate": 2.3916919091997336e-05, "loss": 0.448, "num_tokens": 5032989865.0, "step": 6579 }, { "epoch": 2.410369149033617, "grad_norm": 0.14865586871698985, "learning_rate": 2.3912670674914094e-05, "loss": 0.458, "num_tokens": 5033715960.0, "step": 6580 }, { "epoch": 2.41073555005954, "grad_norm": 0.13930533657022698, "learning_rate": 2.390842215006231e-05, "loss": 0.4643, "num_tokens": 5034487363.0, "step": 6581 }, { "epoch": 2.411101951085463, "grad_norm": 0.141794728605898, "learning_rate": 2.3904173517681347e-05, "loss": 0.4543, "num_tokens": 5035242137.0, "step": 6582 }, { "epoch": 2.411468352111386, "grad_norm": 0.1402310949888908, "learning_rate": 2.3899924778010598e-05, "loss": 0.4773, "num_tokens": 5035957960.0, "step": 6583 }, { "epoch": 2.4118347531373088, "grad_norm": 0.1403044182190085, "learning_rate": 2.3895675931289456e-05, "loss": 0.5012, "num_tokens": 5036738745.0, "step": 6584 }, { "epoch": 2.4122011541632316, "grad_norm": 0.15260666708029527, "learning_rate": 2.3891426977757332e-05, "loss": 0.4549, "num_tokens": 5037535436.0, "step": 6585 }, { "epoch": 2.4125675551891543, "grad_norm": 0.1391553767428054, "learning_rate": 2.3887177917653612e-05, "loss": 0.4493, "num_tokens": 5038298267.0, "step": 6586 }, { "epoch": 2.4129339562150776, "grad_norm": 0.1240922492140027, "learning_rate": 2.3882928751217723e-05, "loss": 0.4477, "num_tokens": 5039114853.0, "step": 6587 }, { "epoch": 2.4133003572410003, "grad_norm": 0.1360634459991296, "learning_rate": 2.387867947868907e-05, "loss": 0.468, "num_tokens": 5039941511.0, "step": 6588 }, { "epoch": 2.413666758266923, "grad_norm": 0.13875942369553534, "learning_rate": 2.3874430100307087e-05, "loss": 0.4971, "num_tokens": 5040612950.0, "step": 6589 }, { "epoch": 2.414033159292846, "grad_norm": 0.13595029118133048, "learning_rate": 2.3870180616311197e-05, "loss": 0.449, "num_tokens": 5041380898.0, "step": 6590 }, { "epoch": 2.4143995603187687, "grad_norm": 0.14338209744291178, "learning_rate": 2.386593102694084e-05, "loss": 0.4405, "num_tokens": 5042091679.0, "step": 6591 }, { "epoch": 2.414765961344692, "grad_norm": 0.1457804462912727, "learning_rate": 2.3861681332435448e-05, "loss": 0.4483, "num_tokens": 5042783322.0, "step": 6592 }, { "epoch": 2.4151323623706147, "grad_norm": 0.13193330120187716, "learning_rate": 2.385743153303448e-05, "loss": 0.4246, "num_tokens": 5043652121.0, "step": 6593 }, { "epoch": 2.4154987633965375, "grad_norm": 0.14960710016056644, "learning_rate": 2.3853181628977385e-05, "loss": 0.4601, "num_tokens": 5044393022.0, "step": 6594 }, { "epoch": 2.4158651644224602, "grad_norm": 0.1323840010529835, "learning_rate": 2.3848931620503614e-05, "loss": 0.5015, "num_tokens": 5045064614.0, "step": 6595 }, { "epoch": 2.416231565448383, "grad_norm": 0.15650734831378055, "learning_rate": 2.384468150785264e-05, "loss": 0.4499, "num_tokens": 5045894819.0, "step": 6596 }, { "epoch": 2.4165979664743062, "grad_norm": 0.1504081560603282, "learning_rate": 2.3840431291263935e-05, "loss": 0.4857, "num_tokens": 5046562191.0, "step": 6597 }, { "epoch": 2.416964367500229, "grad_norm": 0.15928061098571902, "learning_rate": 2.3836180970976967e-05, "loss": 0.4709, "num_tokens": 5047312548.0, "step": 6598 }, { "epoch": 2.417330768526152, "grad_norm": 0.14234713554595893, "learning_rate": 2.3831930547231226e-05, "loss": 0.4621, "num_tokens": 5048109607.0, "step": 6599 }, { "epoch": 2.4176971695520746, "grad_norm": 0.149938109053793, "learning_rate": 2.382768002026619e-05, "loss": 0.4366, "num_tokens": 5048904528.0, "step": 6600 }, { "epoch": 2.418063570577998, "grad_norm": 0.1337384727724186, "learning_rate": 2.382342939032137e-05, "loss": 0.4329, "num_tokens": 5049733748.0, "step": 6601 }, { "epoch": 2.4184299716039206, "grad_norm": 0.12868073644084357, "learning_rate": 2.381917865763626e-05, "loss": 0.4545, "num_tokens": 5050461885.0, "step": 6602 }, { "epoch": 2.4187963726298434, "grad_norm": 0.15499626720630974, "learning_rate": 2.381492782245035e-05, "loss": 0.4412, "num_tokens": 5051325804.0, "step": 6603 }, { "epoch": 2.419162773655766, "grad_norm": 0.14422131701333166, "learning_rate": 2.3810676885003168e-05, "loss": 0.4756, "num_tokens": 5052156625.0, "step": 6604 }, { "epoch": 2.419529174681689, "grad_norm": 0.13643799598151327, "learning_rate": 2.3806425845534225e-05, "loss": 0.4849, "num_tokens": 5052908539.0, "step": 6605 }, { "epoch": 2.419895575707612, "grad_norm": 0.1417631248335894, "learning_rate": 2.3802174704283053e-05, "loss": 0.4703, "num_tokens": 5053697790.0, "step": 6606 }, { "epoch": 2.420261976733535, "grad_norm": 0.13411569304058163, "learning_rate": 2.3797923461489165e-05, "loss": 0.4759, "num_tokens": 5054464007.0, "step": 6607 }, { "epoch": 2.4206283777594577, "grad_norm": 0.136323238962025, "learning_rate": 2.3793672117392104e-05, "loss": 0.454, "num_tokens": 5055220719.0, "step": 6608 }, { "epoch": 2.4209947787853805, "grad_norm": 0.12420872667062446, "learning_rate": 2.3789420672231413e-05, "loss": 0.4641, "num_tokens": 5056027989.0, "step": 6609 }, { "epoch": 2.4213611798113037, "grad_norm": 0.14507221636586917, "learning_rate": 2.3785169126246635e-05, "loss": 0.4772, "num_tokens": 5056936087.0, "step": 6610 }, { "epoch": 2.4217275808372265, "grad_norm": 0.12026117687777294, "learning_rate": 2.3780917479677314e-05, "loss": 0.4766, "num_tokens": 5057817643.0, "step": 6611 }, { "epoch": 2.4220939818631493, "grad_norm": 0.1385583817585125, "learning_rate": 2.377666573276302e-05, "loss": 0.4654, "num_tokens": 5058639368.0, "step": 6612 }, { "epoch": 2.422460382889072, "grad_norm": 0.12662310304433272, "learning_rate": 2.3772413885743307e-05, "loss": 0.4298, "num_tokens": 5059344816.0, "step": 6613 }, { "epoch": 2.422826783914995, "grad_norm": 0.14098478969312672, "learning_rate": 2.3768161938857747e-05, "loss": 0.4661, "num_tokens": 5060055093.0, "step": 6614 }, { "epoch": 2.423193184940918, "grad_norm": 0.13585807287480917, "learning_rate": 2.3763909892345906e-05, "loss": 0.4621, "num_tokens": 5060724664.0, "step": 6615 }, { "epoch": 2.423559585966841, "grad_norm": 0.14815867616206277, "learning_rate": 2.3759657746447373e-05, "loss": 0.4654, "num_tokens": 5061418572.0, "step": 6616 }, { "epoch": 2.4239259869927636, "grad_norm": 0.14105928575311147, "learning_rate": 2.3755405501401736e-05, "loss": 0.4814, "num_tokens": 5062105282.0, "step": 6617 }, { "epoch": 2.4242923880186864, "grad_norm": 0.1430154868882301, "learning_rate": 2.3751153157448577e-05, "loss": 0.4603, "num_tokens": 5062946524.0, "step": 6618 }, { "epoch": 2.424658789044609, "grad_norm": 0.11956098374961192, "learning_rate": 2.3746900714827494e-05, "loss": 0.4505, "num_tokens": 5063873184.0, "step": 6619 }, { "epoch": 2.4250251900705324, "grad_norm": 0.136979533050107, "learning_rate": 2.3742648173778086e-05, "loss": 0.4428, "num_tokens": 5064647374.0, "step": 6620 }, { "epoch": 2.425391591096455, "grad_norm": 0.12556939035209932, "learning_rate": 2.373839553453997e-05, "loss": 0.4793, "num_tokens": 5065429624.0, "step": 6621 }, { "epoch": 2.425757992122378, "grad_norm": 0.1321424675674094, "learning_rate": 2.3734142797352763e-05, "loss": 0.4372, "num_tokens": 5066186541.0, "step": 6622 }, { "epoch": 2.4261243931483008, "grad_norm": 0.13306173732479112, "learning_rate": 2.372988996245606e-05, "loss": 0.4612, "num_tokens": 5066972611.0, "step": 6623 }, { "epoch": 2.4264907941742235, "grad_norm": 0.13469446049142744, "learning_rate": 2.3725637030089507e-05, "loss": 0.4355, "num_tokens": 5067731833.0, "step": 6624 }, { "epoch": 2.4268571952001468, "grad_norm": 0.1448189100432271, "learning_rate": 2.372138400049272e-05, "loss": 0.4755, "num_tokens": 5068463950.0, "step": 6625 }, { "epoch": 2.4272235962260695, "grad_norm": 0.1329125409353584, "learning_rate": 2.3717130873905348e-05, "loss": 0.424, "num_tokens": 5069241786.0, "step": 6626 }, { "epoch": 2.4275899972519923, "grad_norm": 0.13689045142960857, "learning_rate": 2.3712877650567018e-05, "loss": 0.4505, "num_tokens": 5070047887.0, "step": 6627 }, { "epoch": 2.427956398277915, "grad_norm": 0.12960570149847006, "learning_rate": 2.3708624330717385e-05, "loss": 0.4739, "num_tokens": 5070798857.0, "step": 6628 }, { "epoch": 2.428322799303838, "grad_norm": 0.13777170668423994, "learning_rate": 2.37043709145961e-05, "loss": 0.4588, "num_tokens": 5071590657.0, "step": 6629 }, { "epoch": 2.428689200329761, "grad_norm": 0.16010708388193867, "learning_rate": 2.3700117402442806e-05, "loss": 0.4656, "num_tokens": 5072217333.0, "step": 6630 }, { "epoch": 2.429055601355684, "grad_norm": 0.13607307272560706, "learning_rate": 2.3695863794497186e-05, "loss": 0.4534, "num_tokens": 5072935629.0, "step": 6631 }, { "epoch": 2.4294220023816067, "grad_norm": 0.14363634181035945, "learning_rate": 2.3691610090998897e-05, "loss": 0.4585, "num_tokens": 5073858588.0, "step": 6632 }, { "epoch": 2.4297884034075294, "grad_norm": 0.13520701745410907, "learning_rate": 2.3687356292187606e-05, "loss": 0.4575, "num_tokens": 5074602235.0, "step": 6633 }, { "epoch": 2.430154804433452, "grad_norm": 0.13917307487910197, "learning_rate": 2.3683102398303006e-05, "loss": 0.4449, "num_tokens": 5075281194.0, "step": 6634 }, { "epoch": 2.4305212054593754, "grad_norm": 0.12877295678083098, "learning_rate": 2.367884840958476e-05, "loss": 0.4517, "num_tokens": 5076125936.0, "step": 6635 }, { "epoch": 2.4308876064852982, "grad_norm": 0.1239396506250629, "learning_rate": 2.367459432627258e-05, "loss": 0.4199, "num_tokens": 5076962179.0, "step": 6636 }, { "epoch": 2.431254007511221, "grad_norm": 0.126391852997349, "learning_rate": 2.3670340148606142e-05, "loss": 0.4606, "num_tokens": 5077632484.0, "step": 6637 }, { "epoch": 2.431620408537144, "grad_norm": 0.14875729608850688, "learning_rate": 2.366608587682516e-05, "loss": 0.4595, "num_tokens": 5078351899.0, "step": 6638 }, { "epoch": 2.4319868095630666, "grad_norm": 0.14175513785002863, "learning_rate": 2.3661831511169333e-05, "loss": 0.4631, "num_tokens": 5079073948.0, "step": 6639 }, { "epoch": 2.43235321058899, "grad_norm": 0.12392807693106898, "learning_rate": 2.3657577051878365e-05, "loss": 0.4385, "num_tokens": 5079896866.0, "step": 6640 }, { "epoch": 2.4327196116149126, "grad_norm": 0.161310312068425, "learning_rate": 2.3653322499191986e-05, "loss": 0.4651, "num_tokens": 5080644821.0, "step": 6641 }, { "epoch": 2.4330860126408353, "grad_norm": 0.1278174262967481, "learning_rate": 2.3649067853349902e-05, "loss": 0.4458, "num_tokens": 5081355539.0, "step": 6642 }, { "epoch": 2.433452413666758, "grad_norm": 0.13849865307927264, "learning_rate": 2.364481311459184e-05, "loss": 0.4545, "num_tokens": 5082124730.0, "step": 6643 }, { "epoch": 2.433818814692681, "grad_norm": 0.13753525706787276, "learning_rate": 2.3640558283157548e-05, "loss": 0.4412, "num_tokens": 5082891800.0, "step": 6644 }, { "epoch": 2.434185215718604, "grad_norm": 0.13805218524452498, "learning_rate": 2.3636303359286745e-05, "loss": 0.4265, "num_tokens": 5083710358.0, "step": 6645 }, { "epoch": 2.434551616744527, "grad_norm": 0.13247454519213314, "learning_rate": 2.363204834321918e-05, "loss": 0.4678, "num_tokens": 5084441128.0, "step": 6646 }, { "epoch": 2.4349180177704497, "grad_norm": 0.14268695134911383, "learning_rate": 2.36277932351946e-05, "loss": 0.4725, "num_tokens": 5085293927.0, "step": 6647 }, { "epoch": 2.4352844187963725, "grad_norm": 0.1379251414455047, "learning_rate": 2.3623538035452756e-05, "loss": 0.4262, "num_tokens": 5085948950.0, "step": 6648 }, { "epoch": 2.4356508198222953, "grad_norm": 0.14082430527114198, "learning_rate": 2.36192827442334e-05, "loss": 0.4347, "num_tokens": 5086627882.0, "step": 6649 }, { "epoch": 2.4360172208482185, "grad_norm": 0.15285275667238046, "learning_rate": 2.36150273617763e-05, "loss": 0.4535, "num_tokens": 5087408154.0, "step": 6650 }, { "epoch": 2.4363836218741413, "grad_norm": 0.12966627655763935, "learning_rate": 2.3610771888321228e-05, "loss": 0.4731, "num_tokens": 5088103648.0, "step": 6651 }, { "epoch": 2.436750022900064, "grad_norm": 0.14608577607861686, "learning_rate": 2.3606516324107947e-05, "loss": 0.4546, "num_tokens": 5088986599.0, "step": 6652 }, { "epoch": 2.437116423925987, "grad_norm": 0.14838451333905683, "learning_rate": 2.3602260669376243e-05, "loss": 0.4697, "num_tokens": 5089704522.0, "step": 6653 }, { "epoch": 2.43748282495191, "grad_norm": 0.12900716877046983, "learning_rate": 2.3598004924365898e-05, "loss": 0.4751, "num_tokens": 5090444917.0, "step": 6654 }, { "epoch": 2.437849225977833, "grad_norm": 0.15606705855425718, "learning_rate": 2.35937490893167e-05, "loss": 0.4643, "num_tokens": 5091220199.0, "step": 6655 }, { "epoch": 2.4382156270037556, "grad_norm": 0.13174197996510914, "learning_rate": 2.3589493164468434e-05, "loss": 0.4461, "num_tokens": 5092113983.0, "step": 6656 }, { "epoch": 2.4385820280296784, "grad_norm": 0.13225366868867547, "learning_rate": 2.3585237150060906e-05, "loss": 0.4424, "num_tokens": 5092950636.0, "step": 6657 }, { "epoch": 2.438948429055601, "grad_norm": 0.12799328005635624, "learning_rate": 2.358098104633391e-05, "loss": 0.4408, "num_tokens": 5093693117.0, "step": 6658 }, { "epoch": 2.4393148300815244, "grad_norm": 0.14424151136849242, "learning_rate": 2.357672485352727e-05, "loss": 0.4504, "num_tokens": 5094383953.0, "step": 6659 }, { "epoch": 2.439681231107447, "grad_norm": 0.13002438307728012, "learning_rate": 2.357246857188079e-05, "loss": 0.4642, "num_tokens": 5095256170.0, "step": 6660 }, { "epoch": 2.44004763213337, "grad_norm": 0.1296709999493091, "learning_rate": 2.356821220163428e-05, "loss": 0.4452, "num_tokens": 5096051711.0, "step": 6661 }, { "epoch": 2.4404140331592927, "grad_norm": 0.13977523367798944, "learning_rate": 2.356395574302758e-05, "loss": 0.4672, "num_tokens": 5096878386.0, "step": 6662 }, { "epoch": 2.440780434185216, "grad_norm": 0.13433273774174884, "learning_rate": 2.3559699196300514e-05, "loss": 0.4467, "num_tokens": 5097560852.0, "step": 6663 }, { "epoch": 2.4411468352111387, "grad_norm": 0.13648452140907535, "learning_rate": 2.3555442561692904e-05, "loss": 0.4179, "num_tokens": 5098266745.0, "step": 6664 }, { "epoch": 2.4415132362370615, "grad_norm": 0.12343504591774405, "learning_rate": 2.3551185839444594e-05, "loss": 0.4814, "num_tokens": 5099171061.0, "step": 6665 }, { "epoch": 2.4418796372629843, "grad_norm": 0.1369601565769362, "learning_rate": 2.3546929029795433e-05, "loss": 0.4288, "num_tokens": 5099830760.0, "step": 6666 }, { "epoch": 2.442246038288907, "grad_norm": 0.14926835688941778, "learning_rate": 2.354267213298526e-05, "loss": 0.4615, "num_tokens": 5100629211.0, "step": 6667 }, { "epoch": 2.4426124393148303, "grad_norm": 0.1479602842499995, "learning_rate": 2.353841514925394e-05, "loss": 0.4758, "num_tokens": 5101271917.0, "step": 6668 }, { "epoch": 2.442978840340753, "grad_norm": 0.1541904943192632, "learning_rate": 2.353415807884132e-05, "loss": 0.4421, "num_tokens": 5101898362.0, "step": 6669 }, { "epoch": 2.443345241366676, "grad_norm": 0.154457101955294, "learning_rate": 2.352990092198726e-05, "loss": 0.4703, "num_tokens": 5102607236.0, "step": 6670 }, { "epoch": 2.4437116423925986, "grad_norm": 0.14299953940993387, "learning_rate": 2.3525643678931647e-05, "loss": 0.4595, "num_tokens": 5103373970.0, "step": 6671 }, { "epoch": 2.4440780434185214, "grad_norm": 0.14545389719304613, "learning_rate": 2.3521386349914325e-05, "loss": 0.4652, "num_tokens": 5104131963.0, "step": 6672 }, { "epoch": 2.4444444444444446, "grad_norm": 0.14763513763343464, "learning_rate": 2.3517128935175188e-05, "loss": 0.4259, "num_tokens": 5105039376.0, "step": 6673 }, { "epoch": 2.4448108454703674, "grad_norm": 0.13337640289300404, "learning_rate": 2.3512871434954123e-05, "loss": 0.4379, "num_tokens": 5105842530.0, "step": 6674 }, { "epoch": 2.44517724649629, "grad_norm": 0.14987862926123474, "learning_rate": 2.350861384949101e-05, "loss": 0.4291, "num_tokens": 5106590873.0, "step": 6675 }, { "epoch": 2.445543647522213, "grad_norm": 0.13483907259702457, "learning_rate": 2.3504356179025737e-05, "loss": 0.4425, "num_tokens": 5107352465.0, "step": 6676 }, { "epoch": 2.4459100485481358, "grad_norm": 0.14830831882864284, "learning_rate": 2.35000984237982e-05, "loss": 0.4526, "num_tokens": 5108069396.0, "step": 6677 }, { "epoch": 2.446276449574059, "grad_norm": 0.14938884595019705, "learning_rate": 2.3495840584048315e-05, "loss": 0.4589, "num_tokens": 5108843774.0, "step": 6678 }, { "epoch": 2.4466428505999818, "grad_norm": 0.15171613311357812, "learning_rate": 2.3491582660015975e-05, "loss": 0.4568, "num_tokens": 5109624330.0, "step": 6679 }, { "epoch": 2.4470092516259045, "grad_norm": 0.13039885568994491, "learning_rate": 2.3487324651941087e-05, "loss": 0.4469, "num_tokens": 5110383303.0, "step": 6680 }, { "epoch": 2.4473756526518273, "grad_norm": 0.14004418964840554, "learning_rate": 2.3483066560063578e-05, "loss": 0.4647, "num_tokens": 5111143320.0, "step": 6681 }, { "epoch": 2.44774205367775, "grad_norm": 0.13338947900981332, "learning_rate": 2.347880838462336e-05, "loss": 0.4707, "num_tokens": 5111897854.0, "step": 6682 }, { "epoch": 2.4481084547036733, "grad_norm": 0.15003701054570376, "learning_rate": 2.3474550125860365e-05, "loss": 0.4751, "num_tokens": 5112567646.0, "step": 6683 }, { "epoch": 2.448474855729596, "grad_norm": 0.15541516880704212, "learning_rate": 2.347029178401452e-05, "loss": 0.4499, "num_tokens": 5113270689.0, "step": 6684 }, { "epoch": 2.448841256755519, "grad_norm": 0.13954461927336498, "learning_rate": 2.3466033359325748e-05, "loss": 0.4454, "num_tokens": 5113980929.0, "step": 6685 }, { "epoch": 2.4492076577814417, "grad_norm": 0.12716020517215332, "learning_rate": 2.3461774852034008e-05, "loss": 0.4316, "num_tokens": 5114679683.0, "step": 6686 }, { "epoch": 2.4495740588073645, "grad_norm": 0.15075191503914656, "learning_rate": 2.345751626237923e-05, "loss": 0.4478, "num_tokens": 5115437701.0, "step": 6687 }, { "epoch": 2.4499404598332877, "grad_norm": 0.135554880334052, "learning_rate": 2.3453257590601366e-05, "loss": 0.4585, "num_tokens": 5116227524.0, "step": 6688 }, { "epoch": 2.4503068608592105, "grad_norm": 0.1453178794713375, "learning_rate": 2.344899883694037e-05, "loss": 0.4537, "num_tokens": 5116931573.0, "step": 6689 }, { "epoch": 2.4506732618851332, "grad_norm": 0.1488702830486656, "learning_rate": 2.34447400016362e-05, "loss": 0.4811, "num_tokens": 5117690200.0, "step": 6690 }, { "epoch": 2.451039662911056, "grad_norm": 0.14646146383411854, "learning_rate": 2.3440481084928814e-05, "loss": 0.4803, "num_tokens": 5118395373.0, "step": 6691 }, { "epoch": 2.451406063936979, "grad_norm": 0.1443830141110946, "learning_rate": 2.3436222087058176e-05, "loss": 0.4501, "num_tokens": 5119182310.0, "step": 6692 }, { "epoch": 2.451772464962902, "grad_norm": 0.14196616581288501, "learning_rate": 2.3431963008264274e-05, "loss": 0.4761, "num_tokens": 5120042056.0, "step": 6693 }, { "epoch": 2.452138865988825, "grad_norm": 0.1214193770547081, "learning_rate": 2.3427703848787062e-05, "loss": 0.4566, "num_tokens": 5120831233.0, "step": 6694 }, { "epoch": 2.4525052670147476, "grad_norm": 0.1303612161135677, "learning_rate": 2.342344460886653e-05, "loss": 0.4475, "num_tokens": 5121620062.0, "step": 6695 }, { "epoch": 2.4528716680406704, "grad_norm": 0.12857687861087874, "learning_rate": 2.341918528874267e-05, "loss": 0.4472, "num_tokens": 5122479880.0, "step": 6696 }, { "epoch": 2.453238069066593, "grad_norm": 0.13169421219516575, "learning_rate": 2.341492588865547e-05, "loss": 0.4867, "num_tokens": 5123211640.0, "step": 6697 }, { "epoch": 2.4536044700925164, "grad_norm": 0.13765768330227646, "learning_rate": 2.341066640884491e-05, "loss": 0.4509, "num_tokens": 5123941479.0, "step": 6698 }, { "epoch": 2.453970871118439, "grad_norm": 0.14514784189620464, "learning_rate": 2.3406406849550996e-05, "loss": 0.4331, "num_tokens": 5124731364.0, "step": 6699 }, { "epoch": 2.454337272144362, "grad_norm": 0.13337561802522144, "learning_rate": 2.3402147211013735e-05, "loss": 0.4742, "num_tokens": 5125441282.0, "step": 6700 }, { "epoch": 2.4547036731702847, "grad_norm": 0.12813193359166564, "learning_rate": 2.3397887493473138e-05, "loss": 0.4663, "num_tokens": 5126219852.0, "step": 6701 }, { "epoch": 2.455070074196208, "grad_norm": 0.13391630233987137, "learning_rate": 2.33936276971692e-05, "loss": 0.4563, "num_tokens": 5126988804.0, "step": 6702 }, { "epoch": 2.4554364752221307, "grad_norm": 0.1329014640381085, "learning_rate": 2.3389367822341957e-05, "loss": 0.4458, "num_tokens": 5127797267.0, "step": 6703 }, { "epoch": 2.4558028762480535, "grad_norm": 0.12490023642970123, "learning_rate": 2.3385107869231414e-05, "loss": 0.4822, "num_tokens": 5128640768.0, "step": 6704 }, { "epoch": 2.4561692772739763, "grad_norm": 0.1306563569220906, "learning_rate": 2.3380847838077603e-05, "loss": 0.4572, "num_tokens": 5129415443.0, "step": 6705 }, { "epoch": 2.456535678299899, "grad_norm": 0.1315343021980056, "learning_rate": 2.3376587729120567e-05, "loss": 0.4646, "num_tokens": 5130201764.0, "step": 6706 }, { "epoch": 2.4569020793258223, "grad_norm": 0.13412653650278092, "learning_rate": 2.3372327542600304e-05, "loss": 0.4377, "num_tokens": 5130944047.0, "step": 6707 }, { "epoch": 2.457268480351745, "grad_norm": 0.12791349518371192, "learning_rate": 2.336806727875689e-05, "loss": 0.4354, "num_tokens": 5131547157.0, "step": 6708 }, { "epoch": 2.457634881377668, "grad_norm": 0.145252093170989, "learning_rate": 2.3363806937830344e-05, "loss": 0.4386, "num_tokens": 5132267902.0, "step": 6709 }, { "epoch": 2.4580012824035906, "grad_norm": 0.13047452400202, "learning_rate": 2.335954652006073e-05, "loss": 0.4584, "num_tokens": 5133162078.0, "step": 6710 }, { "epoch": 2.458367683429514, "grad_norm": 0.12344806568385969, "learning_rate": 2.3355286025688078e-05, "loss": 0.4682, "num_tokens": 5133937170.0, "step": 6711 }, { "epoch": 2.4587340844554366, "grad_norm": 0.14176783415735564, "learning_rate": 2.335102545495246e-05, "loss": 0.46, "num_tokens": 5134691365.0, "step": 6712 }, { "epoch": 2.4591004854813594, "grad_norm": 0.13830220803473783, "learning_rate": 2.3346764808093938e-05, "loss": 0.4874, "num_tokens": 5135332429.0, "step": 6713 }, { "epoch": 2.459466886507282, "grad_norm": 0.15927196348861186, "learning_rate": 2.3342504085352566e-05, "loss": 0.4441, "num_tokens": 5136204125.0, "step": 6714 }, { "epoch": 2.459833287533205, "grad_norm": 0.13798185233152394, "learning_rate": 2.3338243286968414e-05, "loss": 0.4816, "num_tokens": 5136915348.0, "step": 6715 }, { "epoch": 2.460199688559128, "grad_norm": 0.14518083997201794, "learning_rate": 2.333398241318156e-05, "loss": 0.4755, "num_tokens": 5137709801.0, "step": 6716 }, { "epoch": 2.460566089585051, "grad_norm": 0.15926440459153468, "learning_rate": 2.332972146423208e-05, "loss": 0.4591, "num_tokens": 5138560432.0, "step": 6717 }, { "epoch": 2.4609324906109737, "grad_norm": 0.12148686606775728, "learning_rate": 2.3325460440360045e-05, "loss": 0.4314, "num_tokens": 5139352375.0, "step": 6718 }, { "epoch": 2.4612988916368965, "grad_norm": 0.1408947273037767, "learning_rate": 2.332119934180555e-05, "loss": 0.4577, "num_tokens": 5140188350.0, "step": 6719 }, { "epoch": 2.4616652926628193, "grad_norm": 0.14163579212216393, "learning_rate": 2.3316938168808694e-05, "loss": 0.4732, "num_tokens": 5140966573.0, "step": 6720 }, { "epoch": 2.4620316936887425, "grad_norm": 0.13232085183810202, "learning_rate": 2.3312676921609555e-05, "loss": 0.4516, "num_tokens": 5141733504.0, "step": 6721 }, { "epoch": 2.4623980947146653, "grad_norm": 0.13547104883526984, "learning_rate": 2.3308415600448232e-05, "loss": 0.465, "num_tokens": 5142532766.0, "step": 6722 }, { "epoch": 2.462764495740588, "grad_norm": 0.15459181922815293, "learning_rate": 2.3304154205564834e-05, "loss": 0.444, "num_tokens": 5143243227.0, "step": 6723 }, { "epoch": 2.463130896766511, "grad_norm": 0.14491817723909117, "learning_rate": 2.3299892737199464e-05, "loss": 0.4718, "num_tokens": 5143985563.0, "step": 6724 }, { "epoch": 2.4634972977924336, "grad_norm": 0.1318257636771814, "learning_rate": 2.3295631195592233e-05, "loss": 0.444, "num_tokens": 5144754628.0, "step": 6725 }, { "epoch": 2.463863698818357, "grad_norm": 0.14761444269168814, "learning_rate": 2.329136958098326e-05, "loss": 0.4924, "num_tokens": 5145600206.0, "step": 6726 }, { "epoch": 2.4642300998442797, "grad_norm": 0.1472254535103118, "learning_rate": 2.3287107893612656e-05, "loss": 0.4706, "num_tokens": 5146384640.0, "step": 6727 }, { "epoch": 2.4645965008702024, "grad_norm": 0.12296948608512483, "learning_rate": 2.328284613372055e-05, "loss": 0.454, "num_tokens": 5147306555.0, "step": 6728 }, { "epoch": 2.464962901896125, "grad_norm": 0.1415483173820446, "learning_rate": 2.3278584301547057e-05, "loss": 0.4507, "num_tokens": 5148073647.0, "step": 6729 }, { "epoch": 2.465329302922048, "grad_norm": 0.12247326250199925, "learning_rate": 2.3274322397332326e-05, "loss": 0.4172, "num_tokens": 5148900111.0, "step": 6730 }, { "epoch": 2.465695703947971, "grad_norm": 0.13130294858402977, "learning_rate": 2.3270060421316485e-05, "loss": 0.4808, "num_tokens": 5149596063.0, "step": 6731 }, { "epoch": 2.466062104973894, "grad_norm": 0.13017404746418337, "learning_rate": 2.3265798373739666e-05, "loss": 0.4684, "num_tokens": 5150338593.0, "step": 6732 }, { "epoch": 2.466428505999817, "grad_norm": 0.12501029853044013, "learning_rate": 2.326153625484202e-05, "loss": 0.4206, "num_tokens": 5151190693.0, "step": 6733 }, { "epoch": 2.4667949070257396, "grad_norm": 0.12614717960300295, "learning_rate": 2.3257274064863693e-05, "loss": 0.4262, "num_tokens": 5151976687.0, "step": 6734 }, { "epoch": 2.4671613080516623, "grad_norm": 0.12282246502303455, "learning_rate": 2.3253011804044835e-05, "loss": 0.4324, "num_tokens": 5152831568.0, "step": 6735 }, { "epoch": 2.4675277090775856, "grad_norm": 0.1331472590848759, "learning_rate": 2.3248749472625595e-05, "loss": 0.4706, "num_tokens": 5153631245.0, "step": 6736 }, { "epoch": 2.4678941101035083, "grad_norm": 0.11933302963198117, "learning_rate": 2.3244487070846146e-05, "loss": 0.444, "num_tokens": 5154539023.0, "step": 6737 }, { "epoch": 2.468260511129431, "grad_norm": 0.12049582647244847, "learning_rate": 2.3240224598946637e-05, "loss": 0.4481, "num_tokens": 5155251611.0, "step": 6738 }, { "epoch": 2.468626912155354, "grad_norm": 0.14009632273005762, "learning_rate": 2.323596205716724e-05, "loss": 0.5095, "num_tokens": 5155931648.0, "step": 6739 }, { "epoch": 2.4689933131812767, "grad_norm": 0.14353458392971913, "learning_rate": 2.323169944574813e-05, "loss": 0.4469, "num_tokens": 5156653936.0, "step": 6740 }, { "epoch": 2.4693597142072, "grad_norm": 0.12158309527896259, "learning_rate": 2.3227436764929484e-05, "loss": 0.4446, "num_tokens": 5157360805.0, "step": 6741 }, { "epoch": 2.4697261152331227, "grad_norm": 0.14361802964544929, "learning_rate": 2.322317401495147e-05, "loss": 0.4415, "num_tokens": 5158024787.0, "step": 6742 }, { "epoch": 2.4700925162590455, "grad_norm": 0.12402339369125216, "learning_rate": 2.3218911196054282e-05, "loss": 0.446, "num_tokens": 5158774320.0, "step": 6743 }, { "epoch": 2.4704589172849682, "grad_norm": 0.13791050101582214, "learning_rate": 2.321464830847809e-05, "loss": 0.4344, "num_tokens": 5159483889.0, "step": 6744 }, { "epoch": 2.470825318310891, "grad_norm": 0.11951548475526791, "learning_rate": 2.3210385352463108e-05, "loss": 0.4651, "num_tokens": 5160278848.0, "step": 6745 }, { "epoch": 2.4711917193368143, "grad_norm": 0.1445672739924356, "learning_rate": 2.320612232824951e-05, "loss": 0.4374, "num_tokens": 5161080769.0, "step": 6746 }, { "epoch": 2.471558120362737, "grad_norm": 0.13971430086201045, "learning_rate": 2.3201859236077505e-05, "loss": 0.4809, "num_tokens": 5161887718.0, "step": 6747 }, { "epoch": 2.47192452138866, "grad_norm": 0.13407501730299806, "learning_rate": 2.3197596076187292e-05, "loss": 0.4666, "num_tokens": 5162682061.0, "step": 6748 }, { "epoch": 2.4722909224145826, "grad_norm": 0.13302175581118889, "learning_rate": 2.3193332848819073e-05, "loss": 0.4863, "num_tokens": 5163431057.0, "step": 6749 }, { "epoch": 2.472657323440506, "grad_norm": 0.1436951737101787, "learning_rate": 2.3189069554213068e-05, "loss": 0.4881, "num_tokens": 5164180591.0, "step": 6750 }, { "epoch": 2.4730237244664286, "grad_norm": 0.13796145423436582, "learning_rate": 2.3184806192609477e-05, "loss": 0.4724, "num_tokens": 5164930153.0, "step": 6751 }, { "epoch": 2.4733901254923514, "grad_norm": 0.13528331526800216, "learning_rate": 2.318054276424853e-05, "loss": 0.4676, "num_tokens": 5165702287.0, "step": 6752 }, { "epoch": 2.473756526518274, "grad_norm": 0.13614818528132347, "learning_rate": 2.3176279269370436e-05, "loss": 0.4574, "num_tokens": 5166452236.0, "step": 6753 }, { "epoch": 2.474122927544197, "grad_norm": 0.13136792892119623, "learning_rate": 2.317201570821543e-05, "loss": 0.4444, "num_tokens": 5167152892.0, "step": 6754 }, { "epoch": 2.47448932857012, "grad_norm": 0.13461758468293689, "learning_rate": 2.3167752081023737e-05, "loss": 0.4562, "num_tokens": 5167854838.0, "step": 6755 }, { "epoch": 2.474855729596043, "grad_norm": 0.1459811691526435, "learning_rate": 2.316348838803558e-05, "loss": 0.4853, "num_tokens": 5168594042.0, "step": 6756 }, { "epoch": 2.4752221306219657, "grad_norm": 0.13201440731582612, "learning_rate": 2.315922462949121e-05, "loss": 0.4611, "num_tokens": 5169342545.0, "step": 6757 }, { "epoch": 2.4755885316478885, "grad_norm": 0.14234857464896658, "learning_rate": 2.3154960805630856e-05, "loss": 0.4373, "num_tokens": 5170180421.0, "step": 6758 }, { "epoch": 2.4759549326738117, "grad_norm": 0.1238179834457839, "learning_rate": 2.3150696916694764e-05, "loss": 0.4435, "num_tokens": 5170877522.0, "step": 6759 }, { "epoch": 2.4763213336997345, "grad_norm": 0.1342479870933872, "learning_rate": 2.3146432962923185e-05, "loss": 0.4297, "num_tokens": 5171762166.0, "step": 6760 }, { "epoch": 2.4766877347256573, "grad_norm": 0.13734058996916437, "learning_rate": 2.3142168944556366e-05, "loss": 0.4746, "num_tokens": 5172530754.0, "step": 6761 }, { "epoch": 2.47705413575158, "grad_norm": 0.1318113426184077, "learning_rate": 2.313790486183456e-05, "loss": 0.411, "num_tokens": 5173309069.0, "step": 6762 }, { "epoch": 2.477420536777503, "grad_norm": 0.1323353496027134, "learning_rate": 2.313364071499802e-05, "loss": 0.4687, "num_tokens": 5174072148.0, "step": 6763 }, { "epoch": 2.477786937803426, "grad_norm": 0.1346300958711674, "learning_rate": 2.3129376504287015e-05, "loss": 0.4625, "num_tokens": 5174701068.0, "step": 6764 }, { "epoch": 2.478153338829349, "grad_norm": 0.1416487019047168, "learning_rate": 2.3125112229941814e-05, "loss": 0.4291, "num_tokens": 5175498773.0, "step": 6765 }, { "epoch": 2.4785197398552716, "grad_norm": 0.1308873542782595, "learning_rate": 2.312084789220268e-05, "loss": 0.451, "num_tokens": 5176317847.0, "step": 6766 }, { "epoch": 2.4788861408811944, "grad_norm": 0.12436576200576431, "learning_rate": 2.311658349130988e-05, "loss": 0.4348, "num_tokens": 5177127598.0, "step": 6767 }, { "epoch": 2.479252541907117, "grad_norm": 0.13616269076892287, "learning_rate": 2.31123190275037e-05, "loss": 0.4419, "num_tokens": 5177849034.0, "step": 6768 }, { "epoch": 2.4796189429330404, "grad_norm": 0.13784188984320225, "learning_rate": 2.3108054501024407e-05, "loss": 0.4409, "num_tokens": 5178647183.0, "step": 6769 }, { "epoch": 2.479985343958963, "grad_norm": 0.12498860566804858, "learning_rate": 2.31037899121123e-05, "loss": 0.4479, "num_tokens": 5179432347.0, "step": 6770 }, { "epoch": 2.480351744984886, "grad_norm": 0.12811401912645043, "learning_rate": 2.3099525261007646e-05, "loss": 0.4337, "num_tokens": 5180274014.0, "step": 6771 }, { "epoch": 2.4807181460108088, "grad_norm": 0.1426208667305648, "learning_rate": 2.309526054795075e-05, "loss": 0.449, "num_tokens": 5180934202.0, "step": 6772 }, { "epoch": 2.4810845470367315, "grad_norm": 0.13486202980566536, "learning_rate": 2.3090995773181902e-05, "loss": 0.4632, "num_tokens": 5181650531.0, "step": 6773 }, { "epoch": 2.4814509480626548, "grad_norm": 0.1458001549101922, "learning_rate": 2.308673093694139e-05, "loss": 0.4485, "num_tokens": 5182372929.0, "step": 6774 }, { "epoch": 2.4818173490885775, "grad_norm": 0.1321229467934668, "learning_rate": 2.308246603946953e-05, "loss": 0.4715, "num_tokens": 5183092168.0, "step": 6775 }, { "epoch": 2.4821837501145003, "grad_norm": 0.13280553684543056, "learning_rate": 2.307820108100661e-05, "loss": 0.4434, "num_tokens": 5183917517.0, "step": 6776 }, { "epoch": 2.482550151140423, "grad_norm": 0.12509389458624665, "learning_rate": 2.3073936061792952e-05, "loss": 0.4232, "num_tokens": 5184713022.0, "step": 6777 }, { "epoch": 2.482916552166346, "grad_norm": 0.13161306445250226, "learning_rate": 2.3069670982068852e-05, "loss": 0.454, "num_tokens": 5185407657.0, "step": 6778 }, { "epoch": 2.483282953192269, "grad_norm": 0.13971970150292712, "learning_rate": 2.306540584207463e-05, "loss": 0.4687, "num_tokens": 5186147141.0, "step": 6779 }, { "epoch": 2.483649354218192, "grad_norm": 0.1527274317651677, "learning_rate": 2.306114064205061e-05, "loss": 0.4711, "num_tokens": 5186919307.0, "step": 6780 }, { "epoch": 2.4840157552441147, "grad_norm": 0.11990869036394806, "learning_rate": 2.30568753822371e-05, "loss": 0.4187, "num_tokens": 5187666954.0, "step": 6781 }, { "epoch": 2.4843821562700374, "grad_norm": 0.12492054166990448, "learning_rate": 2.3052610062874433e-05, "loss": 0.4381, "num_tokens": 5188452987.0, "step": 6782 }, { "epoch": 2.4847485572959602, "grad_norm": 0.1501891684621074, "learning_rate": 2.304834468420294e-05, "loss": 0.4899, "num_tokens": 5189240783.0, "step": 6783 }, { "epoch": 2.4851149583218834, "grad_norm": 0.14268373446338162, "learning_rate": 2.3044079246462933e-05, "loss": 0.4474, "num_tokens": 5190013261.0, "step": 6784 }, { "epoch": 2.4854813593478062, "grad_norm": 0.13212754150913525, "learning_rate": 2.3039813749894773e-05, "loss": 0.5054, "num_tokens": 5190757414.0, "step": 6785 }, { "epoch": 2.485847760373729, "grad_norm": 0.14449039985021098, "learning_rate": 2.3035548194738775e-05, "loss": 0.4618, "num_tokens": 5191555777.0, "step": 6786 }, { "epoch": 2.486214161399652, "grad_norm": 0.15189996377504753, "learning_rate": 2.303128258123529e-05, "loss": 0.4483, "num_tokens": 5192334160.0, "step": 6787 }, { "epoch": 2.4865805624255746, "grad_norm": 0.14656933481245343, "learning_rate": 2.302701690962466e-05, "loss": 0.4772, "num_tokens": 5193060326.0, "step": 6788 }, { "epoch": 2.486946963451498, "grad_norm": 0.14323382436395599, "learning_rate": 2.3022751180147237e-05, "loss": 0.4664, "num_tokens": 5193873444.0, "step": 6789 }, { "epoch": 2.4873133644774206, "grad_norm": 0.13023692662899816, "learning_rate": 2.3018485393043364e-05, "loss": 0.4615, "num_tokens": 5194611127.0, "step": 6790 }, { "epoch": 2.4876797655033434, "grad_norm": 0.14967035275110102, "learning_rate": 2.30142195485534e-05, "loss": 0.478, "num_tokens": 5195351607.0, "step": 6791 }, { "epoch": 2.488046166529266, "grad_norm": 0.14941275075430574, "learning_rate": 2.3009953646917696e-05, "loss": 0.4848, "num_tokens": 5196093102.0, "step": 6792 }, { "epoch": 2.488412567555189, "grad_norm": 0.12643575685639652, "learning_rate": 2.300568768837662e-05, "loss": 0.4477, "num_tokens": 5196775812.0, "step": 6793 }, { "epoch": 2.488778968581112, "grad_norm": 0.1608229830565824, "learning_rate": 2.300142167317053e-05, "loss": 0.4413, "num_tokens": 5197384961.0, "step": 6794 }, { "epoch": 2.489145369607035, "grad_norm": 0.14730046577257064, "learning_rate": 2.2997155601539798e-05, "loss": 0.4593, "num_tokens": 5198208818.0, "step": 6795 }, { "epoch": 2.4895117706329577, "grad_norm": 0.13046897007826222, "learning_rate": 2.299288947372479e-05, "loss": 0.4321, "num_tokens": 5198932139.0, "step": 6796 }, { "epoch": 2.4898781716588805, "grad_norm": 0.13237267649538567, "learning_rate": 2.2988623289965878e-05, "loss": 0.4274, "num_tokens": 5199633630.0, "step": 6797 }, { "epoch": 2.4902445726848037, "grad_norm": 0.1467240448957578, "learning_rate": 2.2984357050503437e-05, "loss": 0.4545, "num_tokens": 5200392243.0, "step": 6798 }, { "epoch": 2.4906109737107265, "grad_norm": 0.1463688223666132, "learning_rate": 2.298009075557785e-05, "loss": 0.4979, "num_tokens": 5201084759.0, "step": 6799 }, { "epoch": 2.4909773747366493, "grad_norm": 0.1373958461680323, "learning_rate": 2.29758244054295e-05, "loss": 0.4566, "num_tokens": 5201889164.0, "step": 6800 }, { "epoch": 2.491343775762572, "grad_norm": 0.1331039283940417, "learning_rate": 2.2971558000298773e-05, "loss": 0.4562, "num_tokens": 5202636291.0, "step": 6801 }, { "epoch": 2.491710176788495, "grad_norm": 0.15000816463856842, "learning_rate": 2.296729154042605e-05, "loss": 0.4482, "num_tokens": 5203465693.0, "step": 6802 }, { "epoch": 2.492076577814418, "grad_norm": 0.1292355577640753, "learning_rate": 2.296302502605173e-05, "loss": 0.4163, "num_tokens": 5204231742.0, "step": 6803 }, { "epoch": 2.492442978840341, "grad_norm": 0.13455745818514248, "learning_rate": 2.295875845741621e-05, "loss": 0.4358, "num_tokens": 5204956060.0, "step": 6804 }, { "epoch": 2.4928093798662636, "grad_norm": 0.1437703553496949, "learning_rate": 2.2954491834759884e-05, "loss": 0.4444, "num_tokens": 5205791521.0, "step": 6805 }, { "epoch": 2.4931757808921864, "grad_norm": 0.1288982560987394, "learning_rate": 2.2950225158323147e-05, "loss": 0.4589, "num_tokens": 5206514465.0, "step": 6806 }, { "epoch": 2.4935421819181096, "grad_norm": 0.14027054994232793, "learning_rate": 2.294595842834641e-05, "loss": 0.4343, "num_tokens": 5207231461.0, "step": 6807 }, { "epoch": 2.4939085829440324, "grad_norm": 0.15157518748031568, "learning_rate": 2.294169164507008e-05, "loss": 0.4841, "num_tokens": 5208049096.0, "step": 6808 }, { "epoch": 2.494274983969955, "grad_norm": 0.13473452521833043, "learning_rate": 2.2937424808734573e-05, "loss": 0.4522, "num_tokens": 5208809042.0, "step": 6809 }, { "epoch": 2.494641384995878, "grad_norm": 0.14075193251575824, "learning_rate": 2.2933157919580284e-05, "loss": 0.4408, "num_tokens": 5209548097.0, "step": 6810 }, { "epoch": 2.4950077860218007, "grad_norm": 0.11890012917299285, "learning_rate": 2.2928890977847637e-05, "loss": 0.4336, "num_tokens": 5210342957.0, "step": 6811 }, { "epoch": 2.495374187047724, "grad_norm": 0.15078355859519252, "learning_rate": 2.292462398377707e-05, "loss": 0.4391, "num_tokens": 5211139612.0, "step": 6812 }, { "epoch": 2.4957405880736467, "grad_norm": 0.13797419588005608, "learning_rate": 2.2920356937608973e-05, "loss": 0.4638, "num_tokens": 5211985007.0, "step": 6813 }, { "epoch": 2.4961069890995695, "grad_norm": 0.12236394978638221, "learning_rate": 2.291608983958379e-05, "loss": 0.4827, "num_tokens": 5212789248.0, "step": 6814 }, { "epoch": 2.4964733901254923, "grad_norm": 0.1306922060535306, "learning_rate": 2.2911822689941956e-05, "loss": 0.4168, "num_tokens": 5213645257.0, "step": 6815 }, { "epoch": 2.496839791151415, "grad_norm": 0.13901540000007645, "learning_rate": 2.290755548892387e-05, "loss": 0.4584, "num_tokens": 5214330413.0, "step": 6816 }, { "epoch": 2.4972061921773383, "grad_norm": 0.12580007146705946, "learning_rate": 2.2903288236769997e-05, "loss": 0.4602, "num_tokens": 5215048683.0, "step": 6817 }, { "epoch": 2.497572593203261, "grad_norm": 0.1424902858357898, "learning_rate": 2.2899020933720766e-05, "loss": 0.4589, "num_tokens": 5215747815.0, "step": 6818 }, { "epoch": 2.497938994229184, "grad_norm": 0.13554040271951137, "learning_rate": 2.289475358001661e-05, "loss": 0.4269, "num_tokens": 5216461946.0, "step": 6819 }, { "epoch": 2.4983053952551066, "grad_norm": 0.12544690507842013, "learning_rate": 2.289048617589797e-05, "loss": 0.4866, "num_tokens": 5217195946.0, "step": 6820 }, { "epoch": 2.4986717962810294, "grad_norm": 0.14290658494548797, "learning_rate": 2.2886218721605288e-05, "loss": 0.4294, "num_tokens": 5217948381.0, "step": 6821 }, { "epoch": 2.4990381973069526, "grad_norm": 0.1304552822484146, "learning_rate": 2.2881951217379027e-05, "loss": 0.4431, "num_tokens": 5218708721.0, "step": 6822 }, { "epoch": 2.4994045983328754, "grad_norm": 0.13871880336884826, "learning_rate": 2.287768366345963e-05, "loss": 0.4845, "num_tokens": 5219412755.0, "step": 6823 }, { "epoch": 2.499770999358798, "grad_norm": 0.1449500600005718, "learning_rate": 2.2873416060087546e-05, "loss": 0.4667, "num_tokens": 5220225262.0, "step": 6824 }, { "epoch": 2.500137400384721, "grad_norm": 0.14139172192928123, "learning_rate": 2.2869148407503237e-05, "loss": 0.4708, "num_tokens": 5220936304.0, "step": 6825 }, { "epoch": 2.5005038014106438, "grad_norm": 0.12446804942660644, "learning_rate": 2.286488070594715e-05, "loss": 0.465, "num_tokens": 5221708967.0, "step": 6826 }, { "epoch": 2.500870202436567, "grad_norm": 0.14407612164246514, "learning_rate": 2.2860612955659765e-05, "loss": 0.4955, "num_tokens": 5222434608.0, "step": 6827 }, { "epoch": 2.5012366034624898, "grad_norm": 0.15624109878845485, "learning_rate": 2.2856345156881537e-05, "loss": 0.4514, "num_tokens": 5223173081.0, "step": 6828 }, { "epoch": 2.5016030044884126, "grad_norm": 0.12743714512437287, "learning_rate": 2.2852077309852927e-05, "loss": 0.4645, "num_tokens": 5223932653.0, "step": 6829 }, { "epoch": 2.5019694055143353, "grad_norm": 0.13766885090964198, "learning_rate": 2.284780941481442e-05, "loss": 0.4508, "num_tokens": 5224692820.0, "step": 6830 }, { "epoch": 2.502335806540258, "grad_norm": 0.1313725711171329, "learning_rate": 2.2843541472006475e-05, "loss": 0.4251, "num_tokens": 5225479087.0, "step": 6831 }, { "epoch": 2.5027022075661813, "grad_norm": 0.13897871950854782, "learning_rate": 2.283927348166957e-05, "loss": 0.4292, "num_tokens": 5226191555.0, "step": 6832 }, { "epoch": 2.503068608592104, "grad_norm": 0.1400866183784984, "learning_rate": 2.2835005444044183e-05, "loss": 0.4539, "num_tokens": 5226821567.0, "step": 6833 }, { "epoch": 2.503435009618027, "grad_norm": 0.14058121900063103, "learning_rate": 2.2830737359370807e-05, "loss": 0.4574, "num_tokens": 5227538333.0, "step": 6834 }, { "epoch": 2.5038014106439497, "grad_norm": 0.13192820809832573, "learning_rate": 2.2826469227889912e-05, "loss": 0.4433, "num_tokens": 5228224863.0, "step": 6835 }, { "epoch": 2.5041678116698725, "grad_norm": 0.14645496829304674, "learning_rate": 2.2822201049841983e-05, "loss": 0.4594, "num_tokens": 5228989030.0, "step": 6836 }, { "epoch": 2.5045342126957957, "grad_norm": 0.127739834769322, "learning_rate": 2.2817932825467514e-05, "loss": 0.4407, "num_tokens": 5229810515.0, "step": 6837 }, { "epoch": 2.5049006137217185, "grad_norm": 0.13053418632850128, "learning_rate": 2.2813664555006995e-05, "loss": 0.4788, "num_tokens": 5230595787.0, "step": 6838 }, { "epoch": 2.5052670147476412, "grad_norm": 0.14035546219701261, "learning_rate": 2.280939623870092e-05, "loss": 0.4633, "num_tokens": 5231363126.0, "step": 6839 }, { "epoch": 2.505633415773564, "grad_norm": 0.13975593536084638, "learning_rate": 2.280512787678978e-05, "loss": 0.4398, "num_tokens": 5232172140.0, "step": 6840 }, { "epoch": 2.505999816799487, "grad_norm": 0.14484096025148507, "learning_rate": 2.280085946951408e-05, "loss": 0.4706, "num_tokens": 5232803762.0, "step": 6841 }, { "epoch": 2.50636621782541, "grad_norm": 0.13730357839943122, "learning_rate": 2.2796591017114328e-05, "loss": 0.4436, "num_tokens": 5233561897.0, "step": 6842 }, { "epoch": 2.506732618851333, "grad_norm": 0.13278803969336062, "learning_rate": 2.279232251983101e-05, "loss": 0.4443, "num_tokens": 5234304589.0, "step": 6843 }, { "epoch": 2.5070990198772556, "grad_norm": 0.12631798368941594, "learning_rate": 2.2788053977904648e-05, "loss": 0.4444, "num_tokens": 5235127518.0, "step": 6844 }, { "epoch": 2.5074654209031784, "grad_norm": 0.12768897759221784, "learning_rate": 2.2783785391575748e-05, "loss": 0.4448, "num_tokens": 5235813134.0, "step": 6845 }, { "epoch": 2.507831821929101, "grad_norm": 0.1393885264040527, "learning_rate": 2.2779516761084817e-05, "loss": 0.4461, "num_tokens": 5236572636.0, "step": 6846 }, { "epoch": 2.5081982229550244, "grad_norm": 0.1358348788053697, "learning_rate": 2.277524808667237e-05, "loss": 0.4522, "num_tokens": 5237358953.0, "step": 6847 }, { "epoch": 2.508564623980947, "grad_norm": 0.13761429572373948, "learning_rate": 2.277097936857892e-05, "loss": 0.444, "num_tokens": 5238070921.0, "step": 6848 }, { "epoch": 2.50893102500687, "grad_norm": 0.1429342023098752, "learning_rate": 2.2766710607045e-05, "loss": 0.4932, "num_tokens": 5238899174.0, "step": 6849 }, { "epoch": 2.509297426032793, "grad_norm": 0.133275481418855, "learning_rate": 2.276244180231112e-05, "loss": 0.4887, "num_tokens": 5239736113.0, "step": 6850 }, { "epoch": 2.5096638270587155, "grad_norm": 0.1292601361866237, "learning_rate": 2.2758172954617802e-05, "loss": 0.4326, "num_tokens": 5240424762.0, "step": 6851 }, { "epoch": 2.5100302280846387, "grad_norm": 0.13835806032122328, "learning_rate": 2.2753904064205575e-05, "loss": 0.4386, "num_tokens": 5241153805.0, "step": 6852 }, { "epoch": 2.5103966291105615, "grad_norm": 0.13938489946564595, "learning_rate": 2.274963513131497e-05, "loss": 0.4321, "num_tokens": 5241869156.0, "step": 6853 }, { "epoch": 2.5107630301364843, "grad_norm": 0.1307262919866529, "learning_rate": 2.2745366156186527e-05, "loss": 0.4457, "num_tokens": 5242655088.0, "step": 6854 }, { "epoch": 2.5111294311624075, "grad_norm": 0.13418145868054204, "learning_rate": 2.274109713906076e-05, "loss": 0.4242, "num_tokens": 5243490101.0, "step": 6855 }, { "epoch": 2.5114958321883303, "grad_norm": 0.12416915787842596, "learning_rate": 2.2736828080178213e-05, "loss": 0.4165, "num_tokens": 5244153472.0, "step": 6856 }, { "epoch": 2.511862233214253, "grad_norm": 0.15484274249068175, "learning_rate": 2.273255897977943e-05, "loss": 0.454, "num_tokens": 5245040154.0, "step": 6857 }, { "epoch": 2.512228634240176, "grad_norm": 0.14043405040086387, "learning_rate": 2.2728289838104943e-05, "loss": 0.4662, "num_tokens": 5245896752.0, "step": 6858 }, { "epoch": 2.5125950352660986, "grad_norm": 0.1346180796875929, "learning_rate": 2.27240206553953e-05, "loss": 0.4368, "num_tokens": 5246618301.0, "step": 6859 }, { "epoch": 2.512961436292022, "grad_norm": 0.15815992956041586, "learning_rate": 2.2719751431891046e-05, "loss": 0.4481, "num_tokens": 5247368406.0, "step": 6860 }, { "epoch": 2.5133278373179446, "grad_norm": 0.13637988024561903, "learning_rate": 2.2715482167832725e-05, "loss": 0.4602, "num_tokens": 5248161003.0, "step": 6861 }, { "epoch": 2.5136942383438674, "grad_norm": 0.5070678628512181, "learning_rate": 2.2711212863460894e-05, "loss": 0.4594, "num_tokens": 5248787794.0, "step": 6862 }, { "epoch": 2.51406063936979, "grad_norm": 0.15045882183910392, "learning_rate": 2.2706943519016093e-05, "loss": 0.4493, "num_tokens": 5249501207.0, "step": 6863 }, { "epoch": 2.514427040395713, "grad_norm": 0.13381763494457516, "learning_rate": 2.2702674134738887e-05, "loss": 0.5026, "num_tokens": 5250351991.0, "step": 6864 }, { "epoch": 2.514793441421636, "grad_norm": 0.1424759928653093, "learning_rate": 2.2698404710869833e-05, "loss": 0.4422, "num_tokens": 5251066310.0, "step": 6865 }, { "epoch": 2.515159842447559, "grad_norm": 0.14193621618074767, "learning_rate": 2.2694135247649482e-05, "loss": 0.4618, "num_tokens": 5251797920.0, "step": 6866 }, { "epoch": 2.5155262434734817, "grad_norm": 0.15061704332091827, "learning_rate": 2.2689865745318397e-05, "loss": 0.425, "num_tokens": 5252516065.0, "step": 6867 }, { "epoch": 2.5158926444994045, "grad_norm": 0.13107809943221282, "learning_rate": 2.268559620411714e-05, "loss": 0.4698, "num_tokens": 5253252098.0, "step": 6868 }, { "epoch": 2.5162590455253273, "grad_norm": 0.12489586350607013, "learning_rate": 2.2681326624286288e-05, "loss": 0.474, "num_tokens": 5254014614.0, "step": 6869 }, { "epoch": 2.5166254465512505, "grad_norm": 0.15021387302854147, "learning_rate": 2.2677057006066388e-05, "loss": 0.4586, "num_tokens": 5254826076.0, "step": 6870 }, { "epoch": 2.5169918475771733, "grad_norm": 0.1308601577216182, "learning_rate": 2.2672787349698025e-05, "loss": 0.4503, "num_tokens": 5255453479.0, "step": 6871 }, { "epoch": 2.517358248603096, "grad_norm": 0.14156451434364814, "learning_rate": 2.2668517655421773e-05, "loss": 0.4605, "num_tokens": 5256265135.0, "step": 6872 }, { "epoch": 2.517724649629019, "grad_norm": 0.12259727115732987, "learning_rate": 2.2664247923478193e-05, "loss": 0.4257, "num_tokens": 5257077943.0, "step": 6873 }, { "epoch": 2.5180910506549417, "grad_norm": 0.14076020435249884, "learning_rate": 2.2659978154107876e-05, "loss": 0.4401, "num_tokens": 5258066793.0, "step": 6874 }, { "epoch": 2.518457451680865, "grad_norm": 0.13176077253844887, "learning_rate": 2.2655708347551383e-05, "loss": 0.4505, "num_tokens": 5258785114.0, "step": 6875 }, { "epoch": 2.5188238527067877, "grad_norm": 0.13390816977069583, "learning_rate": 2.2651438504049312e-05, "loss": 0.4702, "num_tokens": 5259528676.0, "step": 6876 }, { "epoch": 2.5191902537327104, "grad_norm": 0.13726508968401832, "learning_rate": 2.2647168623842234e-05, "loss": 0.4825, "num_tokens": 5260350073.0, "step": 6877 }, { "epoch": 2.519556654758633, "grad_norm": 0.13041053635358454, "learning_rate": 2.2642898707170732e-05, "loss": 0.4325, "num_tokens": 5261011369.0, "step": 6878 }, { "epoch": 2.519923055784556, "grad_norm": 0.12904639560840803, "learning_rate": 2.26386287542754e-05, "loss": 0.453, "num_tokens": 5261810395.0, "step": 6879 }, { "epoch": 2.5202894568104792, "grad_norm": 0.14588708254688362, "learning_rate": 2.2634358765396823e-05, "loss": 0.4309, "num_tokens": 5262596804.0, "step": 6880 }, { "epoch": 2.520655857836402, "grad_norm": 0.12026528141673691, "learning_rate": 2.263008874077559e-05, "loss": 0.4848, "num_tokens": 5263344632.0, "step": 6881 }, { "epoch": 2.521022258862325, "grad_norm": 0.13805430113360848, "learning_rate": 2.26258186806523e-05, "loss": 0.4387, "num_tokens": 5264103449.0, "step": 6882 }, { "epoch": 2.5213886598882476, "grad_norm": 0.13872148349701854, "learning_rate": 2.2621548585267537e-05, "loss": 0.456, "num_tokens": 5264851927.0, "step": 6883 }, { "epoch": 2.5217550609141703, "grad_norm": 0.12736577728894047, "learning_rate": 2.2617278454861915e-05, "loss": 0.4575, "num_tokens": 5265658806.0, "step": 6884 }, { "epoch": 2.5221214619400936, "grad_norm": 0.12522910841131404, "learning_rate": 2.2613008289676012e-05, "loss": 0.4271, "num_tokens": 5266505061.0, "step": 6885 }, { "epoch": 2.5224878629660163, "grad_norm": 0.13281464402727547, "learning_rate": 2.2608738089950437e-05, "loss": 0.4674, "num_tokens": 5267263550.0, "step": 6886 }, { "epoch": 2.522854263991939, "grad_norm": 0.12649145105412848, "learning_rate": 2.26044678559258e-05, "loss": 0.4556, "num_tokens": 5268053772.0, "step": 6887 }, { "epoch": 2.523220665017862, "grad_norm": 0.1359671676632136, "learning_rate": 2.2600197587842698e-05, "loss": 0.4867, "num_tokens": 5268931714.0, "step": 6888 }, { "epoch": 2.5235870660437847, "grad_norm": 0.14177385461915987, "learning_rate": 2.2595927285941733e-05, "loss": 0.4499, "num_tokens": 5269646133.0, "step": 6889 }, { "epoch": 2.523953467069708, "grad_norm": 0.138113572863776, "learning_rate": 2.2591656950463516e-05, "loss": 0.4661, "num_tokens": 5270308601.0, "step": 6890 }, { "epoch": 2.5243198680956307, "grad_norm": 0.12145958549880051, "learning_rate": 2.2587386581648663e-05, "loss": 0.4386, "num_tokens": 5271182687.0, "step": 6891 }, { "epoch": 2.5246862691215535, "grad_norm": 0.1327765430004676, "learning_rate": 2.2583116179737784e-05, "loss": 0.4495, "num_tokens": 5271964390.0, "step": 6892 }, { "epoch": 2.5250526701474763, "grad_norm": 0.12204061864272042, "learning_rate": 2.257884574497149e-05, "loss": 0.4471, "num_tokens": 5272831893.0, "step": 6893 }, { "epoch": 2.525419071173399, "grad_norm": 0.11702854398200256, "learning_rate": 2.2574575277590394e-05, "loss": 0.4446, "num_tokens": 5273630318.0, "step": 6894 }, { "epoch": 2.5257854721993223, "grad_norm": 0.1472895249734491, "learning_rate": 2.257030477783512e-05, "loss": 0.4704, "num_tokens": 5274416876.0, "step": 6895 }, { "epoch": 2.526151873225245, "grad_norm": 0.13018677965259587, "learning_rate": 2.2566034245946282e-05, "loss": 0.4637, "num_tokens": 5275190996.0, "step": 6896 }, { "epoch": 2.526518274251168, "grad_norm": 0.13182009746132972, "learning_rate": 2.2561763682164513e-05, "loss": 0.4474, "num_tokens": 5276024254.0, "step": 6897 }, { "epoch": 2.526884675277091, "grad_norm": 0.12664519100607022, "learning_rate": 2.2557493086730414e-05, "loss": 0.4351, "num_tokens": 5276789584.0, "step": 6898 }, { "epoch": 2.5272510763030134, "grad_norm": 0.14548858816044688, "learning_rate": 2.2553222459884632e-05, "loss": 0.4777, "num_tokens": 5277517226.0, "step": 6899 }, { "epoch": 2.5276174773289366, "grad_norm": 0.13504742962341215, "learning_rate": 2.2548951801867775e-05, "loss": 0.4478, "num_tokens": 5278300101.0, "step": 6900 }, { "epoch": 2.5279838783548594, "grad_norm": 0.20205157754195843, "learning_rate": 2.254468111292049e-05, "loss": 0.4499, "num_tokens": 5279094176.0, "step": 6901 }, { "epoch": 2.528350279380782, "grad_norm": 0.11967722725146399, "learning_rate": 2.254041039328339e-05, "loss": 0.451, "num_tokens": 5279879799.0, "step": 6902 }, { "epoch": 2.5287166804067054, "grad_norm": 0.1416572255432704, "learning_rate": 2.253613964319712e-05, "loss": 0.4655, "num_tokens": 5280690964.0, "step": 6903 }, { "epoch": 2.529083081432628, "grad_norm": 0.1478217137526772, "learning_rate": 2.2531868862902302e-05, "loss": 0.4724, "num_tokens": 5281375457.0, "step": 6904 }, { "epoch": 2.529449482458551, "grad_norm": 0.13845683482262341, "learning_rate": 2.2527598052639576e-05, "loss": 0.492, "num_tokens": 5282194989.0, "step": 6905 }, { "epoch": 2.5298158834844737, "grad_norm": 0.1225335351326003, "learning_rate": 2.2523327212649582e-05, "loss": 0.4349, "num_tokens": 5283089335.0, "step": 6906 }, { "epoch": 2.5301822845103965, "grad_norm": 0.1466008764233794, "learning_rate": 2.2519056343172955e-05, "loss": 0.4502, "num_tokens": 5283846919.0, "step": 6907 }, { "epoch": 2.5305486855363197, "grad_norm": 0.14151042758121363, "learning_rate": 2.2514785444450343e-05, "loss": 0.4674, "num_tokens": 5284603665.0, "step": 6908 }, { "epoch": 2.5309150865622425, "grad_norm": 0.13895640923628982, "learning_rate": 2.2510514516722376e-05, "loss": 0.4831, "num_tokens": 5285413449.0, "step": 6909 }, { "epoch": 2.5312814875881653, "grad_norm": 0.12991543245631343, "learning_rate": 2.2506243560229698e-05, "loss": 0.4488, "num_tokens": 5286155761.0, "step": 6910 }, { "epoch": 2.531647888614088, "grad_norm": 0.14156724404232723, "learning_rate": 2.2501972575212965e-05, "loss": 0.4493, "num_tokens": 5286947929.0, "step": 6911 }, { "epoch": 2.532014289640011, "grad_norm": 0.12413997155071137, "learning_rate": 2.2497701561912813e-05, "loss": 0.4695, "num_tokens": 5287720119.0, "step": 6912 }, { "epoch": 2.532380690665934, "grad_norm": 0.13182375947502434, "learning_rate": 2.249343052056989e-05, "loss": 0.4455, "num_tokens": 5288557968.0, "step": 6913 }, { "epoch": 2.532747091691857, "grad_norm": 0.1271157603396339, "learning_rate": 2.248915945142486e-05, "loss": 0.4734, "num_tokens": 5289237096.0, "step": 6914 }, { "epoch": 2.5331134927177796, "grad_norm": 0.13278131614940422, "learning_rate": 2.2484888354718363e-05, "loss": 0.454, "num_tokens": 5290005451.0, "step": 6915 }, { "epoch": 2.5334798937437024, "grad_norm": 0.13692644147455837, "learning_rate": 2.2480617230691052e-05, "loss": 0.4476, "num_tokens": 5290774256.0, "step": 6916 }, { "epoch": 2.533846294769625, "grad_norm": 0.13128850635975417, "learning_rate": 2.2476346079583583e-05, "loss": 0.4671, "num_tokens": 5291467366.0, "step": 6917 }, { "epoch": 2.5342126957955484, "grad_norm": 0.1340226978401795, "learning_rate": 2.2472074901636615e-05, "loss": 0.4693, "num_tokens": 5292242593.0, "step": 6918 }, { "epoch": 2.534579096821471, "grad_norm": 0.13482925080842967, "learning_rate": 2.24678036970908e-05, "loss": 0.4407, "num_tokens": 5292941271.0, "step": 6919 }, { "epoch": 2.534945497847394, "grad_norm": 0.13302604038351984, "learning_rate": 2.2463532466186795e-05, "loss": 0.446, "num_tokens": 5293631764.0, "step": 6920 }, { "epoch": 2.5353118988733168, "grad_norm": 0.14313795755559436, "learning_rate": 2.2459261209165276e-05, "loss": 0.4542, "num_tokens": 5294373864.0, "step": 6921 }, { "epoch": 2.5356782998992395, "grad_norm": 0.13195133101055512, "learning_rate": 2.2454989926266887e-05, "loss": 0.4298, "num_tokens": 5295279150.0, "step": 6922 }, { "epoch": 2.5360447009251628, "grad_norm": 0.12182279997737282, "learning_rate": 2.24507186177323e-05, "loss": 0.4533, "num_tokens": 5295983462.0, "step": 6923 }, { "epoch": 2.5364111019510855, "grad_norm": 0.16043793751232263, "learning_rate": 2.244644728380218e-05, "loss": 0.4362, "num_tokens": 5296774951.0, "step": 6924 }, { "epoch": 2.5367775029770083, "grad_norm": 0.11886882030285044, "learning_rate": 2.244217592471719e-05, "loss": 0.4268, "num_tokens": 5297482868.0, "step": 6925 }, { "epoch": 2.537143904002931, "grad_norm": 0.15221094113502867, "learning_rate": 2.2437904540718005e-05, "loss": 0.4564, "num_tokens": 5298269822.0, "step": 6926 }, { "epoch": 2.537510305028854, "grad_norm": 0.14897674863156218, "learning_rate": 2.2433633132045286e-05, "loss": 0.4719, "num_tokens": 5299153933.0, "step": 6927 }, { "epoch": 2.537876706054777, "grad_norm": 0.12296694532253652, "learning_rate": 2.2429361698939708e-05, "loss": 0.4301, "num_tokens": 5299912542.0, "step": 6928 }, { "epoch": 2.5382431070807, "grad_norm": 0.13055742380583488, "learning_rate": 2.2425090241641945e-05, "loss": 0.4463, "num_tokens": 5300808371.0, "step": 6929 }, { "epoch": 2.5386095081066227, "grad_norm": 0.14037123636239182, "learning_rate": 2.2420818760392664e-05, "loss": 0.4894, "num_tokens": 5301655866.0, "step": 6930 }, { "epoch": 2.5389759091325454, "grad_norm": 0.12386303292420522, "learning_rate": 2.2416547255432546e-05, "loss": 0.4363, "num_tokens": 5302465684.0, "step": 6931 }, { "epoch": 2.5393423101584682, "grad_norm": 0.13858187257752294, "learning_rate": 2.2412275727002253e-05, "loss": 0.4636, "num_tokens": 5303244219.0, "step": 6932 }, { "epoch": 2.5397087111843915, "grad_norm": 0.1432151459747867, "learning_rate": 2.2408004175342478e-05, "loss": 0.4597, "num_tokens": 5304005938.0, "step": 6933 }, { "epoch": 2.5400751122103142, "grad_norm": 0.13759119963360444, "learning_rate": 2.2403732600693907e-05, "loss": 0.4513, "num_tokens": 5304804635.0, "step": 6934 }, { "epoch": 2.540441513236237, "grad_norm": 0.13757297506032376, "learning_rate": 2.2399461003297196e-05, "loss": 0.4976, "num_tokens": 5305557998.0, "step": 6935 }, { "epoch": 2.54080791426216, "grad_norm": 0.15118363610806562, "learning_rate": 2.2395189383393036e-05, "loss": 0.4751, "num_tokens": 5306236125.0, "step": 6936 }, { "epoch": 2.5411743152880826, "grad_norm": 0.13915378311444201, "learning_rate": 2.2390917741222116e-05, "loss": 0.4788, "num_tokens": 5307094378.0, "step": 6937 }, { "epoch": 2.541540716314006, "grad_norm": 0.13535747018487448, "learning_rate": 2.2386646077025116e-05, "loss": 0.4765, "num_tokens": 5307777625.0, "step": 6938 }, { "epoch": 2.5419071173399286, "grad_norm": 0.13988514481230882, "learning_rate": 2.2382374391042714e-05, "loss": 0.4635, "num_tokens": 5308667937.0, "step": 6939 }, { "epoch": 2.5422735183658514, "grad_norm": 0.1384617620287013, "learning_rate": 2.2378102683515602e-05, "loss": 0.4216, "num_tokens": 5309392925.0, "step": 6940 }, { "epoch": 2.542639919391774, "grad_norm": 0.12196377103550195, "learning_rate": 2.237383095468448e-05, "loss": 0.4339, "num_tokens": 5310195199.0, "step": 6941 }, { "epoch": 2.543006320417697, "grad_norm": 0.13425677756739696, "learning_rate": 2.2369559204790013e-05, "loss": 0.4595, "num_tokens": 5310965854.0, "step": 6942 }, { "epoch": 2.54337272144362, "grad_norm": 0.13878873382411022, "learning_rate": 2.2365287434072905e-05, "loss": 0.4241, "num_tokens": 5311650909.0, "step": 6943 }, { "epoch": 2.543739122469543, "grad_norm": 0.14847197585819, "learning_rate": 2.236101564277384e-05, "loss": 0.4345, "num_tokens": 5312495758.0, "step": 6944 }, { "epoch": 2.5441055234954657, "grad_norm": 0.13011321271037482, "learning_rate": 2.235674383113352e-05, "loss": 0.4474, "num_tokens": 5313189870.0, "step": 6945 }, { "epoch": 2.544471924521389, "grad_norm": 0.13821610839660642, "learning_rate": 2.235247199939263e-05, "loss": 0.4458, "num_tokens": 5313857405.0, "step": 6946 }, { "epoch": 2.5448383255473113, "grad_norm": 0.13959037443314443, "learning_rate": 2.234820014779186e-05, "loss": 0.4852, "num_tokens": 5314662490.0, "step": 6947 }, { "epoch": 2.5452047265732345, "grad_norm": 0.13808086600479516, "learning_rate": 2.234392827657192e-05, "loss": 0.45, "num_tokens": 5315420558.0, "step": 6948 }, { "epoch": 2.5455711275991573, "grad_norm": 0.1282182399050883, "learning_rate": 2.2339656385973502e-05, "loss": 0.468, "num_tokens": 5316313956.0, "step": 6949 }, { "epoch": 2.54593752862508, "grad_norm": 0.12230489568824027, "learning_rate": 2.233538447623729e-05, "loss": 0.4423, "num_tokens": 5317070115.0, "step": 6950 }, { "epoch": 2.5463039296510033, "grad_norm": 0.14657040513300912, "learning_rate": 2.2331112547604e-05, "loss": 0.4574, "num_tokens": 5317723688.0, "step": 6951 }, { "epoch": 2.546670330676926, "grad_norm": 0.14083867894002203, "learning_rate": 2.232684060031432e-05, "loss": 0.4608, "num_tokens": 5318481824.0, "step": 6952 }, { "epoch": 2.547036731702849, "grad_norm": 0.13227114100563295, "learning_rate": 2.2322568634608964e-05, "loss": 0.4764, "num_tokens": 5319295958.0, "step": 6953 }, { "epoch": 2.5474031327287716, "grad_norm": 0.1403819036775342, "learning_rate": 2.2318296650728625e-05, "loss": 0.4434, "num_tokens": 5320036575.0, "step": 6954 }, { "epoch": 2.5477695337546944, "grad_norm": 0.13397658489480616, "learning_rate": 2.2314024648914003e-05, "loss": 0.4593, "num_tokens": 5320871061.0, "step": 6955 }, { "epoch": 2.5481359347806176, "grad_norm": 0.1333687733946477, "learning_rate": 2.2309752629405805e-05, "loss": 0.497, "num_tokens": 5321604466.0, "step": 6956 }, { "epoch": 2.5485023358065404, "grad_norm": 0.1300724878899593, "learning_rate": 2.230548059244474e-05, "loss": 0.457, "num_tokens": 5322420107.0, "step": 6957 }, { "epoch": 2.548868736832463, "grad_norm": 0.1415456588710006, "learning_rate": 2.2301208538271512e-05, "loss": 0.4604, "num_tokens": 5323155368.0, "step": 6958 }, { "epoch": 2.549235137858386, "grad_norm": 0.14895144045109793, "learning_rate": 2.2296936467126827e-05, "loss": 0.4506, "num_tokens": 5323919481.0, "step": 6959 }, { "epoch": 2.5496015388843087, "grad_norm": 0.1329097717888289, "learning_rate": 2.2292664379251388e-05, "loss": 0.4517, "num_tokens": 5324760866.0, "step": 6960 }, { "epoch": 2.549967939910232, "grad_norm": 0.12724881842912003, "learning_rate": 2.2288392274885915e-05, "loss": 0.4357, "num_tokens": 5325506965.0, "step": 6961 }, { "epoch": 2.5503343409361547, "grad_norm": 0.1404945149139064, "learning_rate": 2.2284120154271104e-05, "loss": 0.4408, "num_tokens": 5326358497.0, "step": 6962 }, { "epoch": 2.5507007419620775, "grad_norm": 0.13784999060942962, "learning_rate": 2.227984801764768e-05, "loss": 0.458, "num_tokens": 5327120472.0, "step": 6963 }, { "epoch": 2.5510671429880003, "grad_norm": 0.12740425727494495, "learning_rate": 2.227557586525634e-05, "loss": 0.4448, "num_tokens": 5327873044.0, "step": 6964 }, { "epoch": 2.551433544013923, "grad_norm": 0.1252554659534191, "learning_rate": 2.2271303697337814e-05, "loss": 0.4607, "num_tokens": 5328712036.0, "step": 6965 }, { "epoch": 2.5517999450398463, "grad_norm": 0.131158274201992, "learning_rate": 2.22670315141328e-05, "loss": 0.48, "num_tokens": 5329494284.0, "step": 6966 }, { "epoch": 2.552166346065769, "grad_norm": 0.12987877457741417, "learning_rate": 2.2262759315882016e-05, "loss": 0.4374, "num_tokens": 5330226309.0, "step": 6967 }, { "epoch": 2.552532747091692, "grad_norm": 0.1518687530789544, "learning_rate": 2.2258487102826183e-05, "loss": 0.4572, "num_tokens": 5330912909.0, "step": 6968 }, { "epoch": 2.5528991481176146, "grad_norm": 0.1272035432587532, "learning_rate": 2.2254214875206008e-05, "loss": 0.4347, "num_tokens": 5331770209.0, "step": 6969 }, { "epoch": 2.5532655491435374, "grad_norm": 0.11987222093155403, "learning_rate": 2.224994263326221e-05, "loss": 0.4319, "num_tokens": 5332634362.0, "step": 6970 }, { "epoch": 2.5536319501694607, "grad_norm": 0.13627475477779105, "learning_rate": 2.2245670377235516e-05, "loss": 0.4298, "num_tokens": 5333465594.0, "step": 6971 }, { "epoch": 2.5539983511953834, "grad_norm": 0.13290761767253895, "learning_rate": 2.2241398107366635e-05, "loss": 0.4315, "num_tokens": 5334268610.0, "step": 6972 }, { "epoch": 2.554364752221306, "grad_norm": 0.12535603213259444, "learning_rate": 2.223712582389629e-05, "loss": 0.4585, "num_tokens": 5335058113.0, "step": 6973 }, { "epoch": 2.554731153247229, "grad_norm": 0.14076568823731744, "learning_rate": 2.2232853527065198e-05, "loss": 0.4628, "num_tokens": 5335836093.0, "step": 6974 }, { "epoch": 2.5550975542731518, "grad_norm": 0.13042801066736764, "learning_rate": 2.2228581217114074e-05, "loss": 0.4547, "num_tokens": 5336636324.0, "step": 6975 }, { "epoch": 2.555463955299075, "grad_norm": 0.12188071092040918, "learning_rate": 2.222430889428366e-05, "loss": 0.4415, "num_tokens": 5337424017.0, "step": 6976 }, { "epoch": 2.5558303563249978, "grad_norm": 0.13026745632985426, "learning_rate": 2.222003655881465e-05, "loss": 0.4367, "num_tokens": 5338194493.0, "step": 6977 }, { "epoch": 2.5561967573509206, "grad_norm": 0.13849012050014695, "learning_rate": 2.2215764210947785e-05, "loss": 0.4583, "num_tokens": 5339070631.0, "step": 6978 }, { "epoch": 2.5565631583768433, "grad_norm": 0.13853990044746345, "learning_rate": 2.221149185092379e-05, "loss": 0.4781, "num_tokens": 5339748473.0, "step": 6979 }, { "epoch": 2.556929559402766, "grad_norm": 0.14281769814672368, "learning_rate": 2.220721947898338e-05, "loss": 0.4245, "num_tokens": 5340570918.0, "step": 6980 }, { "epoch": 2.5572959604286893, "grad_norm": 0.1311692154236687, "learning_rate": 2.2202947095367283e-05, "loss": 0.4589, "num_tokens": 5341342044.0, "step": 6981 }, { "epoch": 2.557662361454612, "grad_norm": 0.13860023744359692, "learning_rate": 2.219867470031622e-05, "loss": 0.4393, "num_tokens": 5342020733.0, "step": 6982 }, { "epoch": 2.558028762480535, "grad_norm": 0.12843160594043615, "learning_rate": 2.2194402294070934e-05, "loss": 0.462, "num_tokens": 5342801923.0, "step": 6983 }, { "epoch": 2.5583951635064577, "grad_norm": 0.14231236447782147, "learning_rate": 2.2190129876872128e-05, "loss": 0.4705, "num_tokens": 5343492383.0, "step": 6984 }, { "epoch": 2.5587615645323805, "grad_norm": 0.1489126255932155, "learning_rate": 2.2185857448960543e-05, "loss": 0.4471, "num_tokens": 5344309293.0, "step": 6985 }, { "epoch": 2.5591279655583037, "grad_norm": 0.12289746913318794, "learning_rate": 2.2181585010576914e-05, "loss": 0.4592, "num_tokens": 5345137792.0, "step": 6986 }, { "epoch": 2.5594943665842265, "grad_norm": 0.1474225736785033, "learning_rate": 2.2177312561961957e-05, "loss": 0.4312, "num_tokens": 5345829651.0, "step": 6987 }, { "epoch": 2.5598607676101492, "grad_norm": 0.12888384231367145, "learning_rate": 2.21730401033564e-05, "loss": 0.4262, "num_tokens": 5346678138.0, "step": 6988 }, { "epoch": 2.560227168636072, "grad_norm": 0.13828216650044556, "learning_rate": 2.2168767635000976e-05, "loss": 0.4309, "num_tokens": 5347358857.0, "step": 6989 }, { "epoch": 2.560593569661995, "grad_norm": 0.12982579393640167, "learning_rate": 2.216449515713642e-05, "loss": 0.4394, "num_tokens": 5348259179.0, "step": 6990 }, { "epoch": 2.560959970687918, "grad_norm": 0.13566383252980285, "learning_rate": 2.2160222670003466e-05, "loss": 0.456, "num_tokens": 5349007957.0, "step": 6991 }, { "epoch": 2.561326371713841, "grad_norm": 0.14735787493247315, "learning_rate": 2.2155950173842836e-05, "loss": 0.4727, "num_tokens": 5349843121.0, "step": 6992 }, { "epoch": 2.5616927727397636, "grad_norm": 0.13550325028995758, "learning_rate": 2.2151677668895258e-05, "loss": 0.4311, "num_tokens": 5350754684.0, "step": 6993 }, { "epoch": 2.562059173765687, "grad_norm": 0.12030060053261833, "learning_rate": 2.214740515540148e-05, "loss": 0.482, "num_tokens": 5351468086.0, "step": 6994 }, { "epoch": 2.562425574791609, "grad_norm": 0.15489328719202225, "learning_rate": 2.214313263360222e-05, "loss": 0.4566, "num_tokens": 5352173162.0, "step": 6995 }, { "epoch": 2.5627919758175324, "grad_norm": 0.14453983620940994, "learning_rate": 2.2138860103738225e-05, "loss": 0.4729, "num_tokens": 5352869112.0, "step": 6996 }, { "epoch": 2.563158376843455, "grad_norm": 0.1306396477929725, "learning_rate": 2.2134587566050212e-05, "loss": 0.4906, "num_tokens": 5353629772.0, "step": 6997 }, { "epoch": 2.563524777869378, "grad_norm": 0.15686731218072933, "learning_rate": 2.2130315020778926e-05, "loss": 0.4343, "num_tokens": 5354463735.0, "step": 6998 }, { "epoch": 2.563891178895301, "grad_norm": 0.15414594728465356, "learning_rate": 2.2126042468165104e-05, "loss": 0.4496, "num_tokens": 5355179748.0, "step": 6999 }, { "epoch": 2.564257579921224, "grad_norm": 0.12021166906874874, "learning_rate": 2.2121769908449474e-05, "loss": 0.4587, "num_tokens": 5355951905.0, "step": 7000 }, { "epoch": 2.5646239809471467, "grad_norm": 0.1339950616233641, "learning_rate": 2.2117497341872775e-05, "loss": 0.4311, "num_tokens": 5356735180.0, "step": 7001 }, { "epoch": 2.5649903819730695, "grad_norm": 0.15270022928090166, "learning_rate": 2.2113224768675745e-05, "loss": 0.5051, "num_tokens": 5357616094.0, "step": 7002 }, { "epoch": 2.5653567829989923, "grad_norm": 0.1323483154144361, "learning_rate": 2.2108952189099116e-05, "loss": 0.4253, "num_tokens": 5358395555.0, "step": 7003 }, { "epoch": 2.5657231840249155, "grad_norm": 0.1488359634086388, "learning_rate": 2.210467960338362e-05, "loss": 0.4769, "num_tokens": 5359289525.0, "step": 7004 }, { "epoch": 2.5660895850508383, "grad_norm": 0.12302136398327042, "learning_rate": 2.210040701177e-05, "loss": 0.4393, "num_tokens": 5360195197.0, "step": 7005 }, { "epoch": 2.566455986076761, "grad_norm": 0.14574754258447378, "learning_rate": 2.2096134414499e-05, "loss": 0.4836, "num_tokens": 5360915295.0, "step": 7006 }, { "epoch": 2.566822387102684, "grad_norm": 0.13834120056172658, "learning_rate": 2.209186181181135e-05, "loss": 0.4201, "num_tokens": 5361694567.0, "step": 7007 }, { "epoch": 2.5671887881286066, "grad_norm": 0.144519570421745, "learning_rate": 2.2087589203947785e-05, "loss": 0.4772, "num_tokens": 5362397975.0, "step": 7008 }, { "epoch": 2.56755518915453, "grad_norm": 0.13979853671347103, "learning_rate": 2.2083316591149044e-05, "loss": 0.5004, "num_tokens": 5363086443.0, "step": 7009 }, { "epoch": 2.5679215901804526, "grad_norm": 0.15222081469204304, "learning_rate": 2.2079043973655873e-05, "loss": 0.4345, "num_tokens": 5363846593.0, "step": 7010 }, { "epoch": 2.5682879912063754, "grad_norm": 0.13623340963772748, "learning_rate": 2.2074771351709003e-05, "loss": 0.4686, "num_tokens": 5364588196.0, "step": 7011 }, { "epoch": 2.568654392232298, "grad_norm": 0.15317876076066209, "learning_rate": 2.2070498725549174e-05, "loss": 0.4574, "num_tokens": 5365305176.0, "step": 7012 }, { "epoch": 2.569020793258221, "grad_norm": 0.14541297025845185, "learning_rate": 2.206622609541713e-05, "loss": 0.4746, "num_tokens": 5366012446.0, "step": 7013 }, { "epoch": 2.569387194284144, "grad_norm": 0.1537681518535447, "learning_rate": 2.20619534615536e-05, "loss": 0.4371, "num_tokens": 5366873863.0, "step": 7014 }, { "epoch": 2.569753595310067, "grad_norm": 0.15745675217416566, "learning_rate": 2.205768082419934e-05, "loss": 0.4681, "num_tokens": 5367675771.0, "step": 7015 }, { "epoch": 2.5701199963359898, "grad_norm": 0.1501957456003972, "learning_rate": 2.205340818359508e-05, "loss": 0.4859, "num_tokens": 5368442311.0, "step": 7016 }, { "epoch": 2.5704863973619125, "grad_norm": 0.146117177288958, "learning_rate": 2.2049135539981553e-05, "loss": 0.4756, "num_tokens": 5369214158.0, "step": 7017 }, { "epoch": 2.5708527983878353, "grad_norm": 0.14830275525314526, "learning_rate": 2.2044862893599514e-05, "loss": 0.4611, "num_tokens": 5370059614.0, "step": 7018 }, { "epoch": 2.5712191994137585, "grad_norm": 0.1628286092780151, "learning_rate": 2.2040590244689688e-05, "loss": 0.5005, "num_tokens": 5370916174.0, "step": 7019 }, { "epoch": 2.5715856004396813, "grad_norm": 0.13145256877450157, "learning_rate": 2.2036317593492833e-05, "loss": 0.4503, "num_tokens": 5371637041.0, "step": 7020 }, { "epoch": 2.571952001465604, "grad_norm": 0.1528088534312381, "learning_rate": 2.2032044940249667e-05, "loss": 0.4638, "num_tokens": 5372415012.0, "step": 7021 }, { "epoch": 2.572318402491527, "grad_norm": 0.17067896292051804, "learning_rate": 2.2027772285200956e-05, "loss": 0.4751, "num_tokens": 5373117033.0, "step": 7022 }, { "epoch": 2.5726848035174497, "grad_norm": 0.1325552822805557, "learning_rate": 2.2023499628587427e-05, "loss": 0.4224, "num_tokens": 5373908503.0, "step": 7023 }, { "epoch": 2.573051204543373, "grad_norm": 0.15811937832307257, "learning_rate": 2.2019226970649812e-05, "loss": 0.4832, "num_tokens": 5374721389.0, "step": 7024 }, { "epoch": 2.5734176055692957, "grad_norm": 0.1475478002631169, "learning_rate": 2.201495431162887e-05, "loss": 0.4444, "num_tokens": 5375475847.0, "step": 7025 }, { "epoch": 2.5737840065952184, "grad_norm": 0.13979013255723088, "learning_rate": 2.2010681651765328e-05, "loss": 0.4556, "num_tokens": 5376335380.0, "step": 7026 }, { "epoch": 2.5741504076211412, "grad_norm": 0.1480916082071689, "learning_rate": 2.2006408991299943e-05, "loss": 0.4375, "num_tokens": 5377028996.0, "step": 7027 }, { "epoch": 2.574516808647064, "grad_norm": 0.13725686091941583, "learning_rate": 2.2002136330473437e-05, "loss": 0.4593, "num_tokens": 5377853745.0, "step": 7028 }, { "epoch": 2.5748832096729872, "grad_norm": 0.14083484680292385, "learning_rate": 2.199786366952657e-05, "loss": 0.4695, "num_tokens": 5378609020.0, "step": 7029 }, { "epoch": 2.57524961069891, "grad_norm": 0.13603540093894415, "learning_rate": 2.1993591008700063e-05, "loss": 0.4415, "num_tokens": 5379446544.0, "step": 7030 }, { "epoch": 2.575616011724833, "grad_norm": 0.1454712888671843, "learning_rate": 2.1989318348234674e-05, "loss": 0.4329, "num_tokens": 5380130236.0, "step": 7031 }, { "epoch": 2.5759824127507556, "grad_norm": 0.13794981734494924, "learning_rate": 2.1985045688371137e-05, "loss": 0.4389, "num_tokens": 5380896841.0, "step": 7032 }, { "epoch": 2.5763488137766783, "grad_norm": 0.139737204234053, "learning_rate": 2.198077302935019e-05, "loss": 0.4425, "num_tokens": 5381738921.0, "step": 7033 }, { "epoch": 2.5767152148026016, "grad_norm": 0.15081678612125377, "learning_rate": 2.1976500371412586e-05, "loss": 0.4606, "num_tokens": 5382512393.0, "step": 7034 }, { "epoch": 2.5770816158285244, "grad_norm": 0.12569926060013842, "learning_rate": 2.1972227714799056e-05, "loss": 0.4529, "num_tokens": 5383345948.0, "step": 7035 }, { "epoch": 2.577448016854447, "grad_norm": 0.14916736181180665, "learning_rate": 2.196795505975034e-05, "loss": 0.4807, "num_tokens": 5383999971.0, "step": 7036 }, { "epoch": 2.57781441788037, "grad_norm": 0.15315962372251654, "learning_rate": 2.196368240650718e-05, "loss": 0.4543, "num_tokens": 5384686058.0, "step": 7037 }, { "epoch": 2.5781808189062927, "grad_norm": 0.1366032255028655, "learning_rate": 2.1959409755310314e-05, "loss": 0.4407, "num_tokens": 5385302067.0, "step": 7038 }, { "epoch": 2.578547219932216, "grad_norm": 0.13513439185718756, "learning_rate": 2.195513710640049e-05, "loss": 0.4408, "num_tokens": 5386106699.0, "step": 7039 }, { "epoch": 2.5789136209581387, "grad_norm": 0.15380414497147113, "learning_rate": 2.1950864460018452e-05, "loss": 0.449, "num_tokens": 5386869878.0, "step": 7040 }, { "epoch": 2.5792800219840615, "grad_norm": 0.13291922134952477, "learning_rate": 2.1946591816404927e-05, "loss": 0.4591, "num_tokens": 5387742195.0, "step": 7041 }, { "epoch": 2.5796464230099847, "grad_norm": 0.14166257650895164, "learning_rate": 2.1942319175800665e-05, "loss": 0.4638, "num_tokens": 5388453312.0, "step": 7042 }, { "epoch": 2.580012824035907, "grad_norm": 0.13896115645270216, "learning_rate": 2.19380465384464e-05, "loss": 0.4748, "num_tokens": 5389182555.0, "step": 7043 }, { "epoch": 2.5803792250618303, "grad_norm": 0.13739585061521023, "learning_rate": 2.1933773904582872e-05, "loss": 0.4603, "num_tokens": 5389977131.0, "step": 7044 }, { "epoch": 2.580745626087753, "grad_norm": 0.14437273418436203, "learning_rate": 2.192950127445083e-05, "loss": 0.4468, "num_tokens": 5390711835.0, "step": 7045 }, { "epoch": 2.581112027113676, "grad_norm": 0.12933843406321113, "learning_rate": 2.1925228648291003e-05, "loss": 0.4347, "num_tokens": 5391540068.0, "step": 7046 }, { "epoch": 2.581478428139599, "grad_norm": 0.13311025300313142, "learning_rate": 2.1920956026344133e-05, "loss": 0.445, "num_tokens": 5392289278.0, "step": 7047 }, { "epoch": 2.581844829165522, "grad_norm": 0.1405052223362825, "learning_rate": 2.1916683408850962e-05, "loss": 0.4483, "num_tokens": 5393058711.0, "step": 7048 }, { "epoch": 2.5822112301914446, "grad_norm": 0.1544679351088432, "learning_rate": 2.191241079605222e-05, "loss": 0.45, "num_tokens": 5393950210.0, "step": 7049 }, { "epoch": 2.5825776312173674, "grad_norm": 0.11943766595150632, "learning_rate": 2.1908138188188663e-05, "loss": 0.4569, "num_tokens": 5394860517.0, "step": 7050 }, { "epoch": 2.58294403224329, "grad_norm": 0.1334104942397948, "learning_rate": 2.1903865585501008e-05, "loss": 0.4516, "num_tokens": 5395518671.0, "step": 7051 }, { "epoch": 2.5833104332692134, "grad_norm": 0.15256050829878537, "learning_rate": 2.189959298823e-05, "loss": 0.4675, "num_tokens": 5396277490.0, "step": 7052 }, { "epoch": 2.583676834295136, "grad_norm": 0.12990128379805496, "learning_rate": 2.189532039661639e-05, "loss": 0.4769, "num_tokens": 5397056957.0, "step": 7053 }, { "epoch": 2.584043235321059, "grad_norm": 0.14286026097138338, "learning_rate": 2.1891047810900896e-05, "loss": 0.4676, "num_tokens": 5397671765.0, "step": 7054 }, { "epoch": 2.5844096363469817, "grad_norm": 0.14864949344878617, "learning_rate": 2.188677523132426e-05, "loss": 0.4822, "num_tokens": 5398314507.0, "step": 7055 }, { "epoch": 2.5847760373729045, "grad_norm": 0.15620459320275257, "learning_rate": 2.188250265812723e-05, "loss": 0.4837, "num_tokens": 5399054050.0, "step": 7056 }, { "epoch": 2.5851424383988277, "grad_norm": 0.1259427344606721, "learning_rate": 2.1878230091550528e-05, "loss": 0.4394, "num_tokens": 5399856800.0, "step": 7057 }, { "epoch": 2.5855088394247505, "grad_norm": 0.15738890664098723, "learning_rate": 2.1873957531834905e-05, "loss": 0.454, "num_tokens": 5400458029.0, "step": 7058 }, { "epoch": 2.5858752404506733, "grad_norm": 0.17670304592502514, "learning_rate": 2.186968497922108e-05, "loss": 0.463, "num_tokens": 5401204727.0, "step": 7059 }, { "epoch": 2.586241641476596, "grad_norm": 0.13884442686165716, "learning_rate": 2.186541243394979e-05, "loss": 0.4704, "num_tokens": 5401924385.0, "step": 7060 }, { "epoch": 2.586608042502519, "grad_norm": 0.1397402045287764, "learning_rate": 2.1861139896261784e-05, "loss": 0.5047, "num_tokens": 5402609099.0, "step": 7061 }, { "epoch": 2.586974443528442, "grad_norm": 0.19589796935361709, "learning_rate": 2.1856867366397786e-05, "loss": 0.5073, "num_tokens": 5403392543.0, "step": 7062 }, { "epoch": 2.587340844554365, "grad_norm": 0.14551420454798425, "learning_rate": 2.1852594844598525e-05, "loss": 0.4201, "num_tokens": 5404154293.0, "step": 7063 }, { "epoch": 2.5877072455802876, "grad_norm": 0.12491970790396872, "learning_rate": 2.1848322331104744e-05, "loss": 0.4644, "num_tokens": 5404835436.0, "step": 7064 }, { "epoch": 2.5880736466062104, "grad_norm": 0.1504593669404261, "learning_rate": 2.1844049826157177e-05, "loss": 0.4344, "num_tokens": 5405642418.0, "step": 7065 }, { "epoch": 2.588440047632133, "grad_norm": 0.14829576062405608, "learning_rate": 2.1839777329996536e-05, "loss": 0.4383, "num_tokens": 5406351279.0, "step": 7066 }, { "epoch": 2.5888064486580564, "grad_norm": 0.1429399061479228, "learning_rate": 2.1835504842863582e-05, "loss": 0.4384, "num_tokens": 5407087731.0, "step": 7067 }, { "epoch": 2.589172849683979, "grad_norm": 0.13529380756080106, "learning_rate": 2.1831232364999026e-05, "loss": 0.4776, "num_tokens": 5407745158.0, "step": 7068 }, { "epoch": 2.589539250709902, "grad_norm": 0.1540982064785992, "learning_rate": 2.1826959896643607e-05, "loss": 0.4568, "num_tokens": 5408515832.0, "step": 7069 }, { "epoch": 2.5899056517358248, "grad_norm": 0.12884240121027332, "learning_rate": 2.182268743803806e-05, "loss": 0.4752, "num_tokens": 5409266035.0, "step": 7070 }, { "epoch": 2.5902720527617475, "grad_norm": 0.13464526292732373, "learning_rate": 2.1818414989423088e-05, "loss": 0.4412, "num_tokens": 5410072341.0, "step": 7071 }, { "epoch": 2.5906384537876708, "grad_norm": 0.12750609407757715, "learning_rate": 2.181414255103946e-05, "loss": 0.4573, "num_tokens": 5410799631.0, "step": 7072 }, { "epoch": 2.5910048548135935, "grad_norm": 0.12878381826103047, "learning_rate": 2.1809870123127878e-05, "loss": 0.4639, "num_tokens": 5411582895.0, "step": 7073 }, { "epoch": 2.5913712558395163, "grad_norm": 0.1368611772974953, "learning_rate": 2.1805597705929075e-05, "loss": 0.4786, "num_tokens": 5412326288.0, "step": 7074 }, { "epoch": 2.591737656865439, "grad_norm": 0.15429463285349315, "learning_rate": 2.1801325299683786e-05, "loss": 0.4804, "num_tokens": 5413036941.0, "step": 7075 }, { "epoch": 2.592104057891362, "grad_norm": 0.1328893707681989, "learning_rate": 2.1797052904632726e-05, "loss": 0.4474, "num_tokens": 5413679738.0, "step": 7076 }, { "epoch": 2.592470458917285, "grad_norm": 0.13997440619775864, "learning_rate": 2.1792780521016628e-05, "loss": 0.4315, "num_tokens": 5414504661.0, "step": 7077 }, { "epoch": 2.592836859943208, "grad_norm": 0.13168909523317476, "learning_rate": 2.1788508149076218e-05, "loss": 0.4608, "num_tokens": 5415272810.0, "step": 7078 }, { "epoch": 2.5932032609691307, "grad_norm": 0.14140877766533178, "learning_rate": 2.1784235789052217e-05, "loss": 0.4392, "num_tokens": 5415916201.0, "step": 7079 }, { "epoch": 2.5935696619950535, "grad_norm": 0.13618388583573313, "learning_rate": 2.1779963441185354e-05, "loss": 0.4081, "num_tokens": 5416704551.0, "step": 7080 }, { "epoch": 2.5939360630209762, "grad_norm": 0.12323027671654987, "learning_rate": 2.1775691105716354e-05, "loss": 0.4413, "num_tokens": 5417595634.0, "step": 7081 }, { "epoch": 2.5943024640468995, "grad_norm": 0.1391554093218632, "learning_rate": 2.1771418782885925e-05, "loss": 0.4441, "num_tokens": 5418437298.0, "step": 7082 }, { "epoch": 2.5946688650728222, "grad_norm": 0.12897198140938568, "learning_rate": 2.1767146472934808e-05, "loss": 0.4558, "num_tokens": 5419238110.0, "step": 7083 }, { "epoch": 2.595035266098745, "grad_norm": 0.12613988594472678, "learning_rate": 2.176287417610372e-05, "loss": 0.4367, "num_tokens": 5420093323.0, "step": 7084 }, { "epoch": 2.595401667124668, "grad_norm": 0.12932008104082873, "learning_rate": 2.1758601892633367e-05, "loss": 0.4472, "num_tokens": 5420811916.0, "step": 7085 }, { "epoch": 2.5957680681505906, "grad_norm": 0.1330247572532673, "learning_rate": 2.1754329622764487e-05, "loss": 0.4844, "num_tokens": 5421490803.0, "step": 7086 }, { "epoch": 2.596134469176514, "grad_norm": 0.14833888723599312, "learning_rate": 2.175005736673779e-05, "loss": 0.4466, "num_tokens": 5422257938.0, "step": 7087 }, { "epoch": 2.5965008702024366, "grad_norm": 0.15375812780366305, "learning_rate": 2.1745785124793998e-05, "loss": 0.4538, "num_tokens": 5422946257.0, "step": 7088 }, { "epoch": 2.5968672712283594, "grad_norm": 0.130626235618175, "learning_rate": 2.174151289717383e-05, "loss": 0.4288, "num_tokens": 5423820441.0, "step": 7089 }, { "epoch": 2.5972336722542826, "grad_norm": 0.12501447206983884, "learning_rate": 2.173724068411799e-05, "loss": 0.429, "num_tokens": 5424621713.0, "step": 7090 }, { "epoch": 2.597600073280205, "grad_norm": 0.13655601512581403, "learning_rate": 2.173296848586721e-05, "loss": 0.4484, "num_tokens": 5425411677.0, "step": 7091 }, { "epoch": 2.597966474306128, "grad_norm": 0.1257847476425487, "learning_rate": 2.17286963026622e-05, "loss": 0.4579, "num_tokens": 5426201291.0, "step": 7092 }, { "epoch": 2.598332875332051, "grad_norm": 0.1412179419910851, "learning_rate": 2.1724424134743667e-05, "loss": 0.4493, "num_tokens": 5426971258.0, "step": 7093 }, { "epoch": 2.5986992763579737, "grad_norm": 0.14138469997207048, "learning_rate": 2.1720151982352327e-05, "loss": 0.4778, "num_tokens": 5427768408.0, "step": 7094 }, { "epoch": 2.599065677383897, "grad_norm": 0.14274217684033838, "learning_rate": 2.17158798457289e-05, "loss": 0.4501, "num_tokens": 5428527733.0, "step": 7095 }, { "epoch": 2.5994320784098197, "grad_norm": 0.1397052928960281, "learning_rate": 2.171160772511409e-05, "loss": 0.4411, "num_tokens": 5429280636.0, "step": 7096 }, { "epoch": 2.5997984794357425, "grad_norm": 0.13545016738539284, "learning_rate": 2.170733562074862e-05, "loss": 0.4572, "num_tokens": 5430007258.0, "step": 7097 }, { "epoch": 2.6001648804616653, "grad_norm": 0.13464648277319125, "learning_rate": 2.1703063532873182e-05, "loss": 0.4446, "num_tokens": 5430833234.0, "step": 7098 }, { "epoch": 2.600531281487588, "grad_norm": 0.15381402921912735, "learning_rate": 2.1698791461728493e-05, "loss": 0.4374, "num_tokens": 5431553081.0, "step": 7099 }, { "epoch": 2.6008976825135113, "grad_norm": 0.14100709962345068, "learning_rate": 2.1694519407555265e-05, "loss": 0.4638, "num_tokens": 5432303365.0, "step": 7100 }, { "epoch": 2.601264083539434, "grad_norm": 0.13850678765815383, "learning_rate": 2.1690247370594197e-05, "loss": 0.4748, "num_tokens": 5433017337.0, "step": 7101 }, { "epoch": 2.601630484565357, "grad_norm": 0.15918345594835895, "learning_rate": 2.1685975351086002e-05, "loss": 0.4596, "num_tokens": 5433715905.0, "step": 7102 }, { "epoch": 2.6019968855912796, "grad_norm": 0.133487454121156, "learning_rate": 2.1681703349271387e-05, "loss": 0.4293, "num_tokens": 5434518002.0, "step": 7103 }, { "epoch": 2.6023632866172024, "grad_norm": 0.1325474971565264, "learning_rate": 2.1677431365391038e-05, "loss": 0.4563, "num_tokens": 5435246383.0, "step": 7104 }, { "epoch": 2.6027296876431256, "grad_norm": 0.13103293379183764, "learning_rate": 2.1673159399685685e-05, "loss": 0.4487, "num_tokens": 5436023950.0, "step": 7105 }, { "epoch": 2.6030960886690484, "grad_norm": 0.13745564845983743, "learning_rate": 2.1668887452396e-05, "loss": 0.4665, "num_tokens": 5436680993.0, "step": 7106 }, { "epoch": 2.603462489694971, "grad_norm": 0.15102084365060667, "learning_rate": 2.1664615523762715e-05, "loss": 0.4573, "num_tokens": 5437351099.0, "step": 7107 }, { "epoch": 2.603828890720894, "grad_norm": 0.14608493831315345, "learning_rate": 2.166034361402651e-05, "loss": 0.4155, "num_tokens": 5438128691.0, "step": 7108 }, { "epoch": 2.6041952917468167, "grad_norm": 0.13339198266680574, "learning_rate": 2.1656071723428084e-05, "loss": 0.4642, "num_tokens": 5438813869.0, "step": 7109 }, { "epoch": 2.60456169277274, "grad_norm": 0.15297417547009748, "learning_rate": 2.1651799852208145e-05, "loss": 0.484, "num_tokens": 5439636436.0, "step": 7110 }, { "epoch": 2.6049280937986627, "grad_norm": 0.13900038728872074, "learning_rate": 2.1647528000607383e-05, "loss": 0.4501, "num_tokens": 5440492026.0, "step": 7111 }, { "epoch": 2.6052944948245855, "grad_norm": 0.14230349349094512, "learning_rate": 2.164325616886649e-05, "loss": 0.4643, "num_tokens": 5441173172.0, "step": 7112 }, { "epoch": 2.6056608958505083, "grad_norm": 0.14623342186764832, "learning_rate": 2.1638984357226164e-05, "loss": 0.4352, "num_tokens": 5441967013.0, "step": 7113 }, { "epoch": 2.606027296876431, "grad_norm": 0.13294091701704258, "learning_rate": 2.1634712565927097e-05, "loss": 0.4425, "num_tokens": 5442818793.0, "step": 7114 }, { "epoch": 2.6063936979023543, "grad_norm": 0.12607150906250814, "learning_rate": 2.163044079521e-05, "loss": 0.4628, "num_tokens": 5443566327.0, "step": 7115 }, { "epoch": 2.606760098928277, "grad_norm": 0.14979250915004885, "learning_rate": 2.1626169045315532e-05, "loss": 0.4546, "num_tokens": 5444304281.0, "step": 7116 }, { "epoch": 2.6071264999542, "grad_norm": 0.13498965804297486, "learning_rate": 2.16218973164844e-05, "loss": 0.4519, "num_tokens": 5445033570.0, "step": 7117 }, { "epoch": 2.6074929009801227, "grad_norm": 0.1461895403076819, "learning_rate": 2.161762560895729e-05, "loss": 0.4599, "num_tokens": 5445875631.0, "step": 7118 }, { "epoch": 2.6078593020060454, "grad_norm": 0.12324910034092695, "learning_rate": 2.1613353922974896e-05, "loss": 0.4555, "num_tokens": 5446727273.0, "step": 7119 }, { "epoch": 2.6082257030319687, "grad_norm": 0.14204411556639274, "learning_rate": 2.160908225877789e-05, "loss": 0.4553, "num_tokens": 5447398904.0, "step": 7120 }, { "epoch": 2.6085921040578914, "grad_norm": 0.14658788852332216, "learning_rate": 2.160481061660697e-05, "loss": 0.4481, "num_tokens": 5448179103.0, "step": 7121 }, { "epoch": 2.608958505083814, "grad_norm": 0.13074500157675234, "learning_rate": 2.160053899670282e-05, "loss": 0.4576, "num_tokens": 5448906420.0, "step": 7122 }, { "epoch": 2.609324906109737, "grad_norm": 0.13595994980796827, "learning_rate": 2.1596267399306102e-05, "loss": 0.4437, "num_tokens": 5449782863.0, "step": 7123 }, { "epoch": 2.6096913071356598, "grad_norm": 0.11957373901280766, "learning_rate": 2.1591995824657524e-05, "loss": 0.4621, "num_tokens": 5450636408.0, "step": 7124 }, { "epoch": 2.610057708161583, "grad_norm": 0.13985508889425033, "learning_rate": 2.158772427299775e-05, "loss": 0.4477, "num_tokens": 5451330174.0, "step": 7125 }, { "epoch": 2.610424109187506, "grad_norm": 0.14440804882973024, "learning_rate": 2.1583452744567463e-05, "loss": 0.4589, "num_tokens": 5452123139.0, "step": 7126 }, { "epoch": 2.6107905102134286, "grad_norm": 0.12862821564203097, "learning_rate": 2.1579181239607348e-05, "loss": 0.4986, "num_tokens": 5452933893.0, "step": 7127 }, { "epoch": 2.6111569112393513, "grad_norm": 0.14072345177128262, "learning_rate": 2.1574909758358058e-05, "loss": 0.4431, "num_tokens": 5453702591.0, "step": 7128 }, { "epoch": 2.611523312265274, "grad_norm": 0.13700662909611672, "learning_rate": 2.1570638301060297e-05, "loss": 0.4825, "num_tokens": 5454401078.0, "step": 7129 }, { "epoch": 2.6118897132911973, "grad_norm": 0.12879133538591003, "learning_rate": 2.156636686795472e-05, "loss": 0.4216, "num_tokens": 5455208452.0, "step": 7130 }, { "epoch": 2.61225611431712, "grad_norm": 0.13266514360482298, "learning_rate": 2.1562095459281997e-05, "loss": 0.4443, "num_tokens": 5456054806.0, "step": 7131 }, { "epoch": 2.612622515343043, "grad_norm": 0.11996652685227133, "learning_rate": 2.155782407528282e-05, "loss": 0.4205, "num_tokens": 5456713655.0, "step": 7132 }, { "epoch": 2.6129889163689657, "grad_norm": 0.1450018700803581, "learning_rate": 2.1553552716197824e-05, "loss": 0.4389, "num_tokens": 5457476058.0, "step": 7133 }, { "epoch": 2.6133553173948885, "grad_norm": 0.1462190382389811, "learning_rate": 2.1549281382267706e-05, "loss": 0.4758, "num_tokens": 5458187218.0, "step": 7134 }, { "epoch": 2.6137217184208117, "grad_norm": 0.1294279387211764, "learning_rate": 2.1545010073733122e-05, "loss": 0.4643, "num_tokens": 5458996018.0, "step": 7135 }, { "epoch": 2.6140881194467345, "grad_norm": 0.14537345356654507, "learning_rate": 2.1540738790834733e-05, "loss": 0.4809, "num_tokens": 5459790294.0, "step": 7136 }, { "epoch": 2.6144545204726573, "grad_norm": 0.136753914219434, "learning_rate": 2.1536467533813214e-05, "loss": 0.4544, "num_tokens": 5460548879.0, "step": 7137 }, { "epoch": 2.61482092149858, "grad_norm": 0.12942922420076497, "learning_rate": 2.1532196302909213e-05, "loss": 0.455, "num_tokens": 5461407443.0, "step": 7138 }, { "epoch": 2.615187322524503, "grad_norm": 0.14278546472036058, "learning_rate": 2.1527925098363394e-05, "loss": 0.4371, "num_tokens": 5462118109.0, "step": 7139 }, { "epoch": 2.615553723550426, "grad_norm": 0.13410551319313171, "learning_rate": 2.1523653920416426e-05, "loss": 0.4561, "num_tokens": 5462902422.0, "step": 7140 }, { "epoch": 2.615920124576349, "grad_norm": 0.13776601011081316, "learning_rate": 2.151938276930896e-05, "loss": 0.4566, "num_tokens": 5463618451.0, "step": 7141 }, { "epoch": 2.6162865256022716, "grad_norm": 0.13757428389281695, "learning_rate": 2.151511164528165e-05, "loss": 0.4515, "num_tokens": 5464384967.0, "step": 7142 }, { "epoch": 2.616652926628195, "grad_norm": 0.12980254637839705, "learning_rate": 2.1510840548575146e-05, "loss": 0.435, "num_tokens": 5465162058.0, "step": 7143 }, { "epoch": 2.617019327654117, "grad_norm": 0.13329285132362928, "learning_rate": 2.150656947943011e-05, "loss": 0.4375, "num_tokens": 5465932487.0, "step": 7144 }, { "epoch": 2.6173857286800404, "grad_norm": 0.12131861548084945, "learning_rate": 2.1502298438087196e-05, "loss": 0.4686, "num_tokens": 5466758476.0, "step": 7145 }, { "epoch": 2.617752129705963, "grad_norm": 0.1373332646669391, "learning_rate": 2.1498027424787047e-05, "loss": 0.4672, "num_tokens": 5467535038.0, "step": 7146 }, { "epoch": 2.618118530731886, "grad_norm": 0.12397714714797857, "learning_rate": 2.149375643977031e-05, "loss": 0.4404, "num_tokens": 5468285913.0, "step": 7147 }, { "epoch": 2.618484931757809, "grad_norm": 0.13304799275418133, "learning_rate": 2.1489485483277633e-05, "loss": 0.49, "num_tokens": 5468979209.0, "step": 7148 }, { "epoch": 2.618851332783732, "grad_norm": 0.139303593548837, "learning_rate": 2.148521455554967e-05, "loss": 0.4689, "num_tokens": 5469817530.0, "step": 7149 }, { "epoch": 2.6192177338096547, "grad_norm": 0.13706755714148403, "learning_rate": 2.1480943656827048e-05, "loss": 0.4786, "num_tokens": 5470616159.0, "step": 7150 }, { "epoch": 2.6195841348355775, "grad_norm": 0.12056682753782608, "learning_rate": 2.1476672787350427e-05, "loss": 0.4406, "num_tokens": 5471421427.0, "step": 7151 }, { "epoch": 2.6199505358615003, "grad_norm": 0.1243388628197459, "learning_rate": 2.1472401947360426e-05, "loss": 0.4736, "num_tokens": 5472157427.0, "step": 7152 }, { "epoch": 2.6203169368874235, "grad_norm": 0.1290341406455375, "learning_rate": 2.1468131137097704e-05, "loss": 0.4383, "num_tokens": 5473035539.0, "step": 7153 }, { "epoch": 2.6206833379133463, "grad_norm": 0.13852551023760415, "learning_rate": 2.1463860356802892e-05, "loss": 0.4692, "num_tokens": 5473784437.0, "step": 7154 }, { "epoch": 2.621049738939269, "grad_norm": 0.12708132567903951, "learning_rate": 2.1459589606716612e-05, "loss": 0.4603, "num_tokens": 5474570239.0, "step": 7155 }, { "epoch": 2.621416139965192, "grad_norm": 0.12619774260461314, "learning_rate": 2.145531888707952e-05, "loss": 0.4539, "num_tokens": 5475361232.0, "step": 7156 }, { "epoch": 2.6217825409911146, "grad_norm": 0.14478882354128156, "learning_rate": 2.145104819813223e-05, "loss": 0.4319, "num_tokens": 5475994119.0, "step": 7157 }, { "epoch": 2.622148942017038, "grad_norm": 0.1246616518038792, "learning_rate": 2.1446777540115377e-05, "loss": 0.4246, "num_tokens": 5476701723.0, "step": 7158 }, { "epoch": 2.6225153430429606, "grad_norm": 0.13986318914510215, "learning_rate": 2.1442506913269595e-05, "loss": 0.4405, "num_tokens": 5477587953.0, "step": 7159 }, { "epoch": 2.6228817440688834, "grad_norm": 0.12081648508524798, "learning_rate": 2.1438236317835496e-05, "loss": 0.4324, "num_tokens": 5478352930.0, "step": 7160 }, { "epoch": 2.623248145094806, "grad_norm": 0.1266022982967262, "learning_rate": 2.143396575405372e-05, "loss": 0.4386, "num_tokens": 5479142415.0, "step": 7161 }, { "epoch": 2.623614546120729, "grad_norm": 0.1235432534681589, "learning_rate": 2.142969522216489e-05, "loss": 0.4347, "num_tokens": 5479831749.0, "step": 7162 }, { "epoch": 2.623980947146652, "grad_norm": 0.1380979557114485, "learning_rate": 2.142542472240961e-05, "loss": 0.4414, "num_tokens": 5480538715.0, "step": 7163 }, { "epoch": 2.624347348172575, "grad_norm": 0.12568723004167834, "learning_rate": 2.1421154255028524e-05, "loss": 0.4275, "num_tokens": 5481409703.0, "step": 7164 }, { "epoch": 2.6247137491984978, "grad_norm": 0.13230661776400487, "learning_rate": 2.1416883820262225e-05, "loss": 0.4774, "num_tokens": 5482115346.0, "step": 7165 }, { "epoch": 2.6250801502244205, "grad_norm": 0.13996660409730524, "learning_rate": 2.1412613418351342e-05, "loss": 0.4478, "num_tokens": 5482847938.0, "step": 7166 }, { "epoch": 2.6254465512503433, "grad_norm": 0.14004703263518006, "learning_rate": 2.140834304953649e-05, "loss": 0.4658, "num_tokens": 5483612563.0, "step": 7167 }, { "epoch": 2.6258129522762665, "grad_norm": 0.13171640492440545, "learning_rate": 2.140407271405828e-05, "loss": 0.428, "num_tokens": 5484410713.0, "step": 7168 }, { "epoch": 2.6261793533021893, "grad_norm": 0.13203261886632367, "learning_rate": 2.139980241215731e-05, "loss": 0.4535, "num_tokens": 5485174491.0, "step": 7169 }, { "epoch": 2.626545754328112, "grad_norm": 0.13306656484356683, "learning_rate": 2.139553214407421e-05, "loss": 0.4311, "num_tokens": 5485883877.0, "step": 7170 }, { "epoch": 2.626912155354035, "grad_norm": 0.14075766837244735, "learning_rate": 2.1391261910049566e-05, "loss": 0.4709, "num_tokens": 5486562776.0, "step": 7171 }, { "epoch": 2.6272785563799577, "grad_norm": 0.1236425877851932, "learning_rate": 2.1386991710323994e-05, "loss": 0.4372, "num_tokens": 5487343377.0, "step": 7172 }, { "epoch": 2.627644957405881, "grad_norm": 0.12788538696725824, "learning_rate": 2.1382721545138098e-05, "loss": 0.4634, "num_tokens": 5488193199.0, "step": 7173 }, { "epoch": 2.6280113584318037, "grad_norm": 0.13098855156201938, "learning_rate": 2.1378451414732462e-05, "loss": 0.4469, "num_tokens": 5488969979.0, "step": 7174 }, { "epoch": 2.6283777594577264, "grad_norm": 0.130647218776846, "learning_rate": 2.1374181319347702e-05, "loss": 0.4406, "num_tokens": 5489724002.0, "step": 7175 }, { "epoch": 2.6287441604836492, "grad_norm": 0.12885715684133245, "learning_rate": 2.1369911259224415e-05, "loss": 0.4672, "num_tokens": 5490530697.0, "step": 7176 }, { "epoch": 2.629110561509572, "grad_norm": 0.13530898708837538, "learning_rate": 2.1365641234603186e-05, "loss": 0.4614, "num_tokens": 5491189596.0, "step": 7177 }, { "epoch": 2.6294769625354952, "grad_norm": 0.15231980539055212, "learning_rate": 2.1361371245724606e-05, "loss": 0.4687, "num_tokens": 5491893312.0, "step": 7178 }, { "epoch": 2.629843363561418, "grad_norm": 0.1437848847215602, "learning_rate": 2.1357101292829273e-05, "loss": 0.4779, "num_tokens": 5492531256.0, "step": 7179 }, { "epoch": 2.630209764587341, "grad_norm": 0.1405451093379363, "learning_rate": 2.1352831376157775e-05, "loss": 0.4536, "num_tokens": 5493308069.0, "step": 7180 }, { "epoch": 2.6305761656132636, "grad_norm": 0.12739801795673616, "learning_rate": 2.13485614959507e-05, "loss": 0.4432, "num_tokens": 5494115929.0, "step": 7181 }, { "epoch": 2.6309425666391864, "grad_norm": 0.13410609068343252, "learning_rate": 2.1344291652448622e-05, "loss": 0.4533, "num_tokens": 5494769555.0, "step": 7182 }, { "epoch": 2.6313089676651096, "grad_norm": 0.13682369843382405, "learning_rate": 2.1340021845892133e-05, "loss": 0.4491, "num_tokens": 5495565982.0, "step": 7183 }, { "epoch": 2.6316753686910324, "grad_norm": 0.14529922698262585, "learning_rate": 2.1335752076521816e-05, "loss": 0.4466, "num_tokens": 5496407817.0, "step": 7184 }, { "epoch": 2.632041769716955, "grad_norm": 0.1332652807503029, "learning_rate": 2.133148234457823e-05, "loss": 0.4533, "num_tokens": 5497175441.0, "step": 7185 }, { "epoch": 2.632408170742878, "grad_norm": 0.12962793626047714, "learning_rate": 2.132721265030198e-05, "loss": 0.4399, "num_tokens": 5498100288.0, "step": 7186 }, { "epoch": 2.6327745717688007, "grad_norm": 0.15724099016848925, "learning_rate": 2.132294299393362e-05, "loss": 0.4879, "num_tokens": 5498800597.0, "step": 7187 }, { "epoch": 2.633140972794724, "grad_norm": 0.13999857699960228, "learning_rate": 2.131867337571372e-05, "loss": 0.4544, "num_tokens": 5499534261.0, "step": 7188 }, { "epoch": 2.6335073738206467, "grad_norm": 0.1250925688150636, "learning_rate": 2.131440379588287e-05, "loss": 0.4581, "num_tokens": 5500388503.0, "step": 7189 }, { "epoch": 2.6338737748465695, "grad_norm": 0.15409973403280738, "learning_rate": 2.1310134254681612e-05, "loss": 0.4702, "num_tokens": 5501137234.0, "step": 7190 }, { "epoch": 2.6342401758724927, "grad_norm": 0.12368336679327968, "learning_rate": 2.130586475235053e-05, "loss": 0.4356, "num_tokens": 5501911118.0, "step": 7191 }, { "epoch": 2.634606576898415, "grad_norm": 0.15668582642532755, "learning_rate": 2.130159528913018e-05, "loss": 0.493, "num_tokens": 5502523487.0, "step": 7192 }, { "epoch": 2.6349729779243383, "grad_norm": 0.14556006773093594, "learning_rate": 2.1297325865261115e-05, "loss": 0.4709, "num_tokens": 5503357724.0, "step": 7193 }, { "epoch": 2.635339378950261, "grad_norm": 0.12366315711516283, "learning_rate": 2.1293056480983913e-05, "loss": 0.462, "num_tokens": 5504111597.0, "step": 7194 }, { "epoch": 2.635705779976184, "grad_norm": 0.14770057512972676, "learning_rate": 2.1288787136539115e-05, "loss": 0.4689, "num_tokens": 5504847439.0, "step": 7195 }, { "epoch": 2.636072181002107, "grad_norm": 0.14089843005945754, "learning_rate": 2.128451783216728e-05, "loss": 0.4533, "num_tokens": 5505541722.0, "step": 7196 }, { "epoch": 2.63643858202803, "grad_norm": 0.13602888601200674, "learning_rate": 2.128024856810896e-05, "loss": 0.4505, "num_tokens": 5506315033.0, "step": 7197 }, { "epoch": 2.6368049830539526, "grad_norm": 0.1274547538952452, "learning_rate": 2.1275979344604702e-05, "loss": 0.4906, "num_tokens": 5507132512.0, "step": 7198 }, { "epoch": 2.6371713840798754, "grad_norm": 0.13462296534479432, "learning_rate": 2.1271710161895066e-05, "loss": 0.4963, "num_tokens": 5507835095.0, "step": 7199 }, { "epoch": 2.637537785105798, "grad_norm": 0.14507022737881392, "learning_rate": 2.1267441020220582e-05, "loss": 0.4622, "num_tokens": 5508622527.0, "step": 7200 }, { "epoch": 2.6379041861317214, "grad_norm": 0.1334536743921859, "learning_rate": 2.126317191982179e-05, "loss": 0.439, "num_tokens": 5509386575.0, "step": 7201 }, { "epoch": 2.638270587157644, "grad_norm": 0.1278375371033304, "learning_rate": 2.125890286093925e-05, "loss": 0.4497, "num_tokens": 5510107382.0, "step": 7202 }, { "epoch": 2.638636988183567, "grad_norm": 0.1248486790323969, "learning_rate": 2.1254633843813485e-05, "loss": 0.4736, "num_tokens": 5510845917.0, "step": 7203 }, { "epoch": 2.6390033892094897, "grad_norm": 0.14558204715741074, "learning_rate": 2.125036486868503e-05, "loss": 0.4658, "num_tokens": 5511625705.0, "step": 7204 }, { "epoch": 2.6393697902354125, "grad_norm": 0.13046921294292518, "learning_rate": 2.124609593579443e-05, "loss": 0.4688, "num_tokens": 5512374051.0, "step": 7205 }, { "epoch": 2.6397361912613357, "grad_norm": 0.13671398146926406, "learning_rate": 2.124182704538221e-05, "loss": 0.4475, "num_tokens": 5513223579.0, "step": 7206 }, { "epoch": 2.6401025922872585, "grad_norm": 0.1306250055524355, "learning_rate": 2.123755819768889e-05, "loss": 0.4825, "num_tokens": 5513940096.0, "step": 7207 }, { "epoch": 2.6404689933131813, "grad_norm": 0.1353646118978017, "learning_rate": 2.123328939295501e-05, "loss": 0.427, "num_tokens": 5514623698.0, "step": 7208 }, { "epoch": 2.640835394339104, "grad_norm": 0.13136577884459422, "learning_rate": 2.122902063142108e-05, "loss": 0.4881, "num_tokens": 5515407847.0, "step": 7209 }, { "epoch": 2.641201795365027, "grad_norm": 0.14898334631048288, "learning_rate": 2.1224751913327637e-05, "loss": 0.4636, "num_tokens": 5516183787.0, "step": 7210 }, { "epoch": 2.64156819639095, "grad_norm": 0.14122934096367099, "learning_rate": 2.1220483238915195e-05, "loss": 0.4603, "num_tokens": 5516895333.0, "step": 7211 }, { "epoch": 2.641934597416873, "grad_norm": 0.1250415261602163, "learning_rate": 2.121621460842426e-05, "loss": 0.4749, "num_tokens": 5517637232.0, "step": 7212 }, { "epoch": 2.6423009984427956, "grad_norm": 0.13388379245826654, "learning_rate": 2.1211946022095358e-05, "loss": 0.4366, "num_tokens": 5518515515.0, "step": 7213 }, { "epoch": 2.6426673994687184, "grad_norm": 0.1260014546571464, "learning_rate": 2.1207677480168996e-05, "loss": 0.4413, "num_tokens": 5519277443.0, "step": 7214 }, { "epoch": 2.643033800494641, "grad_norm": 0.1436756616963329, "learning_rate": 2.1203408982885678e-05, "loss": 0.4596, "num_tokens": 5520031620.0, "step": 7215 }, { "epoch": 2.6434002015205644, "grad_norm": 0.12366943844317559, "learning_rate": 2.1199140530485924e-05, "loss": 0.4407, "num_tokens": 5520858837.0, "step": 7216 }, { "epoch": 2.643766602546487, "grad_norm": 0.1460294578008817, "learning_rate": 2.119487212321022e-05, "loss": 0.4456, "num_tokens": 5521624724.0, "step": 7217 }, { "epoch": 2.64413300357241, "grad_norm": 0.1279542623572129, "learning_rate": 2.119060376129909e-05, "loss": 0.4336, "num_tokens": 5522367147.0, "step": 7218 }, { "epoch": 2.6444994045983328, "grad_norm": 0.12211157682164815, "learning_rate": 2.1186335444993018e-05, "loss": 0.4363, "num_tokens": 5523208828.0, "step": 7219 }, { "epoch": 2.6448658056242556, "grad_norm": 0.13150382173264794, "learning_rate": 2.118206717453249e-05, "loss": 0.4688, "num_tokens": 5524071536.0, "step": 7220 }, { "epoch": 2.6452322066501788, "grad_norm": 0.14488838457455477, "learning_rate": 2.1177798950158026e-05, "loss": 0.4862, "num_tokens": 5524735795.0, "step": 7221 }, { "epoch": 2.6455986076761016, "grad_norm": 0.1368404577637695, "learning_rate": 2.1173530772110097e-05, "loss": 0.4621, "num_tokens": 5525360047.0, "step": 7222 }, { "epoch": 2.6459650087020243, "grad_norm": 0.15508452312553928, "learning_rate": 2.11692626406292e-05, "loss": 0.4549, "num_tokens": 5526136434.0, "step": 7223 }, { "epoch": 2.646331409727947, "grad_norm": 0.1354613194308906, "learning_rate": 2.116499455595582e-05, "loss": 0.4334, "num_tokens": 5527025293.0, "step": 7224 }, { "epoch": 2.64669781075387, "grad_norm": 0.12140496612251722, "learning_rate": 2.116072651833044e-05, "loss": 0.4188, "num_tokens": 5527756902.0, "step": 7225 }, { "epoch": 2.647064211779793, "grad_norm": 0.12118431378526939, "learning_rate": 2.1156458527993534e-05, "loss": 0.4329, "num_tokens": 5528644252.0, "step": 7226 }, { "epoch": 2.647430612805716, "grad_norm": 0.13735599735738424, "learning_rate": 2.1152190585185586e-05, "loss": 0.4655, "num_tokens": 5529404983.0, "step": 7227 }, { "epoch": 2.6477970138316387, "grad_norm": 0.13567028230784453, "learning_rate": 2.1147922690147075e-05, "loss": 0.4795, "num_tokens": 5530091076.0, "step": 7228 }, { "epoch": 2.6481634148575615, "grad_norm": 0.1406034626631764, "learning_rate": 2.1143654843118476e-05, "loss": 0.4352, "num_tokens": 5530977993.0, "step": 7229 }, { "epoch": 2.6485298158834842, "grad_norm": 0.12471673922337412, "learning_rate": 2.1139387044340244e-05, "loss": 0.4331, "num_tokens": 5531774037.0, "step": 7230 }, { "epoch": 2.6488962169094075, "grad_norm": 0.15639846325072743, "learning_rate": 2.1135119294052853e-05, "loss": 0.4695, "num_tokens": 5532479539.0, "step": 7231 }, { "epoch": 2.6492626179353302, "grad_norm": 0.139439461315278, "learning_rate": 2.113085159249677e-05, "loss": 0.45, "num_tokens": 5533152052.0, "step": 7232 }, { "epoch": 2.649629018961253, "grad_norm": 0.14366100885837738, "learning_rate": 2.1126583939912467e-05, "loss": 0.4561, "num_tokens": 5534033878.0, "step": 7233 }, { "epoch": 2.649995419987176, "grad_norm": 0.13048456502452524, "learning_rate": 2.112231633654038e-05, "loss": 0.4577, "num_tokens": 5534838401.0, "step": 7234 }, { "epoch": 2.6503618210130986, "grad_norm": 0.14098607824866408, "learning_rate": 2.1118048782620975e-05, "loss": 0.4528, "num_tokens": 5535675334.0, "step": 7235 }, { "epoch": 2.650728222039022, "grad_norm": 0.12611451567908816, "learning_rate": 2.1113781278394714e-05, "loss": 0.4823, "num_tokens": 5536369562.0, "step": 7236 }, { "epoch": 2.6510946230649446, "grad_norm": 0.1539037070813383, "learning_rate": 2.1109513824102038e-05, "loss": 0.4497, "num_tokens": 5537105078.0, "step": 7237 }, { "epoch": 2.6514610240908674, "grad_norm": 0.13745097700237957, "learning_rate": 2.1105246419983403e-05, "loss": 0.4705, "num_tokens": 5537842312.0, "step": 7238 }, { "epoch": 2.6518274251167906, "grad_norm": 0.14293129280708852, "learning_rate": 2.110097906627924e-05, "loss": 0.4378, "num_tokens": 5538528053.0, "step": 7239 }, { "epoch": 2.652193826142713, "grad_norm": 0.11895054592153148, "learning_rate": 2.1096711763230005e-05, "loss": 0.4507, "num_tokens": 5539343111.0, "step": 7240 }, { "epoch": 2.652560227168636, "grad_norm": 0.1413056744639887, "learning_rate": 2.1092444511076135e-05, "loss": 0.4897, "num_tokens": 5540079561.0, "step": 7241 }, { "epoch": 2.652926628194559, "grad_norm": 0.14279819068786803, "learning_rate": 2.1088177310058057e-05, "loss": 0.4352, "num_tokens": 5540797644.0, "step": 7242 }, { "epoch": 2.6532930292204817, "grad_norm": 0.13920182191641386, "learning_rate": 2.1083910160416213e-05, "loss": 0.4273, "num_tokens": 5541471036.0, "step": 7243 }, { "epoch": 2.653659430246405, "grad_norm": 0.14310651631497526, "learning_rate": 2.1079643062391032e-05, "loss": 0.4811, "num_tokens": 5542072598.0, "step": 7244 }, { "epoch": 2.6540258312723277, "grad_norm": 0.1511987254961622, "learning_rate": 2.1075376016222937e-05, "loss": 0.4713, "num_tokens": 5542803351.0, "step": 7245 }, { "epoch": 2.6543922322982505, "grad_norm": 0.134830938024068, "learning_rate": 2.1071109022152365e-05, "loss": 0.4413, "num_tokens": 5543545074.0, "step": 7246 }, { "epoch": 2.6547586333241733, "grad_norm": 0.13918779422222097, "learning_rate": 2.1066842080419718e-05, "loss": 0.478, "num_tokens": 5544346674.0, "step": 7247 }, { "epoch": 2.655125034350096, "grad_norm": 0.1276413798973421, "learning_rate": 2.106257519126544e-05, "loss": 0.4616, "num_tokens": 5545123400.0, "step": 7248 }, { "epoch": 2.6554914353760193, "grad_norm": 0.12664254730558105, "learning_rate": 2.1058308354929925e-05, "loss": 0.4399, "num_tokens": 5545945787.0, "step": 7249 }, { "epoch": 2.655857836401942, "grad_norm": 0.15008061161530964, "learning_rate": 2.1054041571653592e-05, "loss": 0.4567, "num_tokens": 5546614919.0, "step": 7250 }, { "epoch": 2.656224237427865, "grad_norm": 0.14318348057800934, "learning_rate": 2.1049774841676858e-05, "loss": 0.4551, "num_tokens": 5547272633.0, "step": 7251 }, { "epoch": 2.6565906384537876, "grad_norm": 0.12566570695167098, "learning_rate": 2.1045508165240128e-05, "loss": 0.4526, "num_tokens": 5548009560.0, "step": 7252 }, { "epoch": 2.6569570394797104, "grad_norm": 0.1288588147577034, "learning_rate": 2.1041241542583797e-05, "loss": 0.4603, "num_tokens": 5548887205.0, "step": 7253 }, { "epoch": 2.6573234405056336, "grad_norm": 0.127623535403572, "learning_rate": 2.1036974973948272e-05, "loss": 0.4758, "num_tokens": 5549623640.0, "step": 7254 }, { "epoch": 2.6576898415315564, "grad_norm": 0.15387104861638687, "learning_rate": 2.1032708459573953e-05, "loss": 0.4446, "num_tokens": 5550283010.0, "step": 7255 }, { "epoch": 2.658056242557479, "grad_norm": 0.12800003427821785, "learning_rate": 2.102844199970124e-05, "loss": 0.4631, "num_tokens": 5551036577.0, "step": 7256 }, { "epoch": 2.658422643583402, "grad_norm": 0.14350289841955877, "learning_rate": 2.1024175594570508e-05, "loss": 0.4667, "num_tokens": 5551811457.0, "step": 7257 }, { "epoch": 2.6587890446093247, "grad_norm": 0.13818985601326308, "learning_rate": 2.101990924442215e-05, "loss": 0.4529, "num_tokens": 5552594641.0, "step": 7258 }, { "epoch": 2.659155445635248, "grad_norm": 0.13174053461050148, "learning_rate": 2.1015642949496572e-05, "loss": 0.4522, "num_tokens": 5553471199.0, "step": 7259 }, { "epoch": 2.6595218466611708, "grad_norm": 0.145833330603951, "learning_rate": 2.1011376710034134e-05, "loss": 0.4678, "num_tokens": 5554131461.0, "step": 7260 }, { "epoch": 2.6598882476870935, "grad_norm": 0.1381026271094872, "learning_rate": 2.100711052627522e-05, "loss": 0.479, "num_tokens": 5554833194.0, "step": 7261 }, { "epoch": 2.6602546487130163, "grad_norm": 0.14399233714413567, "learning_rate": 2.1002844398460208e-05, "loss": 0.4816, "num_tokens": 5555603802.0, "step": 7262 }, { "epoch": 2.660621049738939, "grad_norm": 0.1522423813870835, "learning_rate": 2.0998578326829474e-05, "loss": 0.472, "num_tokens": 5556428770.0, "step": 7263 }, { "epoch": 2.6609874507648623, "grad_norm": 0.1444490470667281, "learning_rate": 2.0994312311623387e-05, "loss": 0.4664, "num_tokens": 5557222841.0, "step": 7264 }, { "epoch": 2.661353851790785, "grad_norm": 0.1372547259547933, "learning_rate": 2.099004635308231e-05, "loss": 0.4324, "num_tokens": 5557971352.0, "step": 7265 }, { "epoch": 2.661720252816708, "grad_norm": 0.13339562506757108, "learning_rate": 2.0985780451446606e-05, "loss": 0.4367, "num_tokens": 5558768765.0, "step": 7266 }, { "epoch": 2.6620866538426307, "grad_norm": 0.13538494442813503, "learning_rate": 2.098151460695664e-05, "loss": 0.4967, "num_tokens": 5559489578.0, "step": 7267 }, { "epoch": 2.6624530548685534, "grad_norm": 0.14671942831174448, "learning_rate": 2.0977248819852772e-05, "loss": 0.4747, "num_tokens": 5560239796.0, "step": 7268 }, { "epoch": 2.6628194558944767, "grad_norm": 0.14603690908507877, "learning_rate": 2.0972983090375343e-05, "loss": 0.4573, "num_tokens": 5560994335.0, "step": 7269 }, { "epoch": 2.6631858569203994, "grad_norm": 0.13194394354003075, "learning_rate": 2.0968717418764718e-05, "loss": 0.4495, "num_tokens": 5561754718.0, "step": 7270 }, { "epoch": 2.663552257946322, "grad_norm": 0.13528811906105717, "learning_rate": 2.096445180526123e-05, "loss": 0.4435, "num_tokens": 5562652830.0, "step": 7271 }, { "epoch": 2.663918658972245, "grad_norm": 0.12953687784036771, "learning_rate": 2.0960186250105236e-05, "loss": 0.4286, "num_tokens": 5563532706.0, "step": 7272 }, { "epoch": 2.664285059998168, "grad_norm": 0.13674304983102117, "learning_rate": 2.0955920753537073e-05, "loss": 0.4439, "num_tokens": 5564261342.0, "step": 7273 }, { "epoch": 2.664651461024091, "grad_norm": 0.13883208214073856, "learning_rate": 2.095165531579707e-05, "loss": 0.453, "num_tokens": 5564981252.0, "step": 7274 }, { "epoch": 2.665017862050014, "grad_norm": 0.13932716442410994, "learning_rate": 2.0947389937125572e-05, "loss": 0.4309, "num_tokens": 5565773548.0, "step": 7275 }, { "epoch": 2.6653842630759366, "grad_norm": 0.14167346292691707, "learning_rate": 2.0943124617762906e-05, "loss": 0.442, "num_tokens": 5566650901.0, "step": 7276 }, { "epoch": 2.6657506641018593, "grad_norm": 0.12932677608160686, "learning_rate": 2.0938859357949396e-05, "loss": 0.4649, "num_tokens": 5567366817.0, "step": 7277 }, { "epoch": 2.666117065127782, "grad_norm": 0.14660976174608648, "learning_rate": 2.0934594157925378e-05, "loss": 0.4269, "num_tokens": 5568243723.0, "step": 7278 }, { "epoch": 2.6664834661537054, "grad_norm": 0.1274036907886359, "learning_rate": 2.0930329017931153e-05, "loss": 0.4588, "num_tokens": 5568969415.0, "step": 7279 }, { "epoch": 2.666849867179628, "grad_norm": 0.14006881066526872, "learning_rate": 2.0926063938207054e-05, "loss": 0.4581, "num_tokens": 5569819487.0, "step": 7280 }, { "epoch": 2.667216268205551, "grad_norm": 0.14427798774181635, "learning_rate": 2.09217989189934e-05, "loss": 0.473, "num_tokens": 5570554906.0, "step": 7281 }, { "epoch": 2.6675826692314737, "grad_norm": 0.13987171467750606, "learning_rate": 2.091753396053047e-05, "loss": 0.4679, "num_tokens": 5571357586.0, "step": 7282 }, { "epoch": 2.6679490702573965, "grad_norm": 0.14152272042262415, "learning_rate": 2.091326906305861e-05, "loss": 0.439, "num_tokens": 5572107111.0, "step": 7283 }, { "epoch": 2.6683154712833197, "grad_norm": 0.1334709442631775, "learning_rate": 2.0909004226818107e-05, "loss": 0.4422, "num_tokens": 5572841277.0, "step": 7284 }, { "epoch": 2.6686818723092425, "grad_norm": 0.13315195871959604, "learning_rate": 2.0904739452049253e-05, "loss": 0.4394, "num_tokens": 5573560259.0, "step": 7285 }, { "epoch": 2.6690482733351653, "grad_norm": 0.13699627223995087, "learning_rate": 2.0900474738992363e-05, "loss": 0.4429, "num_tokens": 5574338807.0, "step": 7286 }, { "epoch": 2.6694146743610885, "grad_norm": 0.13255880562894348, "learning_rate": 2.0896210087887714e-05, "loss": 0.4234, "num_tokens": 5575087639.0, "step": 7287 }, { "epoch": 2.669781075387011, "grad_norm": 0.13520897968060755, "learning_rate": 2.0891945498975595e-05, "loss": 0.4594, "num_tokens": 5575897817.0, "step": 7288 }, { "epoch": 2.670147476412934, "grad_norm": 0.11851103430038365, "learning_rate": 2.0887680972496306e-05, "loss": 0.4274, "num_tokens": 5576781531.0, "step": 7289 }, { "epoch": 2.670513877438857, "grad_norm": 0.12984858937640584, "learning_rate": 2.088341650869013e-05, "loss": 0.4487, "num_tokens": 5577548392.0, "step": 7290 }, { "epoch": 2.6708802784647796, "grad_norm": 0.1434494089521905, "learning_rate": 2.0879152107797326e-05, "loss": 0.4532, "num_tokens": 5578252927.0, "step": 7291 }, { "epoch": 2.671246679490703, "grad_norm": 0.13866092297529883, "learning_rate": 2.0874887770058195e-05, "loss": 0.4642, "num_tokens": 5579020219.0, "step": 7292 }, { "epoch": 2.6716130805166256, "grad_norm": 0.14296650517065188, "learning_rate": 2.0870623495712984e-05, "loss": 0.4942, "num_tokens": 5579719310.0, "step": 7293 }, { "epoch": 2.6719794815425484, "grad_norm": 0.15305691395513613, "learning_rate": 2.0866359285001984e-05, "loss": 0.4535, "num_tokens": 5580444896.0, "step": 7294 }, { "epoch": 2.672345882568471, "grad_norm": 0.13174850224223547, "learning_rate": 2.0862095138165454e-05, "loss": 0.4299, "num_tokens": 5581248809.0, "step": 7295 }, { "epoch": 2.672712283594394, "grad_norm": 0.11960785480244102, "learning_rate": 2.0857831055443643e-05, "loss": 0.4304, "num_tokens": 5582002278.0, "step": 7296 }, { "epoch": 2.673078684620317, "grad_norm": 0.13512427242667505, "learning_rate": 2.0853567037076824e-05, "loss": 0.4582, "num_tokens": 5582809394.0, "step": 7297 }, { "epoch": 2.67344508564624, "grad_norm": 0.13734263141624048, "learning_rate": 2.0849303083305245e-05, "loss": 0.4747, "num_tokens": 5583528384.0, "step": 7298 }, { "epoch": 2.6738114866721627, "grad_norm": 0.1340485451797387, "learning_rate": 2.084503919436915e-05, "loss": 0.4455, "num_tokens": 5584202558.0, "step": 7299 }, { "epoch": 2.6741778876980855, "grad_norm": 0.13871566210033276, "learning_rate": 2.08407753705088e-05, "loss": 0.4332, "num_tokens": 5585035485.0, "step": 7300 }, { "epoch": 2.6745442887240083, "grad_norm": 0.14377002019695656, "learning_rate": 2.0836511611964424e-05, "loss": 0.453, "num_tokens": 5585961041.0, "step": 7301 }, { "epoch": 2.6749106897499315, "grad_norm": 0.13243776066055205, "learning_rate": 2.0832247918976272e-05, "loss": 0.4739, "num_tokens": 5586720731.0, "step": 7302 }, { "epoch": 2.6752770907758543, "grad_norm": 0.13175068655409958, "learning_rate": 2.0827984291784582e-05, "loss": 0.4943, "num_tokens": 5587458629.0, "step": 7303 }, { "epoch": 2.675643491801777, "grad_norm": 0.13516813525812768, "learning_rate": 2.0823720730629566e-05, "loss": 0.4366, "num_tokens": 5588251554.0, "step": 7304 }, { "epoch": 2.6760098928277, "grad_norm": 0.1350186045162279, "learning_rate": 2.0819457235751477e-05, "loss": 0.4648, "num_tokens": 5589204979.0, "step": 7305 }, { "epoch": 2.6763762938536226, "grad_norm": 0.11902623190158766, "learning_rate": 2.081519380739053e-05, "loss": 0.4542, "num_tokens": 5589980057.0, "step": 7306 }, { "epoch": 2.676742694879546, "grad_norm": 0.14021013099652213, "learning_rate": 2.0810930445786938e-05, "loss": 0.4704, "num_tokens": 5590661504.0, "step": 7307 }, { "epoch": 2.6771090959054686, "grad_norm": 0.12976527564769372, "learning_rate": 2.0806667151180932e-05, "loss": 0.4519, "num_tokens": 5591471367.0, "step": 7308 }, { "epoch": 2.6774754969313914, "grad_norm": 0.13417331323320714, "learning_rate": 2.080240392381272e-05, "loss": 0.457, "num_tokens": 5592185303.0, "step": 7309 }, { "epoch": 2.677841897957314, "grad_norm": 0.13269300908472967, "learning_rate": 2.0798140763922504e-05, "loss": 0.4793, "num_tokens": 5592874009.0, "step": 7310 }, { "epoch": 2.678208298983237, "grad_norm": 0.13920058822152453, "learning_rate": 2.0793877671750495e-05, "loss": 0.4515, "num_tokens": 5593673944.0, "step": 7311 }, { "epoch": 2.67857470000916, "grad_norm": 0.135291080088005, "learning_rate": 2.07896146475369e-05, "loss": 0.4731, "num_tokens": 5594321227.0, "step": 7312 }, { "epoch": 2.678941101035083, "grad_norm": 0.1402016300490969, "learning_rate": 2.078535169152191e-05, "loss": 0.4796, "num_tokens": 5595007829.0, "step": 7313 }, { "epoch": 2.6793075020610058, "grad_norm": 0.15067723205025735, "learning_rate": 2.0781088803945727e-05, "loss": 0.4494, "num_tokens": 5595715367.0, "step": 7314 }, { "epoch": 2.6796739030869285, "grad_norm": 0.1437497224594116, "learning_rate": 2.0776825985048532e-05, "loss": 0.4465, "num_tokens": 5596451608.0, "step": 7315 }, { "epoch": 2.6800403041128513, "grad_norm": 0.1199753677370842, "learning_rate": 2.0772563235070522e-05, "loss": 0.4455, "num_tokens": 5597157221.0, "step": 7316 }, { "epoch": 2.6804067051387745, "grad_norm": 0.38762727410066666, "learning_rate": 2.0768300554251875e-05, "loss": 0.4498, "num_tokens": 5597916149.0, "step": 7317 }, { "epoch": 2.6807731061646973, "grad_norm": 0.13121274516455286, "learning_rate": 2.076403794283276e-05, "loss": 0.4397, "num_tokens": 5598745260.0, "step": 7318 }, { "epoch": 2.68113950719062, "grad_norm": 0.13123993744976278, "learning_rate": 2.075977540105337e-05, "loss": 0.454, "num_tokens": 5599549624.0, "step": 7319 }, { "epoch": 2.681505908216543, "grad_norm": 0.14114884311568643, "learning_rate": 2.075551292915386e-05, "loss": 0.4512, "num_tokens": 5600271305.0, "step": 7320 }, { "epoch": 2.6818723092424657, "grad_norm": 0.1619072655458569, "learning_rate": 2.075125052737441e-05, "loss": 0.483, "num_tokens": 5601084127.0, "step": 7321 }, { "epoch": 2.682238710268389, "grad_norm": 0.13436229326041085, "learning_rate": 2.0746988195955174e-05, "loss": 0.4764, "num_tokens": 5601764968.0, "step": 7322 }, { "epoch": 2.6826051112943117, "grad_norm": 0.14294720781890838, "learning_rate": 2.0742725935136316e-05, "loss": 0.4639, "num_tokens": 5602439705.0, "step": 7323 }, { "epoch": 2.6829715123202345, "grad_norm": 0.14334489108335097, "learning_rate": 2.0738463745157988e-05, "loss": 0.4862, "num_tokens": 5603194780.0, "step": 7324 }, { "epoch": 2.6833379133461572, "grad_norm": 0.12288911475823215, "learning_rate": 2.0734201626260343e-05, "loss": 0.4392, "num_tokens": 5603935601.0, "step": 7325 }, { "epoch": 2.68370431437208, "grad_norm": 0.14482477061775428, "learning_rate": 2.072993957868352e-05, "loss": 0.457, "num_tokens": 5604610393.0, "step": 7326 }, { "epoch": 2.6840707153980032, "grad_norm": 0.14309807899749583, "learning_rate": 2.0725677602667677e-05, "loss": 0.4761, "num_tokens": 5605320654.0, "step": 7327 }, { "epoch": 2.684437116423926, "grad_norm": 0.1236830474300815, "learning_rate": 2.0721415698452945e-05, "loss": 0.4481, "num_tokens": 5606131099.0, "step": 7328 }, { "epoch": 2.684803517449849, "grad_norm": 0.12256852772406178, "learning_rate": 2.0717153866279457e-05, "loss": 0.44, "num_tokens": 5606875892.0, "step": 7329 }, { "epoch": 2.6851699184757716, "grad_norm": 0.1436054064519598, "learning_rate": 2.0712892106387353e-05, "loss": 0.4809, "num_tokens": 5607763742.0, "step": 7330 }, { "epoch": 2.6855363195016944, "grad_norm": 0.12635084067285107, "learning_rate": 2.070863041901675e-05, "loss": 0.4425, "num_tokens": 5608605921.0, "step": 7331 }, { "epoch": 2.6859027205276176, "grad_norm": 0.12930127926264492, "learning_rate": 2.0704368804407773e-05, "loss": 0.4494, "num_tokens": 5609409858.0, "step": 7332 }, { "epoch": 2.6862691215535404, "grad_norm": 0.13800171195074834, "learning_rate": 2.0700107262800545e-05, "loss": 0.4904, "num_tokens": 5610144289.0, "step": 7333 }, { "epoch": 2.686635522579463, "grad_norm": 0.16158768941910745, "learning_rate": 2.069584579443517e-05, "loss": 0.4634, "num_tokens": 5611044677.0, "step": 7334 }, { "epoch": 2.6870019236053864, "grad_norm": 0.1211074766493671, "learning_rate": 2.0691584399551777e-05, "loss": 0.4461, "num_tokens": 5611862511.0, "step": 7335 }, { "epoch": 2.6873683246313087, "grad_norm": 0.12666424121000927, "learning_rate": 2.0687323078390457e-05, "loss": 0.4406, "num_tokens": 5612686742.0, "step": 7336 }, { "epoch": 2.687734725657232, "grad_norm": 0.1310208858616047, "learning_rate": 2.0683061831191312e-05, "loss": 0.4794, "num_tokens": 5613469719.0, "step": 7337 }, { "epoch": 2.6881011266831547, "grad_norm": 0.13883358764359777, "learning_rate": 2.0678800658194452e-05, "loss": 0.459, "num_tokens": 5614244429.0, "step": 7338 }, { "epoch": 2.6884675277090775, "grad_norm": 0.15465322620022737, "learning_rate": 2.0674539559639957e-05, "loss": 0.4699, "num_tokens": 5614906387.0, "step": 7339 }, { "epoch": 2.6888339287350007, "grad_norm": 0.14353001023371884, "learning_rate": 2.067027853576793e-05, "loss": 0.462, "num_tokens": 5615764577.0, "step": 7340 }, { "epoch": 2.6892003297609235, "grad_norm": 0.12084055678007888, "learning_rate": 2.066601758681845e-05, "loss": 0.4221, "num_tokens": 5616508040.0, "step": 7341 }, { "epoch": 2.6895667307868463, "grad_norm": 0.37364518342600234, "learning_rate": 2.066175671303159e-05, "loss": 0.4825, "num_tokens": 5617274552.0, "step": 7342 }, { "epoch": 2.689933131812769, "grad_norm": 0.15979274558562798, "learning_rate": 2.0657495914647443e-05, "loss": 0.4749, "num_tokens": 5617918268.0, "step": 7343 }, { "epoch": 2.690299532838692, "grad_norm": 0.1498512082921529, "learning_rate": 2.0653235191906068e-05, "loss": 0.4378, "num_tokens": 5618734582.0, "step": 7344 }, { "epoch": 2.690665933864615, "grad_norm": 0.12498097492517742, "learning_rate": 2.0648974545047542e-05, "loss": 0.4498, "num_tokens": 5619426341.0, "step": 7345 }, { "epoch": 2.691032334890538, "grad_norm": 0.14917690499711905, "learning_rate": 2.0644713974311927e-05, "loss": 0.4358, "num_tokens": 5620150324.0, "step": 7346 }, { "epoch": 2.6913987359164606, "grad_norm": 0.13680872685502524, "learning_rate": 2.0640453479939288e-05, "loss": 0.4567, "num_tokens": 5620890428.0, "step": 7347 }, { "epoch": 2.6917651369423834, "grad_norm": 0.15363827520207474, "learning_rate": 2.0636193062169662e-05, "loss": 0.4744, "num_tokens": 5621601354.0, "step": 7348 }, { "epoch": 2.692131537968306, "grad_norm": 0.13867127953655628, "learning_rate": 2.063193272124312e-05, "loss": 0.46, "num_tokens": 5622349461.0, "step": 7349 }, { "epoch": 2.6924979389942294, "grad_norm": 0.16288715537378065, "learning_rate": 2.0627672457399695e-05, "loss": 0.4678, "num_tokens": 5623070297.0, "step": 7350 }, { "epoch": 2.692864340020152, "grad_norm": 0.13218703157376144, "learning_rate": 2.0623412270879445e-05, "loss": 0.4324, "num_tokens": 5623813876.0, "step": 7351 }, { "epoch": 2.693230741046075, "grad_norm": 0.1363409264726399, "learning_rate": 2.0619152161922402e-05, "loss": 0.4615, "num_tokens": 5624584929.0, "step": 7352 }, { "epoch": 2.6935971420719977, "grad_norm": 0.1217045774572866, "learning_rate": 2.0614892130768588e-05, "loss": 0.4779, "num_tokens": 5625427781.0, "step": 7353 }, { "epoch": 2.6939635430979205, "grad_norm": 0.14394139395328123, "learning_rate": 2.061063217765805e-05, "loss": 0.4753, "num_tokens": 5626195258.0, "step": 7354 }, { "epoch": 2.6943299441238437, "grad_norm": 0.13558512276325085, "learning_rate": 2.060637230283081e-05, "loss": 0.4773, "num_tokens": 5626938477.0, "step": 7355 }, { "epoch": 2.6946963451497665, "grad_norm": 0.14247315842703168, "learning_rate": 2.060211250652687e-05, "loss": 0.4807, "num_tokens": 5627735718.0, "step": 7356 }, { "epoch": 2.6950627461756893, "grad_norm": 0.12550298645246108, "learning_rate": 2.059785278898627e-05, "loss": 0.4356, "num_tokens": 5628411889.0, "step": 7357 }, { "epoch": 2.695429147201612, "grad_norm": 0.14030154470828807, "learning_rate": 2.0593593150449006e-05, "loss": 0.4459, "num_tokens": 5629258215.0, "step": 7358 }, { "epoch": 2.695795548227535, "grad_norm": 0.12140040815291826, "learning_rate": 2.0589333591155102e-05, "loss": 0.4565, "num_tokens": 5630108685.0, "step": 7359 }, { "epoch": 2.696161949253458, "grad_norm": 0.1412964826702115, "learning_rate": 2.0585074111344547e-05, "loss": 0.4472, "num_tokens": 5630895812.0, "step": 7360 }, { "epoch": 2.696528350279381, "grad_norm": 0.12562228299481795, "learning_rate": 2.0580814711257333e-05, "loss": 0.4673, "num_tokens": 5631614960.0, "step": 7361 }, { "epoch": 2.6968947513053037, "grad_norm": 0.13822727954243685, "learning_rate": 2.0576555391133475e-05, "loss": 0.4535, "num_tokens": 5632325393.0, "step": 7362 }, { "epoch": 2.6972611523312264, "grad_norm": 0.14429543436293882, "learning_rate": 2.0572296151212944e-05, "loss": 0.4737, "num_tokens": 5632920963.0, "step": 7363 }, { "epoch": 2.697627553357149, "grad_norm": 0.14158056444522651, "learning_rate": 2.0568036991735738e-05, "loss": 0.4552, "num_tokens": 5633670597.0, "step": 7364 }, { "epoch": 2.6979939543830724, "grad_norm": 0.1411451124305975, "learning_rate": 2.0563777912941833e-05, "loss": 0.4461, "num_tokens": 5634349568.0, "step": 7365 }, { "epoch": 2.698360355408995, "grad_norm": 0.1473873603825236, "learning_rate": 2.05595189150712e-05, "loss": 0.4687, "num_tokens": 5635020967.0, "step": 7366 }, { "epoch": 2.698726756434918, "grad_norm": 0.14570180315232148, "learning_rate": 2.0555259998363807e-05, "loss": 0.4624, "num_tokens": 5635773976.0, "step": 7367 }, { "epoch": 2.6990931574608408, "grad_norm": 0.13786071892369514, "learning_rate": 2.0551001163059636e-05, "loss": 0.4486, "num_tokens": 5636484352.0, "step": 7368 }, { "epoch": 2.6994595584867636, "grad_norm": 0.13943601156081606, "learning_rate": 2.0546742409398636e-05, "loss": 0.4352, "num_tokens": 5637306530.0, "step": 7369 }, { "epoch": 2.699825959512687, "grad_norm": 0.14554128488147014, "learning_rate": 2.054248373762078e-05, "loss": 0.4791, "num_tokens": 5638017902.0, "step": 7370 }, { "epoch": 2.7001923605386096, "grad_norm": 0.138241899352265, "learning_rate": 2.0538225147966e-05, "loss": 0.4487, "num_tokens": 5638766268.0, "step": 7371 }, { "epoch": 2.7005587615645323, "grad_norm": 0.1348891779731975, "learning_rate": 2.0533966640674255e-05, "loss": 0.4516, "num_tokens": 5639500122.0, "step": 7372 }, { "epoch": 2.700925162590455, "grad_norm": 0.14999051482958958, "learning_rate": 2.0529708215985486e-05, "loss": 0.4504, "num_tokens": 5640317323.0, "step": 7373 }, { "epoch": 2.701291563616378, "grad_norm": 0.13816087346591843, "learning_rate": 2.0525449874139644e-05, "loss": 0.4748, "num_tokens": 5641123867.0, "step": 7374 }, { "epoch": 2.701657964642301, "grad_norm": 0.13314491189878683, "learning_rate": 2.0521191615376644e-05, "loss": 0.4331, "num_tokens": 5641830948.0, "step": 7375 }, { "epoch": 2.702024365668224, "grad_norm": 0.14200742082170617, "learning_rate": 2.0516933439936428e-05, "loss": 0.4603, "num_tokens": 5642573705.0, "step": 7376 }, { "epoch": 2.7023907666941467, "grad_norm": 0.14632880430525244, "learning_rate": 2.0512675348058915e-05, "loss": 0.4462, "num_tokens": 5643311796.0, "step": 7377 }, { "epoch": 2.7027571677200695, "grad_norm": 0.1407658752826887, "learning_rate": 2.0508417339984034e-05, "loss": 0.4982, "num_tokens": 5644157612.0, "step": 7378 }, { "epoch": 2.7031235687459922, "grad_norm": 0.14343895168315474, "learning_rate": 2.050415941595169e-05, "loss": 0.4493, "num_tokens": 5644937432.0, "step": 7379 }, { "epoch": 2.7034899697719155, "grad_norm": 0.1354905189680814, "learning_rate": 2.04999015762018e-05, "loss": 0.4633, "num_tokens": 5645625671.0, "step": 7380 }, { "epoch": 2.7038563707978382, "grad_norm": 0.1464217537162211, "learning_rate": 2.049564382097427e-05, "loss": 0.4641, "num_tokens": 5646284394.0, "step": 7381 }, { "epoch": 2.704222771823761, "grad_norm": 0.13348552396573687, "learning_rate": 2.0491386150509003e-05, "loss": 0.4292, "num_tokens": 5647065041.0, "step": 7382 }, { "epoch": 2.7045891728496843, "grad_norm": 0.12737055677599365, "learning_rate": 2.0487128565045883e-05, "loss": 0.4748, "num_tokens": 5647834609.0, "step": 7383 }, { "epoch": 2.7049555738756066, "grad_norm": 0.1572695297645526, "learning_rate": 2.0482871064824815e-05, "loss": 0.4479, "num_tokens": 5648683985.0, "step": 7384 }, { "epoch": 2.70532197490153, "grad_norm": 0.12604873764307156, "learning_rate": 2.0478613650085677e-05, "loss": 0.487, "num_tokens": 5649422877.0, "step": 7385 }, { "epoch": 2.7056883759274526, "grad_norm": 0.15582155116075025, "learning_rate": 2.0474356321068365e-05, "loss": 0.4512, "num_tokens": 5650131905.0, "step": 7386 }, { "epoch": 2.7060547769533754, "grad_norm": 0.1471741431931246, "learning_rate": 2.0470099078012747e-05, "loss": 0.4902, "num_tokens": 5650917576.0, "step": 7387 }, { "epoch": 2.7064211779792986, "grad_norm": 0.13744887346875673, "learning_rate": 2.046584192115869e-05, "loss": 0.4721, "num_tokens": 5651654895.0, "step": 7388 }, { "epoch": 2.7067875790052214, "grad_norm": 0.13737125618737384, "learning_rate": 2.0461584850746066e-05, "loss": 0.4531, "num_tokens": 5652401752.0, "step": 7389 }, { "epoch": 2.707153980031144, "grad_norm": 0.12797897299575298, "learning_rate": 2.0457327867014744e-05, "loss": 0.4534, "num_tokens": 5653292392.0, "step": 7390 }, { "epoch": 2.707520381057067, "grad_norm": 0.14075717950038663, "learning_rate": 2.045307097020457e-05, "loss": 0.4685, "num_tokens": 5654014164.0, "step": 7391 }, { "epoch": 2.7078867820829897, "grad_norm": 0.14460128691618163, "learning_rate": 2.044881416055541e-05, "loss": 0.4403, "num_tokens": 5654826674.0, "step": 7392 }, { "epoch": 2.708253183108913, "grad_norm": 0.13460968457948422, "learning_rate": 2.0444557438307105e-05, "loss": 0.4611, "num_tokens": 5655526131.0, "step": 7393 }, { "epoch": 2.7086195841348357, "grad_norm": 0.15392730211821573, "learning_rate": 2.0440300803699492e-05, "loss": 0.4373, "num_tokens": 5656237926.0, "step": 7394 }, { "epoch": 2.7089859851607585, "grad_norm": 0.1490962586205731, "learning_rate": 2.0436044256972428e-05, "loss": 0.4483, "num_tokens": 5656926735.0, "step": 7395 }, { "epoch": 2.7093523861866813, "grad_norm": 0.12645361528406823, "learning_rate": 2.0431787798365718e-05, "loss": 0.4449, "num_tokens": 5657629094.0, "step": 7396 }, { "epoch": 2.709718787212604, "grad_norm": 0.14352208311164766, "learning_rate": 2.042753142811922e-05, "loss": 0.4265, "num_tokens": 5658374804.0, "step": 7397 }, { "epoch": 2.7100851882385273, "grad_norm": 0.14563585518728245, "learning_rate": 2.0423275146472735e-05, "loss": 0.4885, "num_tokens": 5659192457.0, "step": 7398 }, { "epoch": 2.71045158926445, "grad_norm": 0.13843623434869914, "learning_rate": 2.0419018953666092e-05, "loss": 0.4557, "num_tokens": 5659860802.0, "step": 7399 }, { "epoch": 2.710817990290373, "grad_norm": 0.13264780649395938, "learning_rate": 2.0414762849939106e-05, "loss": 0.4479, "num_tokens": 5660683128.0, "step": 7400 }, { "epoch": 2.7111843913162956, "grad_norm": 0.1430654522301995, "learning_rate": 2.0410506835531575e-05, "loss": 0.4671, "num_tokens": 5661515324.0, "step": 7401 }, { "epoch": 2.7115507923422184, "grad_norm": 0.12132320679410649, "learning_rate": 2.040625091068331e-05, "loss": 0.4282, "num_tokens": 5662300897.0, "step": 7402 }, { "epoch": 2.7119171933681416, "grad_norm": 0.13426224881377374, "learning_rate": 2.0401995075634105e-05, "loss": 0.4561, "num_tokens": 5663092947.0, "step": 7403 }, { "epoch": 2.7122835943940644, "grad_norm": 0.14530010724242082, "learning_rate": 2.0397739330623752e-05, "loss": 0.4445, "num_tokens": 5663833509.0, "step": 7404 }, { "epoch": 2.712649995419987, "grad_norm": 0.12338919354172675, "learning_rate": 2.0393483675892055e-05, "loss": 0.443, "num_tokens": 5664655517.0, "step": 7405 }, { "epoch": 2.71301639644591, "grad_norm": 0.13242669407149002, "learning_rate": 2.0389228111678778e-05, "loss": 0.4583, "num_tokens": 5665414351.0, "step": 7406 }, { "epoch": 2.7133827974718328, "grad_norm": 0.1481484637031854, "learning_rate": 2.0384972638223702e-05, "loss": 0.4248, "num_tokens": 5666134955.0, "step": 7407 }, { "epoch": 2.713749198497756, "grad_norm": 0.1169259106515159, "learning_rate": 2.0380717255766605e-05, "loss": 0.5147, "num_tokens": 5667027752.0, "step": 7408 }, { "epoch": 2.7141155995236788, "grad_norm": 0.12506505492330908, "learning_rate": 2.037646196454726e-05, "loss": 0.4401, "num_tokens": 5668054226.0, "step": 7409 }, { "epoch": 2.7144820005496015, "grad_norm": 0.12808257777715673, "learning_rate": 2.0372206764805412e-05, "loss": 0.4668, "num_tokens": 5668747197.0, "step": 7410 }, { "epoch": 2.7148484015755243, "grad_norm": 0.1477081531172127, "learning_rate": 2.0367951656780828e-05, "loss": 0.4675, "num_tokens": 5669460250.0, "step": 7411 }, { "epoch": 2.715214802601447, "grad_norm": 0.1331135441661268, "learning_rate": 2.036369664071327e-05, "loss": 0.4608, "num_tokens": 5670321178.0, "step": 7412 }, { "epoch": 2.7155812036273703, "grad_norm": 0.13782752780207166, "learning_rate": 2.0359441716842458e-05, "loss": 0.4501, "num_tokens": 5671172516.0, "step": 7413 }, { "epoch": 2.715947604653293, "grad_norm": 0.1340890679902328, "learning_rate": 2.0355186885408168e-05, "loss": 0.4619, "num_tokens": 5671994753.0, "step": 7414 }, { "epoch": 2.716314005679216, "grad_norm": 0.14708858284563084, "learning_rate": 2.0350932146650107e-05, "loss": 0.4288, "num_tokens": 5672713795.0, "step": 7415 }, { "epoch": 2.7166804067051387, "grad_norm": 0.12226651147176656, "learning_rate": 2.0346677500808023e-05, "loss": 0.4579, "num_tokens": 5673508585.0, "step": 7416 }, { "epoch": 2.7170468077310614, "grad_norm": 0.1495473879082699, "learning_rate": 2.0342422948121644e-05, "loss": 0.4835, "num_tokens": 5674290510.0, "step": 7417 }, { "epoch": 2.7174132087569847, "grad_norm": 0.13405993336865846, "learning_rate": 2.0338168488830673e-05, "loss": 0.4355, "num_tokens": 5675096367.0, "step": 7418 }, { "epoch": 2.7177796097829074, "grad_norm": 0.13512846978195822, "learning_rate": 2.0333914123174847e-05, "loss": 0.46, "num_tokens": 5675895108.0, "step": 7419 }, { "epoch": 2.7181460108088302, "grad_norm": 0.129563465734399, "learning_rate": 2.0329659851393867e-05, "loss": 0.4226, "num_tokens": 5676767187.0, "step": 7420 }, { "epoch": 2.718512411834753, "grad_norm": 0.13652077436889323, "learning_rate": 2.032540567372743e-05, "loss": 0.4729, "num_tokens": 5677548798.0, "step": 7421 }, { "epoch": 2.718878812860676, "grad_norm": 0.13472395260398248, "learning_rate": 2.0321151590415246e-05, "loss": 0.4411, "num_tokens": 5678266290.0, "step": 7422 }, { "epoch": 2.719245213886599, "grad_norm": 0.12778424559816598, "learning_rate": 2.0316897601697007e-05, "loss": 0.4905, "num_tokens": 5679097027.0, "step": 7423 }, { "epoch": 2.719611614912522, "grad_norm": 0.1533376720814535, "learning_rate": 2.0312643707812403e-05, "loss": 0.4352, "num_tokens": 5679865041.0, "step": 7424 }, { "epoch": 2.7199780159384446, "grad_norm": 0.14253020296935243, "learning_rate": 2.0308389909001115e-05, "loss": 0.4559, "num_tokens": 5680602894.0, "step": 7425 }, { "epoch": 2.7203444169643674, "grad_norm": 0.13831132698420398, "learning_rate": 2.0304136205502817e-05, "loss": 0.4373, "num_tokens": 5681408441.0, "step": 7426 }, { "epoch": 2.72071081799029, "grad_norm": 0.13045292734963468, "learning_rate": 2.0299882597557197e-05, "loss": 0.4712, "num_tokens": 5682174084.0, "step": 7427 }, { "epoch": 2.7210772190162134, "grad_norm": 0.14727865207348612, "learning_rate": 2.0295629085403908e-05, "loss": 0.4403, "num_tokens": 5682954352.0, "step": 7428 }, { "epoch": 2.721443620042136, "grad_norm": 0.13380001760508797, "learning_rate": 2.0291375669282618e-05, "loss": 0.4791, "num_tokens": 5683668575.0, "step": 7429 }, { "epoch": 2.721810021068059, "grad_norm": 0.1349487759585306, "learning_rate": 2.0287122349432984e-05, "loss": 0.4588, "num_tokens": 5684495698.0, "step": 7430 }, { "epoch": 2.722176422093982, "grad_norm": 0.13372024777161784, "learning_rate": 2.028286912609466e-05, "loss": 0.398, "num_tokens": 5685303976.0, "step": 7431 }, { "epoch": 2.7225428231199045, "grad_norm": 0.13740770126215612, "learning_rate": 2.0278615999507284e-05, "loss": 0.4988, "num_tokens": 5686024886.0, "step": 7432 }, { "epoch": 2.7229092241458277, "grad_norm": 0.14227191209997822, "learning_rate": 2.0274362969910505e-05, "loss": 0.4126, "num_tokens": 5686653716.0, "step": 7433 }, { "epoch": 2.7232756251717505, "grad_norm": 0.1311905547884532, "learning_rate": 2.0270110037543948e-05, "loss": 0.4173, "num_tokens": 5687289468.0, "step": 7434 }, { "epoch": 2.7236420261976733, "grad_norm": 0.15448743796951503, "learning_rate": 2.0265857202647246e-05, "loss": 0.4722, "num_tokens": 5687953586.0, "step": 7435 }, { "epoch": 2.7240084272235965, "grad_norm": 0.1293016015526479, "learning_rate": 2.0261604465460034e-05, "loss": 0.463, "num_tokens": 5688704816.0, "step": 7436 }, { "epoch": 2.7243748282495193, "grad_norm": 0.14086257633253116, "learning_rate": 2.025735182622192e-05, "loss": 0.4766, "num_tokens": 5689450633.0, "step": 7437 }, { "epoch": 2.724741229275442, "grad_norm": 0.1458353308998726, "learning_rate": 2.025309928517252e-05, "loss": 0.4719, "num_tokens": 5690185995.0, "step": 7438 }, { "epoch": 2.725107630301365, "grad_norm": 0.13802342572357992, "learning_rate": 2.024884684255144e-05, "loss": 0.4578, "num_tokens": 5690928086.0, "step": 7439 }, { "epoch": 2.7254740313272876, "grad_norm": 0.13073545920073976, "learning_rate": 2.0244594498598277e-05, "loss": 0.4375, "num_tokens": 5691739055.0, "step": 7440 }, { "epoch": 2.725840432353211, "grad_norm": 0.12977668101862308, "learning_rate": 2.0240342253552633e-05, "loss": 0.463, "num_tokens": 5692591754.0, "step": 7441 }, { "epoch": 2.7262068333791336, "grad_norm": 0.11730642070226097, "learning_rate": 2.02360901076541e-05, "loss": 0.4425, "num_tokens": 5693585636.0, "step": 7442 }, { "epoch": 2.7265732344050564, "grad_norm": 0.14979244473701603, "learning_rate": 2.0231838061142262e-05, "loss": 0.5056, "num_tokens": 5694274011.0, "step": 7443 }, { "epoch": 2.726939635430979, "grad_norm": 0.12844187381689065, "learning_rate": 2.0227586114256702e-05, "loss": 0.4927, "num_tokens": 5695076786.0, "step": 7444 }, { "epoch": 2.727306036456902, "grad_norm": 0.1409527743599124, "learning_rate": 2.022333426723699e-05, "loss": 0.4676, "num_tokens": 5695950678.0, "step": 7445 }, { "epoch": 2.727672437482825, "grad_norm": 0.11931749969408628, "learning_rate": 2.0219082520322692e-05, "loss": 0.4726, "num_tokens": 5696745836.0, "step": 7446 }, { "epoch": 2.728038838508748, "grad_norm": 0.13266950007019668, "learning_rate": 2.0214830873753378e-05, "loss": 0.4273, "num_tokens": 5697504021.0, "step": 7447 }, { "epoch": 2.7284052395346707, "grad_norm": 0.12625438819348084, "learning_rate": 2.021057932776859e-05, "loss": 0.4452, "num_tokens": 5698282986.0, "step": 7448 }, { "epoch": 2.7287716405605935, "grad_norm": 0.13937401640711272, "learning_rate": 2.02063278826079e-05, "loss": 0.4401, "num_tokens": 5698991729.0, "step": 7449 }, { "epoch": 2.7291380415865163, "grad_norm": 0.11556906783401669, "learning_rate": 2.020207653851084e-05, "loss": 0.4457, "num_tokens": 5699735232.0, "step": 7450 }, { "epoch": 2.7295044426124395, "grad_norm": 0.15051748850352187, "learning_rate": 2.0197825295716956e-05, "loss": 0.4655, "num_tokens": 5700492488.0, "step": 7451 }, { "epoch": 2.7298708436383623, "grad_norm": 0.13932351316172925, "learning_rate": 2.019357415446578e-05, "loss": 0.4891, "num_tokens": 5701096218.0, "step": 7452 }, { "epoch": 2.730237244664285, "grad_norm": 0.13914075137902185, "learning_rate": 2.0189323114996835e-05, "loss": 0.4947, "num_tokens": 5701797960.0, "step": 7453 }, { "epoch": 2.730603645690208, "grad_norm": 0.16819626267486598, "learning_rate": 2.0185072177549657e-05, "loss": 0.4724, "num_tokens": 5702584956.0, "step": 7454 }, { "epoch": 2.7309700467161306, "grad_norm": 0.14873794930549517, "learning_rate": 2.0180821342363754e-05, "loss": 0.4724, "num_tokens": 5703374947.0, "step": 7455 }, { "epoch": 2.731336447742054, "grad_norm": 0.12542242362503828, "learning_rate": 2.0176570609678632e-05, "loss": 0.4698, "num_tokens": 5704151507.0, "step": 7456 }, { "epoch": 2.7317028487679766, "grad_norm": 0.12015727693358554, "learning_rate": 2.0172319979733817e-05, "loss": 0.4486, "num_tokens": 5704892926.0, "step": 7457 }, { "epoch": 2.7320692497938994, "grad_norm": 0.14554205570314613, "learning_rate": 2.0168069452768786e-05, "loss": 0.4526, "num_tokens": 5705652489.0, "step": 7458 }, { "epoch": 2.732435650819822, "grad_norm": 0.127781421199235, "learning_rate": 2.0163819029023042e-05, "loss": 0.4568, "num_tokens": 5706493261.0, "step": 7459 }, { "epoch": 2.732802051845745, "grad_norm": 0.14134406845062467, "learning_rate": 2.0159568708736077e-05, "loss": 0.4677, "num_tokens": 5707256504.0, "step": 7460 }, { "epoch": 2.733168452871668, "grad_norm": 0.13623988083648242, "learning_rate": 2.015531849214736e-05, "loss": 0.4752, "num_tokens": 5707912583.0, "step": 7461 }, { "epoch": 2.733534853897591, "grad_norm": 0.14816959158646056, "learning_rate": 2.0151068379496395e-05, "loss": 0.4615, "num_tokens": 5708708408.0, "step": 7462 }, { "epoch": 2.7339012549235138, "grad_norm": 0.1255516786568634, "learning_rate": 2.0146818371022627e-05, "loss": 0.4258, "num_tokens": 5709541023.0, "step": 7463 }, { "epoch": 2.7342676559494365, "grad_norm": 0.12704734310952182, "learning_rate": 2.0142568466965525e-05, "loss": 0.4768, "num_tokens": 5710283320.0, "step": 7464 }, { "epoch": 2.7346340569753593, "grad_norm": 0.14041790752937738, "learning_rate": 2.0138318667564554e-05, "loss": 0.4567, "num_tokens": 5711074044.0, "step": 7465 }, { "epoch": 2.7350004580012826, "grad_norm": 0.11913255220143017, "learning_rate": 2.013406897305917e-05, "loss": 0.412, "num_tokens": 5711896174.0, "step": 7466 }, { "epoch": 2.7353668590272053, "grad_norm": 0.1246596496791172, "learning_rate": 2.012981938368881e-05, "loss": 0.4255, "num_tokens": 5712603770.0, "step": 7467 }, { "epoch": 2.735733260053128, "grad_norm": 0.14367206645609937, "learning_rate": 2.012556989969292e-05, "loss": 0.4731, "num_tokens": 5713312159.0, "step": 7468 }, { "epoch": 2.736099661079051, "grad_norm": 0.13276201817363464, "learning_rate": 2.0121320521310942e-05, "loss": 0.4506, "num_tokens": 5714038851.0, "step": 7469 }, { "epoch": 2.7364660621049737, "grad_norm": 0.13032707727470177, "learning_rate": 2.0117071248782282e-05, "loss": 0.4519, "num_tokens": 5714773473.0, "step": 7470 }, { "epoch": 2.736832463130897, "grad_norm": 0.13506194958216544, "learning_rate": 2.0112822082346393e-05, "loss": 0.4509, "num_tokens": 5715394874.0, "step": 7471 }, { "epoch": 2.7371988641568197, "grad_norm": 0.15279874754773642, "learning_rate": 2.0108573022242677e-05, "loss": 0.4621, "num_tokens": 5716147113.0, "step": 7472 }, { "epoch": 2.7375652651827425, "grad_norm": 0.11820927127459095, "learning_rate": 2.0104324068710546e-05, "loss": 0.4311, "num_tokens": 5716893589.0, "step": 7473 }, { "epoch": 2.7379316662086652, "grad_norm": 0.14277635840538574, "learning_rate": 2.0100075221989415e-05, "loss": 0.4687, "num_tokens": 5717669089.0, "step": 7474 }, { "epoch": 2.738298067234588, "grad_norm": 0.12048321478396891, "learning_rate": 2.0095826482318662e-05, "loss": 0.4515, "num_tokens": 5718439208.0, "step": 7475 }, { "epoch": 2.7386644682605112, "grad_norm": 0.1420119318305186, "learning_rate": 2.00915778499377e-05, "loss": 0.4878, "num_tokens": 5719266025.0, "step": 7476 }, { "epoch": 2.739030869286434, "grad_norm": 0.13406442855986447, "learning_rate": 2.008732932508591e-05, "loss": 0.4501, "num_tokens": 5720066831.0, "step": 7477 }, { "epoch": 2.739397270312357, "grad_norm": 0.13984607619067113, "learning_rate": 2.008308090800267e-05, "loss": 0.4648, "num_tokens": 5720884722.0, "step": 7478 }, { "epoch": 2.7397636713382796, "grad_norm": 0.13210027476684574, "learning_rate": 2.0078832598927364e-05, "loss": 0.4308, "num_tokens": 5721638336.0, "step": 7479 }, { "epoch": 2.7401300723642024, "grad_norm": 0.13076151964263327, "learning_rate": 2.0074584398099345e-05, "loss": 0.4638, "num_tokens": 5722496043.0, "step": 7480 }, { "epoch": 2.7404964733901256, "grad_norm": 0.13187417546700042, "learning_rate": 2.0070336305758e-05, "loss": 0.4388, "num_tokens": 5723442048.0, "step": 7481 }, { "epoch": 2.7408628744160484, "grad_norm": 0.11718225300902427, "learning_rate": 2.006608832214266e-05, "loss": 0.4646, "num_tokens": 5724256327.0, "step": 7482 }, { "epoch": 2.741229275441971, "grad_norm": 0.1391495509493855, "learning_rate": 2.006184044749269e-05, "loss": 0.4443, "num_tokens": 5725011507.0, "step": 7483 }, { "epoch": 2.7415956764678944, "grad_norm": 0.12492448395406426, "learning_rate": 2.0057592682047437e-05, "loss": 0.436, "num_tokens": 5725840805.0, "step": 7484 }, { "epoch": 2.7419620774938167, "grad_norm": 0.1266706030256854, "learning_rate": 2.0053345026046232e-05, "loss": 0.4624, "num_tokens": 5726536645.0, "step": 7485 }, { "epoch": 2.74232847851974, "grad_norm": 0.13431348008338476, "learning_rate": 2.004909747972841e-05, "loss": 0.4442, "num_tokens": 5727250981.0, "step": 7486 }, { "epoch": 2.7426948795456627, "grad_norm": 0.1352653440237131, "learning_rate": 2.0044850043333293e-05, "loss": 0.4324, "num_tokens": 5727960875.0, "step": 7487 }, { "epoch": 2.7430612805715855, "grad_norm": 0.14548291524433413, "learning_rate": 2.0040602717100212e-05, "loss": 0.4624, "num_tokens": 5728674536.0, "step": 7488 }, { "epoch": 2.7434276815975087, "grad_norm": 0.12874537213189463, "learning_rate": 2.0036355501268464e-05, "loss": 0.4587, "num_tokens": 5729441811.0, "step": 7489 }, { "epoch": 2.7437940826234315, "grad_norm": 0.12820860174048385, "learning_rate": 2.0032108396077376e-05, "loss": 0.4646, "num_tokens": 5730301518.0, "step": 7490 }, { "epoch": 2.7441604836493543, "grad_norm": 0.13704536534505163, "learning_rate": 2.0027861401766228e-05, "loss": 0.4622, "num_tokens": 5731109610.0, "step": 7491 }, { "epoch": 2.744526884675277, "grad_norm": 0.13849076523940537, "learning_rate": 2.002361451857433e-05, "loss": 0.4771, "num_tokens": 5731751032.0, "step": 7492 }, { "epoch": 2.7448932857012, "grad_norm": 0.12924716505000233, "learning_rate": 2.0019367746740973e-05, "loss": 0.4659, "num_tokens": 5732468900.0, "step": 7493 }, { "epoch": 2.745259686727123, "grad_norm": 0.13614126585314942, "learning_rate": 2.0015121086505423e-05, "loss": 0.4552, "num_tokens": 5733239613.0, "step": 7494 }, { "epoch": 2.745626087753046, "grad_norm": 0.1272800469740634, "learning_rate": 2.001087453810697e-05, "loss": 0.4748, "num_tokens": 5734048587.0, "step": 7495 }, { "epoch": 2.7459924887789686, "grad_norm": 0.13792375066711374, "learning_rate": 2.0006628101784885e-05, "loss": 0.4299, "num_tokens": 5734834721.0, "step": 7496 }, { "epoch": 2.7463588898048914, "grad_norm": 0.13801461042585508, "learning_rate": 2.0002381777778424e-05, "loss": 0.4717, "num_tokens": 5735548066.0, "step": 7497 }, { "epoch": 2.746725290830814, "grad_norm": 0.1400557382493623, "learning_rate": 1.9998135566326842e-05, "loss": 0.4661, "num_tokens": 5736276750.0, "step": 7498 }, { "epoch": 2.7470916918567374, "grad_norm": 0.1223715354231347, "learning_rate": 1.9993889467669392e-05, "loss": 0.4695, "num_tokens": 5737052651.0, "step": 7499 }, { "epoch": 2.74745809288266, "grad_norm": 0.13280005896006336, "learning_rate": 1.998964348204533e-05, "loss": 0.425, "num_tokens": 5737761075.0, "step": 7500 }, { "epoch": 2.747824493908583, "grad_norm": 0.1284397273263828, "learning_rate": 1.998539760969388e-05, "loss": 0.4727, "num_tokens": 5738434530.0, "step": 7501 }, { "epoch": 2.7481908949345057, "grad_norm": 0.14886987037261729, "learning_rate": 1.998115185085428e-05, "loss": 0.4533, "num_tokens": 5739166048.0, "step": 7502 }, { "epoch": 2.7485572959604285, "grad_norm": 0.13451997206512753, "learning_rate": 1.997690620576575e-05, "loss": 0.4903, "num_tokens": 5739913738.0, "step": 7503 }, { "epoch": 2.7489236969863518, "grad_norm": 0.13603885686364917, "learning_rate": 1.9972660674667523e-05, "loss": 0.4552, "num_tokens": 5740761504.0, "step": 7504 }, { "epoch": 2.7492900980122745, "grad_norm": 0.13742352307255948, "learning_rate": 1.996841525779879e-05, "loss": 0.4616, "num_tokens": 5741478432.0, "step": 7505 }, { "epoch": 2.7496564990381973, "grad_norm": 0.1532499450806657, "learning_rate": 1.9964169955398778e-05, "loss": 0.4957, "num_tokens": 5742215838.0, "step": 7506 }, { "epoch": 2.75002290006412, "grad_norm": 0.1505472239147453, "learning_rate": 1.995992476770667e-05, "loss": 0.4484, "num_tokens": 5743068501.0, "step": 7507 }, { "epoch": 2.750389301090043, "grad_norm": 0.11894415805608143, "learning_rate": 1.9955679694961668e-05, "loss": 0.4579, "num_tokens": 5743893320.0, "step": 7508 }, { "epoch": 2.750755702115966, "grad_norm": 0.12688405927549284, "learning_rate": 1.9951434737402957e-05, "loss": 0.465, "num_tokens": 5744573417.0, "step": 7509 }, { "epoch": 2.751122103141889, "grad_norm": 0.15814425507191104, "learning_rate": 1.994718989526972e-05, "loss": 0.4498, "num_tokens": 5745345735.0, "step": 7510 }, { "epoch": 2.7514885041678117, "grad_norm": 0.125400422686426, "learning_rate": 1.9942945168801125e-05, "loss": 0.4451, "num_tokens": 5746206542.0, "step": 7511 }, { "epoch": 2.7518549051937344, "grad_norm": 0.12895982628313765, "learning_rate": 1.9938700558236345e-05, "loss": 0.4723, "num_tokens": 5747091547.0, "step": 7512 }, { "epoch": 2.752221306219657, "grad_norm": 0.13332674997382535, "learning_rate": 1.9934456063814537e-05, "loss": 0.4221, "num_tokens": 5747721659.0, "step": 7513 }, { "epoch": 2.7525877072455804, "grad_norm": 0.13425236122667666, "learning_rate": 1.9930211685774857e-05, "loss": 0.4705, "num_tokens": 5748565648.0, "step": 7514 }, { "epoch": 2.752954108271503, "grad_norm": 0.12807649153192488, "learning_rate": 1.992596742435645e-05, "loss": 0.4548, "num_tokens": 5749287398.0, "step": 7515 }, { "epoch": 2.753320509297426, "grad_norm": 0.14086678090476587, "learning_rate": 1.9921723279798454e-05, "loss": 0.4549, "num_tokens": 5750024006.0, "step": 7516 }, { "epoch": 2.753686910323349, "grad_norm": 0.12966336461362862, "learning_rate": 1.991747925234001e-05, "loss": 0.4466, "num_tokens": 5750947374.0, "step": 7517 }, { "epoch": 2.7540533113492716, "grad_norm": 0.13140254096805637, "learning_rate": 1.991323534222024e-05, "loss": 0.4615, "num_tokens": 5751625405.0, "step": 7518 }, { "epoch": 2.754419712375195, "grad_norm": 0.13682404362536565, "learning_rate": 1.9908991549678278e-05, "loss": 0.4885, "num_tokens": 5752379807.0, "step": 7519 }, { "epoch": 2.7547861134011176, "grad_norm": 0.12934213061228214, "learning_rate": 1.9904747874953228e-05, "loss": 0.4519, "num_tokens": 5753195355.0, "step": 7520 }, { "epoch": 2.7551525144270403, "grad_norm": 0.1287318294632833, "learning_rate": 1.990050431828419e-05, "loss": 0.4407, "num_tokens": 5753889574.0, "step": 7521 }, { "epoch": 2.755518915452963, "grad_norm": 0.13589505097083607, "learning_rate": 1.9896260879910282e-05, "loss": 0.4684, "num_tokens": 5754559052.0, "step": 7522 }, { "epoch": 2.755885316478886, "grad_norm": 0.1383347599822881, "learning_rate": 1.9892017560070594e-05, "loss": 0.4547, "num_tokens": 5755387993.0, "step": 7523 }, { "epoch": 2.756251717504809, "grad_norm": 0.138891551835636, "learning_rate": 1.9887774359004203e-05, "loss": 0.4493, "num_tokens": 5756152328.0, "step": 7524 }, { "epoch": 2.756618118530732, "grad_norm": 0.12131926650400739, "learning_rate": 1.98835312769502e-05, "loss": 0.4508, "num_tokens": 5757008794.0, "step": 7525 }, { "epoch": 2.7569845195566547, "grad_norm": 0.12445556809934435, "learning_rate": 1.9879288314147652e-05, "loss": 0.4765, "num_tokens": 5757823266.0, "step": 7526 }, { "epoch": 2.7573509205825775, "grad_norm": 0.14018299725793423, "learning_rate": 1.9875045470835642e-05, "loss": 0.4439, "num_tokens": 5758572418.0, "step": 7527 }, { "epoch": 2.7577173216085002, "grad_norm": 0.11702074210454495, "learning_rate": 1.9870802747253223e-05, "loss": 0.4427, "num_tokens": 5759436321.0, "step": 7528 }, { "epoch": 2.7580837226344235, "grad_norm": 0.12888398166336779, "learning_rate": 1.986656014363944e-05, "loss": 0.4456, "num_tokens": 5760296082.0, "step": 7529 }, { "epoch": 2.7584501236603463, "grad_norm": 0.12598526869366258, "learning_rate": 1.9862317660233354e-05, "loss": 0.4311, "num_tokens": 5761001074.0, "step": 7530 }, { "epoch": 2.758816524686269, "grad_norm": 0.12294372402139091, "learning_rate": 1.9858075297274003e-05, "loss": 0.4467, "num_tokens": 5761749639.0, "step": 7531 }, { "epoch": 2.7591829257121923, "grad_norm": 0.15003342507038878, "learning_rate": 1.9853833055000404e-05, "loss": 0.483, "num_tokens": 5762565010.0, "step": 7532 }, { "epoch": 2.7595493267381146, "grad_norm": 0.12912058031628607, "learning_rate": 1.984959093365161e-05, "loss": 0.4443, "num_tokens": 5763363132.0, "step": 7533 }, { "epoch": 2.759915727764038, "grad_norm": 0.12869343696420482, "learning_rate": 1.9845348933466624e-05, "loss": 0.437, "num_tokens": 5764105728.0, "step": 7534 }, { "epoch": 2.7602821287899606, "grad_norm": 0.12653042669794978, "learning_rate": 1.984110705468447e-05, "loss": 0.4513, "num_tokens": 5764884410.0, "step": 7535 }, { "epoch": 2.7606485298158834, "grad_norm": 0.1272374255758925, "learning_rate": 1.983686529754415e-05, "loss": 0.4924, "num_tokens": 5765648182.0, "step": 7536 }, { "epoch": 2.7610149308418066, "grad_norm": 0.13536109600503235, "learning_rate": 1.9832623662284653e-05, "loss": 0.4788, "num_tokens": 5766420999.0, "step": 7537 }, { "epoch": 2.7613813318677294, "grad_norm": 0.1295388206264573, "learning_rate": 1.9828382149144998e-05, "loss": 0.4372, "num_tokens": 5767235872.0, "step": 7538 }, { "epoch": 2.761747732893652, "grad_norm": 0.11566518328189504, "learning_rate": 1.982414075836415e-05, "loss": 0.456, "num_tokens": 5768078474.0, "step": 7539 }, { "epoch": 2.762114133919575, "grad_norm": 0.13779143098426969, "learning_rate": 1.9819899490181087e-05, "loss": 0.4641, "num_tokens": 5768759695.0, "step": 7540 }, { "epoch": 2.7624805349454977, "grad_norm": 0.140293651150195, "learning_rate": 1.98156583448348e-05, "loss": 0.4543, "num_tokens": 5769410413.0, "step": 7541 }, { "epoch": 2.762846935971421, "grad_norm": 0.13777591102836145, "learning_rate": 1.9811417322564235e-05, "loss": 0.4562, "num_tokens": 5770219985.0, "step": 7542 }, { "epoch": 2.7632133369973437, "grad_norm": 0.1317762531313261, "learning_rate": 1.980717642360836e-05, "loss": 0.4516, "num_tokens": 5771108995.0, "step": 7543 }, { "epoch": 2.7635797380232665, "grad_norm": 0.1266236638058503, "learning_rate": 1.9802935648206127e-05, "loss": 0.4665, "num_tokens": 5771959156.0, "step": 7544 }, { "epoch": 2.7639461390491893, "grad_norm": 0.12588541292765784, "learning_rate": 1.9798694996596476e-05, "loss": 0.4744, "num_tokens": 5772700176.0, "step": 7545 }, { "epoch": 2.764312540075112, "grad_norm": 0.15506261687090409, "learning_rate": 1.979445446901835e-05, "loss": 0.4958, "num_tokens": 5773312777.0, "step": 7546 }, { "epoch": 2.7646789411010353, "grad_norm": 0.14445045710498233, "learning_rate": 1.9790214065710675e-05, "loss": 0.4846, "num_tokens": 5774000046.0, "step": 7547 }, { "epoch": 2.765045342126958, "grad_norm": 0.15028575333976885, "learning_rate": 1.9785973786912374e-05, "loss": 0.4381, "num_tokens": 5774779066.0, "step": 7548 }, { "epoch": 2.765411743152881, "grad_norm": 0.12648970624185785, "learning_rate": 1.978173363286237e-05, "loss": 0.4687, "num_tokens": 5775517501.0, "step": 7549 }, { "epoch": 2.7657781441788036, "grad_norm": 0.1472847974415491, "learning_rate": 1.977749360379957e-05, "loss": 0.5002, "num_tokens": 5776330792.0, "step": 7550 }, { "epoch": 2.7661445452047264, "grad_norm": 0.14850130377666285, "learning_rate": 1.9773253699962868e-05, "loss": 0.4574, "num_tokens": 5777010516.0, "step": 7551 }, { "epoch": 2.7665109462306496, "grad_norm": 0.14508407624038142, "learning_rate": 1.9769013921591168e-05, "loss": 0.4593, "num_tokens": 5777788886.0, "step": 7552 }, { "epoch": 2.7668773472565724, "grad_norm": 0.14545592782252714, "learning_rate": 1.976477426892336e-05, "loss": 0.4504, "num_tokens": 5778596217.0, "step": 7553 }, { "epoch": 2.767243748282495, "grad_norm": 0.12727899729357256, "learning_rate": 1.9760534742198313e-05, "loss": 0.4691, "num_tokens": 5779464687.0, "step": 7554 }, { "epoch": 2.767610149308418, "grad_norm": 0.12791972707396765, "learning_rate": 1.9756295341654916e-05, "loss": 0.4372, "num_tokens": 5780239380.0, "step": 7555 }, { "epoch": 2.7679765503343408, "grad_norm": 0.13505112908683453, "learning_rate": 1.975205606753203e-05, "loss": 0.4745, "num_tokens": 5781012640.0, "step": 7556 }, { "epoch": 2.768342951360264, "grad_norm": 0.12547148010330555, "learning_rate": 1.9747816920068514e-05, "loss": 0.4364, "num_tokens": 5781798399.0, "step": 7557 }, { "epoch": 2.7687093523861868, "grad_norm": 0.12906551764460908, "learning_rate": 1.9743577899503225e-05, "loss": 0.4662, "num_tokens": 5782601449.0, "step": 7558 }, { "epoch": 2.7690757534121095, "grad_norm": 0.13527316238127093, "learning_rate": 1.9739339006075e-05, "loss": 0.4555, "num_tokens": 5783313025.0, "step": 7559 }, { "epoch": 2.7694421544380323, "grad_norm": 0.13073744226331033, "learning_rate": 1.973510024002269e-05, "loss": 0.5097, "num_tokens": 5784033189.0, "step": 7560 }, { "epoch": 2.769808555463955, "grad_norm": 0.14794434261155645, "learning_rate": 1.9730861601585118e-05, "loss": 0.4337, "num_tokens": 5784701810.0, "step": 7561 }, { "epoch": 2.7701749564898783, "grad_norm": 0.12473585574271734, "learning_rate": 1.97266230910011e-05, "loss": 0.4327, "num_tokens": 5785658038.0, "step": 7562 }, { "epoch": 2.770541357515801, "grad_norm": 0.13018299696924077, "learning_rate": 1.9722384708509476e-05, "loss": 0.4486, "num_tokens": 5786298164.0, "step": 7563 }, { "epoch": 2.770907758541724, "grad_norm": 0.1660414615263941, "learning_rate": 1.971814645434903e-05, "loss": 0.487, "num_tokens": 5787037891.0, "step": 7564 }, { "epoch": 2.7712741595676467, "grad_norm": 0.1384396392999466, "learning_rate": 1.9713908328758586e-05, "loss": 0.4497, "num_tokens": 5787805764.0, "step": 7565 }, { "epoch": 2.7716405605935694, "grad_norm": 0.12235929809899451, "learning_rate": 1.9709670331976925e-05, "loss": 0.452, "num_tokens": 5788579220.0, "step": 7566 }, { "epoch": 2.7720069616194927, "grad_norm": 0.1310425468007586, "learning_rate": 1.970543246424284e-05, "loss": 0.455, "num_tokens": 5789360435.0, "step": 7567 }, { "epoch": 2.7723733626454155, "grad_norm": 0.14387096676675565, "learning_rate": 1.9701194725795116e-05, "loss": 0.4198, "num_tokens": 5790189291.0, "step": 7568 }, { "epoch": 2.7727397636713382, "grad_norm": 0.13081856274403988, "learning_rate": 1.9696957116872522e-05, "loss": 0.4237, "num_tokens": 5790893099.0, "step": 7569 }, { "epoch": 2.773106164697261, "grad_norm": 0.12977576712808117, "learning_rate": 1.969271963771382e-05, "loss": 0.4661, "num_tokens": 5791662548.0, "step": 7570 }, { "epoch": 2.773472565723184, "grad_norm": 0.13896188942167972, "learning_rate": 1.9688482288557776e-05, "loss": 0.4632, "num_tokens": 5792458657.0, "step": 7571 }, { "epoch": 2.773838966749107, "grad_norm": 0.126147780353413, "learning_rate": 1.9684245069643134e-05, "loss": 0.4548, "num_tokens": 5793247241.0, "step": 7572 }, { "epoch": 2.77420536777503, "grad_norm": 0.14592100460821436, "learning_rate": 1.9680007981208645e-05, "loss": 0.4387, "num_tokens": 5793976959.0, "step": 7573 }, { "epoch": 2.7745717688009526, "grad_norm": 0.12764425499579435, "learning_rate": 1.967577102349304e-05, "loss": 0.476, "num_tokens": 5794815836.0, "step": 7574 }, { "epoch": 2.7749381698268754, "grad_norm": 0.14538044020363391, "learning_rate": 1.9671534196735056e-05, "loss": 0.4351, "num_tokens": 5795521699.0, "step": 7575 }, { "epoch": 2.775304570852798, "grad_norm": 0.1223074824345873, "learning_rate": 1.9667297501173412e-05, "loss": 0.4609, "num_tokens": 5796330815.0, "step": 7576 }, { "epoch": 2.7756709718787214, "grad_norm": 0.1326535867276308, "learning_rate": 1.9663060937046822e-05, "loss": 0.4555, "num_tokens": 5797023342.0, "step": 7577 }, { "epoch": 2.776037372904644, "grad_norm": 0.1431773082231732, "learning_rate": 1.965882450459399e-05, "loss": 0.48, "num_tokens": 5797733235.0, "step": 7578 }, { "epoch": 2.776403773930567, "grad_norm": 0.14052919100504546, "learning_rate": 1.965458820405362e-05, "loss": 0.4805, "num_tokens": 5798533586.0, "step": 7579 }, { "epoch": 2.77677017495649, "grad_norm": 0.14820236524717972, "learning_rate": 1.9650352035664406e-05, "loss": 0.4462, "num_tokens": 5799300468.0, "step": 7580 }, { "epoch": 2.7771365759824125, "grad_norm": 0.1308927679215123, "learning_rate": 1.9646115999665024e-05, "loss": 0.461, "num_tokens": 5800030318.0, "step": 7581 }, { "epoch": 2.7775029770083357, "grad_norm": 0.12658714258998643, "learning_rate": 1.9641880096294162e-05, "loss": 0.4533, "num_tokens": 5800922655.0, "step": 7582 }, { "epoch": 2.7778693780342585, "grad_norm": 0.12480051594470028, "learning_rate": 1.963764432579048e-05, "loss": 0.4643, "num_tokens": 5801729982.0, "step": 7583 }, { "epoch": 2.7782357790601813, "grad_norm": 0.15142297571484856, "learning_rate": 1.963340868839265e-05, "loss": 0.4549, "num_tokens": 5802457482.0, "step": 7584 }, { "epoch": 2.7786021800861045, "grad_norm": 0.1276401874485351, "learning_rate": 1.962917318433933e-05, "loss": 0.4365, "num_tokens": 5803183440.0, "step": 7585 }, { "epoch": 2.7789685811120273, "grad_norm": 0.14381286507306354, "learning_rate": 1.962493781386915e-05, "loss": 0.436, "num_tokens": 5804007570.0, "step": 7586 }, { "epoch": 2.77933498213795, "grad_norm": 0.13199932491324304, "learning_rate": 1.962070257722077e-05, "loss": 0.4213, "num_tokens": 5804666105.0, "step": 7587 }, { "epoch": 2.779701383163873, "grad_norm": 0.13968797473234607, "learning_rate": 1.961646747463281e-05, "loss": 0.4927, "num_tokens": 5805381095.0, "step": 7588 }, { "epoch": 2.7800677841897956, "grad_norm": 0.1292557388863871, "learning_rate": 1.9612232506343888e-05, "loss": 0.4664, "num_tokens": 5806190992.0, "step": 7589 }, { "epoch": 2.780434185215719, "grad_norm": 0.13530452096793713, "learning_rate": 1.9607997672592645e-05, "loss": 0.4298, "num_tokens": 5806987342.0, "step": 7590 }, { "epoch": 2.7808005862416416, "grad_norm": 0.13247209160762644, "learning_rate": 1.9603762973617674e-05, "loss": 0.4209, "num_tokens": 5807693841.0, "step": 7591 }, { "epoch": 2.7811669872675644, "grad_norm": 0.12338381612484871, "learning_rate": 1.9599528409657576e-05, "loss": 0.457, "num_tokens": 5808430251.0, "step": 7592 }, { "epoch": 2.781533388293487, "grad_norm": 0.1340383725421084, "learning_rate": 1.9595293980950956e-05, "loss": 0.465, "num_tokens": 5809128048.0, "step": 7593 }, { "epoch": 2.78189978931941, "grad_norm": 0.14331473958226953, "learning_rate": 1.9591059687736385e-05, "loss": 0.4639, "num_tokens": 5809863699.0, "step": 7594 }, { "epoch": 2.782266190345333, "grad_norm": 0.13586137998668146, "learning_rate": 1.9586825530252463e-05, "loss": 0.4236, "num_tokens": 5810701851.0, "step": 7595 }, { "epoch": 2.782632591371256, "grad_norm": 0.12637638942982377, "learning_rate": 1.958259150873775e-05, "loss": 0.4602, "num_tokens": 5811384465.0, "step": 7596 }, { "epoch": 2.7829989923971787, "grad_norm": 0.13053322198872458, "learning_rate": 1.9578357623430802e-05, "loss": 0.4422, "num_tokens": 5812223358.0, "step": 7597 }, { "epoch": 2.7833653934231015, "grad_norm": 0.13547082929877136, "learning_rate": 1.9574123874570198e-05, "loss": 0.446, "num_tokens": 5813109875.0, "step": 7598 }, { "epoch": 2.7837317944490243, "grad_norm": 0.127389261024506, "learning_rate": 1.9569890262394466e-05, "loss": 0.4667, "num_tokens": 5813907470.0, "step": 7599 }, { "epoch": 2.7840981954749475, "grad_norm": 0.14492625669070838, "learning_rate": 1.9565656787142152e-05, "loss": 0.4636, "num_tokens": 5814621401.0, "step": 7600 }, { "epoch": 2.7844645965008703, "grad_norm": 0.12920481824937707, "learning_rate": 1.956142344905179e-05, "loss": 0.4354, "num_tokens": 5815406968.0, "step": 7601 }, { "epoch": 2.784830997526793, "grad_norm": 0.12786583476331093, "learning_rate": 1.9557190248361904e-05, "loss": 0.421, "num_tokens": 5816254309.0, "step": 7602 }, { "epoch": 2.785197398552716, "grad_norm": 0.14321408419877116, "learning_rate": 1.955295718531103e-05, "loss": 0.4476, "num_tokens": 5817088357.0, "step": 7603 }, { "epoch": 2.7855637995786386, "grad_norm": 0.12282441172800174, "learning_rate": 1.954872426013765e-05, "loss": 0.4135, "num_tokens": 5817852029.0, "step": 7604 }, { "epoch": 2.785930200604562, "grad_norm": 0.1406144005484617, "learning_rate": 1.9544491473080276e-05, "loss": 0.4752, "num_tokens": 5818628117.0, "step": 7605 }, { "epoch": 2.7862966016304846, "grad_norm": 0.1384252995734399, "learning_rate": 1.954025882437741e-05, "loss": 0.4583, "num_tokens": 5819430882.0, "step": 7606 }, { "epoch": 2.7866630026564074, "grad_norm": 0.12411089857430295, "learning_rate": 1.9536026314267538e-05, "loss": 0.4491, "num_tokens": 5820277006.0, "step": 7607 }, { "epoch": 2.78702940368233, "grad_norm": 0.12565833305200905, "learning_rate": 1.9531793942989125e-05, "loss": 0.4725, "num_tokens": 5821056879.0, "step": 7608 }, { "epoch": 2.787395804708253, "grad_norm": 0.14040294982766593, "learning_rate": 1.9527561710780656e-05, "loss": 0.479, "num_tokens": 5821803982.0, "step": 7609 }, { "epoch": 2.787762205734176, "grad_norm": 0.13444042699955971, "learning_rate": 1.9523329617880593e-05, "loss": 0.4598, "num_tokens": 5822477310.0, "step": 7610 }, { "epoch": 2.788128606760099, "grad_norm": 0.13909537357167617, "learning_rate": 1.951909766452738e-05, "loss": 0.4523, "num_tokens": 5823182208.0, "step": 7611 }, { "epoch": 2.7884950077860218, "grad_norm": 0.14418362186875486, "learning_rate": 1.9514865850959483e-05, "loss": 0.4649, "num_tokens": 5823946553.0, "step": 7612 }, { "epoch": 2.7888614088119446, "grad_norm": 0.13581214943678602, "learning_rate": 1.9510634177415322e-05, "loss": 0.4666, "num_tokens": 5824799486.0, "step": 7613 }, { "epoch": 2.7892278098378673, "grad_norm": 0.13346642956564345, "learning_rate": 1.950640264413334e-05, "loss": 0.4258, "num_tokens": 5825524323.0, "step": 7614 }, { "epoch": 2.7895942108637906, "grad_norm": 0.125084369220916, "learning_rate": 1.9502171251351964e-05, "loss": 0.439, "num_tokens": 5826344445.0, "step": 7615 }, { "epoch": 2.7899606118897133, "grad_norm": 0.1338365751646394, "learning_rate": 1.9497939999309595e-05, "loss": 0.4481, "num_tokens": 5827125973.0, "step": 7616 }, { "epoch": 2.790327012915636, "grad_norm": 0.13949312383975668, "learning_rate": 1.949370888824466e-05, "loss": 0.4607, "num_tokens": 5828018201.0, "step": 7617 }, { "epoch": 2.790693413941559, "grad_norm": 0.12119280489616822, "learning_rate": 1.9489477918395545e-05, "loss": 0.4892, "num_tokens": 5828706447.0, "step": 7618 }, { "epoch": 2.7910598149674817, "grad_norm": 0.13089972725008175, "learning_rate": 1.9485247090000642e-05, "loss": 0.4319, "num_tokens": 5829510829.0, "step": 7619 }, { "epoch": 2.791426215993405, "grad_norm": 0.1283319018058521, "learning_rate": 1.948101640329835e-05, "loss": 0.464, "num_tokens": 5830383486.0, "step": 7620 }, { "epoch": 2.7917926170193277, "grad_norm": 0.13038950187320625, "learning_rate": 1.9476785858527027e-05, "loss": 0.4494, "num_tokens": 5831126979.0, "step": 7621 }, { "epoch": 2.7921590180452505, "grad_norm": 0.13900854174424201, "learning_rate": 1.9472555455925053e-05, "loss": 0.4793, "num_tokens": 5831897519.0, "step": 7622 }, { "epoch": 2.7925254190711732, "grad_norm": 0.15008903983673097, "learning_rate": 1.946832519573078e-05, "loss": 0.463, "num_tokens": 5832583818.0, "step": 7623 }, { "epoch": 2.792891820097096, "grad_norm": 0.12907388215178553, "learning_rate": 1.9464095078182563e-05, "loss": 0.4596, "num_tokens": 5833387126.0, "step": 7624 }, { "epoch": 2.7932582211230192, "grad_norm": 0.13779568231116182, "learning_rate": 1.9459865103518757e-05, "loss": 0.4775, "num_tokens": 5834087002.0, "step": 7625 }, { "epoch": 2.793624622148942, "grad_norm": 0.1505088610561773, "learning_rate": 1.9455635271977678e-05, "loss": 0.4795, "num_tokens": 5834741159.0, "step": 7626 }, { "epoch": 2.793991023174865, "grad_norm": 0.12393756043660023, "learning_rate": 1.9451405583797664e-05, "loss": 0.4514, "num_tokens": 5835508806.0, "step": 7627 }, { "epoch": 2.794357424200788, "grad_norm": 0.15002483802344022, "learning_rate": 1.9447176039217043e-05, "loss": 0.478, "num_tokens": 5836388101.0, "step": 7628 }, { "epoch": 2.7947238252267104, "grad_norm": 0.12339621850492172, "learning_rate": 1.944294663847411e-05, "loss": 0.4316, "num_tokens": 5837155304.0, "step": 7629 }, { "epoch": 2.7950902262526336, "grad_norm": 0.1430495146102371, "learning_rate": 1.9438717381807183e-05, "loss": 0.4343, "num_tokens": 5838005056.0, "step": 7630 }, { "epoch": 2.7954566272785564, "grad_norm": 0.11438106746662666, "learning_rate": 1.943448826945455e-05, "loss": 0.4545, "num_tokens": 5838834987.0, "step": 7631 }, { "epoch": 2.795823028304479, "grad_norm": 0.1318794251639858, "learning_rate": 1.9430259301654498e-05, "loss": 0.434, "num_tokens": 5839701010.0, "step": 7632 }, { "epoch": 2.7961894293304024, "grad_norm": 0.14421042722322835, "learning_rate": 1.9426030478645314e-05, "loss": 0.4578, "num_tokens": 5840427467.0, "step": 7633 }, { "epoch": 2.796555830356325, "grad_norm": 0.13428396890430172, "learning_rate": 1.942180180066526e-05, "loss": 0.459, "num_tokens": 5841227893.0, "step": 7634 }, { "epoch": 2.796922231382248, "grad_norm": 0.13725694908458985, "learning_rate": 1.94175732679526e-05, "loss": 0.4506, "num_tokens": 5841942443.0, "step": 7635 }, { "epoch": 2.7972886324081707, "grad_norm": 0.13055914345221972, "learning_rate": 1.941334488074559e-05, "loss": 0.4694, "num_tokens": 5842739969.0, "step": 7636 }, { "epoch": 2.7976550334340935, "grad_norm": 0.12148320225519635, "learning_rate": 1.9409116639282485e-05, "loss": 0.4372, "num_tokens": 5843485343.0, "step": 7637 }, { "epoch": 2.7980214344600167, "grad_norm": 0.12903516652962363, "learning_rate": 1.940488854380151e-05, "loss": 0.4343, "num_tokens": 5844273830.0, "step": 7638 }, { "epoch": 2.7983878354859395, "grad_norm": 0.1302666397058732, "learning_rate": 1.9400660594540902e-05, "loss": 0.433, "num_tokens": 5844918780.0, "step": 7639 }, { "epoch": 2.7987542365118623, "grad_norm": 0.1271724213744409, "learning_rate": 1.939643279173888e-05, "loss": 0.4126, "num_tokens": 5845773733.0, "step": 7640 }, { "epoch": 2.799120637537785, "grad_norm": 0.137098719782317, "learning_rate": 1.9392205135633665e-05, "loss": 0.4263, "num_tokens": 5846517577.0, "step": 7641 }, { "epoch": 2.799487038563708, "grad_norm": 0.14030850475828782, "learning_rate": 1.9387977626463455e-05, "loss": 0.4444, "num_tokens": 5847235035.0, "step": 7642 }, { "epoch": 2.799853439589631, "grad_norm": 0.1258734880475673, "learning_rate": 1.9383750264466446e-05, "loss": 0.4467, "num_tokens": 5847928900.0, "step": 7643 }, { "epoch": 2.800219840615554, "grad_norm": 0.13931535464869804, "learning_rate": 1.9379523049880834e-05, "loss": 0.4751, "num_tokens": 5848681065.0, "step": 7644 }, { "epoch": 2.8005862416414766, "grad_norm": 0.1279031670731789, "learning_rate": 1.9375295982944795e-05, "loss": 0.4515, "num_tokens": 5849491263.0, "step": 7645 }, { "epoch": 2.8009526426673994, "grad_norm": 0.14410926293970616, "learning_rate": 1.9371069063896493e-05, "loss": 0.4704, "num_tokens": 5850145005.0, "step": 7646 }, { "epoch": 2.801319043693322, "grad_norm": 0.1498807993542659, "learning_rate": 1.936684229297411e-05, "loss": 0.5035, "num_tokens": 5850817905.0, "step": 7647 }, { "epoch": 2.8016854447192454, "grad_norm": 0.1489673960079075, "learning_rate": 1.9362615670415788e-05, "loss": 0.4423, "num_tokens": 5851619871.0, "step": 7648 }, { "epoch": 2.802051845745168, "grad_norm": 0.13344562442094182, "learning_rate": 1.935838919645968e-05, "loss": 0.428, "num_tokens": 5852386549.0, "step": 7649 }, { "epoch": 2.802418246771091, "grad_norm": 0.1240283854161489, "learning_rate": 1.9354162871343926e-05, "loss": 0.4726, "num_tokens": 5853179339.0, "step": 7650 }, { "epoch": 2.8027846477970138, "grad_norm": 0.1410142339472919, "learning_rate": 1.934993669530664e-05, "loss": 0.4507, "num_tokens": 5853969956.0, "step": 7651 }, { "epoch": 2.8031510488229365, "grad_norm": 0.11934351333492939, "learning_rate": 1.9345710668585966e-05, "loss": 0.4771, "num_tokens": 5854721852.0, "step": 7652 }, { "epoch": 2.8035174498488598, "grad_norm": 0.13400417447762372, "learning_rate": 1.934148479142001e-05, "loss": 0.4609, "num_tokens": 5855559331.0, "step": 7653 }, { "epoch": 2.8038838508747825, "grad_norm": 0.12961707130807296, "learning_rate": 1.9337259064046868e-05, "loss": 0.4646, "num_tokens": 5856381337.0, "step": 7654 }, { "epoch": 2.8042502519007053, "grad_norm": 0.12914188356713188, "learning_rate": 1.9333033486704657e-05, "loss": 0.4794, "num_tokens": 5857191809.0, "step": 7655 }, { "epoch": 2.804616652926628, "grad_norm": 0.13368396402430502, "learning_rate": 1.932880805963144e-05, "loss": 0.48, "num_tokens": 5858032220.0, "step": 7656 }, { "epoch": 2.804983053952551, "grad_norm": 0.13049802553391987, "learning_rate": 1.9324582783065313e-05, "loss": 0.4353, "num_tokens": 5858811507.0, "step": 7657 }, { "epoch": 2.805349454978474, "grad_norm": 0.1557647069041567, "learning_rate": 1.932035765724434e-05, "loss": 0.455, "num_tokens": 5859465712.0, "step": 7658 }, { "epoch": 2.805715856004397, "grad_norm": 0.1275462702059613, "learning_rate": 1.931613268240659e-05, "loss": 0.4746, "num_tokens": 5860233124.0, "step": 7659 }, { "epoch": 2.8060822570303197, "grad_norm": 0.13660785262308098, "learning_rate": 1.9311907858790117e-05, "loss": 0.4855, "num_tokens": 5860951931.0, "step": 7660 }, { "epoch": 2.8064486580562424, "grad_norm": 0.13371317028177382, "learning_rate": 1.9307683186632964e-05, "loss": 0.4388, "num_tokens": 5861752748.0, "step": 7661 }, { "epoch": 2.806815059082165, "grad_norm": 0.1381687612808067, "learning_rate": 1.9303458666173165e-05, "loss": 0.4656, "num_tokens": 5862479217.0, "step": 7662 }, { "epoch": 2.8071814601080884, "grad_norm": 0.12031052148122956, "learning_rate": 1.9299234297648756e-05, "loss": 0.4611, "num_tokens": 5863351231.0, "step": 7663 }, { "epoch": 2.8075478611340112, "grad_norm": 0.1355751313823843, "learning_rate": 1.9295010081297753e-05, "loss": 0.4501, "num_tokens": 5864063306.0, "step": 7664 }, { "epoch": 2.807914262159934, "grad_norm": 0.1315808334308226, "learning_rate": 1.9290786017358165e-05, "loss": 0.4739, "num_tokens": 5864769650.0, "step": 7665 }, { "epoch": 2.808280663185857, "grad_norm": 0.12954036974246796, "learning_rate": 1.9286562106068e-05, "loss": 0.4324, "num_tokens": 5865600693.0, "step": 7666 }, { "epoch": 2.8086470642117796, "grad_norm": 0.14293760421660615, "learning_rate": 1.9282338347665243e-05, "loss": 0.4791, "num_tokens": 5866312564.0, "step": 7667 }, { "epoch": 2.809013465237703, "grad_norm": 0.12281185126975007, "learning_rate": 1.9278114742387894e-05, "loss": 0.4418, "num_tokens": 5867057447.0, "step": 7668 }, { "epoch": 2.8093798662636256, "grad_norm": 0.1346843674741414, "learning_rate": 1.9273891290473924e-05, "loss": 0.4703, "num_tokens": 5867806483.0, "step": 7669 }, { "epoch": 2.8097462672895483, "grad_norm": 0.14290889380205485, "learning_rate": 1.926966799216129e-05, "loss": 0.4724, "num_tokens": 5868593472.0, "step": 7670 }, { "epoch": 2.810112668315471, "grad_norm": 0.12260708538042718, "learning_rate": 1.9265444847687975e-05, "loss": 0.4461, "num_tokens": 5869375426.0, "step": 7671 }, { "epoch": 2.810479069341394, "grad_norm": 0.14389849108284303, "learning_rate": 1.9261221857291918e-05, "loss": 0.4682, "num_tokens": 5870069550.0, "step": 7672 }, { "epoch": 2.810845470367317, "grad_norm": 0.13186594413490174, "learning_rate": 1.925699902121105e-05, "loss": 0.4383, "num_tokens": 5870855756.0, "step": 7673 }, { "epoch": 2.81121187139324, "grad_norm": 0.13246878554089012, "learning_rate": 1.925277633968332e-05, "loss": 0.4675, "num_tokens": 5871689005.0, "step": 7674 }, { "epoch": 2.8115782724191627, "grad_norm": 0.13567449915776736, "learning_rate": 1.924855381294665e-05, "loss": 0.4739, "num_tokens": 5872452797.0, "step": 7675 }, { "epoch": 2.811944673445086, "grad_norm": 0.13290164341994096, "learning_rate": 1.9244331441238954e-05, "loss": 0.4695, "num_tokens": 5873198438.0, "step": 7676 }, { "epoch": 2.8123110744710083, "grad_norm": 0.13270450684972085, "learning_rate": 1.924010922479814e-05, "loss": 0.3909, "num_tokens": 5873951455.0, "step": 7677 }, { "epoch": 2.8126774754969315, "grad_norm": 0.1426746340105434, "learning_rate": 1.9235887163862106e-05, "loss": 0.47, "num_tokens": 5874611492.0, "step": 7678 }, { "epoch": 2.8130438765228543, "grad_norm": 0.13204225678833853, "learning_rate": 1.9231665258668743e-05, "loss": 0.4613, "num_tokens": 5875296213.0, "step": 7679 }, { "epoch": 2.813410277548777, "grad_norm": 0.1394159029420864, "learning_rate": 1.9227443509455938e-05, "loss": 0.4447, "num_tokens": 5876009290.0, "step": 7680 }, { "epoch": 2.8137766785747003, "grad_norm": 0.13455445606623395, "learning_rate": 1.922322191646155e-05, "loss": 0.4402, "num_tokens": 5876893985.0, "step": 7681 }, { "epoch": 2.814143079600623, "grad_norm": 0.13234271800590494, "learning_rate": 1.921900047992346e-05, "loss": 0.48, "num_tokens": 5877633529.0, "step": 7682 }, { "epoch": 2.814509480626546, "grad_norm": 0.13815146251822474, "learning_rate": 1.9214779200079513e-05, "loss": 0.4353, "num_tokens": 5878436379.0, "step": 7683 }, { "epoch": 2.8148758816524686, "grad_norm": 0.1367360677826293, "learning_rate": 1.9210558077167548e-05, "loss": 0.4351, "num_tokens": 5879082122.0, "step": 7684 }, { "epoch": 2.8152422826783914, "grad_norm": 0.13234827803844695, "learning_rate": 1.9206337111425416e-05, "loss": 0.4398, "num_tokens": 5879929151.0, "step": 7685 }, { "epoch": 2.8156086837043146, "grad_norm": 0.14378870266776556, "learning_rate": 1.9202116303090937e-05, "loss": 0.5152, "num_tokens": 5880735999.0, "step": 7686 }, { "epoch": 2.8159750847302374, "grad_norm": 0.14822495218254245, "learning_rate": 1.9197895652401938e-05, "loss": 0.4567, "num_tokens": 5881494737.0, "step": 7687 }, { "epoch": 2.81634148575616, "grad_norm": 0.12648032593693806, "learning_rate": 1.919367515959622e-05, "loss": 0.4576, "num_tokens": 5882281402.0, "step": 7688 }, { "epoch": 2.816707886782083, "grad_norm": 0.15092368616591018, "learning_rate": 1.918945482491159e-05, "loss": 0.464, "num_tokens": 5883004824.0, "step": 7689 }, { "epoch": 2.8170742878080057, "grad_norm": 0.11523436985659542, "learning_rate": 1.9185234648585847e-05, "loss": 0.4498, "num_tokens": 5884001332.0, "step": 7690 }, { "epoch": 2.817440688833929, "grad_norm": 0.13239621908657212, "learning_rate": 1.9181014630856763e-05, "loss": 0.4675, "num_tokens": 5884771865.0, "step": 7691 }, { "epoch": 2.8178070898598517, "grad_norm": 0.13566671073511297, "learning_rate": 1.9176794771962114e-05, "loss": 0.4567, "num_tokens": 5885507116.0, "step": 7692 }, { "epoch": 2.8181734908857745, "grad_norm": 0.1434245943284679, "learning_rate": 1.9172575072139677e-05, "loss": 0.4616, "num_tokens": 5886279116.0, "step": 7693 }, { "epoch": 2.8185398919116973, "grad_norm": 0.13182453490643395, "learning_rate": 1.91683555316272e-05, "loss": 0.4278, "num_tokens": 5887126249.0, "step": 7694 }, { "epoch": 2.81890629293762, "grad_norm": 0.1119375610504871, "learning_rate": 1.9164136150662428e-05, "loss": 0.4819, "num_tokens": 5887900977.0, "step": 7695 }, { "epoch": 2.8192726939635433, "grad_norm": 0.14089969785006012, "learning_rate": 1.9159916929483114e-05, "loss": 0.4477, "num_tokens": 5888726827.0, "step": 7696 }, { "epoch": 2.819639094989466, "grad_norm": 0.13738123629456264, "learning_rate": 1.915569786832697e-05, "loss": 0.4547, "num_tokens": 5889519427.0, "step": 7697 }, { "epoch": 2.820005496015389, "grad_norm": 0.13794061757367637, "learning_rate": 1.9151478967431733e-05, "loss": 0.4508, "num_tokens": 5890291811.0, "step": 7698 }, { "epoch": 2.8203718970413116, "grad_norm": 0.1298245469828392, "learning_rate": 1.9147260227035112e-05, "loss": 0.4794, "num_tokens": 5891011106.0, "step": 7699 }, { "epoch": 2.8207382980672344, "grad_norm": 0.14754815907830698, "learning_rate": 1.9143041647374798e-05, "loss": 0.4557, "num_tokens": 5891873881.0, "step": 7700 }, { "epoch": 2.8211046990931576, "grad_norm": 0.12477744739579913, "learning_rate": 1.91388232286885e-05, "loss": 0.4447, "num_tokens": 5892662828.0, "step": 7701 }, { "epoch": 2.8214711001190804, "grad_norm": 0.12981492017429203, "learning_rate": 1.9134604971213895e-05, "loss": 0.4577, "num_tokens": 5893429710.0, "step": 7702 }, { "epoch": 2.821837501145003, "grad_norm": 0.13025295040500295, "learning_rate": 1.913038687518866e-05, "loss": 0.4739, "num_tokens": 5894273681.0, "step": 7703 }, { "epoch": 2.822203902170926, "grad_norm": 0.12186349936019468, "learning_rate": 1.912616894085046e-05, "loss": 0.4303, "num_tokens": 5895133027.0, "step": 7704 }, { "epoch": 2.8225703031968488, "grad_norm": 0.11982462314552912, "learning_rate": 1.9121951168436954e-05, "loss": 0.4313, "num_tokens": 5895969803.0, "step": 7705 }, { "epoch": 2.822936704222772, "grad_norm": 0.13105707659295143, "learning_rate": 1.9117733558185793e-05, "loss": 0.4505, "num_tokens": 5896717502.0, "step": 7706 }, { "epoch": 2.8233031052486948, "grad_norm": 0.1440428970327973, "learning_rate": 1.9113516110334616e-05, "loss": 0.4558, "num_tokens": 5897425735.0, "step": 7707 }, { "epoch": 2.8236695062746175, "grad_norm": 0.13867224905145614, "learning_rate": 1.9109298825121043e-05, "loss": 0.4787, "num_tokens": 5898142379.0, "step": 7708 }, { "epoch": 2.8240359073005403, "grad_norm": 0.13202359814873355, "learning_rate": 1.9105081702782714e-05, "loss": 0.4434, "num_tokens": 5898943978.0, "step": 7709 }, { "epoch": 2.824402308326463, "grad_norm": 0.13603979317570003, "learning_rate": 1.9100864743557227e-05, "loss": 0.471, "num_tokens": 5899721972.0, "step": 7710 }, { "epoch": 2.8247687093523863, "grad_norm": 0.13835416534225087, "learning_rate": 1.9096647947682184e-05, "loss": 0.4819, "num_tokens": 5900446330.0, "step": 7711 }, { "epoch": 2.825135110378309, "grad_norm": 0.12466674124311525, "learning_rate": 1.9092431315395186e-05, "loss": 0.4691, "num_tokens": 5901257771.0, "step": 7712 }, { "epoch": 2.825501511404232, "grad_norm": 0.1385700445617402, "learning_rate": 1.9088214846933817e-05, "loss": 0.4495, "num_tokens": 5901994156.0, "step": 7713 }, { "epoch": 2.8258679124301547, "grad_norm": 0.12901201864320264, "learning_rate": 1.9083998542535638e-05, "loss": 0.4418, "num_tokens": 5902728661.0, "step": 7714 }, { "epoch": 2.8262343134560775, "grad_norm": 0.15508977466720536, "learning_rate": 1.9079782402438232e-05, "loss": 0.4305, "num_tokens": 5903485778.0, "step": 7715 }, { "epoch": 2.8266007144820007, "grad_norm": 0.12408457751560655, "learning_rate": 1.9075566426879145e-05, "loss": 0.445, "num_tokens": 5904336937.0, "step": 7716 }, { "epoch": 2.8269671155079235, "grad_norm": 0.13348705714005532, "learning_rate": 1.9071350616095935e-05, "loss": 0.4392, "num_tokens": 5905111528.0, "step": 7717 }, { "epoch": 2.8273335165338462, "grad_norm": 0.1361599230327324, "learning_rate": 1.906713497032613e-05, "loss": 0.4689, "num_tokens": 5905790357.0, "step": 7718 }, { "epoch": 2.827699917559769, "grad_norm": 0.1378126965886189, "learning_rate": 1.9062919489807256e-05, "loss": 0.438, "num_tokens": 5906623922.0, "step": 7719 }, { "epoch": 2.828066318585692, "grad_norm": 0.12320302739039614, "learning_rate": 1.9058704174776845e-05, "loss": 0.4435, "num_tokens": 5907481933.0, "step": 7720 }, { "epoch": 2.828432719611615, "grad_norm": 0.13096432873197197, "learning_rate": 1.90544890254724e-05, "loss": 0.4459, "num_tokens": 5908246824.0, "step": 7721 }, { "epoch": 2.828799120637538, "grad_norm": 0.12940691597427126, "learning_rate": 1.9050274042131414e-05, "loss": 0.475, "num_tokens": 5908954291.0, "step": 7722 }, { "epoch": 2.8291655216634606, "grad_norm": 0.1511156647876041, "learning_rate": 1.904605922499139e-05, "loss": 0.4465, "num_tokens": 5909749541.0, "step": 7723 }, { "epoch": 2.829531922689384, "grad_norm": 0.13421256518089122, "learning_rate": 1.90418445742898e-05, "loss": 0.4294, "num_tokens": 5910463763.0, "step": 7724 }, { "epoch": 2.829898323715306, "grad_norm": 0.13049759063493918, "learning_rate": 1.9037630090264128e-05, "loss": 0.4879, "num_tokens": 5911296245.0, "step": 7725 }, { "epoch": 2.8302647247412294, "grad_norm": 0.12652685589859802, "learning_rate": 1.9033415773151834e-05, "loss": 0.4385, "num_tokens": 5912037944.0, "step": 7726 }, { "epoch": 2.830631125767152, "grad_norm": 0.16145865517179078, "learning_rate": 1.9029201623190366e-05, "loss": 0.4712, "num_tokens": 5912696946.0, "step": 7727 }, { "epoch": 2.830997526793075, "grad_norm": 0.13088035299249087, "learning_rate": 1.902498764061717e-05, "loss": 0.4639, "num_tokens": 5913517495.0, "step": 7728 }, { "epoch": 2.831363927818998, "grad_norm": 0.1329128581344429, "learning_rate": 1.902077382566969e-05, "loss": 0.4634, "num_tokens": 5914283330.0, "step": 7729 }, { "epoch": 2.831730328844921, "grad_norm": 0.13183426432584394, "learning_rate": 1.9016560178585334e-05, "loss": 0.451, "num_tokens": 5915050258.0, "step": 7730 }, { "epoch": 2.8320967298708437, "grad_norm": 0.139322692840864, "learning_rate": 1.9012346699601533e-05, "loss": 0.4144, "num_tokens": 5915875098.0, "step": 7731 }, { "epoch": 2.8324631308967665, "grad_norm": 0.1281760325431863, "learning_rate": 1.9008133388955685e-05, "loss": 0.4727, "num_tokens": 5916753199.0, "step": 7732 }, { "epoch": 2.8328295319226893, "grad_norm": 0.13290510711857026, "learning_rate": 1.9003920246885195e-05, "loss": 0.4443, "num_tokens": 5917560762.0, "step": 7733 }, { "epoch": 2.8331959329486125, "grad_norm": 0.13416928371095196, "learning_rate": 1.899970727362745e-05, "loss": 0.4525, "num_tokens": 5918336140.0, "step": 7734 }, { "epoch": 2.8335623339745353, "grad_norm": 0.1253091604923174, "learning_rate": 1.8995494469419816e-05, "loss": 0.432, "num_tokens": 5919168878.0, "step": 7735 }, { "epoch": 2.833928735000458, "grad_norm": 0.12126509493618011, "learning_rate": 1.8991281834499672e-05, "loss": 0.4435, "num_tokens": 5919958175.0, "step": 7736 }, { "epoch": 2.834295136026381, "grad_norm": 0.12111499903525162, "learning_rate": 1.8987069369104377e-05, "loss": 0.4682, "num_tokens": 5920790761.0, "step": 7737 }, { "epoch": 2.8346615370523036, "grad_norm": 0.1400039932903477, "learning_rate": 1.8982857073471272e-05, "loss": 0.4461, "num_tokens": 5921478032.0, "step": 7738 }, { "epoch": 2.835027938078227, "grad_norm": 0.1292167437126476, "learning_rate": 1.8978644947837713e-05, "loss": 0.4965, "num_tokens": 5922184267.0, "step": 7739 }, { "epoch": 2.8353943391041496, "grad_norm": 0.161674765301949, "learning_rate": 1.8974432992441013e-05, "loss": 0.4685, "num_tokens": 5922867949.0, "step": 7740 }, { "epoch": 2.8357607401300724, "grad_norm": 0.12967657787995152, "learning_rate": 1.89702212075185e-05, "loss": 0.4451, "num_tokens": 5923593970.0, "step": 7741 }, { "epoch": 2.836127141155995, "grad_norm": 0.1271434064060463, "learning_rate": 1.8966009593307483e-05, "loss": 0.4623, "num_tokens": 5924487555.0, "step": 7742 }, { "epoch": 2.836493542181918, "grad_norm": 0.131609191556966, "learning_rate": 1.8961798150045263e-05, "loss": 0.4333, "num_tokens": 5925194640.0, "step": 7743 }, { "epoch": 2.836859943207841, "grad_norm": 0.136207430850327, "learning_rate": 1.8957586877969146e-05, "loss": 0.4291, "num_tokens": 5925971849.0, "step": 7744 }, { "epoch": 2.837226344233764, "grad_norm": 0.1270526111538827, "learning_rate": 1.8953375777316395e-05, "loss": 0.4673, "num_tokens": 5926702571.0, "step": 7745 }, { "epoch": 2.8375927452596867, "grad_norm": 0.1578951631637378, "learning_rate": 1.8949164848324293e-05, "loss": 0.4864, "num_tokens": 5927354476.0, "step": 7746 }, { "epoch": 2.8379591462856095, "grad_norm": 0.140438662886284, "learning_rate": 1.8944954091230104e-05, "loss": 0.4641, "num_tokens": 5928080589.0, "step": 7747 }, { "epoch": 2.8383255473115323, "grad_norm": 0.13249474838168462, "learning_rate": 1.8940743506271068e-05, "loss": 0.4505, "num_tokens": 5928817202.0, "step": 7748 }, { "epoch": 2.8386919483374555, "grad_norm": 0.1374050710524343, "learning_rate": 1.893653309368444e-05, "loss": 0.4512, "num_tokens": 5929483920.0, "step": 7749 }, { "epoch": 2.8390583493633783, "grad_norm": 0.12520857403323865, "learning_rate": 1.8932322853707454e-05, "loss": 0.4424, "num_tokens": 5930305957.0, "step": 7750 }, { "epoch": 2.839424750389301, "grad_norm": 0.1404553232857818, "learning_rate": 1.8928112786577325e-05, "loss": 0.4631, "num_tokens": 5931055687.0, "step": 7751 }, { "epoch": 2.839791151415224, "grad_norm": 0.13133794927703943, "learning_rate": 1.8923902892531284e-05, "loss": 0.4818, "num_tokens": 5931881583.0, "step": 7752 }, { "epoch": 2.8401575524411466, "grad_norm": 0.13052516772678574, "learning_rate": 1.891969317180652e-05, "loss": 0.4697, "num_tokens": 5932791031.0, "step": 7753 }, { "epoch": 2.84052395346707, "grad_norm": 0.12852847596812014, "learning_rate": 1.891548362464023e-05, "loss": 0.4431, "num_tokens": 5933616856.0, "step": 7754 }, { "epoch": 2.8408903544929927, "grad_norm": 0.1238929966541012, "learning_rate": 1.891127425126961e-05, "loss": 0.4561, "num_tokens": 5934405670.0, "step": 7755 }, { "epoch": 2.8412567555189154, "grad_norm": 0.12780823528028012, "learning_rate": 1.8907065051931822e-05, "loss": 0.461, "num_tokens": 5935246346.0, "step": 7756 }, { "epoch": 2.841623156544838, "grad_norm": 0.1205605032053193, "learning_rate": 1.8902856026864037e-05, "loss": 0.4352, "num_tokens": 5935995859.0, "step": 7757 }, { "epoch": 2.841989557570761, "grad_norm": 0.13623756935173836, "learning_rate": 1.8898647176303407e-05, "loss": 0.4374, "num_tokens": 5936684240.0, "step": 7758 }, { "epoch": 2.842355958596684, "grad_norm": 0.12844707751981652, "learning_rate": 1.889443850048709e-05, "loss": 0.4508, "num_tokens": 5937412233.0, "step": 7759 }, { "epoch": 2.842722359622607, "grad_norm": 0.1368896257962863, "learning_rate": 1.8890229999652204e-05, "loss": 0.4751, "num_tokens": 5938176345.0, "step": 7760 }, { "epoch": 2.84308876064853, "grad_norm": 0.11808643932173825, "learning_rate": 1.888602167403589e-05, "loss": 0.4386, "num_tokens": 5938978064.0, "step": 7761 }, { "epoch": 2.8434551616744526, "grad_norm": 0.14002191883941617, "learning_rate": 1.8881813523875253e-05, "loss": 0.4819, "num_tokens": 5939649544.0, "step": 7762 }, { "epoch": 2.8438215627003753, "grad_norm": 0.1404226204760925, "learning_rate": 1.8877605549407407e-05, "loss": 0.5031, "num_tokens": 5940334098.0, "step": 7763 }, { "epoch": 2.8441879637262986, "grad_norm": 0.13462827778113168, "learning_rate": 1.887339775086945e-05, "loss": 0.4535, "num_tokens": 5941127176.0, "step": 7764 }, { "epoch": 2.8445543647522213, "grad_norm": 0.11770586756318113, "learning_rate": 1.886919012849845e-05, "loss": 0.4362, "num_tokens": 5942011125.0, "step": 7765 }, { "epoch": 2.844920765778144, "grad_norm": 0.11548910487332432, "learning_rate": 1.886498268253152e-05, "loss": 0.4718, "num_tokens": 5942808423.0, "step": 7766 }, { "epoch": 2.845287166804067, "grad_norm": 0.14196526419704628, "learning_rate": 1.8860775413205686e-05, "loss": 0.4654, "num_tokens": 5943595025.0, "step": 7767 }, { "epoch": 2.8456535678299897, "grad_norm": 0.1272096591175388, "learning_rate": 1.8856568320758032e-05, "loss": 0.4178, "num_tokens": 5944291144.0, "step": 7768 }, { "epoch": 2.846019968855913, "grad_norm": 0.11941726012959522, "learning_rate": 1.8852361405425597e-05, "loss": 0.4409, "num_tokens": 5945126180.0, "step": 7769 }, { "epoch": 2.8463863698818357, "grad_norm": 0.12737181176735057, "learning_rate": 1.8848154667445406e-05, "loss": 0.4572, "num_tokens": 5945974458.0, "step": 7770 }, { "epoch": 2.8467527709077585, "grad_norm": 0.1348929217677575, "learning_rate": 1.8843948107054508e-05, "loss": 0.4575, "num_tokens": 5946594014.0, "step": 7771 }, { "epoch": 2.8471191719336817, "grad_norm": 0.13768604975106194, "learning_rate": 1.8839741724489906e-05, "loss": 0.4798, "num_tokens": 5947321258.0, "step": 7772 }, { "epoch": 2.847485572959604, "grad_norm": 0.12978316504354598, "learning_rate": 1.8835535519988602e-05, "loss": 0.4496, "num_tokens": 5948110324.0, "step": 7773 }, { "epoch": 2.8478519739855273, "grad_norm": 0.11843764845599312, "learning_rate": 1.8831329493787607e-05, "loss": 0.4656, "num_tokens": 5949001851.0, "step": 7774 }, { "epoch": 2.84821837501145, "grad_norm": 0.12519638356899224, "learning_rate": 1.88271236461239e-05, "loss": 0.4453, "num_tokens": 5949885613.0, "step": 7775 }, { "epoch": 2.848584776037373, "grad_norm": 0.12066843144774411, "learning_rate": 1.8822917977234453e-05, "loss": 0.4837, "num_tokens": 5950667266.0, "step": 7776 }, { "epoch": 2.848951177063296, "grad_norm": 0.13673938797897392, "learning_rate": 1.8818712487356235e-05, "loss": 0.4865, "num_tokens": 5951401091.0, "step": 7777 }, { "epoch": 2.8493175780892184, "grad_norm": 0.12892361280479286, "learning_rate": 1.8814507176726215e-05, "loss": 0.4445, "num_tokens": 5952228083.0, "step": 7778 }, { "epoch": 2.8496839791151416, "grad_norm": 0.12600104602765003, "learning_rate": 1.881030204558132e-05, "loss": 0.4389, "num_tokens": 5952989172.0, "step": 7779 }, { "epoch": 2.8500503801410644, "grad_norm": 0.12901145528561372, "learning_rate": 1.8806097094158494e-05, "loss": 0.4468, "num_tokens": 5953710687.0, "step": 7780 }, { "epoch": 2.850416781166987, "grad_norm": 0.12934967194553143, "learning_rate": 1.8801892322694666e-05, "loss": 0.454, "num_tokens": 5954495350.0, "step": 7781 }, { "epoch": 2.8507831821929104, "grad_norm": 0.12956230693772477, "learning_rate": 1.879768773142675e-05, "loss": 0.4585, "num_tokens": 5955218149.0, "step": 7782 }, { "epoch": 2.851149583218833, "grad_norm": 0.12328177202747523, "learning_rate": 1.8793483320591656e-05, "loss": 0.4435, "num_tokens": 5956049425.0, "step": 7783 }, { "epoch": 2.851515984244756, "grad_norm": 0.12945284724545184, "learning_rate": 1.8789279090426265e-05, "loss": 0.4352, "num_tokens": 5956836064.0, "step": 7784 }, { "epoch": 2.8518823852706787, "grad_norm": 0.12181647070915906, "learning_rate": 1.8785075041167484e-05, "loss": 0.4371, "num_tokens": 5957672201.0, "step": 7785 }, { "epoch": 2.8522487862966015, "grad_norm": 0.13565199119697127, "learning_rate": 1.8780871173052174e-05, "loss": 0.4913, "num_tokens": 5958389345.0, "step": 7786 }, { "epoch": 2.8526151873225247, "grad_norm": 0.14049659412349164, "learning_rate": 1.8776667486317196e-05, "loss": 0.4923, "num_tokens": 5959177815.0, "step": 7787 }, { "epoch": 2.8529815883484475, "grad_norm": 0.1444443371615228, "learning_rate": 1.8772463981199425e-05, "loss": 0.4529, "num_tokens": 5959823245.0, "step": 7788 }, { "epoch": 2.8533479893743703, "grad_norm": 0.12747687323787768, "learning_rate": 1.876826065793568e-05, "loss": 0.4643, "num_tokens": 5960640408.0, "step": 7789 }, { "epoch": 2.853714390400293, "grad_norm": 0.1433556636016812, "learning_rate": 1.8764057516762812e-05, "loss": 0.4746, "num_tokens": 5961407438.0, "step": 7790 }, { "epoch": 2.854080791426216, "grad_norm": 0.13926167930044459, "learning_rate": 1.8759854557917645e-05, "loss": 0.4247, "num_tokens": 5962191125.0, "step": 7791 }, { "epoch": 2.854447192452139, "grad_norm": 0.129478540030579, "learning_rate": 1.8755651781636982e-05, "loss": 0.4595, "num_tokens": 5962909732.0, "step": 7792 }, { "epoch": 2.854813593478062, "grad_norm": 0.13031082485026985, "learning_rate": 1.8751449188157638e-05, "loss": 0.4717, "num_tokens": 5963678923.0, "step": 7793 }, { "epoch": 2.8551799945039846, "grad_norm": 0.14955447167650865, "learning_rate": 1.87472467777164e-05, "loss": 0.4834, "num_tokens": 5964413265.0, "step": 7794 }, { "epoch": 2.8555463955299074, "grad_norm": 0.1311490691880966, "learning_rate": 1.8743044550550044e-05, "loss": 0.4775, "num_tokens": 5965225167.0, "step": 7795 }, { "epoch": 2.85591279655583, "grad_norm": 0.15090944166385856, "learning_rate": 1.8738842506895364e-05, "loss": 0.4503, "num_tokens": 5965854974.0, "step": 7796 }, { "epoch": 2.8562791975817534, "grad_norm": 0.1351580006376223, "learning_rate": 1.8734640646989103e-05, "loss": 0.4743, "num_tokens": 5966585587.0, "step": 7797 }, { "epoch": 2.856645598607676, "grad_norm": 0.16971432125999208, "learning_rate": 1.8730438971068014e-05, "loss": 0.4599, "num_tokens": 5967233816.0, "step": 7798 }, { "epoch": 2.857011999633599, "grad_norm": 0.1429175611600722, "learning_rate": 1.872623747936885e-05, "loss": 0.4694, "num_tokens": 5967904906.0, "step": 7799 }, { "epoch": 2.8573784006595218, "grad_norm": 0.14408658570785557, "learning_rate": 1.872203617212833e-05, "loss": 0.4653, "num_tokens": 5968651618.0, "step": 7800 }, { "epoch": 2.8577448016854445, "grad_norm": 0.13942414791831487, "learning_rate": 1.8717835049583186e-05, "loss": 0.4535, "num_tokens": 5969325889.0, "step": 7801 }, { "epoch": 2.8581112027113678, "grad_norm": 0.1439852170798788, "learning_rate": 1.8713634111970118e-05, "loss": 0.465, "num_tokens": 5970163377.0, "step": 7802 }, { "epoch": 2.8584776037372905, "grad_norm": 0.13715718708313906, "learning_rate": 1.8709433359525833e-05, "loss": 0.4469, "num_tokens": 5971013746.0, "step": 7803 }, { "epoch": 2.8588440047632133, "grad_norm": 0.12314170709823366, "learning_rate": 1.870523279248702e-05, "loss": 0.4475, "num_tokens": 5971788456.0, "step": 7804 }, { "epoch": 2.859210405789136, "grad_norm": 0.12580490179803108, "learning_rate": 1.8701032411090355e-05, "loss": 0.4314, "num_tokens": 5972603199.0, "step": 7805 }, { "epoch": 2.859576806815059, "grad_norm": 0.13407722770214658, "learning_rate": 1.8696832215572502e-05, "loss": 0.424, "num_tokens": 5973436270.0, "step": 7806 }, { "epoch": 2.859943207840982, "grad_norm": 0.12452224297550271, "learning_rate": 1.869263220617013e-05, "loss": 0.4206, "num_tokens": 5974167500.0, "step": 7807 }, { "epoch": 2.860309608866905, "grad_norm": 0.1307832925706471, "learning_rate": 1.8688432383119873e-05, "loss": 0.4596, "num_tokens": 5974918194.0, "step": 7808 }, { "epoch": 2.8606760098928277, "grad_norm": 0.14238826564189097, "learning_rate": 1.8684232746658385e-05, "loss": 0.4454, "num_tokens": 5975668182.0, "step": 7809 }, { "epoch": 2.8610424109187504, "grad_norm": 0.1357469015166715, "learning_rate": 1.8680033297022282e-05, "loss": 0.465, "num_tokens": 5976386386.0, "step": 7810 }, { "epoch": 2.8614088119446732, "grad_norm": 0.1517790958950826, "learning_rate": 1.867583403444818e-05, "loss": 0.4943, "num_tokens": 5977125879.0, "step": 7811 }, { "epoch": 2.8617752129705964, "grad_norm": 0.13721167877811016, "learning_rate": 1.8671634959172685e-05, "loss": 0.4554, "num_tokens": 5977831159.0, "step": 7812 }, { "epoch": 2.8621416139965192, "grad_norm": 0.12368747466175856, "learning_rate": 1.8667436071432405e-05, "loss": 0.4568, "num_tokens": 5978652737.0, "step": 7813 }, { "epoch": 2.862508015022442, "grad_norm": 0.1441832566371681, "learning_rate": 1.86632373714639e-05, "loss": 0.4573, "num_tokens": 5979378974.0, "step": 7814 }, { "epoch": 2.862874416048365, "grad_norm": 0.13107966944250773, "learning_rate": 1.8659038859503762e-05, "loss": 0.4311, "num_tokens": 5980234405.0, "step": 7815 }, { "epoch": 2.8632408170742876, "grad_norm": 0.13011296530214247, "learning_rate": 1.8654840535788542e-05, "loss": 0.4246, "num_tokens": 5980981784.0, "step": 7816 }, { "epoch": 2.863607218100211, "grad_norm": 0.11501461214181372, "learning_rate": 1.8650642400554805e-05, "loss": 0.4761, "num_tokens": 5981733305.0, "step": 7817 }, { "epoch": 2.8639736191261336, "grad_norm": 0.14291793472787323, "learning_rate": 1.864644445403909e-05, "loss": 0.4675, "num_tokens": 5982439210.0, "step": 7818 }, { "epoch": 2.8643400201520564, "grad_norm": 0.14298704823269182, "learning_rate": 1.864224669647792e-05, "loss": 0.4663, "num_tokens": 5983219359.0, "step": 7819 }, { "epoch": 2.864706421177979, "grad_norm": 0.1424928770078046, "learning_rate": 1.8638049128107825e-05, "loss": 0.4771, "num_tokens": 5984001072.0, "step": 7820 }, { "epoch": 2.865072822203902, "grad_norm": 0.13279647358915705, "learning_rate": 1.8633851749165318e-05, "loss": 0.4548, "num_tokens": 5984809619.0, "step": 7821 }, { "epoch": 2.865439223229825, "grad_norm": 0.12420355143482169, "learning_rate": 1.8629654559886877e-05, "loss": 0.4647, "num_tokens": 5985516927.0, "step": 7822 }, { "epoch": 2.865805624255748, "grad_norm": 0.13090489268606537, "learning_rate": 1.862545756050902e-05, "loss": 0.4324, "num_tokens": 5986354188.0, "step": 7823 }, { "epoch": 2.8661720252816707, "grad_norm": 0.1312177772384564, "learning_rate": 1.8621260751268203e-05, "loss": 0.5106, "num_tokens": 5987075653.0, "step": 7824 }, { "epoch": 2.866538426307594, "grad_norm": 0.1372342277916918, "learning_rate": 1.8617064132400902e-05, "loss": 0.4546, "num_tokens": 5987912034.0, "step": 7825 }, { "epoch": 2.8669048273335163, "grad_norm": 0.13197808764577143, "learning_rate": 1.8612867704143577e-05, "loss": 0.5047, "num_tokens": 5988742256.0, "step": 7826 }, { "epoch": 2.8672712283594395, "grad_norm": 0.13403202973624223, "learning_rate": 1.8608671466732658e-05, "loss": 0.4515, "num_tokens": 5989439454.0, "step": 7827 }, { "epoch": 2.8676376293853623, "grad_norm": 0.12937501258044973, "learning_rate": 1.860447542040461e-05, "loss": 0.4561, "num_tokens": 5990213221.0, "step": 7828 }, { "epoch": 2.868004030411285, "grad_norm": 0.1410049640547318, "learning_rate": 1.8600279565395825e-05, "loss": 0.4765, "num_tokens": 5990931166.0, "step": 7829 }, { "epoch": 2.8683704314372083, "grad_norm": 0.12722481481476722, "learning_rate": 1.8596083901942732e-05, "loss": 0.4376, "num_tokens": 5991800979.0, "step": 7830 }, { "epoch": 2.868736832463131, "grad_norm": 0.1354869699067122, "learning_rate": 1.8591888430281737e-05, "loss": 0.4724, "num_tokens": 5992589531.0, "step": 7831 }, { "epoch": 2.869103233489054, "grad_norm": 0.1355150222045079, "learning_rate": 1.858769315064923e-05, "loss": 0.4679, "num_tokens": 5993214911.0, "step": 7832 }, { "epoch": 2.8694696345149766, "grad_norm": 0.14043886423513408, "learning_rate": 1.8583498063281578e-05, "loss": 0.4222, "num_tokens": 5993990947.0, "step": 7833 }, { "epoch": 2.8698360355408994, "grad_norm": 0.129458968017636, "learning_rate": 1.857930316841517e-05, "loss": 0.447, "num_tokens": 5994756643.0, "step": 7834 }, { "epoch": 2.8702024365668226, "grad_norm": 0.1303083489127114, "learning_rate": 1.857510846628636e-05, "loss": 0.4518, "num_tokens": 5995554955.0, "step": 7835 }, { "epoch": 2.8705688375927454, "grad_norm": 0.13760152753016722, "learning_rate": 1.8570913957131483e-05, "loss": 0.4555, "num_tokens": 5996300592.0, "step": 7836 }, { "epoch": 2.870935238618668, "grad_norm": 0.11639472303823144, "learning_rate": 1.85667196411869e-05, "loss": 0.4586, "num_tokens": 5997124765.0, "step": 7837 }, { "epoch": 2.871301639644591, "grad_norm": 0.1320475333477907, "learning_rate": 1.8562525518688915e-05, "loss": 0.4229, "num_tokens": 5997870531.0, "step": 7838 }, { "epoch": 2.8716680406705137, "grad_norm": 0.1302413574019111, "learning_rate": 1.8558331589873863e-05, "loss": 0.4369, "num_tokens": 5998645747.0, "step": 7839 }, { "epoch": 2.872034441696437, "grad_norm": 0.14649547185139344, "learning_rate": 1.8554137854978042e-05, "loss": 0.4782, "num_tokens": 5999242285.0, "step": 7840 }, { "epoch": 2.8724008427223597, "grad_norm": 0.13310998814790573, "learning_rate": 1.8549944314237738e-05, "loss": 0.4442, "num_tokens": 6000043239.0, "step": 7841 }, { "epoch": 2.8727672437482825, "grad_norm": 0.13123428296256848, "learning_rate": 1.8545750967889246e-05, "loss": 0.4677, "num_tokens": 6000811542.0, "step": 7842 }, { "epoch": 2.8731336447742053, "grad_norm": 0.13205778112180486, "learning_rate": 1.8541557816168833e-05, "loss": 0.4717, "num_tokens": 6001544259.0, "step": 7843 }, { "epoch": 2.873500045800128, "grad_norm": 0.13179890392268714, "learning_rate": 1.8537364859312754e-05, "loss": 0.4789, "num_tokens": 6002312904.0, "step": 7844 }, { "epoch": 2.8738664468260513, "grad_norm": 0.12513986347783485, "learning_rate": 1.8533172097557278e-05, "loss": 0.4463, "num_tokens": 6003207762.0, "step": 7845 }, { "epoch": 2.874232847851974, "grad_norm": 0.13278974579768804, "learning_rate": 1.852897953113862e-05, "loss": 0.4847, "num_tokens": 6003941361.0, "step": 7846 }, { "epoch": 2.874599248877897, "grad_norm": 0.12772276959172624, "learning_rate": 1.8524787160293027e-05, "loss": 0.4423, "num_tokens": 6004790195.0, "step": 7847 }, { "epoch": 2.8749656499038196, "grad_norm": 0.1340655389010595, "learning_rate": 1.8520594985256714e-05, "loss": 0.4553, "num_tokens": 6005599954.0, "step": 7848 }, { "epoch": 2.8753320509297424, "grad_norm": 0.12276830727366034, "learning_rate": 1.851640300626587e-05, "loss": 0.4434, "num_tokens": 6006377635.0, "step": 7849 }, { "epoch": 2.8756984519556656, "grad_norm": 0.1324464850495759, "learning_rate": 1.851221122355671e-05, "loss": 0.4557, "num_tokens": 6007055346.0, "step": 7850 }, { "epoch": 2.8760648529815884, "grad_norm": 0.13319522996601238, "learning_rate": 1.850801963736541e-05, "loss": 0.4857, "num_tokens": 6007808777.0, "step": 7851 }, { "epoch": 2.876431254007511, "grad_norm": 0.1325104668007193, "learning_rate": 1.850382824792814e-05, "loss": 0.451, "num_tokens": 6008658930.0, "step": 7852 }, { "epoch": 2.876797655033434, "grad_norm": 0.13267431370868388, "learning_rate": 1.8499637055481074e-05, "loss": 0.4422, "num_tokens": 6009455161.0, "step": 7853 }, { "epoch": 2.8771640560593568, "grad_norm": 0.14609799488498937, "learning_rate": 1.8495446060260348e-05, "loss": 0.4494, "num_tokens": 6010229531.0, "step": 7854 }, { "epoch": 2.87753045708528, "grad_norm": 0.11800413287102524, "learning_rate": 1.8491255262502104e-05, "loss": 0.4264, "num_tokens": 6010883123.0, "step": 7855 }, { "epoch": 2.8778968581112028, "grad_norm": 0.1397362750752337, "learning_rate": 1.8487064662442485e-05, "loss": 0.5061, "num_tokens": 6011527592.0, "step": 7856 }, { "epoch": 2.8782632591371256, "grad_norm": 0.14059510551451954, "learning_rate": 1.848287426031759e-05, "loss": 0.4488, "num_tokens": 6012333678.0, "step": 7857 }, { "epoch": 2.8786296601630483, "grad_norm": 0.12969762448212138, "learning_rate": 1.8478684056363544e-05, "loss": 0.4448, "num_tokens": 6013069732.0, "step": 7858 }, { "epoch": 2.878996061188971, "grad_norm": 0.12072493538919872, "learning_rate": 1.847449405081642e-05, "loss": 0.449, "num_tokens": 6013878842.0, "step": 7859 }, { "epoch": 2.8793624622148943, "grad_norm": 0.13036225093272275, "learning_rate": 1.8470304243912318e-05, "loss": 0.489, "num_tokens": 6014633913.0, "step": 7860 }, { "epoch": 2.879728863240817, "grad_norm": 0.15694556524601952, "learning_rate": 1.8466114635887313e-05, "loss": 0.4957, "num_tokens": 6015204678.0, "step": 7861 }, { "epoch": 2.88009526426674, "grad_norm": 0.13590651185132926, "learning_rate": 1.8461925226977456e-05, "loss": 0.434, "num_tokens": 6015947706.0, "step": 7862 }, { "epoch": 2.8804616652926627, "grad_norm": 0.1161575689522879, "learning_rate": 1.8457736017418795e-05, "loss": 0.4697, "num_tokens": 6016890982.0, "step": 7863 }, { "epoch": 2.8808280663185855, "grad_norm": 0.12724134827758546, "learning_rate": 1.845354700744738e-05, "loss": 0.4139, "num_tokens": 6017548874.0, "step": 7864 }, { "epoch": 2.8811944673445087, "grad_norm": 0.14176702178589065, "learning_rate": 1.8449358197299227e-05, "loss": 0.4437, "num_tokens": 6018306953.0, "step": 7865 }, { "epoch": 2.8815608683704315, "grad_norm": 0.11526327255812698, "learning_rate": 1.8445169587210374e-05, "loss": 0.4642, "num_tokens": 6019076400.0, "step": 7866 }, { "epoch": 2.8819272693963542, "grad_norm": 0.13358605144520783, "learning_rate": 1.8440981177416804e-05, "loss": 0.4786, "num_tokens": 6019832055.0, "step": 7867 }, { "epoch": 2.882293670422277, "grad_norm": 0.12294957312414746, "learning_rate": 1.8436792968154513e-05, "loss": 0.4503, "num_tokens": 6020708958.0, "step": 7868 }, { "epoch": 2.8826600714482, "grad_norm": 0.12651541901910368, "learning_rate": 1.8432604959659498e-05, "loss": 0.4661, "num_tokens": 6021451641.0, "step": 7869 }, { "epoch": 2.883026472474123, "grad_norm": 0.13110829006847066, "learning_rate": 1.8428417152167725e-05, "loss": 0.4397, "num_tokens": 6022144707.0, "step": 7870 }, { "epoch": 2.883392873500046, "grad_norm": 0.1406434438832592, "learning_rate": 1.8424229545915145e-05, "loss": 0.4449, "num_tokens": 6022800979.0, "step": 7871 }, { "epoch": 2.8837592745259686, "grad_norm": 0.13275851292922974, "learning_rate": 1.8420042141137717e-05, "loss": 0.4783, "num_tokens": 6023608961.0, "step": 7872 }, { "epoch": 2.884125675551892, "grad_norm": 0.12506428787684473, "learning_rate": 1.8415854938071366e-05, "loss": 0.4498, "num_tokens": 6024384058.0, "step": 7873 }, { "epoch": 2.884492076577814, "grad_norm": 0.13820830289641564, "learning_rate": 1.8411667936952032e-05, "loss": 0.4638, "num_tokens": 6025196953.0, "step": 7874 }, { "epoch": 2.8848584776037374, "grad_norm": 0.1291022716573509, "learning_rate": 1.840748113801563e-05, "loss": 0.4426, "num_tokens": 6026026982.0, "step": 7875 }, { "epoch": 2.88522487862966, "grad_norm": 0.13057357508139023, "learning_rate": 1.8403294541498047e-05, "loss": 0.4677, "num_tokens": 6026827039.0, "step": 7876 }, { "epoch": 2.885591279655583, "grad_norm": 0.13023583458067212, "learning_rate": 1.8399108147635193e-05, "loss": 0.4311, "num_tokens": 6027616984.0, "step": 7877 }, { "epoch": 2.885957680681506, "grad_norm": 0.13053165675797343, "learning_rate": 1.8394921956662944e-05, "loss": 0.4407, "num_tokens": 6028424391.0, "step": 7878 }, { "epoch": 2.886324081707429, "grad_norm": 0.12446441196669279, "learning_rate": 1.839073596881715e-05, "loss": 0.4411, "num_tokens": 6029214141.0, "step": 7879 }, { "epoch": 2.8866904827333517, "grad_norm": 0.13591151931222614, "learning_rate": 1.8386550184333703e-05, "loss": 0.4685, "num_tokens": 6029926339.0, "step": 7880 }, { "epoch": 2.8870568837592745, "grad_norm": 0.1276340135185549, "learning_rate": 1.838236460344842e-05, "loss": 0.4389, "num_tokens": 6030654156.0, "step": 7881 }, { "epoch": 2.8874232847851973, "grad_norm": 0.14242259350293643, "learning_rate": 1.8378179226397144e-05, "loss": 0.4415, "num_tokens": 6031424228.0, "step": 7882 }, { "epoch": 2.8877896858111205, "grad_norm": 0.13485418368959431, "learning_rate": 1.837399405341571e-05, "loss": 0.4402, "num_tokens": 6032159335.0, "step": 7883 }, { "epoch": 2.8881560868370433, "grad_norm": 0.14107834411375939, "learning_rate": 1.8369809084739903e-05, "loss": 0.456, "num_tokens": 6032870523.0, "step": 7884 }, { "epoch": 2.888522487862966, "grad_norm": 0.14965221316428998, "learning_rate": 1.8365624320605557e-05, "loss": 0.4691, "num_tokens": 6033648985.0, "step": 7885 }, { "epoch": 2.888888888888889, "grad_norm": 0.1277349966354971, "learning_rate": 1.8361439761248442e-05, "loss": 0.4724, "num_tokens": 6034417941.0, "step": 7886 }, { "epoch": 2.8892552899148116, "grad_norm": 0.12738751549763672, "learning_rate": 1.8357255406904326e-05, "loss": 0.4458, "num_tokens": 6035202735.0, "step": 7887 }, { "epoch": 2.889621690940735, "grad_norm": 0.14865750173029027, "learning_rate": 1.8353071257809e-05, "loss": 0.4546, "num_tokens": 6035999460.0, "step": 7888 }, { "epoch": 2.8899880919666576, "grad_norm": 0.139852321393006, "learning_rate": 1.8348887314198192e-05, "loss": 0.4942, "num_tokens": 6036772456.0, "step": 7889 }, { "epoch": 2.8903544929925804, "grad_norm": 0.14160839197982883, "learning_rate": 1.8344703576307657e-05, "loss": 0.503, "num_tokens": 6037502250.0, "step": 7890 }, { "epoch": 2.890720894018503, "grad_norm": 0.14689425453995889, "learning_rate": 1.834052004437313e-05, "loss": 0.4622, "num_tokens": 6038224700.0, "step": 7891 }, { "epoch": 2.891087295044426, "grad_norm": 0.13684851254009425, "learning_rate": 1.8336336718630316e-05, "loss": 0.4491, "num_tokens": 6038939667.0, "step": 7892 }, { "epoch": 2.891453696070349, "grad_norm": 0.1278001477528609, "learning_rate": 1.8332153599314938e-05, "loss": 0.46, "num_tokens": 6039711344.0, "step": 7893 }, { "epoch": 2.891820097096272, "grad_norm": 0.1256328689957731, "learning_rate": 1.832797068666268e-05, "loss": 0.4328, "num_tokens": 6040488600.0, "step": 7894 }, { "epoch": 2.8921864981221947, "grad_norm": 0.1310656534609568, "learning_rate": 1.8323787980909235e-05, "loss": 0.4587, "num_tokens": 6041213006.0, "step": 7895 }, { "epoch": 2.8925528991481175, "grad_norm": 0.14985645741096879, "learning_rate": 1.8319605482290274e-05, "loss": 0.4487, "num_tokens": 6041921488.0, "step": 7896 }, { "epoch": 2.8929193001740403, "grad_norm": 0.141784014746743, "learning_rate": 1.8315423191041458e-05, "loss": 0.4402, "num_tokens": 6042644855.0, "step": 7897 }, { "epoch": 2.8932857011999635, "grad_norm": 0.13661246039986114, "learning_rate": 1.831124110739842e-05, "loss": 0.455, "num_tokens": 6043353144.0, "step": 7898 }, { "epoch": 2.8936521022258863, "grad_norm": 0.13729869791620505, "learning_rate": 1.8307059231596827e-05, "loss": 0.4436, "num_tokens": 6044125590.0, "step": 7899 }, { "epoch": 2.894018503251809, "grad_norm": 0.1392041121147139, "learning_rate": 1.8302877563872287e-05, "loss": 0.4493, "num_tokens": 6044936308.0, "step": 7900 }, { "epoch": 2.894384904277732, "grad_norm": 0.12740480390446673, "learning_rate": 1.829869610446041e-05, "loss": 0.4467, "num_tokens": 6045748935.0, "step": 7901 }, { "epoch": 2.8947513053036547, "grad_norm": 0.12906281467230413, "learning_rate": 1.8294514853596813e-05, "loss": 0.447, "num_tokens": 6046667482.0, "step": 7902 }, { "epoch": 2.895117706329578, "grad_norm": 0.11681205913129952, "learning_rate": 1.8290333811517073e-05, "loss": 0.4433, "num_tokens": 6047473263.0, "step": 7903 }, { "epoch": 2.8954841073555007, "grad_norm": 0.1358727028839107, "learning_rate": 1.8286152978456777e-05, "loss": 0.4515, "num_tokens": 6048204100.0, "step": 7904 }, { "epoch": 2.8958505083814234, "grad_norm": 0.13921958908970666, "learning_rate": 1.828197235465149e-05, "loss": 0.4617, "num_tokens": 6048927344.0, "step": 7905 }, { "epoch": 2.896216909407346, "grad_norm": 0.13206679211028383, "learning_rate": 1.827779194033677e-05, "loss": 0.4826, "num_tokens": 6049569430.0, "step": 7906 }, { "epoch": 2.896583310433269, "grad_norm": 0.13224882537323956, "learning_rate": 1.8273611735748153e-05, "loss": 0.438, "num_tokens": 6050378200.0, "step": 7907 }, { "epoch": 2.8969497114591922, "grad_norm": 0.1469397034484565, "learning_rate": 1.826943174112118e-05, "loss": 0.4908, "num_tokens": 6051113364.0, "step": 7908 }, { "epoch": 2.897316112485115, "grad_norm": 0.13335373353462426, "learning_rate": 1.826525195669136e-05, "loss": 0.4363, "num_tokens": 6051867658.0, "step": 7909 }, { "epoch": 2.897682513511038, "grad_norm": 0.12701906767819482, "learning_rate": 1.8261072382694216e-05, "loss": 0.4405, "num_tokens": 6052564077.0, "step": 7910 }, { "epoch": 2.8980489145369606, "grad_norm": 0.14722923263654442, "learning_rate": 1.8256893019365232e-05, "loss": 0.4366, "num_tokens": 6053331363.0, "step": 7911 }, { "epoch": 2.8984153155628833, "grad_norm": 0.12086262846234325, "learning_rate": 1.8252713866939896e-05, "loss": 0.4611, "num_tokens": 6054086328.0, "step": 7912 }, { "epoch": 2.8987817165888066, "grad_norm": 0.14109885321018065, "learning_rate": 1.824853492565368e-05, "loss": 0.4677, "num_tokens": 6054792403.0, "step": 7913 }, { "epoch": 2.8991481176147293, "grad_norm": 0.1348226512753872, "learning_rate": 1.8244356195742044e-05, "loss": 0.4335, "num_tokens": 6055611677.0, "step": 7914 }, { "epoch": 2.899514518640652, "grad_norm": 0.13475411422279424, "learning_rate": 1.824017767744044e-05, "loss": 0.4868, "num_tokens": 6056370220.0, "step": 7915 }, { "epoch": 2.899880919666575, "grad_norm": 0.12963828131747684, "learning_rate": 1.8235999370984304e-05, "loss": 0.4461, "num_tokens": 6057075377.0, "step": 7916 }, { "epoch": 2.9002473206924977, "grad_norm": 0.13957018661259926, "learning_rate": 1.8231821276609052e-05, "loss": 0.4369, "num_tokens": 6057757252.0, "step": 7917 }, { "epoch": 2.900613721718421, "grad_norm": 0.14682901684501717, "learning_rate": 1.822764339455011e-05, "loss": 0.4315, "num_tokens": 6058554586.0, "step": 7918 }, { "epoch": 2.9009801227443437, "grad_norm": 0.10798025274671123, "learning_rate": 1.8223465725042873e-05, "loss": 0.4619, "num_tokens": 6059536753.0, "step": 7919 }, { "epoch": 2.9013465237702665, "grad_norm": 0.12539369039019108, "learning_rate": 1.8219288268322725e-05, "loss": 0.4485, "num_tokens": 6060349196.0, "step": 7920 }, { "epoch": 2.9017129247961897, "grad_norm": 0.13089631509048963, "learning_rate": 1.8215111024625048e-05, "loss": 0.4383, "num_tokens": 6061201481.0, "step": 7921 }, { "epoch": 2.902079325822112, "grad_norm": 0.13673717945253464, "learning_rate": 1.821093399418521e-05, "loss": 0.5136, "num_tokens": 6061888717.0, "step": 7922 }, { "epoch": 2.9024457268480353, "grad_norm": 0.1438662894660189, "learning_rate": 1.820675717723856e-05, "loss": 0.4562, "num_tokens": 6062720987.0, "step": 7923 }, { "epoch": 2.902812127873958, "grad_norm": 0.12759222132174156, "learning_rate": 1.820258057402044e-05, "loss": 0.4554, "num_tokens": 6063511366.0, "step": 7924 }, { "epoch": 2.903178528899881, "grad_norm": 0.12375881984576366, "learning_rate": 1.8198404184766173e-05, "loss": 0.4911, "num_tokens": 6064220600.0, "step": 7925 }, { "epoch": 2.903544929925804, "grad_norm": 0.14391785795545234, "learning_rate": 1.8194228009711083e-05, "loss": 0.4568, "num_tokens": 6065011980.0, "step": 7926 }, { "epoch": 2.903911330951727, "grad_norm": 0.12838465598873824, "learning_rate": 1.8190052049090475e-05, "loss": 0.4765, "num_tokens": 6065766971.0, "step": 7927 }, { "epoch": 2.9042777319776496, "grad_norm": 0.1332978337162018, "learning_rate": 1.8185876303139633e-05, "loss": 0.4155, "num_tokens": 6066492162.0, "step": 7928 }, { "epoch": 2.9046441330035724, "grad_norm": 0.13665529562899578, "learning_rate": 1.8181700772093848e-05, "loss": 0.4592, "num_tokens": 6067237822.0, "step": 7929 }, { "epoch": 2.905010534029495, "grad_norm": 0.12964478789410389, "learning_rate": 1.817752545618838e-05, "loss": 0.4271, "num_tokens": 6067986525.0, "step": 7930 }, { "epoch": 2.9053769350554184, "grad_norm": 0.13617665892233308, "learning_rate": 1.817335035565849e-05, "loss": 0.4477, "num_tokens": 6068874258.0, "step": 7931 }, { "epoch": 2.905743336081341, "grad_norm": 0.11682889447261564, "learning_rate": 1.8169175470739428e-05, "loss": 0.4293, "num_tokens": 6069703314.0, "step": 7932 }, { "epoch": 2.906109737107264, "grad_norm": 0.13976766535813398, "learning_rate": 1.816500080166641e-05, "loss": 0.5116, "num_tokens": 6070400730.0, "step": 7933 }, { "epoch": 2.9064761381331867, "grad_norm": 0.1360752940691497, "learning_rate": 1.816082634867467e-05, "loss": 0.4663, "num_tokens": 6071230318.0, "step": 7934 }, { "epoch": 2.9068425391591095, "grad_norm": 0.13637064685401032, "learning_rate": 1.815665211199941e-05, "loss": 0.4759, "num_tokens": 6071990759.0, "step": 7935 }, { "epoch": 2.9072089401850327, "grad_norm": 0.12600032748685863, "learning_rate": 1.8152478091875816e-05, "loss": 0.4459, "num_tokens": 6072713359.0, "step": 7936 }, { "epoch": 2.9075753412109555, "grad_norm": 0.13338286668499982, "learning_rate": 1.81483042885391e-05, "loss": 0.4928, "num_tokens": 6073460469.0, "step": 7937 }, { "epoch": 2.9079417422368783, "grad_norm": 0.1364150835663648, "learning_rate": 1.8144130702224405e-05, "loss": 0.4309, "num_tokens": 6074246520.0, "step": 7938 }, { "epoch": 2.908308143262801, "grad_norm": 0.130152114107298, "learning_rate": 1.8139957333166896e-05, "loss": 0.4684, "num_tokens": 6074924077.0, "step": 7939 }, { "epoch": 2.908674544288724, "grad_norm": 0.12963781273756672, "learning_rate": 1.8135784181601726e-05, "loss": 0.4547, "num_tokens": 6075674668.0, "step": 7940 }, { "epoch": 2.909040945314647, "grad_norm": 0.1391632719732164, "learning_rate": 1.813161124776402e-05, "loss": 0.4717, "num_tokens": 6076397118.0, "step": 7941 }, { "epoch": 2.90940734634057, "grad_norm": 0.13784018215178453, "learning_rate": 1.8127438531888914e-05, "loss": 0.4385, "num_tokens": 6077246562.0, "step": 7942 }, { "epoch": 2.9097737473664926, "grad_norm": 0.1248679986250385, "learning_rate": 1.8123266034211512e-05, "loss": 0.4282, "num_tokens": 6078086026.0, "step": 7943 }, { "epoch": 2.9101401483924154, "grad_norm": 0.12300566420544427, "learning_rate": 1.8119093754966898e-05, "loss": 0.4441, "num_tokens": 6078797342.0, "step": 7944 }, { "epoch": 2.910506549418338, "grad_norm": 0.13186113937997007, "learning_rate": 1.811492169439018e-05, "loss": 0.4413, "num_tokens": 6079508147.0, "step": 7945 }, { "epoch": 2.9108729504442614, "grad_norm": 0.13471308736555085, "learning_rate": 1.8110749852716413e-05, "loss": 0.4674, "num_tokens": 6080225750.0, "step": 7946 }, { "epoch": 2.911239351470184, "grad_norm": 0.12718161557501953, "learning_rate": 1.810657823018066e-05, "loss": 0.4613, "num_tokens": 6080968136.0, "step": 7947 }, { "epoch": 2.911605752496107, "grad_norm": 0.14351326233934836, "learning_rate": 1.8102406827017983e-05, "loss": 0.4437, "num_tokens": 6081639456.0, "step": 7948 }, { "epoch": 2.9119721535220298, "grad_norm": 0.1261167508751488, "learning_rate": 1.8098235643463396e-05, "loss": 0.4697, "num_tokens": 6082381129.0, "step": 7949 }, { "epoch": 2.9123385545479525, "grad_norm": 0.1476538116897633, "learning_rate": 1.8094064679751945e-05, "loss": 0.4639, "num_tokens": 6083266513.0, "step": 7950 }, { "epoch": 2.9127049555738758, "grad_norm": 0.12457515185894791, "learning_rate": 1.8089893936118626e-05, "loss": 0.4592, "num_tokens": 6083942614.0, "step": 7951 }, { "epoch": 2.9130713565997985, "grad_norm": 0.1502424580338273, "learning_rate": 1.808572341279844e-05, "loss": 0.4545, "num_tokens": 6084730085.0, "step": 7952 }, { "epoch": 2.9134377576257213, "grad_norm": 0.12917017031674533, "learning_rate": 1.8081553110026376e-05, "loss": 0.4471, "num_tokens": 6085489407.0, "step": 7953 }, { "epoch": 2.913804158651644, "grad_norm": 0.11400944947920888, "learning_rate": 1.8077383028037415e-05, "loss": 0.4536, "num_tokens": 6086286558.0, "step": 7954 }, { "epoch": 2.914170559677567, "grad_norm": 0.13334159020535574, "learning_rate": 1.80732131670665e-05, "loss": 0.4234, "num_tokens": 6087073050.0, "step": 7955 }, { "epoch": 2.91453696070349, "grad_norm": 0.13193027489280976, "learning_rate": 1.806904352734859e-05, "loss": 0.4629, "num_tokens": 6087874749.0, "step": 7956 }, { "epoch": 2.914903361729413, "grad_norm": 0.12905868938679985, "learning_rate": 1.8064874109118633e-05, "loss": 0.4295, "num_tokens": 6088601193.0, "step": 7957 }, { "epoch": 2.9152697627553357, "grad_norm": 0.13734064085715508, "learning_rate": 1.8060704912611525e-05, "loss": 0.4814, "num_tokens": 6089315507.0, "step": 7958 }, { "epoch": 2.9156361637812584, "grad_norm": 0.13340050321046112, "learning_rate": 1.8056535938062204e-05, "loss": 0.4614, "num_tokens": 6090066696.0, "step": 7959 }, { "epoch": 2.9160025648071812, "grad_norm": 0.12939286097394495, "learning_rate": 1.8052367185705556e-05, "loss": 0.468, "num_tokens": 6090851493.0, "step": 7960 }, { "epoch": 2.9163689658331045, "grad_norm": 0.1431702643813988, "learning_rate": 1.8048198655776468e-05, "loss": 0.4831, "num_tokens": 6091549145.0, "step": 7961 }, { "epoch": 2.9167353668590272, "grad_norm": 0.12673349621316704, "learning_rate": 1.8044030348509823e-05, "loss": 0.4348, "num_tokens": 6092230058.0, "step": 7962 }, { "epoch": 2.91710176788495, "grad_norm": 0.14617416743246972, "learning_rate": 1.8039862264140475e-05, "loss": 0.4538, "num_tokens": 6092933328.0, "step": 7963 }, { "epoch": 2.917468168910873, "grad_norm": 0.13177659930776014, "learning_rate": 1.803569440290327e-05, "loss": 0.4664, "num_tokens": 6093675367.0, "step": 7964 }, { "epoch": 2.9178345699367956, "grad_norm": 0.13498835653162755, "learning_rate": 1.803152676503305e-05, "loss": 0.469, "num_tokens": 6094427364.0, "step": 7965 }, { "epoch": 2.918200970962719, "grad_norm": 0.1437959563398841, "learning_rate": 1.8027359350764628e-05, "loss": 0.4478, "num_tokens": 6095177491.0, "step": 7966 }, { "epoch": 2.9185673719886416, "grad_norm": 0.13331750093891437, "learning_rate": 1.8023192160332836e-05, "loss": 0.469, "num_tokens": 6095912758.0, "step": 7967 }, { "epoch": 2.9189337730145644, "grad_norm": 0.14104702810712605, "learning_rate": 1.8019025193972456e-05, "loss": 0.419, "num_tokens": 6096491715.0, "step": 7968 }, { "epoch": 2.9193001740404876, "grad_norm": 0.1486523386638439, "learning_rate": 1.8014858451918272e-05, "loss": 0.4373, "num_tokens": 6097209418.0, "step": 7969 }, { "epoch": 2.91966657506641, "grad_norm": 0.13919133891533803, "learning_rate": 1.801069193440507e-05, "loss": 0.4374, "num_tokens": 6097882271.0, "step": 7970 }, { "epoch": 2.920032976092333, "grad_norm": 0.12834774351196807, "learning_rate": 1.8006525641667596e-05, "loss": 0.4614, "num_tokens": 6098622902.0, "step": 7971 }, { "epoch": 2.920399377118256, "grad_norm": 0.1259505358424034, "learning_rate": 1.8002359573940614e-05, "loss": 0.4525, "num_tokens": 6099405846.0, "step": 7972 }, { "epoch": 2.9207657781441787, "grad_norm": 0.1406685231866564, "learning_rate": 1.799819373145885e-05, "loss": 0.4872, "num_tokens": 6100147922.0, "step": 7973 }, { "epoch": 2.921132179170102, "grad_norm": 0.141384372846145, "learning_rate": 1.7994028114457015e-05, "loss": 0.4577, "num_tokens": 6100862575.0, "step": 7974 }, { "epoch": 2.9214985801960247, "grad_norm": 0.12936892510509565, "learning_rate": 1.798986272316984e-05, "loss": 0.4742, "num_tokens": 6101582823.0, "step": 7975 }, { "epoch": 2.9218649812219475, "grad_norm": 0.139769471226426, "learning_rate": 1.7985697557832007e-05, "loss": 0.4742, "num_tokens": 6102349981.0, "step": 7976 }, { "epoch": 2.9222313822478703, "grad_norm": 0.13874357763802175, "learning_rate": 1.7981532618678216e-05, "loss": 0.4734, "num_tokens": 6103147737.0, "step": 7977 }, { "epoch": 2.922597783273793, "grad_norm": 0.13510220157698627, "learning_rate": 1.7977367905943118e-05, "loss": 0.4199, "num_tokens": 6103923575.0, "step": 7978 }, { "epoch": 2.9229641842997163, "grad_norm": 0.13197179113039792, "learning_rate": 1.797320341986139e-05, "loss": 0.469, "num_tokens": 6104685258.0, "step": 7979 }, { "epoch": 2.923330585325639, "grad_norm": 0.12720683817095807, "learning_rate": 1.796903916066767e-05, "loss": 0.4535, "num_tokens": 6105489713.0, "step": 7980 }, { "epoch": 2.923696986351562, "grad_norm": 0.1563613118378562, "learning_rate": 1.796487512859659e-05, "loss": 0.4484, "num_tokens": 6106202469.0, "step": 7981 }, { "epoch": 2.9240633873774846, "grad_norm": 0.14858888909233092, "learning_rate": 1.7960711323882765e-05, "loss": 0.472, "num_tokens": 6106965230.0, "step": 7982 }, { "epoch": 2.9244297884034074, "grad_norm": 0.12721328320978542, "learning_rate": 1.795654774676082e-05, "loss": 0.477, "num_tokens": 6107741828.0, "step": 7983 }, { "epoch": 2.9247961894293306, "grad_norm": 0.14860947854413456, "learning_rate": 1.795238439746534e-05, "loss": 0.471, "num_tokens": 6108507670.0, "step": 7984 }, { "epoch": 2.9251625904552534, "grad_norm": 0.13141829704771402, "learning_rate": 1.7948221276230898e-05, "loss": 0.4347, "num_tokens": 6109222885.0, "step": 7985 }, { "epoch": 2.925528991481176, "grad_norm": 0.13558687628663463, "learning_rate": 1.7944058383292077e-05, "loss": 0.4527, "num_tokens": 6110035923.0, "step": 7986 }, { "epoch": 2.925895392507099, "grad_norm": 0.12523324890422138, "learning_rate": 1.793989571888343e-05, "loss": 0.4309, "num_tokens": 6110750115.0, "step": 7987 }, { "epoch": 2.9262617935330217, "grad_norm": 0.12369974494344803, "learning_rate": 1.7935733283239495e-05, "loss": 0.4463, "num_tokens": 6111498837.0, "step": 7988 }, { "epoch": 2.926628194558945, "grad_norm": 0.13638156826856992, "learning_rate": 1.7931571076594815e-05, "loss": 0.4504, "num_tokens": 6112297697.0, "step": 7989 }, { "epoch": 2.9269945955848677, "grad_norm": 0.1203931144369287, "learning_rate": 1.7927409099183887e-05, "loss": 0.433, "num_tokens": 6113027175.0, "step": 7990 }, { "epoch": 2.9273609966107905, "grad_norm": 0.14018886518862406, "learning_rate": 1.7923247351241233e-05, "loss": 0.4576, "num_tokens": 6113787756.0, "step": 7991 }, { "epoch": 2.9277273976367133, "grad_norm": 0.14184399095653516, "learning_rate": 1.7919085833001343e-05, "loss": 0.4323, "num_tokens": 6114542649.0, "step": 7992 }, { "epoch": 2.928093798662636, "grad_norm": 0.12756755955049456, "learning_rate": 1.791492454469868e-05, "loss": 0.4436, "num_tokens": 6115359594.0, "step": 7993 }, { "epoch": 2.9284601996885593, "grad_norm": 0.11326589635260144, "learning_rate": 1.7910763486567736e-05, "loss": 0.4459, "num_tokens": 6116189109.0, "step": 7994 }, { "epoch": 2.928826600714482, "grad_norm": 0.1240754318302919, "learning_rate": 1.7906602658842943e-05, "loss": 0.4264, "num_tokens": 6117049747.0, "step": 7995 }, { "epoch": 2.929193001740405, "grad_norm": 0.1255847962682845, "learning_rate": 1.7902442061758746e-05, "loss": 0.4585, "num_tokens": 6117862950.0, "step": 7996 }, { "epoch": 2.9295594027663276, "grad_norm": 0.14246914766850338, "learning_rate": 1.789828169554958e-05, "loss": 0.4249, "num_tokens": 6118584102.0, "step": 7997 }, { "epoch": 2.9299258037922504, "grad_norm": 0.1262719592276961, "learning_rate": 1.7894121560449845e-05, "loss": 0.4667, "num_tokens": 6119245597.0, "step": 7998 }, { "epoch": 2.9302922048181737, "grad_norm": 0.14969427085687984, "learning_rate": 1.788996165669396e-05, "loss": 0.4756, "num_tokens": 6119955436.0, "step": 7999 }, { "epoch": 2.9306586058440964, "grad_norm": 0.14531116149642645, "learning_rate": 1.7885801984516295e-05, "loss": 0.4628, "num_tokens": 6120711161.0, "step": 8000 }, { "epoch": 2.931025006870019, "grad_norm": 0.1426370864325346, "learning_rate": 1.7881642544151235e-05, "loss": 0.46, "num_tokens": 6121508029.0, "step": 8001 }, { "epoch": 2.931391407895942, "grad_norm": 0.12726455942930184, "learning_rate": 1.787748333583314e-05, "loss": 0.4729, "num_tokens": 6122253692.0, "step": 8002 }, { "epoch": 2.9317578089218648, "grad_norm": 0.13268020340945147, "learning_rate": 1.787332435979636e-05, "loss": 0.4667, "num_tokens": 6123048544.0, "step": 8003 }, { "epoch": 2.932124209947788, "grad_norm": 0.1333124052683734, "learning_rate": 1.7869165616275222e-05, "loss": 0.4783, "num_tokens": 6123832425.0, "step": 8004 }, { "epoch": 2.9324906109737108, "grad_norm": 0.13381767551039003, "learning_rate": 1.7865007105504054e-05, "loss": 0.4642, "num_tokens": 6124546326.0, "step": 8005 }, { "epoch": 2.9328570119996336, "grad_norm": 0.13528934418554833, "learning_rate": 1.7860848827717165e-05, "loss": 0.4593, "num_tokens": 6125238031.0, "step": 8006 }, { "epoch": 2.9332234130255563, "grad_norm": 0.12453953618326424, "learning_rate": 1.7856690783148862e-05, "loss": 0.4291, "num_tokens": 6126050283.0, "step": 8007 }, { "epoch": 2.933589814051479, "grad_norm": 0.13526595728715704, "learning_rate": 1.7852532972033417e-05, "loss": 0.4668, "num_tokens": 6126743414.0, "step": 8008 }, { "epoch": 2.9339562150774023, "grad_norm": 0.13553140865581295, "learning_rate": 1.7848375394605093e-05, "loss": 0.4492, "num_tokens": 6127451708.0, "step": 8009 }, { "epoch": 2.934322616103325, "grad_norm": 0.12571609431434025, "learning_rate": 1.7844218051098164e-05, "loss": 0.4683, "num_tokens": 6128146738.0, "step": 8010 }, { "epoch": 2.934689017129248, "grad_norm": 0.15413368189124022, "learning_rate": 1.7840060941746866e-05, "loss": 0.4834, "num_tokens": 6128910504.0, "step": 8011 }, { "epoch": 2.9350554181551707, "grad_norm": 0.12537171576458422, "learning_rate": 1.7835904066785418e-05, "loss": 0.4518, "num_tokens": 6129804459.0, "step": 8012 }, { "epoch": 2.9354218191810935, "grad_norm": 0.13424064975610292, "learning_rate": 1.7831747426448057e-05, "loss": 0.4539, "num_tokens": 6130566929.0, "step": 8013 }, { "epoch": 2.9357882202070167, "grad_norm": 0.1302159314677068, "learning_rate": 1.7827591020968972e-05, "loss": 0.4806, "num_tokens": 6131254539.0, "step": 8014 }, { "epoch": 2.9361546212329395, "grad_norm": 0.1373414671291786, "learning_rate": 1.7823434850582363e-05, "loss": 0.4529, "num_tokens": 6132084259.0, "step": 8015 }, { "epoch": 2.9365210222588622, "grad_norm": 0.12937515255216536, "learning_rate": 1.7819278915522406e-05, "loss": 0.4106, "num_tokens": 6132828868.0, "step": 8016 }, { "epoch": 2.9368874232847855, "grad_norm": 0.11977644035133866, "learning_rate": 1.781512321602326e-05, "loss": 0.444, "num_tokens": 6133611122.0, "step": 8017 }, { "epoch": 2.937253824310708, "grad_norm": 0.12950982002668032, "learning_rate": 1.781096775231908e-05, "loss": 0.4357, "num_tokens": 6134394825.0, "step": 8018 }, { "epoch": 2.937620225336631, "grad_norm": 0.1278596899313048, "learning_rate": 1.7806812524644005e-05, "loss": 0.4611, "num_tokens": 6135206947.0, "step": 8019 }, { "epoch": 2.937986626362554, "grad_norm": 0.12293111803313556, "learning_rate": 1.780265753323215e-05, "loss": 0.4403, "num_tokens": 6135993902.0, "step": 8020 }, { "epoch": 2.9383530273884766, "grad_norm": 0.137570197405374, "learning_rate": 1.779850277831764e-05, "loss": 0.4655, "num_tokens": 6136736960.0, "step": 8021 }, { "epoch": 2.9387194284144, "grad_norm": 0.14338950749270887, "learning_rate": 1.7794348260134567e-05, "loss": 0.4491, "num_tokens": 6137419705.0, "step": 8022 }, { "epoch": 2.9390858294403226, "grad_norm": 0.12377854998534012, "learning_rate": 1.779019397891701e-05, "loss": 0.4377, "num_tokens": 6138166678.0, "step": 8023 }, { "epoch": 2.9394522304662454, "grad_norm": 0.12704284934077284, "learning_rate": 1.778603993489905e-05, "loss": 0.4775, "num_tokens": 6138918449.0, "step": 8024 }, { "epoch": 2.939818631492168, "grad_norm": 0.13441689389754427, "learning_rate": 1.7781886128314732e-05, "loss": 0.4186, "num_tokens": 6139744729.0, "step": 8025 }, { "epoch": 2.940185032518091, "grad_norm": 0.13629502322370865, "learning_rate": 1.777773255939811e-05, "loss": 0.4724, "num_tokens": 6140493182.0, "step": 8026 }, { "epoch": 2.940551433544014, "grad_norm": 0.13299671564489782, "learning_rate": 1.7773579228383218e-05, "loss": 0.423, "num_tokens": 6141292711.0, "step": 8027 }, { "epoch": 2.940917834569937, "grad_norm": 0.12649489602244632, "learning_rate": 1.7769426135504063e-05, "loss": 0.4369, "num_tokens": 6141983457.0, "step": 8028 }, { "epoch": 2.9412842355958597, "grad_norm": 0.12369627665639507, "learning_rate": 1.7765273280994656e-05, "loss": 0.4366, "num_tokens": 6142739000.0, "step": 8029 }, { "epoch": 2.9416506366217825, "grad_norm": 0.1290534923776512, "learning_rate": 1.776112066508899e-05, "loss": 0.447, "num_tokens": 6143484956.0, "step": 8030 }, { "epoch": 2.9420170376477053, "grad_norm": 0.12430605362760627, "learning_rate": 1.775696828802103e-05, "loss": 0.463, "num_tokens": 6144451595.0, "step": 8031 }, { "epoch": 2.9423834386736285, "grad_norm": 0.11671889130826454, "learning_rate": 1.775281615002475e-05, "loss": 0.4282, "num_tokens": 6145236811.0, "step": 8032 }, { "epoch": 2.9427498396995513, "grad_norm": 0.13736577757063914, "learning_rate": 1.77486642513341e-05, "loss": 0.439, "num_tokens": 6146059992.0, "step": 8033 }, { "epoch": 2.943116240725474, "grad_norm": 0.13365424440989046, "learning_rate": 1.7744512592183016e-05, "loss": 0.4825, "num_tokens": 6146835988.0, "step": 8034 }, { "epoch": 2.943482641751397, "grad_norm": 0.1300377398174687, "learning_rate": 1.774036117280542e-05, "loss": 0.4624, "num_tokens": 6147609182.0, "step": 8035 }, { "epoch": 2.9438490427773196, "grad_norm": 0.1428674314981844, "learning_rate": 1.7736209993435216e-05, "loss": 0.4689, "num_tokens": 6148323807.0, "step": 8036 }, { "epoch": 2.944215443803243, "grad_norm": 0.13528330246882803, "learning_rate": 1.773205905430632e-05, "loss": 0.4271, "num_tokens": 6149134764.0, "step": 8037 }, { "epoch": 2.9445818448291656, "grad_norm": 0.1461835942034974, "learning_rate": 1.7727908355652596e-05, "loss": 0.4551, "num_tokens": 6149950409.0, "step": 8038 }, { "epoch": 2.9449482458550884, "grad_norm": 0.1297374743226966, "learning_rate": 1.772375789770791e-05, "loss": 0.4339, "num_tokens": 6150750682.0, "step": 8039 }, { "epoch": 2.945314646881011, "grad_norm": 0.14253344109288735, "learning_rate": 1.7719607680706134e-05, "loss": 0.475, "num_tokens": 6151485868.0, "step": 8040 }, { "epoch": 2.945681047906934, "grad_norm": 0.15343173826466397, "learning_rate": 1.7715457704881106e-05, "loss": 0.4461, "num_tokens": 6152272292.0, "step": 8041 }, { "epoch": 2.946047448932857, "grad_norm": 0.12427273516797166, "learning_rate": 1.7711307970466637e-05, "loss": 0.4299, "num_tokens": 6152980293.0, "step": 8042 }, { "epoch": 2.94641384995878, "grad_norm": 0.1393963856635992, "learning_rate": 1.7707158477696567e-05, "loss": 0.4331, "num_tokens": 6153800257.0, "step": 8043 }, { "epoch": 2.9467802509847028, "grad_norm": 0.13052992303752747, "learning_rate": 1.770300922680468e-05, "loss": 0.4451, "num_tokens": 6154506148.0, "step": 8044 }, { "epoch": 2.9471466520106255, "grad_norm": 0.13620584351745027, "learning_rate": 1.7698860218024774e-05, "loss": 0.4627, "num_tokens": 6155219539.0, "step": 8045 }, { "epoch": 2.9475130530365483, "grad_norm": 0.14506760454176235, "learning_rate": 1.769471145159062e-05, "loss": 0.4726, "num_tokens": 6155907599.0, "step": 8046 }, { "epoch": 2.9478794540624715, "grad_norm": 0.13806727925339823, "learning_rate": 1.769056292773597e-05, "loss": 0.4866, "num_tokens": 6156712621.0, "step": 8047 }, { "epoch": 2.9482458550883943, "grad_norm": 0.13608872276999076, "learning_rate": 1.7686414646694584e-05, "loss": 0.4869, "num_tokens": 6157504303.0, "step": 8048 }, { "epoch": 2.948612256114317, "grad_norm": 0.1364560188759127, "learning_rate": 1.7682266608700185e-05, "loss": 0.4451, "num_tokens": 6158215617.0, "step": 8049 }, { "epoch": 2.94897865714024, "grad_norm": 0.1261725284363221, "learning_rate": 1.7678118813986494e-05, "loss": 0.4727, "num_tokens": 6159076334.0, "step": 8050 }, { "epoch": 2.9493450581661627, "grad_norm": 0.12825854037656897, "learning_rate": 1.7673971262787226e-05, "loss": 0.4487, "num_tokens": 6159921878.0, "step": 8051 }, { "epoch": 2.949711459192086, "grad_norm": 0.11974514894559497, "learning_rate": 1.7669823955336055e-05, "loss": 0.4505, "num_tokens": 6160739181.0, "step": 8052 }, { "epoch": 2.9500778602180087, "grad_norm": 0.13023204889298662, "learning_rate": 1.7665676891866676e-05, "loss": 0.4495, "num_tokens": 6161603618.0, "step": 8053 }, { "epoch": 2.9504442612439314, "grad_norm": 0.13809638429011578, "learning_rate": 1.7661530072612752e-05, "loss": 0.4393, "num_tokens": 6162361215.0, "step": 8054 }, { "epoch": 2.9508106622698542, "grad_norm": 0.11460636635564227, "learning_rate": 1.765738349780792e-05, "loss": 0.4603, "num_tokens": 6163221823.0, "step": 8055 }, { "epoch": 2.951177063295777, "grad_norm": 0.1360206820331436, "learning_rate": 1.765323716768583e-05, "loss": 0.4632, "num_tokens": 6164032946.0, "step": 8056 }, { "epoch": 2.9515434643217002, "grad_norm": 0.1357771548726299, "learning_rate": 1.7649091082480096e-05, "loss": 0.471, "num_tokens": 6164794999.0, "step": 8057 }, { "epoch": 2.951909865347623, "grad_norm": 0.13774886274796563, "learning_rate": 1.764494524242433e-05, "loss": 0.4393, "num_tokens": 6165625451.0, "step": 8058 }, { "epoch": 2.952276266373546, "grad_norm": 0.13081036758098916, "learning_rate": 1.764079964775214e-05, "loss": 0.4473, "num_tokens": 6166306087.0, "step": 8059 }, { "epoch": 2.9526426673994686, "grad_norm": 0.14595701125020466, "learning_rate": 1.763665429869709e-05, "loss": 0.4616, "num_tokens": 6167000451.0, "step": 8060 }, { "epoch": 2.9530090684253913, "grad_norm": 0.13935045072054808, "learning_rate": 1.7632509195492754e-05, "loss": 0.4508, "num_tokens": 6167749784.0, "step": 8061 }, { "epoch": 2.9533754694513146, "grad_norm": 0.13362617533589483, "learning_rate": 1.7628364338372685e-05, "loss": 0.466, "num_tokens": 6168443693.0, "step": 8062 }, { "epoch": 2.9537418704772374, "grad_norm": 0.14007951110661016, "learning_rate": 1.7624219727570424e-05, "loss": 0.4693, "num_tokens": 6169153829.0, "step": 8063 }, { "epoch": 2.95410827150316, "grad_norm": 0.15887773198357452, "learning_rate": 1.76200753633195e-05, "loss": 0.4738, "num_tokens": 6169948527.0, "step": 8064 }, { "epoch": 2.9544746725290834, "grad_norm": 0.1377897133715436, "learning_rate": 1.761593124585343e-05, "loss": 0.424, "num_tokens": 6170658479.0, "step": 8065 }, { "epoch": 2.9548410735550057, "grad_norm": 0.12265177226391563, "learning_rate": 1.7611787375405694e-05, "loss": 0.4474, "num_tokens": 6171461711.0, "step": 8066 }, { "epoch": 2.955207474580929, "grad_norm": 0.1361560562518908, "learning_rate": 1.7607643752209792e-05, "loss": 0.4087, "num_tokens": 6172203982.0, "step": 8067 }, { "epoch": 2.9555738756068517, "grad_norm": 0.13775841466873312, "learning_rate": 1.7603500376499198e-05, "loss": 0.4783, "num_tokens": 6173012604.0, "step": 8068 }, { "epoch": 2.9559402766327745, "grad_norm": 0.12888354011180367, "learning_rate": 1.7599357248507353e-05, "loss": 0.4311, "num_tokens": 6173764742.0, "step": 8069 }, { "epoch": 2.9563066776586977, "grad_norm": 0.12511838281472643, "learning_rate": 1.7595214368467706e-05, "loss": 0.4826, "num_tokens": 6174613428.0, "step": 8070 }, { "epoch": 2.9566730786846205, "grad_norm": 0.1404628561349181, "learning_rate": 1.7591071736613687e-05, "loss": 0.4943, "num_tokens": 6175257233.0, "step": 8071 }, { "epoch": 2.9570394797105433, "grad_norm": 0.1402494910967136, "learning_rate": 1.7586929353178717e-05, "loss": 0.4705, "num_tokens": 6175992232.0, "step": 8072 }, { "epoch": 2.957405880736466, "grad_norm": 0.13977199543465074, "learning_rate": 1.7582787218396195e-05, "loss": 0.4704, "num_tokens": 6176672131.0, "step": 8073 }, { "epoch": 2.957772281762389, "grad_norm": 0.13707170644643235, "learning_rate": 1.7578645332499495e-05, "loss": 0.4411, "num_tokens": 6177420818.0, "step": 8074 }, { "epoch": 2.958138682788312, "grad_norm": 0.13029124666879532, "learning_rate": 1.7574503695722e-05, "loss": 0.4346, "num_tokens": 6178137792.0, "step": 8075 }, { "epoch": 2.958505083814235, "grad_norm": 0.12223459803598102, "learning_rate": 1.757036230829707e-05, "loss": 0.4488, "num_tokens": 6178847932.0, "step": 8076 }, { "epoch": 2.9588714848401576, "grad_norm": 0.12874335590111305, "learning_rate": 1.7566221170458034e-05, "loss": 0.4175, "num_tokens": 6179734860.0, "step": 8077 }, { "epoch": 2.9592378858660804, "grad_norm": 0.1221368710345394, "learning_rate": 1.756208028243825e-05, "loss": 0.4871, "num_tokens": 6180569906.0, "step": 8078 }, { "epoch": 2.959604286892003, "grad_norm": 0.14447588577528686, "learning_rate": 1.7557939644471015e-05, "loss": 0.488, "num_tokens": 6181203970.0, "step": 8079 }, { "epoch": 2.9599706879179264, "grad_norm": 0.13919689410829184, "learning_rate": 1.7553799256789634e-05, "loss": 0.4597, "num_tokens": 6181890192.0, "step": 8080 }, { "epoch": 2.960337088943849, "grad_norm": 0.14214472922202698, "learning_rate": 1.7549659119627402e-05, "loss": 0.4524, "num_tokens": 6182568009.0, "step": 8081 }, { "epoch": 2.960703489969772, "grad_norm": 0.12542702758116533, "learning_rate": 1.754551923321758e-05, "loss": 0.4439, "num_tokens": 6183489818.0, "step": 8082 }, { "epoch": 2.9610698909956947, "grad_norm": 0.13807956421795603, "learning_rate": 1.7541379597793442e-05, "loss": 0.4392, "num_tokens": 6184333448.0, "step": 8083 }, { "epoch": 2.9614362920216175, "grad_norm": 0.13189995313020159, "learning_rate": 1.7537240213588226e-05, "loss": 0.4778, "num_tokens": 6185058038.0, "step": 8084 }, { "epoch": 2.9618026930475407, "grad_norm": 0.131395062888923, "learning_rate": 1.753310108083516e-05, "loss": 0.4259, "num_tokens": 6185776825.0, "step": 8085 }, { "epoch": 2.9621690940734635, "grad_norm": 0.13538916354953898, "learning_rate": 1.752896219976748e-05, "loss": 0.4722, "num_tokens": 6186509029.0, "step": 8086 }, { "epoch": 2.9625354950993863, "grad_norm": 0.13235649731342203, "learning_rate": 1.7524823570618366e-05, "loss": 0.4297, "num_tokens": 6187318218.0, "step": 8087 }, { "epoch": 2.962901896125309, "grad_norm": 0.12951456225733443, "learning_rate": 1.7520685193621017e-05, "loss": 0.4827, "num_tokens": 6188134438.0, "step": 8088 }, { "epoch": 2.963268297151232, "grad_norm": 0.13882930013449793, "learning_rate": 1.7516547069008617e-05, "loss": 0.4697, "num_tokens": 6188805613.0, "step": 8089 }, { "epoch": 2.963634698177155, "grad_norm": 0.13339709069342703, "learning_rate": 1.7512409197014307e-05, "loss": 0.4568, "num_tokens": 6189529216.0, "step": 8090 }, { "epoch": 2.964001099203078, "grad_norm": 0.1459136418103115, "learning_rate": 1.7508271577871258e-05, "loss": 0.4376, "num_tokens": 6190161420.0, "step": 8091 }, { "epoch": 2.9643675002290006, "grad_norm": 0.13627800984575925, "learning_rate": 1.7504134211812586e-05, "loss": 0.4681, "num_tokens": 6190880331.0, "step": 8092 }, { "epoch": 2.9647339012549234, "grad_norm": 0.14576473588340488, "learning_rate": 1.7499997099071406e-05, "loss": 0.4715, "num_tokens": 6191494796.0, "step": 8093 }, { "epoch": 2.965100302280846, "grad_norm": 0.1393208613443358, "learning_rate": 1.7495860239880835e-05, "loss": 0.4523, "num_tokens": 6192323972.0, "step": 8094 }, { "epoch": 2.9654667033067694, "grad_norm": 0.13427507329103933, "learning_rate": 1.7491723634473953e-05, "loss": 0.4668, "num_tokens": 6193021855.0, "step": 8095 }, { "epoch": 2.965833104332692, "grad_norm": 0.14992233358232776, "learning_rate": 1.748758728308383e-05, "loss": 0.4731, "num_tokens": 6193699006.0, "step": 8096 }, { "epoch": 2.966199505358615, "grad_norm": 0.13231714807990544, "learning_rate": 1.748345118594354e-05, "loss": 0.4231, "num_tokens": 6194467966.0, "step": 8097 }, { "epoch": 2.9665659063845378, "grad_norm": 0.14195287645559168, "learning_rate": 1.747931534328612e-05, "loss": 0.4541, "num_tokens": 6195162316.0, "step": 8098 }, { "epoch": 2.9669323074104605, "grad_norm": 0.13086464831323225, "learning_rate": 1.7475179755344617e-05, "loss": 0.4569, "num_tokens": 6195936440.0, "step": 8099 }, { "epoch": 2.9672987084363838, "grad_norm": 0.12962927202931887, "learning_rate": 1.747104442235203e-05, "loss": 0.437, "num_tokens": 6196703602.0, "step": 8100 }, { "epoch": 2.9676651094623065, "grad_norm": 0.12664241285924532, "learning_rate": 1.746690934454137e-05, "loss": 0.433, "num_tokens": 6197421515.0, "step": 8101 }, { "epoch": 2.9680315104882293, "grad_norm": 0.1468495234552349, "learning_rate": 1.7462774522145624e-05, "loss": 0.4523, "num_tokens": 6198244998.0, "step": 8102 }, { "epoch": 2.968397911514152, "grad_norm": 0.13499471696103132, "learning_rate": 1.7458639955397778e-05, "loss": 0.4713, "num_tokens": 6198906584.0, "step": 8103 }, { "epoch": 2.968764312540075, "grad_norm": 0.13634972982134663, "learning_rate": 1.7454505644530772e-05, "loss": 0.4751, "num_tokens": 6199637961.0, "step": 8104 }, { "epoch": 2.969130713565998, "grad_norm": 0.12757754194643214, "learning_rate": 1.7450371589777568e-05, "loss": 0.4559, "num_tokens": 6200337629.0, "step": 8105 }, { "epoch": 2.969497114591921, "grad_norm": 0.13152481248039913, "learning_rate": 1.74462377913711e-05, "loss": 0.48, "num_tokens": 6201042891.0, "step": 8106 }, { "epoch": 2.9698635156178437, "grad_norm": 0.13513978325258288, "learning_rate": 1.7442104249544263e-05, "loss": 0.4756, "num_tokens": 6201837146.0, "step": 8107 }, { "epoch": 2.9702299166437665, "grad_norm": 0.14657864117870853, "learning_rate": 1.743797096452999e-05, "loss": 0.4662, "num_tokens": 6202647325.0, "step": 8108 }, { "epoch": 2.9705963176696892, "grad_norm": 0.1331236772343204, "learning_rate": 1.7433837936561135e-05, "loss": 0.4678, "num_tokens": 6203504282.0, "step": 8109 }, { "epoch": 2.9709627186956125, "grad_norm": 0.12569662868125525, "learning_rate": 1.74297051658706e-05, "loss": 0.4717, "num_tokens": 6204306158.0, "step": 8110 }, { "epoch": 2.9713291197215352, "grad_norm": 0.13180945159743807, "learning_rate": 1.7425572652691237e-05, "loss": 0.4529, "num_tokens": 6205060426.0, "step": 8111 }, { "epoch": 2.971695520747458, "grad_norm": 0.145095744676129, "learning_rate": 1.7421440397255876e-05, "loss": 0.4481, "num_tokens": 6205876677.0, "step": 8112 }, { "epoch": 2.972061921773381, "grad_norm": 0.13982965029742137, "learning_rate": 1.741730839979737e-05, "loss": 0.4317, "num_tokens": 6206736026.0, "step": 8113 }, { "epoch": 2.9724283227993036, "grad_norm": 0.11258569163049827, "learning_rate": 1.741317666054852e-05, "loss": 0.4492, "num_tokens": 6207572057.0, "step": 8114 }, { "epoch": 2.972794723825227, "grad_norm": 0.13079885203819422, "learning_rate": 1.740904517974212e-05, "loss": 0.4724, "num_tokens": 6208456196.0, "step": 8115 }, { "epoch": 2.9731611248511496, "grad_norm": 0.11920108145990806, "learning_rate": 1.740491395761098e-05, "loss": 0.4259, "num_tokens": 6209413439.0, "step": 8116 }, { "epoch": 2.9735275258770724, "grad_norm": 0.12874443437352007, "learning_rate": 1.7400782994387846e-05, "loss": 0.498, "num_tokens": 6210113881.0, "step": 8117 }, { "epoch": 2.9738939269029956, "grad_norm": 0.12912994807742426, "learning_rate": 1.7396652290305494e-05, "loss": 0.4653, "num_tokens": 6210948081.0, "step": 8118 }, { "epoch": 2.974260327928918, "grad_norm": 0.1435407100267536, "learning_rate": 1.7392521845596658e-05, "loss": 0.4572, "num_tokens": 6211721177.0, "step": 8119 }, { "epoch": 2.974626728954841, "grad_norm": 0.1440395271012327, "learning_rate": 1.738839166049407e-05, "loss": 0.4591, "num_tokens": 6212328232.0, "step": 8120 }, { "epoch": 2.974993129980764, "grad_norm": 0.13707576935876092, "learning_rate": 1.738426173523044e-05, "loss": 0.4558, "num_tokens": 6213049198.0, "step": 8121 }, { "epoch": 2.9753595310066867, "grad_norm": 0.13425305207885294, "learning_rate": 1.738013207003847e-05, "loss": 0.4337, "num_tokens": 6213800857.0, "step": 8122 }, { "epoch": 2.97572593203261, "grad_norm": 0.13006495626366468, "learning_rate": 1.7376002665150836e-05, "loss": 0.4653, "num_tokens": 6214586722.0, "step": 8123 }, { "epoch": 2.9760923330585327, "grad_norm": 0.12411352715390843, "learning_rate": 1.737187352080021e-05, "loss": 0.3951, "num_tokens": 6215322132.0, "step": 8124 }, { "epoch": 2.9764587340844555, "grad_norm": 0.15059090288399832, "learning_rate": 1.7367744637219268e-05, "loss": 0.5039, "num_tokens": 6216010111.0, "step": 8125 }, { "epoch": 2.9768251351103783, "grad_norm": 0.14701506085421845, "learning_rate": 1.7363616014640617e-05, "loss": 0.4573, "num_tokens": 6216760212.0, "step": 8126 }, { "epoch": 2.977191536136301, "grad_norm": 0.11978540355890122, "learning_rate": 1.73594876532969e-05, "loss": 0.466, "num_tokens": 6217655562.0, "step": 8127 }, { "epoch": 2.9775579371622243, "grad_norm": 0.1427051767384442, "learning_rate": 1.7355359553420724e-05, "loss": 0.4567, "num_tokens": 6218435511.0, "step": 8128 }, { "epoch": 2.977924338188147, "grad_norm": 0.11831292159675236, "learning_rate": 1.7351231715244693e-05, "loss": 0.4617, "num_tokens": 6219317433.0, "step": 8129 }, { "epoch": 2.97829073921407, "grad_norm": 0.12459675516326929, "learning_rate": 1.734710413900138e-05, "loss": 0.4622, "num_tokens": 6220045668.0, "step": 8130 }, { "epoch": 2.9786571402399926, "grad_norm": 0.12952368247666612, "learning_rate": 1.7342976824923345e-05, "loss": 0.4189, "num_tokens": 6220854202.0, "step": 8131 }, { "epoch": 2.9790235412659154, "grad_norm": 0.12204116775122943, "learning_rate": 1.7338849773243155e-05, "loss": 0.4729, "num_tokens": 6221607288.0, "step": 8132 }, { "epoch": 2.9793899422918386, "grad_norm": 0.13914800949355843, "learning_rate": 1.733472298419334e-05, "loss": 0.4635, "num_tokens": 6222408239.0, "step": 8133 }, { "epoch": 2.9797563433177614, "grad_norm": 0.11266253019199814, "learning_rate": 1.733059645800641e-05, "loss": 0.4227, "num_tokens": 6223138533.0, "step": 8134 }, { "epoch": 2.980122744343684, "grad_norm": 0.12642132733920455, "learning_rate": 1.7326470194914894e-05, "loss": 0.466, "num_tokens": 6223931445.0, "step": 8135 }, { "epoch": 2.980489145369607, "grad_norm": 0.13004677858460997, "learning_rate": 1.7322344195151274e-05, "loss": 0.4405, "num_tokens": 6224702658.0, "step": 8136 }, { "epoch": 2.9808555463955297, "grad_norm": 0.13211544238337924, "learning_rate": 1.731821845894802e-05, "loss": 0.4448, "num_tokens": 6225516644.0, "step": 8137 }, { "epoch": 2.981221947421453, "grad_norm": 0.13158744828648097, "learning_rate": 1.7314092986537612e-05, "loss": 0.4825, "num_tokens": 6226176151.0, "step": 8138 }, { "epoch": 2.9815883484473757, "grad_norm": 0.13290821350451876, "learning_rate": 1.7309967778152477e-05, "loss": 0.4828, "num_tokens": 6226951687.0, "step": 8139 }, { "epoch": 2.9819547494732985, "grad_norm": 0.14121981180461168, "learning_rate": 1.7305842834025067e-05, "loss": 0.4904, "num_tokens": 6227745563.0, "step": 8140 }, { "epoch": 2.9823211504992213, "grad_norm": 0.12852768519545626, "learning_rate": 1.7301718154387787e-05, "loss": 0.4429, "num_tokens": 6228424279.0, "step": 8141 }, { "epoch": 2.982687551525144, "grad_norm": 0.13681715300095068, "learning_rate": 1.7297593739473044e-05, "loss": 0.4608, "num_tokens": 6229111397.0, "step": 8142 }, { "epoch": 2.9830539525510673, "grad_norm": 0.13699462838129597, "learning_rate": 1.729346958951324e-05, "loss": 0.4702, "num_tokens": 6229876426.0, "step": 8143 }, { "epoch": 2.98342035357699, "grad_norm": 0.1404659434376797, "learning_rate": 1.7289345704740723e-05, "loss": 0.4438, "num_tokens": 6230585597.0, "step": 8144 }, { "epoch": 2.983786754602913, "grad_norm": 0.14345012766922427, "learning_rate": 1.728522208538787e-05, "loss": 0.431, "num_tokens": 6231314286.0, "step": 8145 }, { "epoch": 2.9841531556288357, "grad_norm": 0.13292387061220043, "learning_rate": 1.7281098731687013e-05, "loss": 0.4525, "num_tokens": 6232073351.0, "step": 8146 }, { "epoch": 2.9845195566547584, "grad_norm": 0.14851481270389616, "learning_rate": 1.7276975643870488e-05, "loss": 0.4652, "num_tokens": 6232837549.0, "step": 8147 }, { "epoch": 2.9848859576806817, "grad_norm": 0.1231274334299727, "learning_rate": 1.7272852822170613e-05, "loss": 0.423, "num_tokens": 6233574730.0, "step": 8148 }, { "epoch": 2.9852523587066044, "grad_norm": 0.1314981305727629, "learning_rate": 1.7268730266819673e-05, "loss": 0.4524, "num_tokens": 6234311625.0, "step": 8149 }, { "epoch": 2.985618759732527, "grad_norm": 0.13795006560090994, "learning_rate": 1.726460797804996e-05, "loss": 0.4573, "num_tokens": 6235080104.0, "step": 8150 }, { "epoch": 2.98598516075845, "grad_norm": 0.13058528069764389, "learning_rate": 1.726048595609374e-05, "loss": 0.4677, "num_tokens": 6235906606.0, "step": 8151 }, { "epoch": 2.9863515617843728, "grad_norm": 0.12184853783561518, "learning_rate": 1.7256364201183276e-05, "loss": 0.4583, "num_tokens": 6236786783.0, "step": 8152 }, { "epoch": 2.986717962810296, "grad_norm": 0.12662129228947347, "learning_rate": 1.7252242713550786e-05, "loss": 0.483, "num_tokens": 6237513381.0, "step": 8153 }, { "epoch": 2.987084363836219, "grad_norm": 0.12680465583018966, "learning_rate": 1.7248121493428512e-05, "loss": 0.4427, "num_tokens": 6238257034.0, "step": 8154 }, { "epoch": 2.9874507648621416, "grad_norm": 0.1242572818629463, "learning_rate": 1.7244000541048647e-05, "loss": 0.4591, "num_tokens": 6239072801.0, "step": 8155 }, { "epoch": 2.9878171658880643, "grad_norm": 0.13354282760333472, "learning_rate": 1.7239879856643407e-05, "loss": 0.4584, "num_tokens": 6239836550.0, "step": 8156 }, { "epoch": 2.988183566913987, "grad_norm": 0.1169580717766093, "learning_rate": 1.723575944044495e-05, "loss": 0.4126, "num_tokens": 6240510508.0, "step": 8157 }, { "epoch": 2.9885499679399103, "grad_norm": 0.13479153908038533, "learning_rate": 1.723163929268544e-05, "loss": 0.4538, "num_tokens": 6241405257.0, "step": 8158 }, { "epoch": 2.988916368965833, "grad_norm": 0.1294193170181259, "learning_rate": 1.7227519413597033e-05, "loss": 0.4753, "num_tokens": 6242239410.0, "step": 8159 }, { "epoch": 2.989282769991756, "grad_norm": 0.1199744136025598, "learning_rate": 1.7223399803411864e-05, "loss": 0.4306, "num_tokens": 6242999218.0, "step": 8160 }, { "epoch": 2.9896491710176787, "grad_norm": 0.1353813992107143, "learning_rate": 1.7219280462362036e-05, "loss": 0.461, "num_tokens": 6243843227.0, "step": 8161 }, { "epoch": 2.9900155720436015, "grad_norm": 0.12687907193888007, "learning_rate": 1.721516139067966e-05, "loss": 0.4635, "num_tokens": 6244561073.0, "step": 8162 }, { "epoch": 2.9903819730695247, "grad_norm": 0.12535942012645376, "learning_rate": 1.7211042588596825e-05, "loss": 0.4382, "num_tokens": 6245323678.0, "step": 8163 }, { "epoch": 2.9907483740954475, "grad_norm": 0.13018542727212035, "learning_rate": 1.72069240563456e-05, "loss": 0.4475, "num_tokens": 6246122379.0, "step": 8164 }, { "epoch": 2.9911147751213703, "grad_norm": 0.12673290554071648, "learning_rate": 1.720280579415805e-05, "loss": 0.4781, "num_tokens": 6246785668.0, "step": 8165 }, { "epoch": 2.9914811761472935, "grad_norm": 0.1475023816501585, "learning_rate": 1.7198687802266202e-05, "loss": 0.4892, "num_tokens": 6247396893.0, "step": 8166 }, { "epoch": 2.991847577173216, "grad_norm": 0.1408891108348157, "learning_rate": 1.7194570080902096e-05, "loss": 0.4493, "num_tokens": 6248176346.0, "step": 8167 }, { "epoch": 2.992213978199139, "grad_norm": 0.12441930390855611, "learning_rate": 1.719045263029774e-05, "loss": 0.4824, "num_tokens": 6248933530.0, "step": 8168 }, { "epoch": 2.992580379225062, "grad_norm": 0.1390951591218261, "learning_rate": 1.7186335450685117e-05, "loss": 0.4405, "num_tokens": 6249673538.0, "step": 8169 }, { "epoch": 2.9929467802509846, "grad_norm": 0.13149174098595595, "learning_rate": 1.7182218542296224e-05, "loss": 0.471, "num_tokens": 6250405287.0, "step": 8170 }, { "epoch": 2.993313181276908, "grad_norm": 0.1366409610496494, "learning_rate": 1.7178101905363024e-05, "loss": 0.4751, "num_tokens": 6251032670.0, "step": 8171 }, { "epoch": 2.9936795823028306, "grad_norm": 0.13224265468201454, "learning_rate": 1.717398554011745e-05, "loss": 0.4425, "num_tokens": 6251769887.0, "step": 8172 }, { "epoch": 2.9940459833287534, "grad_norm": 0.13575430171390096, "learning_rate": 1.7169869446791463e-05, "loss": 0.4812, "num_tokens": 6252437701.0, "step": 8173 }, { "epoch": 2.994412384354676, "grad_norm": 0.14842237703376068, "learning_rate": 1.7165753625616962e-05, "loss": 0.5231, "num_tokens": 6253143541.0, "step": 8174 }, { "epoch": 2.994778785380599, "grad_norm": 0.14070411708388214, "learning_rate": 1.716163807682586e-05, "loss": 0.4338, "num_tokens": 6253863745.0, "step": 8175 }, { "epoch": 2.995145186406522, "grad_norm": 0.13712744414995043, "learning_rate": 1.715752280065005e-05, "loss": 0.4621, "num_tokens": 6254649932.0, "step": 8176 }, { "epoch": 2.995511587432445, "grad_norm": 0.14941276037370085, "learning_rate": 1.7153407797321383e-05, "loss": 0.4473, "num_tokens": 6255424851.0, "step": 8177 }, { "epoch": 2.9958779884583677, "grad_norm": 0.13028151251333042, "learning_rate": 1.7149293067071755e-05, "loss": 0.4828, "num_tokens": 6256170271.0, "step": 8178 }, { "epoch": 2.9962443894842905, "grad_norm": 0.1308978101242735, "learning_rate": 1.7145178610132976e-05, "loss": 0.4624, "num_tokens": 6256979270.0, "step": 8179 }, { "epoch": 2.9966107905102133, "grad_norm": 0.1327003205580607, "learning_rate": 1.714106442673688e-05, "loss": 0.4423, "num_tokens": 6257790083.0, "step": 8180 }, { "epoch": 2.9969771915361365, "grad_norm": 0.13182865831055907, "learning_rate": 1.713695051711529e-05, "loss": 0.4217, "num_tokens": 6258454093.0, "step": 8181 }, { "epoch": 2.9973435925620593, "grad_norm": 0.12413868169080862, "learning_rate": 1.7132836881499993e-05, "loss": 0.4606, "num_tokens": 6259396909.0, "step": 8182 }, { "epoch": 2.997709993587982, "grad_norm": 0.12845143047307478, "learning_rate": 1.7128723520122767e-05, "loss": 0.4202, "num_tokens": 6260270876.0, "step": 8183 }, { "epoch": 2.998076394613905, "grad_norm": 0.11199758242228885, "learning_rate": 1.7124610433215386e-05, "loss": 0.4434, "num_tokens": 6261102902.0, "step": 8184 }, { "epoch": 2.9984427956398276, "grad_norm": 0.1336062713841795, "learning_rate": 1.7120497621009596e-05, "loss": 0.4706, "num_tokens": 6261930774.0, "step": 8185 }, { "epoch": 2.998809196665751, "grad_norm": 0.12477160651984767, "learning_rate": 1.711638508373713e-05, "loss": 0.4525, "num_tokens": 6262659388.0, "step": 8186 }, { "epoch": 2.9991755976916736, "grad_norm": 0.13494840751527834, "learning_rate": 1.7112272821629723e-05, "loss": 0.46, "num_tokens": 6263307148.0, "step": 8187 }, { "epoch": 2.9995419987175964, "grad_norm": 0.1455479605253679, "learning_rate": 1.710816083491905e-05, "loss": 0.4357, "num_tokens": 6264083394.0, "step": 8188 }, { "epoch": 2.999908399743519, "grad_norm": 0.12276537492629398, "learning_rate": 1.710404912383682e-05, "loss": 0.4562, "num_tokens": 6264783483.0, "step": 8189 }, { "epoch": 3.0, "grad_norm": 0.12276537492629398, "learning_rate": 1.70999376886147e-05, "loss": 0.476, "num_tokens": 6264969622.0, "step": 8190 }, { "epoch": 3.000366401025923, "grad_norm": 0.3038361456170196, "learning_rate": 1.709582652948434e-05, "loss": 0.4167, "num_tokens": 6265701056.0, "step": 8191 }, { "epoch": 3.0007328020518456, "grad_norm": 0.19192644787822208, "learning_rate": 1.7091715646677392e-05, "loss": 0.4398, "num_tokens": 6266417421.0, "step": 8192 }, { "epoch": 3.001099203077769, "grad_norm": 0.15090076881845654, "learning_rate": 1.7087605040425473e-05, "loss": 0.424, "num_tokens": 6267271852.0, "step": 8193 }, { "epoch": 3.0014656041036916, "grad_norm": 0.18609767806548447, "learning_rate": 1.70834947109602e-05, "loss": 0.3993, "num_tokens": 6268131801.0, "step": 8194 }, { "epoch": 3.0018320051296143, "grad_norm": 0.1557485114793272, "learning_rate": 1.7079384658513172e-05, "loss": 0.4021, "num_tokens": 6268967936.0, "step": 8195 }, { "epoch": 3.002198406155537, "grad_norm": 0.14678987211320552, "learning_rate": 1.707527488331595e-05, "loss": 0.3708, "num_tokens": 6269657314.0, "step": 8196 }, { "epoch": 3.00256480718146, "grad_norm": 0.15197329765023412, "learning_rate": 1.7071165385600115e-05, "loss": 0.4065, "num_tokens": 6270281206.0, "step": 8197 }, { "epoch": 3.002931208207383, "grad_norm": 0.18254660071045015, "learning_rate": 1.7067056165597205e-05, "loss": 0.4194, "num_tokens": 6271064352.0, "step": 8198 }, { "epoch": 3.003297609233306, "grad_norm": 0.1666329970972335, "learning_rate": 1.7062947223538758e-05, "loss": 0.4329, "num_tokens": 6271738869.0, "step": 8199 }, { "epoch": 3.0036640102592287, "grad_norm": 0.15992102828743177, "learning_rate": 1.705883855965629e-05, "loss": 0.3917, "num_tokens": 6272545987.0, "step": 8200 }, { "epoch": 3.0040304112851515, "grad_norm": 0.15847267497214051, "learning_rate": 1.7054730174181294e-05, "loss": 0.3999, "num_tokens": 6273347307.0, "step": 8201 }, { "epoch": 3.0043968123110742, "grad_norm": 0.154142010942842, "learning_rate": 1.7050622067345265e-05, "loss": 0.3967, "num_tokens": 6274020662.0, "step": 8202 }, { "epoch": 3.0047632133369975, "grad_norm": 0.15788462363884898, "learning_rate": 1.7046514239379668e-05, "loss": 0.3935, "num_tokens": 6274715195.0, "step": 8203 }, { "epoch": 3.0051296143629203, "grad_norm": 0.14124909692066698, "learning_rate": 1.704240669051595e-05, "loss": 0.4155, "num_tokens": 6275437641.0, "step": 8204 }, { "epoch": 3.005496015388843, "grad_norm": 0.1592085609146472, "learning_rate": 1.7038299420985566e-05, "loss": 0.4365, "num_tokens": 6276156603.0, "step": 8205 }, { "epoch": 3.005862416414766, "grad_norm": 0.15385958702240055, "learning_rate": 1.7034192431019924e-05, "loss": 0.408, "num_tokens": 6276905898.0, "step": 8206 }, { "epoch": 3.006228817440689, "grad_norm": 0.1365568553693534, "learning_rate": 1.7030085720850433e-05, "loss": 0.4247, "num_tokens": 6277629959.0, "step": 8207 }, { "epoch": 3.006595218466612, "grad_norm": 0.1482565367071965, "learning_rate": 1.7025979290708482e-05, "loss": 0.3992, "num_tokens": 6278458559.0, "step": 8208 }, { "epoch": 3.0069616194925346, "grad_norm": 0.13808083118800074, "learning_rate": 1.7021873140825458e-05, "loss": 0.3876, "num_tokens": 6279294868.0, "step": 8209 }, { "epoch": 3.0073280205184574, "grad_norm": 0.1449480650322731, "learning_rate": 1.7017767271432702e-05, "loss": 0.3777, "num_tokens": 6280057646.0, "step": 8210 }, { "epoch": 3.00769442154438, "grad_norm": 0.1419612026983006, "learning_rate": 1.701366168276158e-05, "loss": 0.412, "num_tokens": 6280796509.0, "step": 8211 }, { "epoch": 3.0080608225703034, "grad_norm": 0.14191099729356524, "learning_rate": 1.7009556375043387e-05, "loss": 0.3902, "num_tokens": 6281630793.0, "step": 8212 }, { "epoch": 3.008427223596226, "grad_norm": 0.14179126593228134, "learning_rate": 1.700545134850947e-05, "loss": 0.4029, "num_tokens": 6282423018.0, "step": 8213 }, { "epoch": 3.008793624622149, "grad_norm": 0.14228639560583006, "learning_rate": 1.7001346603391105e-05, "loss": 0.3754, "num_tokens": 6283176312.0, "step": 8214 }, { "epoch": 3.0091600256480717, "grad_norm": 0.13609708453868777, "learning_rate": 1.6997242139919565e-05, "loss": 0.4215, "num_tokens": 6283979499.0, "step": 8215 }, { "epoch": 3.0095264266739945, "grad_norm": 0.15006201033704475, "learning_rate": 1.6993137958326137e-05, "loss": 0.3913, "num_tokens": 6284731309.0, "step": 8216 }, { "epoch": 3.0098928276999177, "grad_norm": 0.1346350593488732, "learning_rate": 1.698903405884206e-05, "loss": 0.3945, "num_tokens": 6285486647.0, "step": 8217 }, { "epoch": 3.0102592287258405, "grad_norm": 0.1457372548549537, "learning_rate": 1.698493044169855e-05, "loss": 0.3854, "num_tokens": 6286292130.0, "step": 8218 }, { "epoch": 3.0106256297517633, "grad_norm": 0.13018056108179837, "learning_rate": 1.6980827107126848e-05, "loss": 0.3674, "num_tokens": 6287066979.0, "step": 8219 }, { "epoch": 3.010992030777686, "grad_norm": 0.15120700711071697, "learning_rate": 1.6976724055358128e-05, "loss": 0.3946, "num_tokens": 6287742353.0, "step": 8220 }, { "epoch": 3.011358431803609, "grad_norm": 0.14211254061049025, "learning_rate": 1.69726212866236e-05, "loss": 0.4401, "num_tokens": 6288402752.0, "step": 8221 }, { "epoch": 3.011724832829532, "grad_norm": 0.1411590013782143, "learning_rate": 1.696851880115443e-05, "loss": 0.3939, "num_tokens": 6289207354.0, "step": 8222 }, { "epoch": 3.012091233855455, "grad_norm": 0.14268854018528007, "learning_rate": 1.696441659918175e-05, "loss": 0.3414, "num_tokens": 6289997990.0, "step": 8223 }, { "epoch": 3.0124576348813776, "grad_norm": 0.13662512570573715, "learning_rate": 1.6960314680936714e-05, "loss": 0.4078, "num_tokens": 6290742970.0, "step": 8224 }, { "epoch": 3.0128240359073004, "grad_norm": 0.14116654607805212, "learning_rate": 1.6956213046650446e-05, "loss": 0.4154, "num_tokens": 6291487416.0, "step": 8225 }, { "epoch": 3.013190436933223, "grad_norm": 0.1423714637986087, "learning_rate": 1.695211169655403e-05, "loss": 0.4009, "num_tokens": 6292301706.0, "step": 8226 }, { "epoch": 3.0135568379591464, "grad_norm": 0.1303776800883774, "learning_rate": 1.694801063087858e-05, "loss": 0.4133, "num_tokens": 6293078594.0, "step": 8227 }, { "epoch": 3.013923238985069, "grad_norm": 0.15316588901596723, "learning_rate": 1.6943909849855153e-05, "loss": 0.4254, "num_tokens": 6293770602.0, "step": 8228 }, { "epoch": 3.014289640010992, "grad_norm": 0.149135659163272, "learning_rate": 1.6939809353714804e-05, "loss": 0.4045, "num_tokens": 6294532955.0, "step": 8229 }, { "epoch": 3.0146560410369148, "grad_norm": 0.14282891228675187, "learning_rate": 1.6935709142688588e-05, "loss": 0.3981, "num_tokens": 6295256973.0, "step": 8230 }, { "epoch": 3.015022442062838, "grad_norm": 0.14510403870838492, "learning_rate": 1.6931609217007507e-05, "loss": 0.3966, "num_tokens": 6296056217.0, "step": 8231 }, { "epoch": 3.0153888430887608, "grad_norm": 0.13678466727876346, "learning_rate": 1.6927509576902595e-05, "loss": 0.4128, "num_tokens": 6296800971.0, "step": 8232 }, { "epoch": 3.0157552441146835, "grad_norm": 0.1500104327698446, "learning_rate": 1.6923410222604828e-05, "loss": 0.4189, "num_tokens": 6297552222.0, "step": 8233 }, { "epoch": 3.0161216451406063, "grad_norm": 0.161719335307808, "learning_rate": 1.6919311154345184e-05, "loss": 0.3939, "num_tokens": 6298378468.0, "step": 8234 }, { "epoch": 3.016488046166529, "grad_norm": 0.14178410611155015, "learning_rate": 1.691521237235463e-05, "loss": 0.3858, "num_tokens": 6299288362.0, "step": 8235 }, { "epoch": 3.0168544471924523, "grad_norm": 0.1327469028717474, "learning_rate": 1.6911113876864106e-05, "loss": 0.3826, "num_tokens": 6299865977.0, "step": 8236 }, { "epoch": 3.017220848218375, "grad_norm": 0.1613274209064489, "learning_rate": 1.690701566810453e-05, "loss": 0.4145, "num_tokens": 6300528374.0, "step": 8237 }, { "epoch": 3.017587249244298, "grad_norm": 0.15487403066978617, "learning_rate": 1.6902917746306825e-05, "loss": 0.4173, "num_tokens": 6301200900.0, "step": 8238 }, { "epoch": 3.0179536502702207, "grad_norm": 0.1522512109147418, "learning_rate": 1.6898820111701887e-05, "loss": 0.4092, "num_tokens": 6302042460.0, "step": 8239 }, { "epoch": 3.0183200512961434, "grad_norm": 0.14884221210738546, "learning_rate": 1.6894722764520598e-05, "loss": 0.3835, "num_tokens": 6302869063.0, "step": 8240 }, { "epoch": 3.0186864523220667, "grad_norm": 0.13123524808530512, "learning_rate": 1.6890625704993815e-05, "loss": 0.4016, "num_tokens": 6303535460.0, "step": 8241 }, { "epoch": 3.0190528533479895, "grad_norm": 0.14930321966161209, "learning_rate": 1.688652893335238e-05, "loss": 0.3862, "num_tokens": 6304300591.0, "step": 8242 }, { "epoch": 3.0194192543739122, "grad_norm": 0.15211623365306237, "learning_rate": 1.6882432449827133e-05, "loss": 0.4238, "num_tokens": 6305031269.0, "step": 8243 }, { "epoch": 3.019785655399835, "grad_norm": 0.16069993484135023, "learning_rate": 1.6878336254648888e-05, "loss": 0.3997, "num_tokens": 6305673362.0, "step": 8244 }, { "epoch": 3.020152056425758, "grad_norm": 0.13587429883348526, "learning_rate": 1.687424034804843e-05, "loss": 0.391, "num_tokens": 6306556911.0, "step": 8245 }, { "epoch": 3.020518457451681, "grad_norm": 0.1314278291927176, "learning_rate": 1.687014473025656e-05, "loss": 0.4018, "num_tokens": 6307286998.0, "step": 8246 }, { "epoch": 3.020884858477604, "grad_norm": 0.14700294131581046, "learning_rate": 1.686604940150404e-05, "loss": 0.3964, "num_tokens": 6307969300.0, "step": 8247 }, { "epoch": 3.0212512595035266, "grad_norm": 0.15025610004931422, "learning_rate": 1.68619543620216e-05, "loss": 0.3862, "num_tokens": 6308734810.0, "step": 8248 }, { "epoch": 3.0216176605294494, "grad_norm": 0.14420232432008454, "learning_rate": 1.685785961204e-05, "loss": 0.3907, "num_tokens": 6309486307.0, "step": 8249 }, { "epoch": 3.021984061555372, "grad_norm": 0.13587534590194422, "learning_rate": 1.6853765151789936e-05, "loss": 0.3743, "num_tokens": 6310304113.0, "step": 8250 }, { "epoch": 3.0223504625812954, "grad_norm": 0.13987044595588982, "learning_rate": 1.684967098150212e-05, "loss": 0.3943, "num_tokens": 6311042358.0, "step": 8251 }, { "epoch": 3.022716863607218, "grad_norm": 0.1308215629601627, "learning_rate": 1.684557710140724e-05, "loss": 0.4037, "num_tokens": 6311893552.0, "step": 8252 }, { "epoch": 3.023083264633141, "grad_norm": 0.14721873775337507, "learning_rate": 1.6841483511735942e-05, "loss": 0.4255, "num_tokens": 6312657733.0, "step": 8253 }, { "epoch": 3.0234496656590637, "grad_norm": 0.16252618318667683, "learning_rate": 1.6837390212718906e-05, "loss": 0.4357, "num_tokens": 6313263779.0, "step": 8254 }, { "epoch": 3.023816066684987, "grad_norm": 0.15276324428683144, "learning_rate": 1.6833297204586753e-05, "loss": 0.3977, "num_tokens": 6313994728.0, "step": 8255 }, { "epoch": 3.0241824677109097, "grad_norm": 0.14611683335611633, "learning_rate": 1.6829204487570093e-05, "loss": 0.3983, "num_tokens": 6314666586.0, "step": 8256 }, { "epoch": 3.0245488687368325, "grad_norm": 0.14665366257106877, "learning_rate": 1.6825112061899544e-05, "loss": 0.3914, "num_tokens": 6315592988.0, "step": 8257 }, { "epoch": 3.0249152697627553, "grad_norm": 0.13600078102305335, "learning_rate": 1.682101992780568e-05, "loss": 0.3785, "num_tokens": 6316274446.0, "step": 8258 }, { "epoch": 3.025281670788678, "grad_norm": 0.142170091264543, "learning_rate": 1.6816928085519077e-05, "loss": 0.3865, "num_tokens": 6317108658.0, "step": 8259 }, { "epoch": 3.0256480718146013, "grad_norm": 0.1376005022427823, "learning_rate": 1.6812836535270292e-05, "loss": 0.4164, "num_tokens": 6317925428.0, "step": 8260 }, { "epoch": 3.026014472840524, "grad_norm": 0.13276779726817564, "learning_rate": 1.680874527728985e-05, "loss": 0.4041, "num_tokens": 6318676999.0, "step": 8261 }, { "epoch": 3.026380873866447, "grad_norm": 0.16061978129131163, "learning_rate": 1.680465431180828e-05, "loss": 0.4341, "num_tokens": 6319339065.0, "step": 8262 }, { "epoch": 3.0267472748923696, "grad_norm": 0.162040275677662, "learning_rate": 1.680056363905608e-05, "loss": 0.4383, "num_tokens": 6320072319.0, "step": 8263 }, { "epoch": 3.0271136759182924, "grad_norm": 0.1360517848789664, "learning_rate": 1.679647325926374e-05, "loss": 0.4092, "num_tokens": 6320890256.0, "step": 8264 }, { "epoch": 3.0274800769442156, "grad_norm": 0.13991274081656943, "learning_rate": 1.6792383172661733e-05, "loss": 0.402, "num_tokens": 6321683748.0, "step": 8265 }, { "epoch": 3.0278464779701384, "grad_norm": 0.14244663950912353, "learning_rate": 1.678829337948051e-05, "loss": 0.4109, "num_tokens": 6322391701.0, "step": 8266 }, { "epoch": 3.028212878996061, "grad_norm": 0.1410206429530557, "learning_rate": 1.6784203879950503e-05, "loss": 0.3901, "num_tokens": 6323133381.0, "step": 8267 }, { "epoch": 3.028579280021984, "grad_norm": 0.13856197181012592, "learning_rate": 1.6780114674302147e-05, "loss": 0.3937, "num_tokens": 6323820356.0, "step": 8268 }, { "epoch": 3.0289456810479067, "grad_norm": 0.1476724272760022, "learning_rate": 1.6776025762765832e-05, "loss": 0.3824, "num_tokens": 6324607011.0, "step": 8269 }, { "epoch": 3.02931208207383, "grad_norm": 0.1359064478786903, "learning_rate": 1.6771937145571955e-05, "loss": 0.4103, "num_tokens": 6325408323.0, "step": 8270 }, { "epoch": 3.0296784830997527, "grad_norm": 0.1408822349403385, "learning_rate": 1.6767848822950886e-05, "loss": 0.4106, "num_tokens": 6326095609.0, "step": 8271 }, { "epoch": 3.0300448841256755, "grad_norm": 0.14222754917769073, "learning_rate": 1.6763760795132976e-05, "loss": 0.4199, "num_tokens": 6326754349.0, "step": 8272 }, { "epoch": 3.0304112851515983, "grad_norm": 0.16275574096805612, "learning_rate": 1.6759673062348568e-05, "loss": 0.386, "num_tokens": 6327434094.0, "step": 8273 }, { "epoch": 3.030777686177521, "grad_norm": 0.14204330989160727, "learning_rate": 1.6755585624827984e-05, "loss": 0.3821, "num_tokens": 6328227268.0, "step": 8274 }, { "epoch": 3.0311440872034443, "grad_norm": 0.14026428468929164, "learning_rate": 1.6751498482801514e-05, "loss": 0.4037, "num_tokens": 6328923678.0, "step": 8275 }, { "epoch": 3.031510488229367, "grad_norm": 0.13497546817109085, "learning_rate": 1.674741163649946e-05, "loss": 0.4028, "num_tokens": 6329799075.0, "step": 8276 }, { "epoch": 3.03187688925529, "grad_norm": 0.151742840399724, "learning_rate": 1.67433250861521e-05, "loss": 0.3818, "num_tokens": 6330600607.0, "step": 8277 }, { "epoch": 3.0322432902812126, "grad_norm": 0.15308588105889437, "learning_rate": 1.6739238831989667e-05, "loss": 0.4066, "num_tokens": 6331319344.0, "step": 8278 }, { "epoch": 3.032609691307136, "grad_norm": 0.14060428246949438, "learning_rate": 1.6735152874242428e-05, "loss": 0.4244, "num_tokens": 6331969182.0, "step": 8279 }, { "epoch": 3.0329760923330586, "grad_norm": 0.15897734041777206, "learning_rate": 1.6731067213140577e-05, "loss": 0.3943, "num_tokens": 6332675034.0, "step": 8280 }, { "epoch": 3.0333424933589814, "grad_norm": 0.16674271412174377, "learning_rate": 1.672698184891433e-05, "loss": 0.4152, "num_tokens": 6333412809.0, "step": 8281 }, { "epoch": 3.033708894384904, "grad_norm": 0.13884829688662836, "learning_rate": 1.672289678179388e-05, "loss": 0.3849, "num_tokens": 6334141693.0, "step": 8282 }, { "epoch": 3.034075295410827, "grad_norm": 0.1482445870365783, "learning_rate": 1.6718812012009385e-05, "loss": 0.414, "num_tokens": 6334932867.0, "step": 8283 }, { "epoch": 3.03444169643675, "grad_norm": 0.14604643942767923, "learning_rate": 1.671472753979102e-05, "loss": 0.4263, "num_tokens": 6335635065.0, "step": 8284 }, { "epoch": 3.034808097462673, "grad_norm": 0.16003597539729894, "learning_rate": 1.6710643365368905e-05, "loss": 0.4137, "num_tokens": 6336451724.0, "step": 8285 }, { "epoch": 3.0351744984885958, "grad_norm": 0.14024485215137525, "learning_rate": 1.670655948897316e-05, "loss": 0.3742, "num_tokens": 6337173945.0, "step": 8286 }, { "epoch": 3.0355408995145186, "grad_norm": 0.1417246195818596, "learning_rate": 1.6702475910833905e-05, "loss": 0.4068, "num_tokens": 6337896405.0, "step": 8287 }, { "epoch": 3.0359073005404413, "grad_norm": 0.1453473997185102, "learning_rate": 1.6698392631181208e-05, "loss": 0.418, "num_tokens": 6338771562.0, "step": 8288 }, { "epoch": 3.0362737015663646, "grad_norm": 0.14130358817472013, "learning_rate": 1.669430965024516e-05, "loss": 0.4161, "num_tokens": 6339520039.0, "step": 8289 }, { "epoch": 3.0366401025922873, "grad_norm": 0.14453527283794662, "learning_rate": 1.6690226968255796e-05, "loss": 0.4224, "num_tokens": 6340200618.0, "step": 8290 }, { "epoch": 3.03700650361821, "grad_norm": 0.14853269077456605, "learning_rate": 1.6686144585443162e-05, "loss": 0.3881, "num_tokens": 6340927553.0, "step": 8291 }, { "epoch": 3.037372904644133, "grad_norm": 0.13374605575158785, "learning_rate": 1.6682062502037285e-05, "loss": 0.3816, "num_tokens": 6341764236.0, "step": 8292 }, { "epoch": 3.0377393056700557, "grad_norm": 0.13419513113327, "learning_rate": 1.667798071826815e-05, "loss": 0.3858, "num_tokens": 6342572970.0, "step": 8293 }, { "epoch": 3.038105706695979, "grad_norm": 0.14033657826946927, "learning_rate": 1.6673899234365757e-05, "loss": 0.4104, "num_tokens": 6343308261.0, "step": 8294 }, { "epoch": 3.0384721077219017, "grad_norm": 0.14068081253456882, "learning_rate": 1.666981805056007e-05, "loss": 0.3847, "num_tokens": 6344032595.0, "step": 8295 }, { "epoch": 3.0388385087478245, "grad_norm": 0.14228280280725839, "learning_rate": 1.6665737167081047e-05, "loss": 0.4409, "num_tokens": 6344864992.0, "step": 8296 }, { "epoch": 3.0392049097737472, "grad_norm": 0.14259639158063747, "learning_rate": 1.666165658415862e-05, "loss": 0.4301, "num_tokens": 6345536818.0, "step": 8297 }, { "epoch": 3.03957131079967, "grad_norm": 0.15070672031870536, "learning_rate": 1.66575763020227e-05, "loss": 0.4076, "num_tokens": 6346314889.0, "step": 8298 }, { "epoch": 3.0399377118255932, "grad_norm": 0.15419599376852566, "learning_rate": 1.66534963209032e-05, "loss": 0.3815, "num_tokens": 6347128963.0, "step": 8299 }, { "epoch": 3.040304112851516, "grad_norm": 0.11959468211436425, "learning_rate": 1.6649416641030005e-05, "loss": 0.4096, "num_tokens": 6347969431.0, "step": 8300 }, { "epoch": 3.040670513877439, "grad_norm": 0.1544843162472707, "learning_rate": 1.6645337262632976e-05, "loss": 0.3845, "num_tokens": 6348803597.0, "step": 8301 }, { "epoch": 3.0410369149033616, "grad_norm": 0.13703904338442052, "learning_rate": 1.664125818594196e-05, "loss": 0.4217, "num_tokens": 6349513482.0, "step": 8302 }, { "epoch": 3.041403315929285, "grad_norm": 0.1581914075816971, "learning_rate": 1.66371794111868e-05, "loss": 0.4123, "num_tokens": 6350339865.0, "step": 8303 }, { "epoch": 3.0417697169552076, "grad_norm": 0.13606216844772112, "learning_rate": 1.6633100938597312e-05, "loss": 0.4159, "num_tokens": 6351134388.0, "step": 8304 }, { "epoch": 3.0421361179811304, "grad_norm": 0.13617401318687375, "learning_rate": 1.6629022768403285e-05, "loss": 0.3991, "num_tokens": 6351900557.0, "step": 8305 }, { "epoch": 3.042502519007053, "grad_norm": 0.13898487487386832, "learning_rate": 1.662494490083452e-05, "loss": 0.4221, "num_tokens": 6352660674.0, "step": 8306 }, { "epoch": 3.042868920032976, "grad_norm": 0.1427834746449087, "learning_rate": 1.6620867336120765e-05, "loss": 0.3845, "num_tokens": 6353413208.0, "step": 8307 }, { "epoch": 3.043235321058899, "grad_norm": 0.14007835297378252, "learning_rate": 1.6616790074491776e-05, "loss": 0.3667, "num_tokens": 6354156341.0, "step": 8308 }, { "epoch": 3.043601722084822, "grad_norm": 0.1417560249390339, "learning_rate": 1.6612713116177294e-05, "loss": 0.4039, "num_tokens": 6354910662.0, "step": 8309 }, { "epoch": 3.0439681231107447, "grad_norm": 0.13632831284273955, "learning_rate": 1.6608636461407002e-05, "loss": 0.3918, "num_tokens": 6355611148.0, "step": 8310 }, { "epoch": 3.0443345241366675, "grad_norm": 0.142111786301314, "learning_rate": 1.660456011041064e-05, "loss": 0.4055, "num_tokens": 6356315300.0, "step": 8311 }, { "epoch": 3.0447009251625903, "grad_norm": 0.15339156306360877, "learning_rate": 1.660048406341786e-05, "loss": 0.4216, "num_tokens": 6357054902.0, "step": 8312 }, { "epoch": 3.0450673261885135, "grad_norm": 0.1468556608763991, "learning_rate": 1.6596408320658323e-05, "loss": 0.4216, "num_tokens": 6357831487.0, "step": 8313 }, { "epoch": 3.0454337272144363, "grad_norm": 0.14348736012370442, "learning_rate": 1.6592332882361697e-05, "loss": 0.4259, "num_tokens": 6358523664.0, "step": 8314 }, { "epoch": 3.045800128240359, "grad_norm": 0.1409157329976201, "learning_rate": 1.6588257748757587e-05, "loss": 0.3871, "num_tokens": 6359401417.0, "step": 8315 }, { "epoch": 3.046166529266282, "grad_norm": 0.13597625469657312, "learning_rate": 1.658418292007562e-05, "loss": 0.4173, "num_tokens": 6360161780.0, "step": 8316 }, { "epoch": 3.0465329302922046, "grad_norm": 0.1466156877071606, "learning_rate": 1.6580108396545385e-05, "loss": 0.3941, "num_tokens": 6361066718.0, "step": 8317 }, { "epoch": 3.046899331318128, "grad_norm": 0.13141593646347802, "learning_rate": 1.6576034178396457e-05, "loss": 0.3944, "num_tokens": 6361792970.0, "step": 8318 }, { "epoch": 3.0472657323440506, "grad_norm": 0.14927754837439774, "learning_rate": 1.6571960265858408e-05, "loss": 0.4172, "num_tokens": 6362564546.0, "step": 8319 }, { "epoch": 3.0476321333699734, "grad_norm": 0.145792263789096, "learning_rate": 1.6567886659160758e-05, "loss": 0.4135, "num_tokens": 6363354827.0, "step": 8320 }, { "epoch": 3.047998534395896, "grad_norm": 0.1334136986032153, "learning_rate": 1.656381335853305e-05, "loss": 0.3772, "num_tokens": 6364092270.0, "step": 8321 }, { "epoch": 3.048364935421819, "grad_norm": 0.1449746506226943, "learning_rate": 1.6559740364204787e-05, "loss": 0.4248, "num_tokens": 6364982860.0, "step": 8322 }, { "epoch": 3.048731336447742, "grad_norm": 0.13287294121005877, "learning_rate": 1.655566767640546e-05, "loss": 0.4154, "num_tokens": 6365869506.0, "step": 8323 }, { "epoch": 3.049097737473665, "grad_norm": 0.14301698832810444, "learning_rate": 1.655159529536455e-05, "loss": 0.3952, "num_tokens": 6366609133.0, "step": 8324 }, { "epoch": 3.0494641384995878, "grad_norm": 0.13784477950225038, "learning_rate": 1.65475232213115e-05, "loss": 0.4064, "num_tokens": 6367359169.0, "step": 8325 }, { "epoch": 3.0498305395255105, "grad_norm": 0.1475849687931638, "learning_rate": 1.654345145447576e-05, "loss": 0.4259, "num_tokens": 6368088749.0, "step": 8326 }, { "epoch": 3.0501969405514338, "grad_norm": 0.15232627342319616, "learning_rate": 1.6539379995086746e-05, "loss": 0.4302, "num_tokens": 6368774187.0, "step": 8327 }, { "epoch": 3.0505633415773565, "grad_norm": 0.15007877140302045, "learning_rate": 1.6535308843373873e-05, "loss": 0.4247, "num_tokens": 6369641649.0, "step": 8328 }, { "epoch": 3.0509297426032793, "grad_norm": 0.13511659033003165, "learning_rate": 1.6531237999566508e-05, "loss": 0.4069, "num_tokens": 6370376670.0, "step": 8329 }, { "epoch": 3.051296143629202, "grad_norm": 0.13557399576444326, "learning_rate": 1.652716746389404e-05, "loss": 0.3981, "num_tokens": 6371160350.0, "step": 8330 }, { "epoch": 3.051662544655125, "grad_norm": 0.14646101528112032, "learning_rate": 1.652309723658581e-05, "loss": 0.4326, "num_tokens": 6371891462.0, "step": 8331 }, { "epoch": 3.052028945681048, "grad_norm": 0.14476390385291377, "learning_rate": 1.6519027317871163e-05, "loss": 0.4034, "num_tokens": 6372757654.0, "step": 8332 }, { "epoch": 3.052395346706971, "grad_norm": 0.13844618486238788, "learning_rate": 1.6514957707979403e-05, "loss": 0.4129, "num_tokens": 6373484254.0, "step": 8333 }, { "epoch": 3.0527617477328937, "grad_norm": 0.15423062946465363, "learning_rate": 1.6510888407139836e-05, "loss": 0.3976, "num_tokens": 6374180526.0, "step": 8334 }, { "epoch": 3.0531281487588164, "grad_norm": 0.14655600945273556, "learning_rate": 1.650681941558175e-05, "loss": 0.4088, "num_tokens": 6375030902.0, "step": 8335 }, { "epoch": 3.053494549784739, "grad_norm": 0.14382560337944292, "learning_rate": 1.650275073353442e-05, "loss": 0.4183, "num_tokens": 6375647059.0, "step": 8336 }, { "epoch": 3.0538609508106624, "grad_norm": 0.166600550135643, "learning_rate": 1.6498682361227062e-05, "loss": 0.4176, "num_tokens": 6376312424.0, "step": 8337 }, { "epoch": 3.0542273518365852, "grad_norm": 0.17254867594082543, "learning_rate": 1.6494614298888936e-05, "loss": 0.3832, "num_tokens": 6377071980.0, "step": 8338 }, { "epoch": 3.054593752862508, "grad_norm": 0.13777066778899655, "learning_rate": 1.6490546546749247e-05, "loss": 0.3789, "num_tokens": 6377880772.0, "step": 8339 }, { "epoch": 3.054960153888431, "grad_norm": 0.13972800792571263, "learning_rate": 1.6486479105037183e-05, "loss": 0.4196, "num_tokens": 6378648189.0, "step": 8340 }, { "epoch": 3.0553265549143536, "grad_norm": 0.14400085167691426, "learning_rate": 1.6482411973981936e-05, "loss": 0.3832, "num_tokens": 6379415478.0, "step": 8341 }, { "epoch": 3.055692955940277, "grad_norm": 0.1374163018908436, "learning_rate": 1.647834515381265e-05, "loss": 0.4144, "num_tokens": 6380249606.0, "step": 8342 }, { "epoch": 3.0560593569661996, "grad_norm": 0.1457156538856262, "learning_rate": 1.647427864475848e-05, "loss": 0.3965, "num_tokens": 6381057693.0, "step": 8343 }, { "epoch": 3.0564257579921223, "grad_norm": 0.14728080637447796, "learning_rate": 1.6470212447048553e-05, "loss": 0.3932, "num_tokens": 6381804678.0, "step": 8344 }, { "epoch": 3.056792159018045, "grad_norm": 0.13913405512029084, "learning_rate": 1.6466146560911957e-05, "loss": 0.3979, "num_tokens": 6382656485.0, "step": 8345 }, { "epoch": 3.057158560043968, "grad_norm": 0.14453083993765567, "learning_rate": 1.646208098657782e-05, "loss": 0.4572, "num_tokens": 6383428049.0, "step": 8346 }, { "epoch": 3.057524961069891, "grad_norm": 0.15935097258880174, "learning_rate": 1.6458015724275174e-05, "loss": 0.4109, "num_tokens": 6384111073.0, "step": 8347 }, { "epoch": 3.057891362095814, "grad_norm": 0.14396814916813458, "learning_rate": 1.6453950774233103e-05, "loss": 0.3897, "num_tokens": 6384926855.0, "step": 8348 }, { "epoch": 3.0582577631217367, "grad_norm": 0.13920361829062564, "learning_rate": 1.6449886136680633e-05, "loss": 0.3803, "num_tokens": 6385780025.0, "step": 8349 }, { "epoch": 3.0586241641476595, "grad_norm": 0.14378075726321382, "learning_rate": 1.6445821811846787e-05, "loss": 0.4255, "num_tokens": 6386458319.0, "step": 8350 }, { "epoch": 3.0589905651735827, "grad_norm": 0.14835509892067852, "learning_rate": 1.644175779996056e-05, "loss": 0.4232, "num_tokens": 6387196724.0, "step": 8351 }, { "epoch": 3.0593569661995055, "grad_norm": 0.1542740187457695, "learning_rate": 1.643769410125095e-05, "loss": 0.396, "num_tokens": 6387998341.0, "step": 8352 }, { "epoch": 3.0597233672254283, "grad_norm": 0.14194170369149195, "learning_rate": 1.643363071594691e-05, "loss": 0.4119, "num_tokens": 6388748254.0, "step": 8353 }, { "epoch": 3.060089768251351, "grad_norm": 0.1335960463499764, "learning_rate": 1.642956764427741e-05, "loss": 0.4058, "num_tokens": 6389520763.0, "step": 8354 }, { "epoch": 3.060456169277274, "grad_norm": 0.15179888260602323, "learning_rate": 1.6425504886471362e-05, "loss": 0.4497, "num_tokens": 6390378797.0, "step": 8355 }, { "epoch": 3.060822570303197, "grad_norm": 0.5113428646499333, "learning_rate": 1.642144244275768e-05, "loss": 0.4125, "num_tokens": 6391239493.0, "step": 8356 }, { "epoch": 3.06118897132912, "grad_norm": 0.1514653239050431, "learning_rate": 1.641738031336528e-05, "loss": 0.4124, "num_tokens": 6392042860.0, "step": 8357 }, { "epoch": 3.0615553723550426, "grad_norm": 0.1451890149495232, "learning_rate": 1.641331849852303e-05, "loss": 0.423, "num_tokens": 6392868762.0, "step": 8358 }, { "epoch": 3.0619217733809654, "grad_norm": 0.12994982161234148, "learning_rate": 1.640925699845978e-05, "loss": 0.3956, "num_tokens": 6393722835.0, "step": 8359 }, { "epoch": 3.062288174406888, "grad_norm": 0.13858382159684626, "learning_rate": 1.6405195813404384e-05, "loss": 0.3995, "num_tokens": 6394567697.0, "step": 8360 }, { "epoch": 3.0626545754328114, "grad_norm": 0.1363530705783407, "learning_rate": 1.6401134943585665e-05, "loss": 0.4069, "num_tokens": 6395304900.0, "step": 8361 }, { "epoch": 3.063020976458734, "grad_norm": 0.14307238716864842, "learning_rate": 1.639707438923244e-05, "loss": 0.4055, "num_tokens": 6396079665.0, "step": 8362 }, { "epoch": 3.063387377484657, "grad_norm": 0.13778329839350387, "learning_rate": 1.639301415057349e-05, "loss": 0.3943, "num_tokens": 6396911875.0, "step": 8363 }, { "epoch": 3.0637537785105797, "grad_norm": 0.12787265820094873, "learning_rate": 1.638895422783759e-05, "loss": 0.4099, "num_tokens": 6397696181.0, "step": 8364 }, { "epoch": 3.0641201795365025, "grad_norm": 0.13476524866685163, "learning_rate": 1.638489462125349e-05, "loss": 0.3715, "num_tokens": 6398468332.0, "step": 8365 }, { "epoch": 3.0644865805624257, "grad_norm": 0.13302030098448447, "learning_rate": 1.6380835331049935e-05, "loss": 0.4094, "num_tokens": 6399197444.0, "step": 8366 }, { "epoch": 3.0648529815883485, "grad_norm": 0.14610522582170818, "learning_rate": 1.6376776357455632e-05, "loss": 0.4043, "num_tokens": 6400088671.0, "step": 8367 }, { "epoch": 3.0652193826142713, "grad_norm": 0.1261057589074181, "learning_rate": 1.6372717700699295e-05, "loss": 0.3708, "num_tokens": 6400892660.0, "step": 8368 }, { "epoch": 3.065585783640194, "grad_norm": 0.13048389755594822, "learning_rate": 1.63686593610096e-05, "loss": 0.4254, "num_tokens": 6401733788.0, "step": 8369 }, { "epoch": 3.065952184666117, "grad_norm": 0.15005706526505805, "learning_rate": 1.6364601338615206e-05, "loss": 0.4157, "num_tokens": 6402454462.0, "step": 8370 }, { "epoch": 3.06631858569204, "grad_norm": 0.14440804033709087, "learning_rate": 1.6360543633744777e-05, "loss": 0.4009, "num_tokens": 6403215580.0, "step": 8371 }, { "epoch": 3.066684986717963, "grad_norm": 0.1349755849507614, "learning_rate": 1.635648624662693e-05, "loss": 0.3957, "num_tokens": 6404052585.0, "step": 8372 }, { "epoch": 3.0670513877438856, "grad_norm": 0.14047184073955526, "learning_rate": 1.6352429177490284e-05, "loss": 0.4204, "num_tokens": 6404773221.0, "step": 8373 }, { "epoch": 3.0674177887698084, "grad_norm": 0.14010789315193647, "learning_rate": 1.6348372426563425e-05, "loss": 0.3856, "num_tokens": 6405647496.0, "step": 8374 }, { "epoch": 3.0677841897957316, "grad_norm": 0.13447105366557746, "learning_rate": 1.634431599407493e-05, "loss": 0.3841, "num_tokens": 6406348431.0, "step": 8375 }, { "epoch": 3.0681505908216544, "grad_norm": 0.13473931399680056, "learning_rate": 1.634025988025336e-05, "loss": 0.3888, "num_tokens": 6407153367.0, "step": 8376 }, { "epoch": 3.068516991847577, "grad_norm": 0.14350777761942876, "learning_rate": 1.6336204085327262e-05, "loss": 0.4122, "num_tokens": 6407957768.0, "step": 8377 }, { "epoch": 3.0688833928735, "grad_norm": 0.1405456415534607, "learning_rate": 1.6332148609525136e-05, "loss": 0.411, "num_tokens": 6408670545.0, "step": 8378 }, { "epoch": 3.0692497938994228, "grad_norm": 0.1461489909965737, "learning_rate": 1.6328093453075512e-05, "loss": 0.4091, "num_tokens": 6409487563.0, "step": 8379 }, { "epoch": 3.069616194925346, "grad_norm": 0.1479620262025196, "learning_rate": 1.6324038616206856e-05, "loss": 0.4184, "num_tokens": 6410196242.0, "step": 8380 }, { "epoch": 3.0699825959512688, "grad_norm": 0.1562003641974978, "learning_rate": 1.6319984099147652e-05, "loss": 0.3972, "num_tokens": 6410932252.0, "step": 8381 }, { "epoch": 3.0703489969771915, "grad_norm": 0.139821605122823, "learning_rate": 1.631592990212634e-05, "loss": 0.4204, "num_tokens": 6411696579.0, "step": 8382 }, { "epoch": 3.0707153980031143, "grad_norm": 0.1390679813428866, "learning_rate": 1.6311876025371347e-05, "loss": 0.3941, "num_tokens": 6412545669.0, "step": 8383 }, { "epoch": 3.071081799029037, "grad_norm": 0.140151996315567, "learning_rate": 1.63078224691111e-05, "loss": 0.4263, "num_tokens": 6413217171.0, "step": 8384 }, { "epoch": 3.0714482000549603, "grad_norm": 0.14821708761307892, "learning_rate": 1.6303769233573987e-05, "loss": 0.409, "num_tokens": 6413995155.0, "step": 8385 }, { "epoch": 3.071814601080883, "grad_norm": 0.14957424322156607, "learning_rate": 1.6299716318988383e-05, "loss": 0.4055, "num_tokens": 6414838288.0, "step": 8386 }, { "epoch": 3.072181002106806, "grad_norm": 0.14203612242619595, "learning_rate": 1.6295663725582652e-05, "loss": 0.4145, "num_tokens": 6415609085.0, "step": 8387 }, { "epoch": 3.0725474031327287, "grad_norm": 0.14636073708533595, "learning_rate": 1.6291611453585146e-05, "loss": 0.3972, "num_tokens": 6416271808.0, "step": 8388 }, { "epoch": 3.0729138041586515, "grad_norm": 0.14502652685379488, "learning_rate": 1.6287559503224165e-05, "loss": 0.3933, "num_tokens": 6417043105.0, "step": 8389 }, { "epoch": 3.0732802051845747, "grad_norm": 0.1371443298842775, "learning_rate": 1.6283507874728028e-05, "loss": 0.4084, "num_tokens": 6417800244.0, "step": 8390 }, { "epoch": 3.0736466062104975, "grad_norm": 0.13716566353860643, "learning_rate": 1.627945656832502e-05, "loss": 0.4285, "num_tokens": 6418554166.0, "step": 8391 }, { "epoch": 3.0740130072364202, "grad_norm": 0.14490419875133184, "learning_rate": 1.627540558424342e-05, "loss": 0.4338, "num_tokens": 6419369328.0, "step": 8392 }, { "epoch": 3.074379408262343, "grad_norm": 0.15377684196733216, "learning_rate": 1.6271354922711467e-05, "loss": 0.4149, "num_tokens": 6420078423.0, "step": 8393 }, { "epoch": 3.074745809288266, "grad_norm": 0.13911721093612794, "learning_rate": 1.6267304583957392e-05, "loss": 0.362, "num_tokens": 6420928481.0, "step": 8394 }, { "epoch": 3.075112210314189, "grad_norm": 0.13531227879052982, "learning_rate": 1.626325456820941e-05, "loss": 0.3852, "num_tokens": 6421774623.0, "step": 8395 }, { "epoch": 3.075478611340112, "grad_norm": 0.12035075990363171, "learning_rate": 1.6259204875695732e-05, "loss": 0.3832, "num_tokens": 6422461565.0, "step": 8396 }, { "epoch": 3.0758450123660346, "grad_norm": 0.15136217155420714, "learning_rate": 1.6255155506644514e-05, "loss": 0.3995, "num_tokens": 6423295858.0, "step": 8397 }, { "epoch": 3.0762114133919574, "grad_norm": 0.13161865254709415, "learning_rate": 1.625110646128394e-05, "loss": 0.3695, "num_tokens": 6424115660.0, "step": 8398 }, { "epoch": 3.07657781441788, "grad_norm": 0.1427875716681977, "learning_rate": 1.624705773984213e-05, "loss": 0.4071, "num_tokens": 6424875430.0, "step": 8399 }, { "epoch": 3.0769442154438034, "grad_norm": 0.13871698766353574, "learning_rate": 1.6243009342547217e-05, "loss": 0.4009, "num_tokens": 6425710485.0, "step": 8400 }, { "epoch": 3.077310616469726, "grad_norm": 0.13212050081022336, "learning_rate": 1.623896126962731e-05, "loss": 0.426, "num_tokens": 6426433649.0, "step": 8401 }, { "epoch": 3.077677017495649, "grad_norm": 0.16660088745578527, "learning_rate": 1.623491352131048e-05, "loss": 0.4096, "num_tokens": 6427152582.0, "step": 8402 }, { "epoch": 3.0780434185215717, "grad_norm": 0.15066474350802256, "learning_rate": 1.623086609782482e-05, "loss": 0.4065, "num_tokens": 6427913528.0, "step": 8403 }, { "epoch": 3.078409819547495, "grad_norm": 0.15021895983158767, "learning_rate": 1.6226818999398363e-05, "loss": 0.4231, "num_tokens": 6428714048.0, "step": 8404 }, { "epoch": 3.0787762205734177, "grad_norm": 0.14144801767202653, "learning_rate": 1.622277222625914e-05, "loss": 0.3942, "num_tokens": 6429450634.0, "step": 8405 }, { "epoch": 3.0791426215993405, "grad_norm": 0.14940882935138675, "learning_rate": 1.6218725778635175e-05, "loss": 0.4017, "num_tokens": 6430201853.0, "step": 8406 }, { "epoch": 3.0795090226252633, "grad_norm": 0.143290690561936, "learning_rate": 1.621467965675445e-05, "loss": 0.3866, "num_tokens": 6430888059.0, "step": 8407 }, { "epoch": 3.079875423651186, "grad_norm": 0.13608862849826897, "learning_rate": 1.621063386084495e-05, "loss": 0.378, "num_tokens": 6431694565.0, "step": 8408 }, { "epoch": 3.0802418246771093, "grad_norm": 0.13559692964290265, "learning_rate": 1.620658839113464e-05, "loss": 0.3997, "num_tokens": 6432445532.0, "step": 8409 }, { "epoch": 3.080608225703032, "grad_norm": 0.1484248991661367, "learning_rate": 1.6202543247851443e-05, "loss": 0.4123, "num_tokens": 6433302403.0, "step": 8410 }, { "epoch": 3.080974626728955, "grad_norm": 0.14639654068631874, "learning_rate": 1.6198498431223303e-05, "loss": 0.4063, "num_tokens": 6434012967.0, "step": 8411 }, { "epoch": 3.0813410277548776, "grad_norm": 0.1476874367222645, "learning_rate": 1.6194453941478104e-05, "loss": 0.4151, "num_tokens": 6434866806.0, "step": 8412 }, { "epoch": 3.0817074287808004, "grad_norm": 0.13687240734923317, "learning_rate": 1.6190409778843737e-05, "loss": 0.4238, "num_tokens": 6435739095.0, "step": 8413 }, { "epoch": 3.0820738298067236, "grad_norm": 0.1401596888911859, "learning_rate": 1.6186365943548068e-05, "loss": 0.4151, "num_tokens": 6436549596.0, "step": 8414 }, { "epoch": 3.0824402308326464, "grad_norm": 0.14104036176163548, "learning_rate": 1.6182322435818952e-05, "loss": 0.383, "num_tokens": 6437328725.0, "step": 8415 }, { "epoch": 3.082806631858569, "grad_norm": 0.14288032577613427, "learning_rate": 1.6178279255884208e-05, "loss": 0.3912, "num_tokens": 6438065035.0, "step": 8416 }, { "epoch": 3.083173032884492, "grad_norm": 0.14305918334499806, "learning_rate": 1.6174236403971656e-05, "loss": 0.3803, "num_tokens": 6438818918.0, "step": 8417 }, { "epoch": 3.0835394339104147, "grad_norm": 0.14267160995382952, "learning_rate": 1.617019388030908e-05, "loss": 0.4088, "num_tokens": 6439554588.0, "step": 8418 }, { "epoch": 3.083905834936338, "grad_norm": 0.14144445414535234, "learning_rate": 1.6166151685124263e-05, "loss": 0.3868, "num_tokens": 6440385526.0, "step": 8419 }, { "epoch": 3.0842722359622607, "grad_norm": 0.1390187750390713, "learning_rate": 1.6162109818644964e-05, "loss": 0.4252, "num_tokens": 6441106602.0, "step": 8420 }, { "epoch": 3.0846386369881835, "grad_norm": 0.14121110382256935, "learning_rate": 1.61580682810989e-05, "loss": 0.3851, "num_tokens": 6441906598.0, "step": 8421 }, { "epoch": 3.0850050380141063, "grad_norm": 0.1408979940402629, "learning_rate": 1.6154027072713813e-05, "loss": 0.4207, "num_tokens": 6442678612.0, "step": 8422 }, { "epoch": 3.0853714390400295, "grad_norm": 0.1416897161688206, "learning_rate": 1.614998619371739e-05, "loss": 0.3974, "num_tokens": 6443346214.0, "step": 8423 }, { "epoch": 3.0857378400659523, "grad_norm": 0.14677724591068092, "learning_rate": 1.61459456443373e-05, "loss": 0.3768, "num_tokens": 6444151670.0, "step": 8424 }, { "epoch": 3.086104241091875, "grad_norm": 0.13581709420001972, "learning_rate": 1.614190542480124e-05, "loss": 0.4044, "num_tokens": 6444890276.0, "step": 8425 }, { "epoch": 3.086470642117798, "grad_norm": 0.15007416105759921, "learning_rate": 1.6137865535336828e-05, "loss": 0.4002, "num_tokens": 6445625197.0, "step": 8426 }, { "epoch": 3.0868370431437206, "grad_norm": 0.15129149219042312, "learning_rate": 1.613382597617169e-05, "loss": 0.4304, "num_tokens": 6446298602.0, "step": 8427 }, { "epoch": 3.087203444169644, "grad_norm": 0.15301749422971875, "learning_rate": 1.6129786747533455e-05, "loss": 0.4047, "num_tokens": 6447114142.0, "step": 8428 }, { "epoch": 3.0875698451955667, "grad_norm": 0.1395593452119191, "learning_rate": 1.612574784964968e-05, "loss": 0.4188, "num_tokens": 6447920555.0, "step": 8429 }, { "epoch": 3.0879362462214894, "grad_norm": 0.1393789062606886, "learning_rate": 1.6121709282747963e-05, "loss": 0.4038, "num_tokens": 6448694690.0, "step": 8430 }, { "epoch": 3.088302647247412, "grad_norm": 0.14248805760322503, "learning_rate": 1.6117671047055842e-05, "loss": 0.407, "num_tokens": 6449559575.0, "step": 8431 }, { "epoch": 3.088669048273335, "grad_norm": 0.1386175096081512, "learning_rate": 1.6113633142800842e-05, "loss": 0.3973, "num_tokens": 6450487199.0, "step": 8432 }, { "epoch": 3.089035449299258, "grad_norm": 0.12601215501136367, "learning_rate": 1.6109595570210497e-05, "loss": 0.4188, "num_tokens": 6451173937.0, "step": 8433 }, { "epoch": 3.089401850325181, "grad_norm": 0.1551784275682394, "learning_rate": 1.610555832951229e-05, "loss": 0.4115, "num_tokens": 6451969100.0, "step": 8434 }, { "epoch": 3.089768251351104, "grad_norm": 0.13832609959524297, "learning_rate": 1.610152142093369e-05, "loss": 0.3925, "num_tokens": 6452634749.0, "step": 8435 }, { "epoch": 3.0901346523770266, "grad_norm": 0.1542270195151582, "learning_rate": 1.6097484844702173e-05, "loss": 0.413, "num_tokens": 6453333905.0, "step": 8436 }, { "epoch": 3.0905010534029493, "grad_norm": 0.1544002422875484, "learning_rate": 1.609344860104516e-05, "loss": 0.45, "num_tokens": 6454136303.0, "step": 8437 }, { "epoch": 3.0908674544288726, "grad_norm": 0.13951448985528747, "learning_rate": 1.6089412690190084e-05, "loss": 0.4072, "num_tokens": 6454948431.0, "step": 8438 }, { "epoch": 3.0912338554547953, "grad_norm": 0.14365616989595584, "learning_rate": 1.6085377112364345e-05, "loss": 0.3933, "num_tokens": 6455673834.0, "step": 8439 }, { "epoch": 3.091600256480718, "grad_norm": 0.1356353475014294, "learning_rate": 1.6081341867795317e-05, "loss": 0.3815, "num_tokens": 6456344916.0, "step": 8440 }, { "epoch": 3.091966657506641, "grad_norm": 0.1575964859890774, "learning_rate": 1.607730695671037e-05, "loss": 0.4294, "num_tokens": 6457117485.0, "step": 8441 }, { "epoch": 3.0923330585325637, "grad_norm": 0.1472503019565779, "learning_rate": 1.6073272379336855e-05, "loss": 0.4004, "num_tokens": 6457905574.0, "step": 8442 }, { "epoch": 3.092699459558487, "grad_norm": 0.14251548757642402, "learning_rate": 1.606923813590209e-05, "loss": 0.4026, "num_tokens": 6458647512.0, "step": 8443 }, { "epoch": 3.0930658605844097, "grad_norm": 0.15906741303573665, "learning_rate": 1.6065204226633383e-05, "loss": 0.396, "num_tokens": 6459454712.0, "step": 8444 }, { "epoch": 3.0934322616103325, "grad_norm": 0.13240349655802244, "learning_rate": 1.6061170651758025e-05, "loss": 0.3804, "num_tokens": 6460230215.0, "step": 8445 }, { "epoch": 3.0937986626362552, "grad_norm": 0.1472741317555653, "learning_rate": 1.605713741150329e-05, "loss": 0.4126, "num_tokens": 6460842145.0, "step": 8446 }, { "epoch": 3.094165063662178, "grad_norm": 0.1698138027767308, "learning_rate": 1.6053104506096424e-05, "loss": 0.405, "num_tokens": 6461525417.0, "step": 8447 }, { "epoch": 3.0945314646881013, "grad_norm": 0.14649482113220896, "learning_rate": 1.6049071935764655e-05, "loss": 0.4026, "num_tokens": 6462186567.0, "step": 8448 }, { "epoch": 3.094897865714024, "grad_norm": 0.14767951412131144, "learning_rate": 1.6045039700735205e-05, "loss": 0.4235, "num_tokens": 6463105165.0, "step": 8449 }, { "epoch": 3.095264266739947, "grad_norm": 0.15044369703295685, "learning_rate": 1.6041007801235265e-05, "loss": 0.4095, "num_tokens": 6463927616.0, "step": 8450 }, { "epoch": 3.0956306677658696, "grad_norm": 0.13448916019645843, "learning_rate": 1.603697623749201e-05, "loss": 0.4373, "num_tokens": 6464790948.0, "step": 8451 }, { "epoch": 3.095997068791793, "grad_norm": 0.1390604795184504, "learning_rate": 1.60329450097326e-05, "loss": 0.431, "num_tokens": 6465485656.0, "step": 8452 }, { "epoch": 3.0963634698177156, "grad_norm": 0.15862749326205666, "learning_rate": 1.6028914118184166e-05, "loss": 0.4347, "num_tokens": 6466332955.0, "step": 8453 }, { "epoch": 3.0967298708436384, "grad_norm": 0.14848636524623685, "learning_rate": 1.602488356307383e-05, "loss": 0.3778, "num_tokens": 6467020641.0, "step": 8454 }, { "epoch": 3.097096271869561, "grad_norm": 0.144563794184552, "learning_rate": 1.6020853344628696e-05, "loss": 0.4128, "num_tokens": 6467643638.0, "step": 8455 }, { "epoch": 3.097462672895484, "grad_norm": 0.1553306940925656, "learning_rate": 1.6016823463075835e-05, "loss": 0.3997, "num_tokens": 6468483603.0, "step": 8456 }, { "epoch": 3.097829073921407, "grad_norm": 0.13363337907149098, "learning_rate": 1.6012793918642323e-05, "loss": 0.4417, "num_tokens": 6469311171.0, "step": 8457 }, { "epoch": 3.09819547494733, "grad_norm": 0.15409740222843415, "learning_rate": 1.6008764711555187e-05, "loss": 0.391, "num_tokens": 6469999745.0, "step": 8458 }, { "epoch": 3.0985618759732527, "grad_norm": 0.14171024049603503, "learning_rate": 1.6004735842041462e-05, "loss": 0.3881, "num_tokens": 6470718527.0, "step": 8459 }, { "epoch": 3.0989282769991755, "grad_norm": 0.146414748072218, "learning_rate": 1.6000707310328155e-05, "loss": 0.4091, "num_tokens": 6471388228.0, "step": 8460 }, { "epoch": 3.0992946780250983, "grad_norm": 0.157674967100007, "learning_rate": 1.599667911664224e-05, "loss": 0.4389, "num_tokens": 6472079989.0, "step": 8461 }, { "epoch": 3.0996610790510215, "grad_norm": 0.15959473536416985, "learning_rate": 1.5992651261210686e-05, "loss": 0.4255, "num_tokens": 6472812961.0, "step": 8462 }, { "epoch": 3.1000274800769443, "grad_norm": 0.14546167855801104, "learning_rate": 1.5988623744260455e-05, "loss": 0.408, "num_tokens": 6473556899.0, "step": 8463 }, { "epoch": 3.100393881102867, "grad_norm": 0.15634280185858906, "learning_rate": 1.5984596566018452e-05, "loss": 0.4096, "num_tokens": 6474300290.0, "step": 8464 }, { "epoch": 3.10076028212879, "grad_norm": 0.15161440556946348, "learning_rate": 1.598056972671161e-05, "loss": 0.3806, "num_tokens": 6474974764.0, "step": 8465 }, { "epoch": 3.1011266831547126, "grad_norm": 0.15571613963172257, "learning_rate": 1.5976543226566805e-05, "loss": 0.4283, "num_tokens": 6475677027.0, "step": 8466 }, { "epoch": 3.101493084180636, "grad_norm": 0.15030277745883364, "learning_rate": 1.5972517065810912e-05, "loss": 0.415, "num_tokens": 6476464227.0, "step": 8467 }, { "epoch": 3.1018594852065586, "grad_norm": 0.13627685179312085, "learning_rate": 1.5968491244670793e-05, "loss": 0.3853, "num_tokens": 6477084052.0, "step": 8468 }, { "epoch": 3.1022258862324814, "grad_norm": 0.15087111546699014, "learning_rate": 1.5964465763373262e-05, "loss": 0.43, "num_tokens": 6477822585.0, "step": 8469 }, { "epoch": 3.102592287258404, "grad_norm": 0.14791803501358824, "learning_rate": 1.596044062214514e-05, "loss": 0.4141, "num_tokens": 6478690575.0, "step": 8470 }, { "epoch": 3.1029586882843274, "grad_norm": 0.1362584199957846, "learning_rate": 1.5956415821213228e-05, "loss": 0.4083, "num_tokens": 6479412389.0, "step": 8471 }, { "epoch": 3.10332508931025, "grad_norm": 0.14003272371481706, "learning_rate": 1.5952391360804304e-05, "loss": 0.3968, "num_tokens": 6480215477.0, "step": 8472 }, { "epoch": 3.103691490336173, "grad_norm": 0.1386365699400543, "learning_rate": 1.594836724114511e-05, "loss": 0.415, "num_tokens": 6481028153.0, "step": 8473 }, { "epoch": 3.1040578913620958, "grad_norm": 0.14307933626392594, "learning_rate": 1.5944343462462395e-05, "loss": 0.3848, "num_tokens": 6481713629.0, "step": 8474 }, { "epoch": 3.1044242923880185, "grad_norm": 0.140111077565582, "learning_rate": 1.594032002498287e-05, "loss": 0.3926, "num_tokens": 6482531392.0, "step": 8475 }, { "epoch": 3.1047906934139418, "grad_norm": 0.14083440421818855, "learning_rate": 1.5936296928933238e-05, "loss": 0.4247, "num_tokens": 6483177897.0, "step": 8476 }, { "epoch": 3.1051570944398645, "grad_norm": 0.17017506288463996, "learning_rate": 1.5932274174540184e-05, "loss": 0.4203, "num_tokens": 6484003567.0, "step": 8477 }, { "epoch": 3.1055234954657873, "grad_norm": 0.13364042201087226, "learning_rate": 1.5928251762030352e-05, "loss": 0.4062, "num_tokens": 6484842454.0, "step": 8478 }, { "epoch": 3.10588989649171, "grad_norm": 0.13998792858429243, "learning_rate": 1.5924229691630403e-05, "loss": 0.4075, "num_tokens": 6485567767.0, "step": 8479 }, { "epoch": 3.106256297517633, "grad_norm": 0.1367873718710013, "learning_rate": 1.5920207963566944e-05, "loss": 0.4056, "num_tokens": 6486426556.0, "step": 8480 }, { "epoch": 3.106622698543556, "grad_norm": 0.1411246670541191, "learning_rate": 1.5916186578066573e-05, "loss": 0.4139, "num_tokens": 6487156102.0, "step": 8481 }, { "epoch": 3.106989099569479, "grad_norm": 0.142403516323774, "learning_rate": 1.5912165535355896e-05, "loss": 0.3809, "num_tokens": 6488040376.0, "step": 8482 }, { "epoch": 3.1073555005954017, "grad_norm": 0.13922160067845157, "learning_rate": 1.5908144835661453e-05, "loss": 0.4015, "num_tokens": 6488736305.0, "step": 8483 }, { "epoch": 3.1077219016213244, "grad_norm": 0.14843519198594904, "learning_rate": 1.5904124479209803e-05, "loss": 0.4317, "num_tokens": 6489423231.0, "step": 8484 }, { "epoch": 3.1080883026472472, "grad_norm": 0.15200754481316467, "learning_rate": 1.5900104466227474e-05, "loss": 0.393, "num_tokens": 6490217072.0, "step": 8485 }, { "epoch": 3.1084547036731704, "grad_norm": 0.1470677931219864, "learning_rate": 1.5896084796940945e-05, "loss": 0.4275, "num_tokens": 6490958269.0, "step": 8486 }, { "epoch": 3.1088211046990932, "grad_norm": 0.14855064232584717, "learning_rate": 1.5892065471576738e-05, "loss": 0.4417, "num_tokens": 6491646872.0, "step": 8487 }, { "epoch": 3.109187505725016, "grad_norm": 0.14951324227196014, "learning_rate": 1.58880464903613e-05, "loss": 0.4031, "num_tokens": 6492485116.0, "step": 8488 }, { "epoch": 3.109553906750939, "grad_norm": 0.1477894882132944, "learning_rate": 1.5884027853521084e-05, "loss": 0.3996, "num_tokens": 6493249052.0, "step": 8489 }, { "epoch": 3.1099203077768616, "grad_norm": 0.1327284460315555, "learning_rate": 1.5880009561282515e-05, "loss": 0.386, "num_tokens": 6494087331.0, "step": 8490 }, { "epoch": 3.110286708802785, "grad_norm": 0.13250191011964316, "learning_rate": 1.5875991613872e-05, "loss": 0.3935, "num_tokens": 6494834678.0, "step": 8491 }, { "epoch": 3.1106531098287076, "grad_norm": 0.1484904657401571, "learning_rate": 1.587197401151593e-05, "loss": 0.3729, "num_tokens": 6495566423.0, "step": 8492 }, { "epoch": 3.1110195108546304, "grad_norm": 0.1536218419788846, "learning_rate": 1.5867956754440683e-05, "loss": 0.4065, "num_tokens": 6496228321.0, "step": 8493 }, { "epoch": 3.111385911880553, "grad_norm": 0.1482357354235167, "learning_rate": 1.5863939842872598e-05, "loss": 0.3776, "num_tokens": 6496964719.0, "step": 8494 }, { "epoch": 3.111752312906476, "grad_norm": 0.13618527117517207, "learning_rate": 1.585992327703802e-05, "loss": 0.4214, "num_tokens": 6497729329.0, "step": 8495 }, { "epoch": 3.112118713932399, "grad_norm": 0.1435534303899256, "learning_rate": 1.5855907057163253e-05, "loss": 0.3974, "num_tokens": 6498524790.0, "step": 8496 }, { "epoch": 3.112485114958322, "grad_norm": 0.13939238293838718, "learning_rate": 1.585189118347458e-05, "loss": 0.3863, "num_tokens": 6499340854.0, "step": 8497 }, { "epoch": 3.1128515159842447, "grad_norm": 0.13522629158197524, "learning_rate": 1.5847875656198284e-05, "loss": 0.4, "num_tokens": 6500106114.0, "step": 8498 }, { "epoch": 3.1132179170101675, "grad_norm": 0.14916958747264566, "learning_rate": 1.5843860475560617e-05, "loss": 0.404, "num_tokens": 6500732347.0, "step": 8499 }, { "epoch": 3.1135843180360907, "grad_norm": 0.15518880677968824, "learning_rate": 1.5839845641787808e-05, "loss": 0.411, "num_tokens": 6501541828.0, "step": 8500 }, { "epoch": 3.1139507190620135, "grad_norm": 0.16484391142304974, "learning_rate": 1.5835831155106078e-05, "loss": 0.4343, "num_tokens": 6502341153.0, "step": 8501 }, { "epoch": 3.1143171200879363, "grad_norm": 0.14464044194653758, "learning_rate": 1.5831817015741612e-05, "loss": 0.3973, "num_tokens": 6503047957.0, "step": 8502 }, { "epoch": 3.114683521113859, "grad_norm": 0.15562970991497033, "learning_rate": 1.58278032239206e-05, "loss": 0.4137, "num_tokens": 6503686005.0, "step": 8503 }, { "epoch": 3.115049922139782, "grad_norm": 0.1482586651084228, "learning_rate": 1.582378977986918e-05, "loss": 0.3934, "num_tokens": 6504523862.0, "step": 8504 }, { "epoch": 3.115416323165705, "grad_norm": 0.1501283440740618, "learning_rate": 1.5819776683813498e-05, "loss": 0.394, "num_tokens": 6505242122.0, "step": 8505 }, { "epoch": 3.115782724191628, "grad_norm": 0.147220508311029, "learning_rate": 1.5815763935979664e-05, "loss": 0.4052, "num_tokens": 6506029513.0, "step": 8506 }, { "epoch": 3.1161491252175506, "grad_norm": 0.13802611244740035, "learning_rate": 1.5811751536593786e-05, "loss": 0.4109, "num_tokens": 6506754559.0, "step": 8507 }, { "epoch": 3.1165155262434734, "grad_norm": 0.1625690842506127, "learning_rate": 1.580773948588192e-05, "loss": 0.3992, "num_tokens": 6507499313.0, "step": 8508 }, { "epoch": 3.116881927269396, "grad_norm": 0.17512026974746692, "learning_rate": 1.5803727784070152e-05, "loss": 0.423, "num_tokens": 6508191867.0, "step": 8509 }, { "epoch": 3.1172483282953194, "grad_norm": 0.1421560400276928, "learning_rate": 1.5799716431384492e-05, "loss": 0.4306, "num_tokens": 6508850607.0, "step": 8510 }, { "epoch": 3.117614729321242, "grad_norm": 0.15469758588517443, "learning_rate": 1.5795705428050965e-05, "loss": 0.4275, "num_tokens": 6509644475.0, "step": 8511 }, { "epoch": 3.117981130347165, "grad_norm": 0.15878253559192507, "learning_rate": 1.5791694774295586e-05, "loss": 0.4082, "num_tokens": 6510328749.0, "step": 8512 }, { "epoch": 3.1183475313730877, "grad_norm": 0.1594601453909196, "learning_rate": 1.578768447034431e-05, "loss": 0.441, "num_tokens": 6511034686.0, "step": 8513 }, { "epoch": 3.1187139323990105, "grad_norm": 0.15369254005124464, "learning_rate": 1.5783674516423115e-05, "loss": 0.4155, "num_tokens": 6511764328.0, "step": 8514 }, { "epoch": 3.1190803334249337, "grad_norm": 0.14316079226735146, "learning_rate": 1.5779664912757926e-05, "loss": 0.4272, "num_tokens": 6512490153.0, "step": 8515 }, { "epoch": 3.1194467344508565, "grad_norm": 0.14309669946429615, "learning_rate": 1.577565565957467e-05, "loss": 0.4479, "num_tokens": 6513268434.0, "step": 8516 }, { "epoch": 3.1198131354767793, "grad_norm": 0.15052351238230077, "learning_rate": 1.5771646757099247e-05, "loss": 0.4073, "num_tokens": 6513953026.0, "step": 8517 }, { "epoch": 3.120179536502702, "grad_norm": 0.14451552595172612, "learning_rate": 1.576763820555753e-05, "loss": 0.4019, "num_tokens": 6514793037.0, "step": 8518 }, { "epoch": 3.1205459375286253, "grad_norm": 0.15065405376176805, "learning_rate": 1.5763630005175383e-05, "loss": 0.4118, "num_tokens": 6515520804.0, "step": 8519 }, { "epoch": 3.120912338554548, "grad_norm": 0.14489360261772286, "learning_rate": 1.5759622156178646e-05, "loss": 0.4164, "num_tokens": 6516263479.0, "step": 8520 }, { "epoch": 3.121278739580471, "grad_norm": 0.1354363687120115, "learning_rate": 1.5755614658793136e-05, "loss": 0.4099, "num_tokens": 6517124286.0, "step": 8521 }, { "epoch": 3.1216451406063936, "grad_norm": 0.1454541504087196, "learning_rate": 1.5751607513244672e-05, "loss": 0.4156, "num_tokens": 6517876341.0, "step": 8522 }, { "epoch": 3.1220115416323164, "grad_norm": 0.1455718034259141, "learning_rate": 1.5747600719759008e-05, "loss": 0.3929, "num_tokens": 6518648604.0, "step": 8523 }, { "epoch": 3.1223779426582396, "grad_norm": 0.145589159405785, "learning_rate": 1.5743594278561912e-05, "loss": 0.4184, "num_tokens": 6519427794.0, "step": 8524 }, { "epoch": 3.1227443436841624, "grad_norm": 0.15027119429485883, "learning_rate": 1.5739588189879143e-05, "loss": 0.3976, "num_tokens": 6520123109.0, "step": 8525 }, { "epoch": 3.123110744710085, "grad_norm": 0.13822811033686785, "learning_rate": 1.5735582453936408e-05, "loss": 0.4161, "num_tokens": 6520947813.0, "step": 8526 }, { "epoch": 3.123477145736008, "grad_norm": 0.14811960320880743, "learning_rate": 1.5731577070959403e-05, "loss": 0.3792, "num_tokens": 6521632089.0, "step": 8527 }, { "epoch": 3.1238435467619308, "grad_norm": 0.155055498933487, "learning_rate": 1.572757204117382e-05, "loss": 0.4073, "num_tokens": 6522440981.0, "step": 8528 }, { "epoch": 3.124209947787854, "grad_norm": 0.15149813946921603, "learning_rate": 1.572356736480532e-05, "loss": 0.38, "num_tokens": 6523274239.0, "step": 8529 }, { "epoch": 3.1245763488137768, "grad_norm": 0.1376165742743159, "learning_rate": 1.5719563042079533e-05, "loss": 0.4013, "num_tokens": 6523944990.0, "step": 8530 }, { "epoch": 3.1249427498396996, "grad_norm": 0.1386356628967139, "learning_rate": 1.5715559073222092e-05, "loss": 0.4039, "num_tokens": 6524810274.0, "step": 8531 }, { "epoch": 3.1253091508656223, "grad_norm": 0.15895233403104794, "learning_rate": 1.5711555458458593e-05, "loss": 0.3761, "num_tokens": 6525697574.0, "step": 8532 }, { "epoch": 3.125675551891545, "grad_norm": 0.1365338597819216, "learning_rate": 1.5707552198014626e-05, "loss": 0.4118, "num_tokens": 6526363049.0, "step": 8533 }, { "epoch": 3.1260419529174683, "grad_norm": 0.15444424569306556, "learning_rate": 1.5703549292115755e-05, "loss": 0.4162, "num_tokens": 6527132237.0, "step": 8534 }, { "epoch": 3.126408353943391, "grad_norm": 0.1599291591507782, "learning_rate": 1.5699546740987502e-05, "loss": 0.4081, "num_tokens": 6527782427.0, "step": 8535 }, { "epoch": 3.126774754969314, "grad_norm": 0.15349757920243842, "learning_rate": 1.5695544544855406e-05, "loss": 0.4032, "num_tokens": 6528488157.0, "step": 8536 }, { "epoch": 3.1271411559952367, "grad_norm": 0.14427814828249733, "learning_rate": 1.5691542703944968e-05, "loss": 0.4141, "num_tokens": 6529291826.0, "step": 8537 }, { "epoch": 3.1275075570211595, "grad_norm": 0.14543333983069906, "learning_rate": 1.5687541218481653e-05, "loss": 0.373, "num_tokens": 6530094936.0, "step": 8538 }, { "epoch": 3.1278739580470827, "grad_norm": 0.1423383062848699, "learning_rate": 1.5683540088690952e-05, "loss": 0.4271, "num_tokens": 6530754315.0, "step": 8539 }, { "epoch": 3.1282403590730055, "grad_norm": 0.161442575795512, "learning_rate": 1.567953931479828e-05, "loss": 0.4401, "num_tokens": 6531455176.0, "step": 8540 }, { "epoch": 3.1286067600989282, "grad_norm": 0.14087827518014143, "learning_rate": 1.5675538897029077e-05, "loss": 0.4048, "num_tokens": 6532073921.0, "step": 8541 }, { "epoch": 3.128973161124851, "grad_norm": 0.15826906364595464, "learning_rate": 1.5671538835608737e-05, "loss": 0.3866, "num_tokens": 6532894680.0, "step": 8542 }, { "epoch": 3.129339562150774, "grad_norm": 0.13436390068037807, "learning_rate": 1.5667539130762634e-05, "loss": 0.3936, "num_tokens": 6533639006.0, "step": 8543 }, { "epoch": 3.129705963176697, "grad_norm": 0.14922764414662726, "learning_rate": 1.566353978271615e-05, "loss": 0.3946, "num_tokens": 6534475450.0, "step": 8544 }, { "epoch": 3.13007236420262, "grad_norm": 0.13815586689517692, "learning_rate": 1.5659540791694605e-05, "loss": 0.3977, "num_tokens": 6535254582.0, "step": 8545 }, { "epoch": 3.1304387652285426, "grad_norm": 0.15167708033836877, "learning_rate": 1.5655542157923328e-05, "loss": 0.4301, "num_tokens": 6536047182.0, "step": 8546 }, { "epoch": 3.1308051662544654, "grad_norm": 0.1455987535018969, "learning_rate": 1.565154388162763e-05, "loss": 0.4008, "num_tokens": 6536955675.0, "step": 8547 }, { "epoch": 3.1311715672803886, "grad_norm": 0.12031590087952271, "learning_rate": 1.5647545963032783e-05, "loss": 0.4052, "num_tokens": 6537818990.0, "step": 8548 }, { "epoch": 3.1315379683063114, "grad_norm": 0.13381528323230146, "learning_rate": 1.564354840236404e-05, "loss": 0.4207, "num_tokens": 6538664997.0, "step": 8549 }, { "epoch": 3.131904369332234, "grad_norm": 0.13822664309894125, "learning_rate": 1.5639551199846654e-05, "loss": 0.411, "num_tokens": 6539438617.0, "step": 8550 }, { "epoch": 3.132270770358157, "grad_norm": 0.15169335706177997, "learning_rate": 1.563555435570584e-05, "loss": 0.3962, "num_tokens": 6540139052.0, "step": 8551 }, { "epoch": 3.1326371713840797, "grad_norm": 0.13952245428602153, "learning_rate": 1.5631557870166803e-05, "loss": 0.4254, "num_tokens": 6540837986.0, "step": 8552 }, { "epoch": 3.133003572410003, "grad_norm": 0.15302741705023554, "learning_rate": 1.5627561743454722e-05, "loss": 0.3845, "num_tokens": 6541561102.0, "step": 8553 }, { "epoch": 3.1333699734359257, "grad_norm": 0.13659932248037404, "learning_rate": 1.5623565975794754e-05, "loss": 0.4018, "num_tokens": 6542397003.0, "step": 8554 }, { "epoch": 3.1337363744618485, "grad_norm": 0.14028344150484665, "learning_rate": 1.5619570567412033e-05, "loss": 0.3935, "num_tokens": 6543174411.0, "step": 8555 }, { "epoch": 3.1341027754877713, "grad_norm": 0.1446155440899002, "learning_rate": 1.56155755185317e-05, "loss": 0.4019, "num_tokens": 6543869156.0, "step": 8556 }, { "epoch": 3.134469176513694, "grad_norm": 0.14718991387036254, "learning_rate": 1.5611580829378826e-05, "loss": 0.409, "num_tokens": 6544555168.0, "step": 8557 }, { "epoch": 3.1348355775396173, "grad_norm": 0.16209179779637656, "learning_rate": 1.560758650017851e-05, "loss": 0.4513, "num_tokens": 6545318711.0, "step": 8558 }, { "epoch": 3.13520197856554, "grad_norm": 0.15755812483108525, "learning_rate": 1.5603592531155797e-05, "loss": 0.4059, "num_tokens": 6546088662.0, "step": 8559 }, { "epoch": 3.135568379591463, "grad_norm": 0.1469398427333047, "learning_rate": 1.559959892253574e-05, "loss": 0.4305, "num_tokens": 6546926219.0, "step": 8560 }, { "epoch": 3.1359347806173856, "grad_norm": 0.14768666743216874, "learning_rate": 1.559560567454335e-05, "loss": 0.3874, "num_tokens": 6547559259.0, "step": 8561 }, { "epoch": 3.1363011816433084, "grad_norm": 0.14640809808310867, "learning_rate": 1.559161278740362e-05, "loss": 0.4168, "num_tokens": 6548262174.0, "step": 8562 }, { "epoch": 3.1366675826692316, "grad_norm": 0.15929028540825194, "learning_rate": 1.5587620261341534e-05, "loss": 0.3807, "num_tokens": 6549023870.0, "step": 8563 }, { "epoch": 3.1370339836951544, "grad_norm": 0.15288450780769489, "learning_rate": 1.558362809658205e-05, "loss": 0.411, "num_tokens": 6549711154.0, "step": 8564 }, { "epoch": 3.137400384721077, "grad_norm": 0.15056196282131393, "learning_rate": 1.5579636293350092e-05, "loss": 0.4041, "num_tokens": 6550607900.0, "step": 8565 }, { "epoch": 3.137766785747, "grad_norm": 0.14209824298294438, "learning_rate": 1.5575644851870594e-05, "loss": 0.4144, "num_tokens": 6551359016.0, "step": 8566 }, { "epoch": 3.138133186772923, "grad_norm": 0.14980759696195908, "learning_rate": 1.5571653772368436e-05, "loss": 0.3991, "num_tokens": 6552189412.0, "step": 8567 }, { "epoch": 3.138499587798846, "grad_norm": 0.1453797691012553, "learning_rate": 1.5567663055068508e-05, "loss": 0.3515, "num_tokens": 6553031412.0, "step": 8568 }, { "epoch": 3.1388659888247687, "grad_norm": 0.13731521464948432, "learning_rate": 1.5563672700195657e-05, "loss": 0.3828, "num_tokens": 6553753142.0, "step": 8569 }, { "epoch": 3.1392323898506915, "grad_norm": 0.13735368183578572, "learning_rate": 1.555968270797472e-05, "loss": 0.3851, "num_tokens": 6554508528.0, "step": 8570 }, { "epoch": 3.1395987908766143, "grad_norm": 0.15498589351584904, "learning_rate": 1.5555693078630504e-05, "loss": 0.3984, "num_tokens": 6555297885.0, "step": 8571 }, { "epoch": 3.1399651919025375, "grad_norm": 0.13085696275846395, "learning_rate": 1.555170381238782e-05, "loss": 0.4348, "num_tokens": 6556120328.0, "step": 8572 }, { "epoch": 3.1403315929284603, "grad_norm": 0.155422304913532, "learning_rate": 1.5547714909471417e-05, "loss": 0.4193, "num_tokens": 6556829396.0, "step": 8573 }, { "epoch": 3.140697993954383, "grad_norm": 0.14879377615714856, "learning_rate": 1.5543726370106075e-05, "loss": 0.3829, "num_tokens": 6557717340.0, "step": 8574 }, { "epoch": 3.141064394980306, "grad_norm": 0.12731670296321845, "learning_rate": 1.5539738194516502e-05, "loss": 0.366, "num_tokens": 6558450309.0, "step": 8575 }, { "epoch": 3.1414307960062287, "grad_norm": 0.1392646774511166, "learning_rate": 1.5535750382927425e-05, "loss": 0.4282, "num_tokens": 6559325427.0, "step": 8576 }, { "epoch": 3.141797197032152, "grad_norm": 0.14761141222184854, "learning_rate": 1.5531762935563535e-05, "loss": 0.3948, "num_tokens": 6559999108.0, "step": 8577 }, { "epoch": 3.1421635980580747, "grad_norm": 0.15228727148972415, "learning_rate": 1.552777585264949e-05, "loss": 0.409, "num_tokens": 6560767890.0, "step": 8578 }, { "epoch": 3.1425299990839974, "grad_norm": 0.1514334167682025, "learning_rate": 1.5523789134409958e-05, "loss": 0.4119, "num_tokens": 6561439966.0, "step": 8579 }, { "epoch": 3.14289640010992, "grad_norm": 0.1463320741771718, "learning_rate": 1.5519802781069557e-05, "loss": 0.4257, "num_tokens": 6562119270.0, "step": 8580 }, { "epoch": 3.143262801135843, "grad_norm": 0.14685791724963432, "learning_rate": 1.55158167928529e-05, "loss": 0.4347, "num_tokens": 6562851423.0, "step": 8581 }, { "epoch": 3.1436292021617662, "grad_norm": 0.1510727403956146, "learning_rate": 1.5511831169984582e-05, "loss": 0.425, "num_tokens": 6563691054.0, "step": 8582 }, { "epoch": 3.143995603187689, "grad_norm": 0.14671988789056212, "learning_rate": 1.5507845912689155e-05, "loss": 0.4045, "num_tokens": 6564450835.0, "step": 8583 }, { "epoch": 3.144362004213612, "grad_norm": 0.1386115475157461, "learning_rate": 1.5503861021191174e-05, "loss": 0.3833, "num_tokens": 6565180064.0, "step": 8584 }, { "epoch": 3.1447284052395346, "grad_norm": 0.13574219877333243, "learning_rate": 1.5499876495715175e-05, "loss": 0.4124, "num_tokens": 6565949999.0, "step": 8585 }, { "epoch": 3.1450948062654573, "grad_norm": 0.14246326148921237, "learning_rate": 1.549589233648565e-05, "loss": 0.3987, "num_tokens": 6566705108.0, "step": 8586 }, { "epoch": 3.1454612072913806, "grad_norm": 0.13828359864887058, "learning_rate": 1.54919085437271e-05, "loss": 0.3894, "num_tokens": 6567489844.0, "step": 8587 }, { "epoch": 3.1458276083173033, "grad_norm": 0.134109669381129, "learning_rate": 1.548792511766398e-05, "loss": 0.3877, "num_tokens": 6568240386.0, "step": 8588 }, { "epoch": 3.146194009343226, "grad_norm": 0.1423904942325176, "learning_rate": 1.5483942058520728e-05, "loss": 0.4116, "num_tokens": 6568922011.0, "step": 8589 }, { "epoch": 3.146560410369149, "grad_norm": 0.14368401308436068, "learning_rate": 1.547995936652178e-05, "loss": 0.4203, "num_tokens": 6569729918.0, "step": 8590 }, { "epoch": 3.1469268113950717, "grad_norm": 0.1309511581497415, "learning_rate": 1.5475977041891542e-05, "loss": 0.4393, "num_tokens": 6570524503.0, "step": 8591 }, { "epoch": 3.147293212420995, "grad_norm": 0.15551536552706902, "learning_rate": 1.5471995084854378e-05, "loss": 0.4192, "num_tokens": 6571277188.0, "step": 8592 }, { "epoch": 3.1476596134469177, "grad_norm": 0.15162683685369774, "learning_rate": 1.5468013495634665e-05, "loss": 0.4031, "num_tokens": 6571974928.0, "step": 8593 }, { "epoch": 3.1480260144728405, "grad_norm": 0.1460550501857127, "learning_rate": 1.546403227445674e-05, "loss": 0.4127, "num_tokens": 6572801607.0, "step": 8594 }, { "epoch": 3.1483924154987633, "grad_norm": 0.14101632436146658, "learning_rate": 1.5460051421544914e-05, "loss": 0.4136, "num_tokens": 6573598567.0, "step": 8595 }, { "epoch": 3.1487588165246865, "grad_norm": 0.1339352028214592, "learning_rate": 1.5456070937123508e-05, "loss": 0.3953, "num_tokens": 6574309978.0, "step": 8596 }, { "epoch": 3.1491252175506093, "grad_norm": 0.15248343492945865, "learning_rate": 1.5452090821416774e-05, "loss": 0.4011, "num_tokens": 6575093519.0, "step": 8597 }, { "epoch": 3.149491618576532, "grad_norm": 0.1440353502763484, "learning_rate": 1.5448111074648992e-05, "loss": 0.3914, "num_tokens": 6575768907.0, "step": 8598 }, { "epoch": 3.149858019602455, "grad_norm": 0.1504865277575745, "learning_rate": 1.544413169704439e-05, "loss": 0.3925, "num_tokens": 6576596116.0, "step": 8599 }, { "epoch": 3.1502244206283776, "grad_norm": 0.1353428754436237, "learning_rate": 1.5440152688827176e-05, "loss": 0.4137, "num_tokens": 6577357178.0, "step": 8600 }, { "epoch": 3.150590821654301, "grad_norm": 0.14523136488998076, "learning_rate": 1.543617405022156e-05, "loss": 0.385, "num_tokens": 6578155933.0, "step": 8601 }, { "epoch": 3.1509572226802236, "grad_norm": 0.14274910759954945, "learning_rate": 1.543219578145171e-05, "loss": 0.3719, "num_tokens": 6578978993.0, "step": 8602 }, { "epoch": 3.1513236237061464, "grad_norm": 0.13278273331641668, "learning_rate": 1.5428217882741775e-05, "loss": 0.4439, "num_tokens": 6579704345.0, "step": 8603 }, { "epoch": 3.151690024732069, "grad_norm": 0.14392052316450674, "learning_rate": 1.54242403543159e-05, "loss": 0.3992, "num_tokens": 6580460874.0, "step": 8604 }, { "epoch": 3.152056425757992, "grad_norm": 0.13847067666668644, "learning_rate": 1.5420263196398186e-05, "loss": 0.3845, "num_tokens": 6581197955.0, "step": 8605 }, { "epoch": 3.152422826783915, "grad_norm": 0.1403653231908891, "learning_rate": 1.5416286409212725e-05, "loss": 0.4547, "num_tokens": 6581893451.0, "step": 8606 }, { "epoch": 3.152789227809838, "grad_norm": 0.15343663196749782, "learning_rate": 1.5412309992983595e-05, "loss": 0.3989, "num_tokens": 6582677202.0, "step": 8607 }, { "epoch": 3.1531556288357607, "grad_norm": 0.1398193995305383, "learning_rate": 1.5408333947934835e-05, "loss": 0.4046, "num_tokens": 6583503014.0, "step": 8608 }, { "epoch": 3.1535220298616835, "grad_norm": 0.13218573076087675, "learning_rate": 1.5404358274290484e-05, "loss": 0.3827, "num_tokens": 6584257918.0, "step": 8609 }, { "epoch": 3.1538884308876063, "grad_norm": 0.14507880732558065, "learning_rate": 1.5400382972274544e-05, "loss": 0.4079, "num_tokens": 6584944579.0, "step": 8610 }, { "epoch": 3.1542548319135295, "grad_norm": 0.148712163564874, "learning_rate": 1.5396408042111e-05, "loss": 0.3832, "num_tokens": 6585762496.0, "step": 8611 }, { "epoch": 3.1546212329394523, "grad_norm": 0.12926361861602423, "learning_rate": 1.5392433484023817e-05, "loss": 0.3852, "num_tokens": 6586569687.0, "step": 8612 }, { "epoch": 3.154987633965375, "grad_norm": 0.1329152617948745, "learning_rate": 1.5388459298236948e-05, "loss": 0.4285, "num_tokens": 6587333088.0, "step": 8613 }, { "epoch": 3.155354034991298, "grad_norm": 0.1532766848005202, "learning_rate": 1.5384485484974308e-05, "loss": 0.4151, "num_tokens": 6588163661.0, "step": 8614 }, { "epoch": 3.155720436017221, "grad_norm": 0.12943471857107994, "learning_rate": 1.53805120444598e-05, "loss": 0.4095, "num_tokens": 6588922821.0, "step": 8615 }, { "epoch": 3.156086837043144, "grad_norm": 0.16041628673064412, "learning_rate": 1.5376538976917313e-05, "loss": 0.415, "num_tokens": 6589722086.0, "step": 8616 }, { "epoch": 3.1564532380690666, "grad_norm": 0.15640581962082234, "learning_rate": 1.5372566282570702e-05, "loss": 0.3987, "num_tokens": 6590566334.0, "step": 8617 }, { "epoch": 3.1568196390949894, "grad_norm": 0.13795879541278352, "learning_rate": 1.5368593961643808e-05, "loss": 0.3949, "num_tokens": 6591301796.0, "step": 8618 }, { "epoch": 3.157186040120912, "grad_norm": 0.1363890726366867, "learning_rate": 1.5364622014360446e-05, "loss": 0.4049, "num_tokens": 6592102832.0, "step": 8619 }, { "epoch": 3.1575524411468354, "grad_norm": 0.13443656361875314, "learning_rate": 1.5360650440944424e-05, "loss": 0.4173, "num_tokens": 6592889677.0, "step": 8620 }, { "epoch": 3.157918842172758, "grad_norm": 0.15781972665492228, "learning_rate": 1.5356679241619503e-05, "loss": 0.3805, "num_tokens": 6593529578.0, "step": 8621 }, { "epoch": 3.158285243198681, "grad_norm": 0.15414798728304782, "learning_rate": 1.5352708416609447e-05, "loss": 0.404, "num_tokens": 6594230185.0, "step": 8622 }, { "epoch": 3.1586516442246038, "grad_norm": 0.13565280901136287, "learning_rate": 1.5348737966137996e-05, "loss": 0.3962, "num_tokens": 6594986485.0, "step": 8623 }, { "epoch": 3.1590180452505265, "grad_norm": 0.15711517727958343, "learning_rate": 1.534476789042885e-05, "loss": 0.4003, "num_tokens": 6595890734.0, "step": 8624 }, { "epoch": 3.1593844462764498, "grad_norm": 0.12747174647524276, "learning_rate": 1.5340798189705712e-05, "loss": 0.4033, "num_tokens": 6596728513.0, "step": 8625 }, { "epoch": 3.1597508473023725, "grad_norm": 0.14131241445587903, "learning_rate": 1.5336828864192254e-05, "loss": 0.4192, "num_tokens": 6597544536.0, "step": 8626 }, { "epoch": 3.1601172483282953, "grad_norm": 0.15370142893817196, "learning_rate": 1.5332859914112114e-05, "loss": 0.382, "num_tokens": 6598246557.0, "step": 8627 }, { "epoch": 3.160483649354218, "grad_norm": 0.14505665910122442, "learning_rate": 1.532889133968893e-05, "loss": 0.4309, "num_tokens": 6598940031.0, "step": 8628 }, { "epoch": 3.160850050380141, "grad_norm": 0.14155273947000868, "learning_rate": 1.5324923141146303e-05, "loss": 0.4472, "num_tokens": 6599709933.0, "step": 8629 }, { "epoch": 3.161216451406064, "grad_norm": 0.15706974366900572, "learning_rate": 1.532095531870783e-05, "loss": 0.4225, "num_tokens": 6600447035.0, "step": 8630 }, { "epoch": 3.161582852431987, "grad_norm": 0.14604476249337034, "learning_rate": 1.5316987872597073e-05, "loss": 0.3866, "num_tokens": 6601213089.0, "step": 8631 }, { "epoch": 3.1619492534579097, "grad_norm": 0.1354873184315542, "learning_rate": 1.5313020803037563e-05, "loss": 0.4072, "num_tokens": 6602145367.0, "step": 8632 }, { "epoch": 3.1623156544838324, "grad_norm": 0.12826567247061527, "learning_rate": 1.5309054110252834e-05, "loss": 0.4305, "num_tokens": 6602962162.0, "step": 8633 }, { "epoch": 3.1626820555097552, "grad_norm": 0.14010424178486341, "learning_rate": 1.5305087794466392e-05, "loss": 0.3972, "num_tokens": 6603677084.0, "step": 8634 }, { "epoch": 3.1630484565356785, "grad_norm": 0.1466234619316237, "learning_rate": 1.53011218559017e-05, "loss": 0.4191, "num_tokens": 6604416656.0, "step": 8635 }, { "epoch": 3.1634148575616012, "grad_norm": 0.14530186182093227, "learning_rate": 1.5297156294782244e-05, "loss": 0.3942, "num_tokens": 6605130534.0, "step": 8636 }, { "epoch": 3.163781258587524, "grad_norm": 0.15173376811531106, "learning_rate": 1.529319111133144e-05, "loss": 0.4178, "num_tokens": 6605865601.0, "step": 8637 }, { "epoch": 3.164147659613447, "grad_norm": 0.1467118673739407, "learning_rate": 1.52892263057727e-05, "loss": 0.4381, "num_tokens": 6606502821.0, "step": 8638 }, { "epoch": 3.1645140606393696, "grad_norm": 0.15066151844860082, "learning_rate": 1.5285261878329443e-05, "loss": 0.4254, "num_tokens": 6607308161.0, "step": 8639 }, { "epoch": 3.164880461665293, "grad_norm": 0.1466980496293476, "learning_rate": 1.5281297829225022e-05, "loss": 0.4037, "num_tokens": 6608009545.0, "step": 8640 }, { "epoch": 3.1652468626912156, "grad_norm": 0.15361569065117844, "learning_rate": 1.5277334158682797e-05, "loss": 0.4211, "num_tokens": 6608742343.0, "step": 8641 }, { "epoch": 3.1656132637171384, "grad_norm": 0.14262797196574067, "learning_rate": 1.52733708669261e-05, "loss": 0.417, "num_tokens": 6609521074.0, "step": 8642 }, { "epoch": 3.165979664743061, "grad_norm": 0.14578288782267934, "learning_rate": 1.526940795417824e-05, "loss": 0.4276, "num_tokens": 6610299758.0, "step": 8643 }, { "epoch": 3.166346065768984, "grad_norm": 0.13893977051800355, "learning_rate": 1.5265445420662506e-05, "loss": 0.4403, "num_tokens": 6610957200.0, "step": 8644 }, { "epoch": 3.166712466794907, "grad_norm": 0.14582483044773284, "learning_rate": 1.5261483266602163e-05, "loss": 0.4032, "num_tokens": 6611773174.0, "step": 8645 }, { "epoch": 3.16707886782083, "grad_norm": 0.13688664804873743, "learning_rate": 1.5257521492220458e-05, "loss": 0.4046, "num_tokens": 6612500856.0, "step": 8646 }, { "epoch": 3.1674452688467527, "grad_norm": 0.14655422080110164, "learning_rate": 1.5253560097740615e-05, "loss": 0.3896, "num_tokens": 6613297035.0, "step": 8647 }, { "epoch": 3.1678116698726755, "grad_norm": 0.1346406344006018, "learning_rate": 1.5249599083385848e-05, "loss": 0.3753, "num_tokens": 6614009842.0, "step": 8648 }, { "epoch": 3.1681780708985987, "grad_norm": 0.13957040842371296, "learning_rate": 1.5245638449379314e-05, "loss": 0.3815, "num_tokens": 6614839947.0, "step": 8649 }, { "epoch": 3.1685444719245215, "grad_norm": 0.12976620181453083, "learning_rate": 1.5241678195944194e-05, "loss": 0.3952, "num_tokens": 6615693509.0, "step": 8650 }, { "epoch": 3.1689108729504443, "grad_norm": 0.12842741949610068, "learning_rate": 1.523771832330362e-05, "loss": 0.3734, "num_tokens": 6616408618.0, "step": 8651 }, { "epoch": 3.169277273976367, "grad_norm": 0.14344197242430629, "learning_rate": 1.52337588316807e-05, "loss": 0.445, "num_tokens": 6617195848.0, "step": 8652 }, { "epoch": 3.16964367500229, "grad_norm": 0.13864795357260912, "learning_rate": 1.5229799721298553e-05, "loss": 0.3937, "num_tokens": 6618023345.0, "step": 8653 }, { "epoch": 3.170010076028213, "grad_norm": 0.1340919345557805, "learning_rate": 1.522584099238023e-05, "loss": 0.3993, "num_tokens": 6618792349.0, "step": 8654 }, { "epoch": 3.170376477054136, "grad_norm": 0.14747699858654667, "learning_rate": 1.5221882645148794e-05, "loss": 0.4096, "num_tokens": 6619500486.0, "step": 8655 }, { "epoch": 3.1707428780800586, "grad_norm": 0.13660347605729753, "learning_rate": 1.5217924679827283e-05, "loss": 0.399, "num_tokens": 6620162946.0, "step": 8656 }, { "epoch": 3.1711092791059814, "grad_norm": 0.15144227455970596, "learning_rate": 1.5213967096638684e-05, "loss": 0.4189, "num_tokens": 6620855711.0, "step": 8657 }, { "epoch": 3.171475680131904, "grad_norm": 0.1470685788068978, "learning_rate": 1.5210009895806015e-05, "loss": 0.4069, "num_tokens": 6621532654.0, "step": 8658 }, { "epoch": 3.1718420811578274, "grad_norm": 0.14811889527351368, "learning_rate": 1.520605307755222e-05, "loss": 0.4095, "num_tokens": 6622254151.0, "step": 8659 }, { "epoch": 3.17220848218375, "grad_norm": 0.14071963415712674, "learning_rate": 1.5202096642100248e-05, "loss": 0.4202, "num_tokens": 6623172127.0, "step": 8660 }, { "epoch": 3.172574883209673, "grad_norm": 0.12596588404528902, "learning_rate": 1.5198140589673039e-05, "loss": 0.3992, "num_tokens": 6623919233.0, "step": 8661 }, { "epoch": 3.1729412842355957, "grad_norm": 0.1367940896745543, "learning_rate": 1.5194184920493466e-05, "loss": 0.423, "num_tokens": 6624648484.0, "step": 8662 }, { "epoch": 3.173307685261519, "grad_norm": 0.13806046570491082, "learning_rate": 1.519022963478444e-05, "loss": 0.4032, "num_tokens": 6625458927.0, "step": 8663 }, { "epoch": 3.1736740862874417, "grad_norm": 0.13780233608658396, "learning_rate": 1.51862747327688e-05, "loss": 0.4034, "num_tokens": 6626192774.0, "step": 8664 }, { "epoch": 3.1740404873133645, "grad_norm": 0.13287735625247726, "learning_rate": 1.5182320214669384e-05, "loss": 0.3863, "num_tokens": 6627115205.0, "step": 8665 }, { "epoch": 3.1744068883392873, "grad_norm": 0.14420841923569602, "learning_rate": 1.5178366080709022e-05, "loss": 0.3852, "num_tokens": 6627850806.0, "step": 8666 }, { "epoch": 3.17477328936521, "grad_norm": 0.12178058064592845, "learning_rate": 1.5174412331110494e-05, "loss": 0.3737, "num_tokens": 6628659854.0, "step": 8667 }, { "epoch": 3.1751396903911333, "grad_norm": 0.138048243947935, "learning_rate": 1.5170458966096572e-05, "loss": 0.4023, "num_tokens": 6629405982.0, "step": 8668 }, { "epoch": 3.175506091417056, "grad_norm": 0.14055660426340358, "learning_rate": 1.5166505985890014e-05, "loss": 0.3991, "num_tokens": 6630217450.0, "step": 8669 }, { "epoch": 3.175872492442979, "grad_norm": 0.14042839828112358, "learning_rate": 1.5162553390713543e-05, "loss": 0.4153, "num_tokens": 6630964386.0, "step": 8670 }, { "epoch": 3.1762388934689016, "grad_norm": 0.13389239888135263, "learning_rate": 1.5158601180789876e-05, "loss": 0.4105, "num_tokens": 6631856341.0, "step": 8671 }, { "epoch": 3.1766052944948244, "grad_norm": 0.1347659495471201, "learning_rate": 1.5154649356341687e-05, "loss": 0.4066, "num_tokens": 6632617809.0, "step": 8672 }, { "epoch": 3.1769716955207477, "grad_norm": 0.13926153159951807, "learning_rate": 1.5150697917591638e-05, "loss": 0.4289, "num_tokens": 6633281890.0, "step": 8673 }, { "epoch": 3.1773380965466704, "grad_norm": 0.1493438882079976, "learning_rate": 1.5146746864762385e-05, "loss": 0.4036, "num_tokens": 6634016492.0, "step": 8674 }, { "epoch": 3.177704497572593, "grad_norm": 0.1349605626311374, "learning_rate": 1.514279619807654e-05, "loss": 0.4096, "num_tokens": 6634860302.0, "step": 8675 }, { "epoch": 3.178070898598516, "grad_norm": 0.14283694920947262, "learning_rate": 1.51388459177567e-05, "loss": 0.3896, "num_tokens": 6635651598.0, "step": 8676 }, { "epoch": 3.1784372996244388, "grad_norm": 0.15288730397005, "learning_rate": 1.5134896024025443e-05, "loss": 0.4181, "num_tokens": 6636412878.0, "step": 8677 }, { "epoch": 3.178803700650362, "grad_norm": 0.14351278014777707, "learning_rate": 1.5130946517105325e-05, "loss": 0.3941, "num_tokens": 6637227105.0, "step": 8678 }, { "epoch": 3.1791701016762848, "grad_norm": 0.15711990282301913, "learning_rate": 1.5126997397218878e-05, "loss": 0.4423, "num_tokens": 6637885992.0, "step": 8679 }, { "epoch": 3.1795365027022076, "grad_norm": 0.1513287807335693, "learning_rate": 1.512304866458861e-05, "loss": 0.4089, "num_tokens": 6638562076.0, "step": 8680 }, { "epoch": 3.1799029037281303, "grad_norm": 0.14908806494001697, "learning_rate": 1.5119100319437015e-05, "loss": 0.3848, "num_tokens": 6639331435.0, "step": 8681 }, { "epoch": 3.180269304754053, "grad_norm": 0.1445522390154196, "learning_rate": 1.511515236198657e-05, "loss": 0.3595, "num_tokens": 6640126380.0, "step": 8682 }, { "epoch": 3.1806357057799763, "grad_norm": 0.1347117864329813, "learning_rate": 1.5111204792459707e-05, "loss": 0.3964, "num_tokens": 6640899096.0, "step": 8683 }, { "epoch": 3.181002106805899, "grad_norm": 0.13328603208134823, "learning_rate": 1.5107257611078848e-05, "loss": 0.3651, "num_tokens": 6641734382.0, "step": 8684 }, { "epoch": 3.181368507831822, "grad_norm": 0.13420938230284507, "learning_rate": 1.5103310818066406e-05, "loss": 0.3887, "num_tokens": 6642589164.0, "step": 8685 }, { "epoch": 3.1817349088577447, "grad_norm": 0.13957518952803297, "learning_rate": 1.5099364413644755e-05, "loss": 0.4096, "num_tokens": 6643274887.0, "step": 8686 }, { "epoch": 3.1821013098836675, "grad_norm": 0.14272956458962882, "learning_rate": 1.5095418398036251e-05, "loss": 0.4143, "num_tokens": 6643995835.0, "step": 8687 }, { "epoch": 3.1824677109095907, "grad_norm": 0.14960162153344012, "learning_rate": 1.5091472771463242e-05, "loss": 0.4022, "num_tokens": 6644683255.0, "step": 8688 }, { "epoch": 3.1828341119355135, "grad_norm": 0.1455292871217395, "learning_rate": 1.508752753414803e-05, "loss": 0.4038, "num_tokens": 6645485246.0, "step": 8689 }, { "epoch": 3.1832005129614362, "grad_norm": 0.14799409484240098, "learning_rate": 1.508358268631291e-05, "loss": 0.3928, "num_tokens": 6646310532.0, "step": 8690 }, { "epoch": 3.183566913987359, "grad_norm": 0.14794083874859473, "learning_rate": 1.5079638228180159e-05, "loss": 0.4298, "num_tokens": 6647049500.0, "step": 8691 }, { "epoch": 3.183933315013282, "grad_norm": 0.15218816180307923, "learning_rate": 1.5075694159972019e-05, "loss": 0.405, "num_tokens": 6647849736.0, "step": 8692 }, { "epoch": 3.184299716039205, "grad_norm": 0.13769422906608708, "learning_rate": 1.5071750481910724e-05, "loss": 0.3727, "num_tokens": 6648660376.0, "step": 8693 }, { "epoch": 3.184666117065128, "grad_norm": 0.1407679552880448, "learning_rate": 1.506780719421847e-05, "loss": 0.3882, "num_tokens": 6649410147.0, "step": 8694 }, { "epoch": 3.1850325180910506, "grad_norm": 0.15120797393814991, "learning_rate": 1.5063864297117445e-05, "loss": 0.4318, "num_tokens": 6650082485.0, "step": 8695 }, { "epoch": 3.1853989191169734, "grad_norm": 0.15265584247151656, "learning_rate": 1.505992179082981e-05, "loss": 0.4088, "num_tokens": 6650867597.0, "step": 8696 }, { "epoch": 3.1857653201428966, "grad_norm": 0.14443236068993362, "learning_rate": 1.5055979675577704e-05, "loss": 0.3908, "num_tokens": 6651675039.0, "step": 8697 }, { "epoch": 3.1861317211688194, "grad_norm": 0.1342653558214711, "learning_rate": 1.5052037951583236e-05, "loss": 0.3866, "num_tokens": 6652350651.0, "step": 8698 }, { "epoch": 3.186498122194742, "grad_norm": 0.15532644961289338, "learning_rate": 1.5048096619068507e-05, "loss": 0.4263, "num_tokens": 6653016997.0, "step": 8699 }, { "epoch": 3.186864523220665, "grad_norm": 0.14846243372486345, "learning_rate": 1.5044155678255589e-05, "loss": 0.3891, "num_tokens": 6653754547.0, "step": 8700 }, { "epoch": 3.1872309242465877, "grad_norm": 0.13847365365593334, "learning_rate": 1.504021512936654e-05, "loss": 0.3724, "num_tokens": 6654640783.0, "step": 8701 }, { "epoch": 3.187597325272511, "grad_norm": 0.140006407190707, "learning_rate": 1.5036274972623379e-05, "loss": 0.3897, "num_tokens": 6655423859.0, "step": 8702 }, { "epoch": 3.1879637262984337, "grad_norm": 0.16446853902593567, "learning_rate": 1.5032335208248103e-05, "loss": 0.3808, "num_tokens": 6656178662.0, "step": 8703 }, { "epoch": 3.1883301273243565, "grad_norm": 0.12985476728698667, "learning_rate": 1.5028395836462718e-05, "loss": 0.3985, "num_tokens": 6657054877.0, "step": 8704 }, { "epoch": 3.1886965283502793, "grad_norm": 0.13337721300188599, "learning_rate": 1.502445685748918e-05, "loss": 0.4042, "num_tokens": 6657754089.0, "step": 8705 }, { "epoch": 3.189062929376202, "grad_norm": 0.1545399349110224, "learning_rate": 1.5020518271549414e-05, "loss": 0.3749, "num_tokens": 6658499477.0, "step": 8706 }, { "epoch": 3.1894293304021253, "grad_norm": 0.13992546390666294, "learning_rate": 1.5016580078865355e-05, "loss": 0.4041, "num_tokens": 6659206268.0, "step": 8707 }, { "epoch": 3.189795731428048, "grad_norm": 0.15323741678657096, "learning_rate": 1.5012642279658889e-05, "loss": 0.3936, "num_tokens": 6659915606.0, "step": 8708 }, { "epoch": 3.190162132453971, "grad_norm": 0.15547039776814342, "learning_rate": 1.5008704874151891e-05, "loss": 0.4138, "num_tokens": 6660661322.0, "step": 8709 }, { "epoch": 3.1905285334798936, "grad_norm": 0.13636398449032122, "learning_rate": 1.500476786256622e-05, "loss": 0.3978, "num_tokens": 6661506109.0, "step": 8710 }, { "epoch": 3.190894934505817, "grad_norm": 0.13927648657295355, "learning_rate": 1.5000831245123695e-05, "loss": 0.4056, "num_tokens": 6662341560.0, "step": 8711 }, { "epoch": 3.1912613355317396, "grad_norm": 0.13106463228665527, "learning_rate": 1.4996895022046128e-05, "loss": 0.387, "num_tokens": 6663287316.0, "step": 8712 }, { "epoch": 3.1916277365576624, "grad_norm": 0.1250700925442475, "learning_rate": 1.4992959193555305e-05, "loss": 0.4031, "num_tokens": 6664017106.0, "step": 8713 }, { "epoch": 3.191994137583585, "grad_norm": 0.14338778397543603, "learning_rate": 1.498902375987298e-05, "loss": 0.3968, "num_tokens": 6664764205.0, "step": 8714 }, { "epoch": 3.192360538609508, "grad_norm": 0.14237358526562818, "learning_rate": 1.4985088721220908e-05, "loss": 0.4109, "num_tokens": 6665475361.0, "step": 8715 }, { "epoch": 3.192726939635431, "grad_norm": 0.1385589599375719, "learning_rate": 1.4981154077820792e-05, "loss": 0.4206, "num_tokens": 6666158995.0, "step": 8716 }, { "epoch": 3.193093340661354, "grad_norm": 0.14952237016416725, "learning_rate": 1.4977219829894333e-05, "loss": 0.4143, "num_tokens": 6666789449.0, "step": 8717 }, { "epoch": 3.1934597416872768, "grad_norm": 0.157889951201504, "learning_rate": 1.497328597766321e-05, "loss": 0.434, "num_tokens": 6667432271.0, "step": 8718 }, { "epoch": 3.1938261427131995, "grad_norm": 0.1587689612000579, "learning_rate": 1.4969352521349062e-05, "loss": 0.4182, "num_tokens": 6668152775.0, "step": 8719 }, { "epoch": 3.1941925437391223, "grad_norm": 0.14232777203312175, "learning_rate": 1.4965419461173532e-05, "loss": 0.4208, "num_tokens": 6668808024.0, "step": 8720 }, { "epoch": 3.1945589447650455, "grad_norm": 0.14552686930408684, "learning_rate": 1.4961486797358218e-05, "loss": 0.4179, "num_tokens": 6669507523.0, "step": 8721 }, { "epoch": 3.1949253457909683, "grad_norm": 0.14404841298221907, "learning_rate": 1.4957554530124702e-05, "loss": 0.3809, "num_tokens": 6670330637.0, "step": 8722 }, { "epoch": 3.195291746816891, "grad_norm": 0.13707846575070406, "learning_rate": 1.495362265969456e-05, "loss": 0.419, "num_tokens": 6671120376.0, "step": 8723 }, { "epoch": 3.195658147842814, "grad_norm": 0.1394489105416866, "learning_rate": 1.4949691186289315e-05, "loss": 0.435, "num_tokens": 6671824897.0, "step": 8724 }, { "epoch": 3.1960245488687367, "grad_norm": 0.16403447131403875, "learning_rate": 1.4945760110130488e-05, "loss": 0.41, "num_tokens": 6672628895.0, "step": 8725 }, { "epoch": 3.19639094989466, "grad_norm": 0.13775458472279037, "learning_rate": 1.494182943143958e-05, "loss": 0.4241, "num_tokens": 6673420411.0, "step": 8726 }, { "epoch": 3.1967573509205827, "grad_norm": 0.13846820919630268, "learning_rate": 1.493789915043805e-05, "loss": 0.4103, "num_tokens": 6674162709.0, "step": 8727 }, { "epoch": 3.1971237519465054, "grad_norm": 0.1473753247976465, "learning_rate": 1.4933969267347367e-05, "loss": 0.4051, "num_tokens": 6674953328.0, "step": 8728 }, { "epoch": 3.1974901529724282, "grad_norm": 0.12734085826640032, "learning_rate": 1.4930039782388947e-05, "loss": 0.4115, "num_tokens": 6675810311.0, "step": 8729 }, { "epoch": 3.197856553998351, "grad_norm": 0.14428738274150732, "learning_rate": 1.4926110695784193e-05, "loss": 0.4227, "num_tokens": 6676666339.0, "step": 8730 }, { "epoch": 3.1982229550242742, "grad_norm": 0.12782503008781043, "learning_rate": 1.4922182007754495e-05, "loss": 0.4054, "num_tokens": 6677410097.0, "step": 8731 }, { "epoch": 3.198589356050197, "grad_norm": 0.13911356361214453, "learning_rate": 1.4918253718521212e-05, "loss": 0.4207, "num_tokens": 6678144925.0, "step": 8732 }, { "epoch": 3.19895575707612, "grad_norm": 0.1447105978948742, "learning_rate": 1.4914325828305668e-05, "loss": 0.3985, "num_tokens": 6678847787.0, "step": 8733 }, { "epoch": 3.1993221581020426, "grad_norm": 0.15897762255478728, "learning_rate": 1.4910398337329199e-05, "loss": 0.4021, "num_tokens": 6679574475.0, "step": 8734 }, { "epoch": 3.1996885591279653, "grad_norm": 0.13590168373724784, "learning_rate": 1.4906471245813086e-05, "loss": 0.403, "num_tokens": 6680366712.0, "step": 8735 }, { "epoch": 3.2000549601538886, "grad_norm": 0.14086677456259195, "learning_rate": 1.4902544553978598e-05, "loss": 0.4016, "num_tokens": 6681128787.0, "step": 8736 }, { "epoch": 3.2004213611798114, "grad_norm": 0.1353526030770479, "learning_rate": 1.4898618262046984e-05, "loss": 0.397, "num_tokens": 6681917872.0, "step": 8737 }, { "epoch": 3.200787762205734, "grad_norm": 0.1437998805475778, "learning_rate": 1.489469237023947e-05, "loss": 0.4324, "num_tokens": 6682698220.0, "step": 8738 }, { "epoch": 3.201154163231657, "grad_norm": 0.14818076499545535, "learning_rate": 1.4890766878777264e-05, "loss": 0.4155, "num_tokens": 6683523946.0, "step": 8739 }, { "epoch": 3.2015205642575797, "grad_norm": 0.14083536144279254, "learning_rate": 1.4886841787881543e-05, "loss": 0.39, "num_tokens": 6684320334.0, "step": 8740 }, { "epoch": 3.201886965283503, "grad_norm": 0.14029523859583695, "learning_rate": 1.4882917097773458e-05, "loss": 0.4355, "num_tokens": 6685026412.0, "step": 8741 }, { "epoch": 3.2022533663094257, "grad_norm": 0.1448031786588955, "learning_rate": 1.4878992808674148e-05, "loss": 0.3809, "num_tokens": 6685812041.0, "step": 8742 }, { "epoch": 3.2026197673353485, "grad_norm": 0.15471564135457505, "learning_rate": 1.487506892080473e-05, "loss": 0.3927, "num_tokens": 6686609463.0, "step": 8743 }, { "epoch": 3.2029861683612713, "grad_norm": 0.14261386188936556, "learning_rate": 1.4871145434386285e-05, "loss": 0.4424, "num_tokens": 6687368499.0, "step": 8744 }, { "epoch": 3.2033525693871945, "grad_norm": 0.13953151161469796, "learning_rate": 1.4867222349639892e-05, "loss": 0.3865, "num_tokens": 6688197972.0, "step": 8745 }, { "epoch": 3.2037189704131173, "grad_norm": 0.13939523590713984, "learning_rate": 1.4863299666786577e-05, "loss": 0.3907, "num_tokens": 6688939255.0, "step": 8746 }, { "epoch": 3.20408537143904, "grad_norm": 0.13981627756285595, "learning_rate": 1.4859377386047382e-05, "loss": 0.3946, "num_tokens": 6689735291.0, "step": 8747 }, { "epoch": 3.204451772464963, "grad_norm": 0.1344716137159541, "learning_rate": 1.485545550764329e-05, "loss": 0.4396, "num_tokens": 6690759965.0, "step": 8748 }, { "epoch": 3.2048181734908856, "grad_norm": 0.13118556825361305, "learning_rate": 1.4851534031795285e-05, "loss": 0.4049, "num_tokens": 6691543231.0, "step": 8749 }, { "epoch": 3.205184574516809, "grad_norm": 0.1475789377726577, "learning_rate": 1.484761295872433e-05, "loss": 0.4301, "num_tokens": 6692254232.0, "step": 8750 }, { "epoch": 3.2055509755427316, "grad_norm": 0.15083164705982005, "learning_rate": 1.4843692288651342e-05, "loss": 0.4334, "num_tokens": 6692914346.0, "step": 8751 }, { "epoch": 3.2059173765686544, "grad_norm": 0.1669740979676171, "learning_rate": 1.4839772021797227e-05, "loss": 0.4124, "num_tokens": 6693784424.0, "step": 8752 }, { "epoch": 3.206283777594577, "grad_norm": 0.12976741910531028, "learning_rate": 1.4835852158382884e-05, "loss": 0.3857, "num_tokens": 6694551724.0, "step": 8753 }, { "epoch": 3.2066501786205, "grad_norm": 0.1354958572915793, "learning_rate": 1.4831932698629165e-05, "loss": 0.4224, "num_tokens": 6695317569.0, "step": 8754 }, { "epoch": 3.207016579646423, "grad_norm": 0.14350414322706317, "learning_rate": 1.4828013642756913e-05, "loss": 0.4125, "num_tokens": 6696089668.0, "step": 8755 }, { "epoch": 3.207382980672346, "grad_norm": 0.14926997264344855, "learning_rate": 1.482409499098695e-05, "loss": 0.4367, "num_tokens": 6696884386.0, "step": 8756 }, { "epoch": 3.2077493816982687, "grad_norm": 0.14693745110839304, "learning_rate": 1.482017674354006e-05, "loss": 0.4304, "num_tokens": 6697642207.0, "step": 8757 }, { "epoch": 3.2081157827241915, "grad_norm": 0.14404113648361336, "learning_rate": 1.4816258900637035e-05, "loss": 0.3673, "num_tokens": 6698556203.0, "step": 8758 }, { "epoch": 3.2084821837501147, "grad_norm": 0.13221594872381393, "learning_rate": 1.4812341462498605e-05, "loss": 0.393, "num_tokens": 6699399510.0, "step": 8759 }, { "epoch": 3.2088485847760375, "grad_norm": 0.13549773166868062, "learning_rate": 1.4808424429345496e-05, "loss": 0.3878, "num_tokens": 6700104874.0, "step": 8760 }, { "epoch": 3.2092149858019603, "grad_norm": 0.14313959772148882, "learning_rate": 1.4804507801398425e-05, "loss": 0.3962, "num_tokens": 6700808821.0, "step": 8761 }, { "epoch": 3.209581386827883, "grad_norm": 0.14580453200846677, "learning_rate": 1.4800591578878068e-05, "loss": 0.421, "num_tokens": 6701440638.0, "step": 8762 }, { "epoch": 3.209947787853806, "grad_norm": 0.1601742640489991, "learning_rate": 1.4796675762005075e-05, "loss": 0.4146, "num_tokens": 6702158574.0, "step": 8763 }, { "epoch": 3.210314188879729, "grad_norm": 0.14416913078417903, "learning_rate": 1.4792760351000088e-05, "loss": 0.432, "num_tokens": 6703015977.0, "step": 8764 }, { "epoch": 3.210680589905652, "grad_norm": 0.13407439067471547, "learning_rate": 1.4788845346083713e-05, "loss": 0.408, "num_tokens": 6703795014.0, "step": 8765 }, { "epoch": 3.2110469909315746, "grad_norm": 0.14840291268924083, "learning_rate": 1.4784930747476547e-05, "loss": 0.3838, "num_tokens": 6704634627.0, "step": 8766 }, { "epoch": 3.2114133919574974, "grad_norm": 0.12421156181595153, "learning_rate": 1.478101655539916e-05, "loss": 0.3896, "num_tokens": 6705413623.0, "step": 8767 }, { "epoch": 3.21177979298342, "grad_norm": 0.15395197140071384, "learning_rate": 1.4777102770072076e-05, "loss": 0.4147, "num_tokens": 6706131314.0, "step": 8768 }, { "epoch": 3.2121461940093434, "grad_norm": 0.14573811220211566, "learning_rate": 1.4773189391715832e-05, "loss": 0.4279, "num_tokens": 6706900794.0, "step": 8769 }, { "epoch": 3.212512595035266, "grad_norm": 0.14847147770785551, "learning_rate": 1.4769276420550927e-05, "loss": 0.3977, "num_tokens": 6707643004.0, "step": 8770 }, { "epoch": 3.212878996061189, "grad_norm": 0.1376674377711357, "learning_rate": 1.4765363856797814e-05, "loss": 0.4046, "num_tokens": 6708436510.0, "step": 8771 }, { "epoch": 3.2132453970871118, "grad_norm": 0.13460076924711992, "learning_rate": 1.4761451700676972e-05, "loss": 0.387, "num_tokens": 6709131344.0, "step": 8772 }, { "epoch": 3.2136117981130345, "grad_norm": 0.16389441971021707, "learning_rate": 1.4757539952408819e-05, "loss": 0.4131, "num_tokens": 6709888448.0, "step": 8773 }, { "epoch": 3.2139781991389578, "grad_norm": 0.13471787179075936, "learning_rate": 1.4753628612213753e-05, "loss": 0.4059, "num_tokens": 6710623036.0, "step": 8774 }, { "epoch": 3.2143446001648805, "grad_norm": 0.1478530578204592, "learning_rate": 1.474971768031217e-05, "loss": 0.4133, "num_tokens": 6711396063.0, "step": 8775 }, { "epoch": 3.2147110011908033, "grad_norm": 0.15171244293388006, "learning_rate": 1.474580715692441e-05, "loss": 0.4033, "num_tokens": 6712152232.0, "step": 8776 }, { "epoch": 3.215077402216726, "grad_norm": 0.1478679121811784, "learning_rate": 1.4741897042270834e-05, "loss": 0.3795, "num_tokens": 6712842421.0, "step": 8777 }, { "epoch": 3.215443803242649, "grad_norm": 0.15095113181280867, "learning_rate": 1.4737987336571741e-05, "loss": 0.3934, "num_tokens": 6713559512.0, "step": 8778 }, { "epoch": 3.215810204268572, "grad_norm": 0.15184231361865122, "learning_rate": 1.4734078040047423e-05, "loss": 0.4354, "num_tokens": 6714290977.0, "step": 8779 }, { "epoch": 3.216176605294495, "grad_norm": 0.14953758177432463, "learning_rate": 1.4730169152918157e-05, "loss": 0.4101, "num_tokens": 6715066026.0, "step": 8780 }, { "epoch": 3.2165430063204177, "grad_norm": 0.13429442296985963, "learning_rate": 1.4726260675404174e-05, "loss": 0.4207, "num_tokens": 6715901912.0, "step": 8781 }, { "epoch": 3.2169094073463405, "grad_norm": 0.14194257538971244, "learning_rate": 1.4722352607725694e-05, "loss": 0.3937, "num_tokens": 6716731983.0, "step": 8782 }, { "epoch": 3.2172758083722632, "grad_norm": 0.1385984838047411, "learning_rate": 1.4718444950102928e-05, "loss": 0.4246, "num_tokens": 6717580469.0, "step": 8783 }, { "epoch": 3.2176422093981865, "grad_norm": 0.15157528789160393, "learning_rate": 1.4714537702756042e-05, "loss": 0.4478, "num_tokens": 6718138888.0, "step": 8784 }, { "epoch": 3.2180086104241092, "grad_norm": 0.1588244659333461, "learning_rate": 1.4710630865905202e-05, "loss": 0.3899, "num_tokens": 6718827849.0, "step": 8785 }, { "epoch": 3.218375011450032, "grad_norm": 0.1616279718299762, "learning_rate": 1.4706724439770521e-05, "loss": 0.376, "num_tokens": 6719590396.0, "step": 8786 }, { "epoch": 3.218741412475955, "grad_norm": 0.14030482763036176, "learning_rate": 1.4702818424572107e-05, "loss": 0.4112, "num_tokens": 6720226014.0, "step": 8787 }, { "epoch": 3.2191078135018776, "grad_norm": 0.1437525077583146, "learning_rate": 1.4698912820530047e-05, "loss": 0.4234, "num_tokens": 6721021425.0, "step": 8788 }, { "epoch": 3.219474214527801, "grad_norm": 0.15384632116680913, "learning_rate": 1.4695007627864404e-05, "loss": 0.3919, "num_tokens": 6721810296.0, "step": 8789 }, { "epoch": 3.2198406155537236, "grad_norm": 0.13779290208801245, "learning_rate": 1.4691102846795202e-05, "loss": 0.384, "num_tokens": 6722598783.0, "step": 8790 }, { "epoch": 3.2202070165796464, "grad_norm": 0.1443844564163811, "learning_rate": 1.4687198477542466e-05, "loss": 0.4402, "num_tokens": 6723393317.0, "step": 8791 }, { "epoch": 3.220573417605569, "grad_norm": 0.1436323152030486, "learning_rate": 1.4683294520326179e-05, "loss": 0.3989, "num_tokens": 6724221484.0, "step": 8792 }, { "epoch": 3.2209398186314924, "grad_norm": 0.14123073386444127, "learning_rate": 1.4679390975366313e-05, "loss": 0.4369, "num_tokens": 6725020792.0, "step": 8793 }, { "epoch": 3.221306219657415, "grad_norm": 0.1339641898000612, "learning_rate": 1.467548784288281e-05, "loss": 0.4234, "num_tokens": 6725724272.0, "step": 8794 }, { "epoch": 3.221672620683338, "grad_norm": 0.15091567154296154, "learning_rate": 1.4671585123095584e-05, "loss": 0.4148, "num_tokens": 6726533014.0, "step": 8795 }, { "epoch": 3.2220390217092607, "grad_norm": 0.14750439635788226, "learning_rate": 1.466768281622454e-05, "loss": 0.3921, "num_tokens": 6727295087.0, "step": 8796 }, { "epoch": 3.2224054227351835, "grad_norm": 0.12998899450057622, "learning_rate": 1.4663780922489555e-05, "loss": 0.397, "num_tokens": 6728076571.0, "step": 8797 }, { "epoch": 3.2227718237611067, "grad_norm": 0.14793916045364683, "learning_rate": 1.465987944211046e-05, "loss": 0.4073, "num_tokens": 6728883241.0, "step": 8798 }, { "epoch": 3.2231382247870295, "grad_norm": 0.13461989035126784, "learning_rate": 1.4655978375307105e-05, "loss": 0.3686, "num_tokens": 6729713802.0, "step": 8799 }, { "epoch": 3.2235046258129523, "grad_norm": 0.13742854572659088, "learning_rate": 1.465207772229928e-05, "loss": 0.4378, "num_tokens": 6730409438.0, "step": 8800 }, { "epoch": 3.223871026838875, "grad_norm": 0.15940206944731652, "learning_rate": 1.4648177483306768e-05, "loss": 0.3933, "num_tokens": 6731204882.0, "step": 8801 }, { "epoch": 3.224237427864798, "grad_norm": 0.13301924431077947, "learning_rate": 1.4644277658549335e-05, "loss": 0.4057, "num_tokens": 6731999787.0, "step": 8802 }, { "epoch": 3.224603828890721, "grad_norm": 0.13554810027616482, "learning_rate": 1.4640378248246698e-05, "loss": 0.39, "num_tokens": 6732799955.0, "step": 8803 }, { "epoch": 3.224970229916644, "grad_norm": 0.14142754511628663, "learning_rate": 1.4636479252618578e-05, "loss": 0.3975, "num_tokens": 6733553145.0, "step": 8804 }, { "epoch": 3.2253366309425666, "grad_norm": 0.14895216986524834, "learning_rate": 1.4632580671884667e-05, "loss": 0.4338, "num_tokens": 6734336355.0, "step": 8805 }, { "epoch": 3.2257030319684894, "grad_norm": 0.1373868473684383, "learning_rate": 1.4628682506264618e-05, "loss": 0.3972, "num_tokens": 6735037644.0, "step": 8806 }, { "epoch": 3.226069432994412, "grad_norm": 0.14665114950589486, "learning_rate": 1.4624784755978083e-05, "loss": 0.4098, "num_tokens": 6735862615.0, "step": 8807 }, { "epoch": 3.2264358340203354, "grad_norm": 0.15199604088636742, "learning_rate": 1.4620887421244666e-05, "loss": 0.436, "num_tokens": 6736701321.0, "step": 8808 }, { "epoch": 3.226802235046258, "grad_norm": 0.13256044968709685, "learning_rate": 1.4616990502283965e-05, "loss": 0.3817, "num_tokens": 6737468152.0, "step": 8809 }, { "epoch": 3.227168636072181, "grad_norm": 0.12935135701035402, "learning_rate": 1.461309399931556e-05, "loss": 0.3948, "num_tokens": 6738336773.0, "step": 8810 }, { "epoch": 3.2275350370981037, "grad_norm": 0.12579495413245687, "learning_rate": 1.4609197912558975e-05, "loss": 0.3598, "num_tokens": 6739091621.0, "step": 8811 }, { "epoch": 3.227901438124027, "grad_norm": 0.1431952812958223, "learning_rate": 1.4605302242233763e-05, "loss": 0.3995, "num_tokens": 6739931419.0, "step": 8812 }, { "epoch": 3.2282678391499497, "grad_norm": 0.13741577820442777, "learning_rate": 1.4601406988559403e-05, "loss": 0.3863, "num_tokens": 6740718690.0, "step": 8813 }, { "epoch": 3.2286342401758725, "grad_norm": 0.13918893067969118, "learning_rate": 1.4597512151755373e-05, "loss": 0.4257, "num_tokens": 6741378800.0, "step": 8814 }, { "epoch": 3.2290006412017953, "grad_norm": 0.14840720485160075, "learning_rate": 1.4593617732041137e-05, "loss": 0.4011, "num_tokens": 6742204878.0, "step": 8815 }, { "epoch": 3.229367042227718, "grad_norm": 0.13609332984971734, "learning_rate": 1.4589723729636114e-05, "loss": 0.4178, "num_tokens": 6743013188.0, "step": 8816 }, { "epoch": 3.2297334432536413, "grad_norm": 0.14373618828336268, "learning_rate": 1.4585830144759707e-05, "loss": 0.3965, "num_tokens": 6743816311.0, "step": 8817 }, { "epoch": 3.230099844279564, "grad_norm": 0.1388308287257471, "learning_rate": 1.4581936977631306e-05, "loss": 0.3789, "num_tokens": 6744553895.0, "step": 8818 }, { "epoch": 3.230466245305487, "grad_norm": 0.13863641208071892, "learning_rate": 1.4578044228470277e-05, "loss": 0.3894, "num_tokens": 6745332984.0, "step": 8819 }, { "epoch": 3.2308326463314097, "grad_norm": 0.13371122549062764, "learning_rate": 1.4574151897495932e-05, "loss": 0.3923, "num_tokens": 6746027784.0, "step": 8820 }, { "epoch": 3.2311990473573324, "grad_norm": 0.13914986908686364, "learning_rate": 1.4570259984927603e-05, "loss": 0.3876, "num_tokens": 6746907445.0, "step": 8821 }, { "epoch": 3.2315654483832557, "grad_norm": 0.13769283356377404, "learning_rate": 1.456636849098457e-05, "loss": 0.4015, "num_tokens": 6747643914.0, "step": 8822 }, { "epoch": 3.2319318494091784, "grad_norm": 0.14269831734557833, "learning_rate": 1.45624774158861e-05, "loss": 0.3894, "num_tokens": 6748537516.0, "step": 8823 }, { "epoch": 3.232298250435101, "grad_norm": 0.1292164658918276, "learning_rate": 1.4558586759851436e-05, "loss": 0.3837, "num_tokens": 6749247733.0, "step": 8824 }, { "epoch": 3.232664651461024, "grad_norm": 0.1510676160707179, "learning_rate": 1.4554696523099785e-05, "loss": 0.4263, "num_tokens": 6749883799.0, "step": 8825 }, { "epoch": 3.2330310524869468, "grad_norm": 0.1493819261186499, "learning_rate": 1.4550806705850354e-05, "loss": 0.4469, "num_tokens": 6750552230.0, "step": 8826 }, { "epoch": 3.23339745351287, "grad_norm": 0.15952101976852434, "learning_rate": 1.4546917308322303e-05, "loss": 0.4111, "num_tokens": 6751220422.0, "step": 8827 }, { "epoch": 3.233763854538793, "grad_norm": 0.14920505262369804, "learning_rate": 1.4543028330734776e-05, "loss": 0.3751, "num_tokens": 6751949501.0, "step": 8828 }, { "epoch": 3.2341302555647156, "grad_norm": 0.1417380320666862, "learning_rate": 1.4539139773306911e-05, "loss": 0.3755, "num_tokens": 6752655724.0, "step": 8829 }, { "epoch": 3.2344966565906383, "grad_norm": 0.1320439709681137, "learning_rate": 1.4535251636257793e-05, "loss": 0.3866, "num_tokens": 6753497308.0, "step": 8830 }, { "epoch": 3.234863057616561, "grad_norm": 0.14473106035726363, "learning_rate": 1.4531363919806505e-05, "loss": 0.3763, "num_tokens": 6754280814.0, "step": 8831 }, { "epoch": 3.2352294586424843, "grad_norm": 0.13715844326560142, "learning_rate": 1.4527476624172096e-05, "loss": 0.4211, "num_tokens": 6755119942.0, "step": 8832 }, { "epoch": 3.235595859668407, "grad_norm": 0.14215282848441455, "learning_rate": 1.4523589749573583e-05, "loss": 0.3902, "num_tokens": 6755875517.0, "step": 8833 }, { "epoch": 3.23596226069433, "grad_norm": 0.1355687840728884, "learning_rate": 1.4519703296229994e-05, "loss": 0.4028, "num_tokens": 6756498647.0, "step": 8834 }, { "epoch": 3.2363286617202527, "grad_norm": 0.15241514290633804, "learning_rate": 1.4515817264360288e-05, "loss": 0.3986, "num_tokens": 6757293865.0, "step": 8835 }, { "epoch": 3.2366950627461755, "grad_norm": 0.1494992369670902, "learning_rate": 1.451193165418343e-05, "loss": 0.4161, "num_tokens": 6757955425.0, "step": 8836 }, { "epoch": 3.2370614637720987, "grad_norm": 0.1593789957154523, "learning_rate": 1.450804646591836e-05, "loss": 0.4028, "num_tokens": 6758679226.0, "step": 8837 }, { "epoch": 3.2374278647980215, "grad_norm": 0.1383454386211391, "learning_rate": 1.450416169978397e-05, "loss": 0.4523, "num_tokens": 6759451999.0, "step": 8838 }, { "epoch": 3.2377942658239443, "grad_norm": 0.14840467931683582, "learning_rate": 1.4500277355999153e-05, "loss": 0.3816, "num_tokens": 6760154507.0, "step": 8839 }, { "epoch": 3.238160666849867, "grad_norm": 0.1469623918134148, "learning_rate": 1.4496393434782788e-05, "loss": 0.4113, "num_tokens": 6760967792.0, "step": 8840 }, { "epoch": 3.2385270678757903, "grad_norm": 0.13174447881041476, "learning_rate": 1.449250993635368e-05, "loss": 0.3978, "num_tokens": 6761838158.0, "step": 8841 }, { "epoch": 3.238893468901713, "grad_norm": 0.133828995660042, "learning_rate": 1.4488626860930666e-05, "loss": 0.3992, "num_tokens": 6762716894.0, "step": 8842 }, { "epoch": 3.239259869927636, "grad_norm": 0.12564667508011937, "learning_rate": 1.4484744208732538e-05, "loss": 0.4024, "num_tokens": 6763523823.0, "step": 8843 }, { "epoch": 3.2396262709535586, "grad_norm": 0.14389066765639844, "learning_rate": 1.4480861979978045e-05, "loss": 0.3835, "num_tokens": 6764344093.0, "step": 8844 }, { "epoch": 3.2399926719794814, "grad_norm": 0.12463657710561293, "learning_rate": 1.4476980174885941e-05, "loss": 0.4044, "num_tokens": 6765107508.0, "step": 8845 }, { "epoch": 3.2403590730054046, "grad_norm": 0.13649464057616476, "learning_rate": 1.4473098793674942e-05, "loss": 0.3875, "num_tokens": 6765834354.0, "step": 8846 }, { "epoch": 3.2407254740313274, "grad_norm": 0.1486944023099617, "learning_rate": 1.4469217836563747e-05, "loss": 0.4258, "num_tokens": 6766691248.0, "step": 8847 }, { "epoch": 3.24109187505725, "grad_norm": 0.13757081489479228, "learning_rate": 1.446533730377102e-05, "loss": 0.3987, "num_tokens": 6767484348.0, "step": 8848 }, { "epoch": 3.241458276083173, "grad_norm": 0.13452016605610684, "learning_rate": 1.4461457195515416e-05, "loss": 0.3731, "num_tokens": 6768252734.0, "step": 8849 }, { "epoch": 3.2418246771090957, "grad_norm": 0.134028557081129, "learning_rate": 1.4457577512015547e-05, "loss": 0.3885, "num_tokens": 6768936834.0, "step": 8850 }, { "epoch": 3.242191078135019, "grad_norm": 0.15135209661143745, "learning_rate": 1.445369825349002e-05, "loss": 0.4007, "num_tokens": 6769650922.0, "step": 8851 }, { "epoch": 3.2425574791609417, "grad_norm": 0.13896998981051248, "learning_rate": 1.44498194201574e-05, "loss": 0.389, "num_tokens": 6770378087.0, "step": 8852 }, { "epoch": 3.2429238801868645, "grad_norm": 0.15108614925087951, "learning_rate": 1.4445941012236264e-05, "loss": 0.4294, "num_tokens": 6771098132.0, "step": 8853 }, { "epoch": 3.2432902812127873, "grad_norm": 0.1531870517368026, "learning_rate": 1.4442063029945107e-05, "loss": 0.3976, "num_tokens": 6771877879.0, "step": 8854 }, { "epoch": 3.24365668223871, "grad_norm": 0.142022629635142, "learning_rate": 1.4438185473502447e-05, "loss": 0.4003, "num_tokens": 6772603662.0, "step": 8855 }, { "epoch": 3.2440230832646333, "grad_norm": 0.14616810129178967, "learning_rate": 1.4434308343126762e-05, "loss": 0.4241, "num_tokens": 6773375222.0, "step": 8856 }, { "epoch": 3.244389484290556, "grad_norm": 0.1398417601986597, "learning_rate": 1.4430431639036519e-05, "loss": 0.404, "num_tokens": 6774135229.0, "step": 8857 }, { "epoch": 3.244755885316479, "grad_norm": 0.14564278776226525, "learning_rate": 1.4426555361450118e-05, "loss": 0.3926, "num_tokens": 6774914459.0, "step": 8858 }, { "epoch": 3.2451222863424016, "grad_norm": 0.14234313015157274, "learning_rate": 1.4422679510585998e-05, "loss": 0.4164, "num_tokens": 6775706870.0, "step": 8859 }, { "epoch": 3.245488687368325, "grad_norm": 0.13734140898597327, "learning_rate": 1.4418804086662529e-05, "loss": 0.3859, "num_tokens": 6776513911.0, "step": 8860 }, { "epoch": 3.2458550883942476, "grad_norm": 0.1443730481175037, "learning_rate": 1.4414929089898069e-05, "loss": 0.3821, "num_tokens": 6777325260.0, "step": 8861 }, { "epoch": 3.2462214894201704, "grad_norm": 0.13221787933057105, "learning_rate": 1.4411054520510956e-05, "loss": 0.3887, "num_tokens": 6778259694.0, "step": 8862 }, { "epoch": 3.246587890446093, "grad_norm": 0.1292094573273257, "learning_rate": 1.4407180378719497e-05, "loss": 0.4067, "num_tokens": 6779059391.0, "step": 8863 }, { "epoch": 3.246954291472016, "grad_norm": 0.1306972999480914, "learning_rate": 1.440330666474198e-05, "loss": 0.4041, "num_tokens": 6779817102.0, "step": 8864 }, { "epoch": 3.247320692497939, "grad_norm": 0.14162648133286554, "learning_rate": 1.439943337879667e-05, "loss": 0.421, "num_tokens": 6780656982.0, "step": 8865 }, { "epoch": 3.247687093523862, "grad_norm": 0.1330815208753113, "learning_rate": 1.4395560521101808e-05, "loss": 0.3917, "num_tokens": 6781503519.0, "step": 8866 }, { "epoch": 3.2480534945497848, "grad_norm": 0.13706474543788946, "learning_rate": 1.4391688091875602e-05, "loss": 0.3866, "num_tokens": 6782353285.0, "step": 8867 }, { "epoch": 3.2484198955757075, "grad_norm": 0.13303285418043845, "learning_rate": 1.4387816091336237e-05, "loss": 0.3793, "num_tokens": 6783119090.0, "step": 8868 }, { "epoch": 3.2487862966016303, "grad_norm": 0.1342792309561331, "learning_rate": 1.4383944519701896e-05, "loss": 0.4101, "num_tokens": 6783814563.0, "step": 8869 }, { "epoch": 3.2491526976275535, "grad_norm": 0.1546239610273028, "learning_rate": 1.4380073377190718e-05, "loss": 0.3749, "num_tokens": 6784672401.0, "step": 8870 }, { "epoch": 3.2495190986534763, "grad_norm": 0.14456510514890633, "learning_rate": 1.43762026640208e-05, "loss": 0.4047, "num_tokens": 6785410852.0, "step": 8871 }, { "epoch": 3.249885499679399, "grad_norm": 0.13209441809920933, "learning_rate": 1.4372332380410256e-05, "loss": 0.41, "num_tokens": 6786175986.0, "step": 8872 }, { "epoch": 3.250251900705322, "grad_norm": 0.14821221939534313, "learning_rate": 1.436846252657715e-05, "loss": 0.4191, "num_tokens": 6786891185.0, "step": 8873 }, { "epoch": 3.2506183017312447, "grad_norm": 0.15679170060595568, "learning_rate": 1.436459310273953e-05, "loss": 0.4287, "num_tokens": 6787612123.0, "step": 8874 }, { "epoch": 3.250984702757168, "grad_norm": 0.14263210573618681, "learning_rate": 1.4360724109115411e-05, "loss": 0.4158, "num_tokens": 6788338287.0, "step": 8875 }, { "epoch": 3.2513511037830907, "grad_norm": 0.15338477063136988, "learning_rate": 1.4356855545922792e-05, "loss": 0.4144, "num_tokens": 6789100109.0, "step": 8876 }, { "epoch": 3.2517175048090134, "grad_norm": 0.13950024140669998, "learning_rate": 1.4352987413379648e-05, "loss": 0.3976, "num_tokens": 6789917786.0, "step": 8877 }, { "epoch": 3.2520839058349362, "grad_norm": 0.13920193980560727, "learning_rate": 1.4349119711703923e-05, "loss": 0.3961, "num_tokens": 6790675450.0, "step": 8878 }, { "epoch": 3.252450306860859, "grad_norm": 0.1454782050789695, "learning_rate": 1.4345252441113532e-05, "loss": 0.4278, "num_tokens": 6791395659.0, "step": 8879 }, { "epoch": 3.2528167078867822, "grad_norm": 0.148036342412182, "learning_rate": 1.4341385601826408e-05, "loss": 0.399, "num_tokens": 6792121361.0, "step": 8880 }, { "epoch": 3.253183108912705, "grad_norm": 0.15010214623407886, "learning_rate": 1.4337519194060395e-05, "loss": 0.39, "num_tokens": 6792802428.0, "step": 8881 }, { "epoch": 3.253549509938628, "grad_norm": 0.14202787108570986, "learning_rate": 1.4333653218033341e-05, "loss": 0.4045, "num_tokens": 6793562930.0, "step": 8882 }, { "epoch": 3.2539159109645506, "grad_norm": 0.13699532332114248, "learning_rate": 1.4329787673963095e-05, "loss": 0.3862, "num_tokens": 6794383440.0, "step": 8883 }, { "epoch": 3.2542823119904734, "grad_norm": 0.13553363546440939, "learning_rate": 1.432592256206745e-05, "loss": 0.3964, "num_tokens": 6795102637.0, "step": 8884 }, { "epoch": 3.2546487130163966, "grad_norm": 0.14252407091520478, "learning_rate": 1.4322057882564174e-05, "loss": 0.4291, "num_tokens": 6795895959.0, "step": 8885 }, { "epoch": 3.2550151140423194, "grad_norm": 0.13939625313770604, "learning_rate": 1.431819363567103e-05, "loss": 0.4205, "num_tokens": 6796615687.0, "step": 8886 }, { "epoch": 3.255381515068242, "grad_norm": 0.14532096391340427, "learning_rate": 1.4314329821605752e-05, "loss": 0.4042, "num_tokens": 6797450730.0, "step": 8887 }, { "epoch": 3.255747916094165, "grad_norm": 0.13718136498701597, "learning_rate": 1.4310466440586037e-05, "loss": 0.3969, "num_tokens": 6798176511.0, "step": 8888 }, { "epoch": 3.2561143171200877, "grad_norm": 0.1444146434290958, "learning_rate": 1.4306603492829569e-05, "loss": 0.3881, "num_tokens": 6798964193.0, "step": 8889 }, { "epoch": 3.256480718146011, "grad_norm": 0.1382548075082418, "learning_rate": 1.4302740978554007e-05, "loss": 0.3951, "num_tokens": 6799801229.0, "step": 8890 }, { "epoch": 3.2568471191719337, "grad_norm": 0.1327244621828571, "learning_rate": 1.4298878897976969e-05, "loss": 0.4044, "num_tokens": 6800564977.0, "step": 8891 }, { "epoch": 3.2572135201978565, "grad_norm": 0.14748733754565113, "learning_rate": 1.4295017251316074e-05, "loss": 0.3808, "num_tokens": 6801273227.0, "step": 8892 }, { "epoch": 3.2575799212237793, "grad_norm": 0.14733523685201436, "learning_rate": 1.429115603878891e-05, "loss": 0.4235, "num_tokens": 6802095704.0, "step": 8893 }, { "epoch": 3.2579463222497025, "grad_norm": 0.1378748719955198, "learning_rate": 1.428729526061302e-05, "loss": 0.4179, "num_tokens": 6802822929.0, "step": 8894 }, { "epoch": 3.2583127232756253, "grad_norm": 0.14532168219783828, "learning_rate": 1.4283434917005945e-05, "loss": 0.4036, "num_tokens": 6803492319.0, "step": 8895 }, { "epoch": 3.258679124301548, "grad_norm": 0.15160882035204923, "learning_rate": 1.4279575008185189e-05, "loss": 0.4171, "num_tokens": 6804218043.0, "step": 8896 }, { "epoch": 3.259045525327471, "grad_norm": 0.15028938997369345, "learning_rate": 1.4275715534368257e-05, "loss": 0.4266, "num_tokens": 6804927243.0, "step": 8897 }, { "epoch": 3.2594119263533936, "grad_norm": 0.1443640124906424, "learning_rate": 1.427185649577258e-05, "loss": 0.3965, "num_tokens": 6805920588.0, "step": 8898 }, { "epoch": 3.259778327379317, "grad_norm": 0.11994201739271458, "learning_rate": 1.426799789261561e-05, "loss": 0.3897, "num_tokens": 6806721455.0, "step": 8899 }, { "epoch": 3.2601447284052396, "grad_norm": 0.13904701939148925, "learning_rate": 1.426413972511477e-05, "loss": 0.3792, "num_tokens": 6807515420.0, "step": 8900 }, { "epoch": 3.2605111294311624, "grad_norm": 0.1365683130937209, "learning_rate": 1.4260281993487422e-05, "loss": 0.386, "num_tokens": 6808241928.0, "step": 8901 }, { "epoch": 3.260877530457085, "grad_norm": 0.1409289517818287, "learning_rate": 1.4256424697950939e-05, "loss": 0.3853, "num_tokens": 6809100746.0, "step": 8902 }, { "epoch": 3.2612439314830084, "grad_norm": 0.1272457055420359, "learning_rate": 1.4252567838722659e-05, "loss": 0.3931, "num_tokens": 6809797345.0, "step": 8903 }, { "epoch": 3.261610332508931, "grad_norm": 0.1368672656492847, "learning_rate": 1.4248711416019903e-05, "loss": 0.3669, "num_tokens": 6810587939.0, "step": 8904 }, { "epoch": 3.261976733534854, "grad_norm": 0.14599868680502281, "learning_rate": 1.4244855430059947e-05, "loss": 0.4229, "num_tokens": 6811525950.0, "step": 8905 }, { "epoch": 3.2623431345607767, "grad_norm": 0.14277940452014098, "learning_rate": 1.4240999881060055e-05, "loss": 0.3998, "num_tokens": 6812206064.0, "step": 8906 }, { "epoch": 3.2627095355866995, "grad_norm": 0.13780695065730375, "learning_rate": 1.4237144769237472e-05, "loss": 0.398, "num_tokens": 6812973909.0, "step": 8907 }, { "epoch": 3.2630759366126227, "grad_norm": 0.14737724777241554, "learning_rate": 1.4233290094809413e-05, "loss": 0.4173, "num_tokens": 6813660023.0, "step": 8908 }, { "epoch": 3.2634423376385455, "grad_norm": 0.15082657417732995, "learning_rate": 1.4229435857993058e-05, "loss": 0.3938, "num_tokens": 6814504730.0, "step": 8909 }, { "epoch": 3.2638087386644683, "grad_norm": 0.14443051162037512, "learning_rate": 1.422558205900559e-05, "loss": 0.3848, "num_tokens": 6815292283.0, "step": 8910 }, { "epoch": 3.264175139690391, "grad_norm": 0.1329071825899723, "learning_rate": 1.4221728698064134e-05, "loss": 0.423, "num_tokens": 6816038718.0, "step": 8911 }, { "epoch": 3.264541540716314, "grad_norm": 0.15306121792839525, "learning_rate": 1.4217875775385808e-05, "loss": 0.417, "num_tokens": 6816756736.0, "step": 8912 }, { "epoch": 3.264907941742237, "grad_norm": 0.1393195729242914, "learning_rate": 1.4214023291187706e-05, "loss": 0.4439, "num_tokens": 6817492348.0, "step": 8913 }, { "epoch": 3.26527434276816, "grad_norm": 0.15011638997246823, "learning_rate": 1.4210171245686901e-05, "loss": 0.4044, "num_tokens": 6818276758.0, "step": 8914 }, { "epoch": 3.2656407437940826, "grad_norm": 0.13076342286145556, "learning_rate": 1.4206319639100423e-05, "loss": 0.3972, "num_tokens": 6819169543.0, "step": 8915 }, { "epoch": 3.2660071448200054, "grad_norm": 0.1345485341970622, "learning_rate": 1.4202468471645298e-05, "loss": 0.3861, "num_tokens": 6819991849.0, "step": 8916 }, { "epoch": 3.266373545845928, "grad_norm": 0.12573038490810995, "learning_rate": 1.419861774353852e-05, "loss": 0.4398, "num_tokens": 6820679716.0, "step": 8917 }, { "epoch": 3.2667399468718514, "grad_norm": 0.15303987646300096, "learning_rate": 1.4194767454997042e-05, "loss": 0.4305, "num_tokens": 6821416666.0, "step": 8918 }, { "epoch": 3.267106347897774, "grad_norm": 0.14284287470091503, "learning_rate": 1.4190917606237821e-05, "loss": 0.4395, "num_tokens": 6822151093.0, "step": 8919 }, { "epoch": 3.267472748923697, "grad_norm": 0.15054542706845148, "learning_rate": 1.4187068197477767e-05, "loss": 0.4134, "num_tokens": 6822899250.0, "step": 8920 }, { "epoch": 3.2678391499496198, "grad_norm": 0.13121277258744318, "learning_rate": 1.418321922893378e-05, "loss": 0.3875, "num_tokens": 6823640779.0, "step": 8921 }, { "epoch": 3.2682055509755426, "grad_norm": 0.14192674338370595, "learning_rate": 1.4179370700822719e-05, "loss": 0.385, "num_tokens": 6824321700.0, "step": 8922 }, { "epoch": 3.2685719520014658, "grad_norm": 0.13775601327390793, "learning_rate": 1.4175522613361436e-05, "loss": 0.3995, "num_tokens": 6825130479.0, "step": 8923 }, { "epoch": 3.2689383530273886, "grad_norm": 0.13611026307044227, "learning_rate": 1.417167496676675e-05, "loss": 0.4041, "num_tokens": 6826021330.0, "step": 8924 }, { "epoch": 3.2693047540533113, "grad_norm": 0.12976681280840083, "learning_rate": 1.416782776125544e-05, "loss": 0.3693, "num_tokens": 6826741351.0, "step": 8925 }, { "epoch": 3.269671155079234, "grad_norm": 0.1401685321351334, "learning_rate": 1.4163980997044294e-05, "loss": 0.392, "num_tokens": 6827471299.0, "step": 8926 }, { "epoch": 3.270037556105157, "grad_norm": 0.13805610489816778, "learning_rate": 1.416013467435006e-05, "loss": 0.4463, "num_tokens": 6828173074.0, "step": 8927 }, { "epoch": 3.27040395713108, "grad_norm": 0.14499028783355833, "learning_rate": 1.4156288793389425e-05, "loss": 0.3764, "num_tokens": 6828960080.0, "step": 8928 }, { "epoch": 3.270770358157003, "grad_norm": 0.13912545097344184, "learning_rate": 1.4152443354379118e-05, "loss": 0.3974, "num_tokens": 6829766372.0, "step": 8929 }, { "epoch": 3.2711367591829257, "grad_norm": 0.14042635935131642, "learning_rate": 1.4148598357535792e-05, "loss": 0.3985, "num_tokens": 6830477158.0, "step": 8930 }, { "epoch": 3.2715031602088485, "grad_norm": 0.1542433981184061, "learning_rate": 1.4144753803076095e-05, "loss": 0.4103, "num_tokens": 6831174390.0, "step": 8931 }, { "epoch": 3.2718695612347712, "grad_norm": 0.1586031965697964, "learning_rate": 1.4140909691216647e-05, "loss": 0.3833, "num_tokens": 6831793241.0, "step": 8932 }, { "epoch": 3.2722359622606945, "grad_norm": 0.15233085450014225, "learning_rate": 1.413706602217404e-05, "loss": 0.4324, "num_tokens": 6832498693.0, "step": 8933 }, { "epoch": 3.2726023632866172, "grad_norm": 0.1589079228806715, "learning_rate": 1.4133222796164847e-05, "loss": 0.4109, "num_tokens": 6833334930.0, "step": 8934 }, { "epoch": 3.27296876431254, "grad_norm": 0.14663574159317674, "learning_rate": 1.4129380013405612e-05, "loss": 0.4315, "num_tokens": 6834058645.0, "step": 8935 }, { "epoch": 3.273335165338463, "grad_norm": 0.13504162753547264, "learning_rate": 1.4125537674112848e-05, "loss": 0.4125, "num_tokens": 6834787212.0, "step": 8936 }, { "epoch": 3.2737015663643856, "grad_norm": 0.14357301728420932, "learning_rate": 1.4121695778503068e-05, "loss": 0.3696, "num_tokens": 6835517721.0, "step": 8937 }, { "epoch": 3.274067967390309, "grad_norm": 0.13913402523765875, "learning_rate": 1.4117854326792726e-05, "loss": 0.4115, "num_tokens": 6836188001.0, "step": 8938 }, { "epoch": 3.2744343684162316, "grad_norm": 0.14538849669358844, "learning_rate": 1.411401331919826e-05, "loss": 0.4082, "num_tokens": 6836946477.0, "step": 8939 }, { "epoch": 3.2748007694421544, "grad_norm": 0.14882840017155952, "learning_rate": 1.4110172755936118e-05, "loss": 0.4027, "num_tokens": 6837702790.0, "step": 8940 }, { "epoch": 3.275167170468077, "grad_norm": 0.1309828256762293, "learning_rate": 1.4106332637222671e-05, "loss": 0.3925, "num_tokens": 6838505360.0, "step": 8941 }, { "epoch": 3.2755335714940004, "grad_norm": 0.14015392049703929, "learning_rate": 1.4102492963274288e-05, "loss": 0.3953, "num_tokens": 6839201116.0, "step": 8942 }, { "epoch": 3.275899972519923, "grad_norm": 0.14848448638785594, "learning_rate": 1.409865373430733e-05, "loss": 0.4026, "num_tokens": 6839851752.0, "step": 8943 }, { "epoch": 3.276266373545846, "grad_norm": 0.15182057998621934, "learning_rate": 1.4094814950538101e-05, "loss": 0.4193, "num_tokens": 6840595719.0, "step": 8944 }, { "epoch": 3.2766327745717687, "grad_norm": 0.1459115537327289, "learning_rate": 1.4090976612182905e-05, "loss": 0.3856, "num_tokens": 6841311745.0, "step": 8945 }, { "epoch": 3.2769991755976915, "grad_norm": 0.13981295813679603, "learning_rate": 1.4087138719458012e-05, "loss": 0.4013, "num_tokens": 6842099377.0, "step": 8946 }, { "epoch": 3.2773655766236147, "grad_norm": 0.13843161847251756, "learning_rate": 1.4083301272579658e-05, "loss": 0.4148, "num_tokens": 6842882107.0, "step": 8947 }, { "epoch": 3.2777319776495375, "grad_norm": 0.14010053217378712, "learning_rate": 1.4079464271764071e-05, "loss": 0.4268, "num_tokens": 6843581330.0, "step": 8948 }, { "epoch": 3.2780983786754603, "grad_norm": 0.15913110506116995, "learning_rate": 1.4075627717227435e-05, "loss": 0.4112, "num_tokens": 6844350682.0, "step": 8949 }, { "epoch": 3.278464779701383, "grad_norm": 0.1402879705560301, "learning_rate": 1.407179160918593e-05, "loss": 0.4257, "num_tokens": 6845282472.0, "step": 8950 }, { "epoch": 3.2788311807273063, "grad_norm": 0.1393904073527968, "learning_rate": 1.406795594785569e-05, "loss": 0.4033, "num_tokens": 6846050750.0, "step": 8951 }, { "epoch": 3.279197581753229, "grad_norm": 0.14416210730434667, "learning_rate": 1.4064120733452837e-05, "loss": 0.394, "num_tokens": 6846770334.0, "step": 8952 }, { "epoch": 3.279563982779152, "grad_norm": 0.1675753926779235, "learning_rate": 1.406028596619347e-05, "loss": 0.4294, "num_tokens": 6847419777.0, "step": 8953 }, { "epoch": 3.2799303838050746, "grad_norm": 0.15615643074146798, "learning_rate": 1.4056451646293661e-05, "loss": 0.3901, "num_tokens": 6848328254.0, "step": 8954 }, { "epoch": 3.2802967848309974, "grad_norm": 0.12764281349318524, "learning_rate": 1.4052617773969428e-05, "loss": 0.4108, "num_tokens": 6849152935.0, "step": 8955 }, { "epoch": 3.2806631858569206, "grad_norm": 0.14781700996843666, "learning_rate": 1.4048784349436817e-05, "loss": 0.4341, "num_tokens": 6849963910.0, "step": 8956 }, { "epoch": 3.2810295868828434, "grad_norm": 0.14677137675512056, "learning_rate": 1.4044951372911814e-05, "loss": 0.4249, "num_tokens": 6850673471.0, "step": 8957 }, { "epoch": 3.281395987908766, "grad_norm": 0.1540714458954005, "learning_rate": 1.4041118844610365e-05, "loss": 0.4039, "num_tokens": 6851354828.0, "step": 8958 }, { "epoch": 3.281762388934689, "grad_norm": 0.14394201668536477, "learning_rate": 1.4037286764748438e-05, "loss": 0.4127, "num_tokens": 6852108691.0, "step": 8959 }, { "epoch": 3.2821287899606117, "grad_norm": 0.1407550655915986, "learning_rate": 1.4033455133541943e-05, "loss": 0.4247, "num_tokens": 6852930742.0, "step": 8960 }, { "epoch": 3.282495190986535, "grad_norm": 0.13786749355254527, "learning_rate": 1.4029623951206763e-05, "loss": 0.4181, "num_tokens": 6853654415.0, "step": 8961 }, { "epoch": 3.2828615920124578, "grad_norm": 0.16060931291736383, "learning_rate": 1.402579321795877e-05, "loss": 0.4391, "num_tokens": 6854329531.0, "step": 8962 }, { "epoch": 3.2832279930383805, "grad_norm": 0.15886901978382395, "learning_rate": 1.402196293401381e-05, "loss": 0.3958, "num_tokens": 6855149698.0, "step": 8963 }, { "epoch": 3.2835943940643033, "grad_norm": 0.14546844618187274, "learning_rate": 1.4018133099587692e-05, "loss": 0.4076, "num_tokens": 6855858984.0, "step": 8964 }, { "epoch": 3.283960795090226, "grad_norm": 0.1479811201916461, "learning_rate": 1.4014303714896204e-05, "loss": 0.3704, "num_tokens": 6856711680.0, "step": 8965 }, { "epoch": 3.2843271961161493, "grad_norm": 0.13059505368375196, "learning_rate": 1.4010474780155115e-05, "loss": 0.4021, "num_tokens": 6857624027.0, "step": 8966 }, { "epoch": 3.284693597142072, "grad_norm": 0.12761816992598318, "learning_rate": 1.4006646295580173e-05, "loss": 0.4453, "num_tokens": 6858487230.0, "step": 8967 }, { "epoch": 3.285059998167995, "grad_norm": 0.15113256619262447, "learning_rate": 1.4002818261387075e-05, "loss": 0.4319, "num_tokens": 6859222698.0, "step": 8968 }, { "epoch": 3.2854263991939177, "grad_norm": 0.15282264024641914, "learning_rate": 1.3998990677791513e-05, "loss": 0.4318, "num_tokens": 6860062596.0, "step": 8969 }, { "epoch": 3.2857928002198404, "grad_norm": 0.13172143258895758, "learning_rate": 1.3995163545009165e-05, "loss": 0.4176, "num_tokens": 6860744723.0, "step": 8970 }, { "epoch": 3.2861592012457637, "grad_norm": 0.1579398764158494, "learning_rate": 1.3991336863255657e-05, "loss": 0.4274, "num_tokens": 6861503097.0, "step": 8971 }, { "epoch": 3.2865256022716864, "grad_norm": 0.1443293233947614, "learning_rate": 1.3987510632746605e-05, "loss": 0.3956, "num_tokens": 6862292367.0, "step": 8972 }, { "epoch": 3.286892003297609, "grad_norm": 0.13505508666976546, "learning_rate": 1.3983684853697598e-05, "loss": 0.4179, "num_tokens": 6862993698.0, "step": 8973 }, { "epoch": 3.287258404323532, "grad_norm": 0.1522492176565954, "learning_rate": 1.3979859526324196e-05, "loss": 0.4182, "num_tokens": 6863774845.0, "step": 8974 }, { "epoch": 3.287624805349455, "grad_norm": 0.15194546718969545, "learning_rate": 1.3976034650841932e-05, "loss": 0.4241, "num_tokens": 6864424394.0, "step": 8975 }, { "epoch": 3.287991206375378, "grad_norm": 0.14046662058871023, "learning_rate": 1.3972210227466323e-05, "loss": 0.4522, "num_tokens": 6865244162.0, "step": 8976 }, { "epoch": 3.288357607401301, "grad_norm": 0.14578416559884805, "learning_rate": 1.3968386256412846e-05, "loss": 0.4012, "num_tokens": 6865975894.0, "step": 8977 }, { "epoch": 3.2887240084272236, "grad_norm": 0.13924763810150698, "learning_rate": 1.3964562737896968e-05, "loss": 0.3848, "num_tokens": 6866783739.0, "step": 8978 }, { "epoch": 3.2890904094531463, "grad_norm": 0.13867009516108286, "learning_rate": 1.3960739672134123e-05, "loss": 0.3876, "num_tokens": 6867414167.0, "step": 8979 }, { "epoch": 3.289456810479069, "grad_norm": 0.15139450513411934, "learning_rate": 1.3956917059339716e-05, "loss": 0.4058, "num_tokens": 6868172045.0, "step": 8980 }, { "epoch": 3.2898232115049924, "grad_norm": 0.13591831014234826, "learning_rate": 1.395309489972913e-05, "loss": 0.4103, "num_tokens": 6868946009.0, "step": 8981 }, { "epoch": 3.290189612530915, "grad_norm": 0.14640199842805326, "learning_rate": 1.3949273193517723e-05, "loss": 0.4415, "num_tokens": 6869668931.0, "step": 8982 }, { "epoch": 3.290556013556838, "grad_norm": 0.15443612988332736, "learning_rate": 1.3945451940920837e-05, "loss": 0.4176, "num_tokens": 6870357739.0, "step": 8983 }, { "epoch": 3.2909224145827607, "grad_norm": 0.14644216396490684, "learning_rate": 1.3941631142153776e-05, "loss": 0.3924, "num_tokens": 6871299954.0, "step": 8984 }, { "epoch": 3.2912888156086835, "grad_norm": 0.130316930563019, "learning_rate": 1.3937810797431804e-05, "loss": 0.4025, "num_tokens": 6872096646.0, "step": 8985 }, { "epoch": 3.2916552166346067, "grad_norm": 0.13404331921572657, "learning_rate": 1.3933990906970195e-05, "loss": 0.4162, "num_tokens": 6872861876.0, "step": 8986 }, { "epoch": 3.2920216176605295, "grad_norm": 0.14398478689371877, "learning_rate": 1.3930171470984173e-05, "loss": 0.4269, "num_tokens": 6873602161.0, "step": 8987 }, { "epoch": 3.2923880186864523, "grad_norm": 0.13542360669743864, "learning_rate": 1.3926352489688944e-05, "loss": 0.4248, "num_tokens": 6874446250.0, "step": 8988 }, { "epoch": 3.292754419712375, "grad_norm": 0.14121633419595978, "learning_rate": 1.3922533963299683e-05, "loss": 0.4154, "num_tokens": 6875190362.0, "step": 8989 }, { "epoch": 3.2931208207382983, "grad_norm": 0.130718935460752, "learning_rate": 1.3918715892031544e-05, "loss": 0.4131, "num_tokens": 6876026058.0, "step": 8990 }, { "epoch": 3.293487221764221, "grad_norm": 0.14271604768270918, "learning_rate": 1.391489827609966e-05, "loss": 0.4034, "num_tokens": 6876898510.0, "step": 8991 }, { "epoch": 3.293853622790144, "grad_norm": 0.1339572657781764, "learning_rate": 1.3911081115719127e-05, "loss": 0.3979, "num_tokens": 6877537792.0, "step": 8992 }, { "epoch": 3.2942200238160666, "grad_norm": 0.1660687897538706, "learning_rate": 1.3907264411105015e-05, "loss": 0.397, "num_tokens": 6878212076.0, "step": 8993 }, { "epoch": 3.2945864248419894, "grad_norm": 0.14585489184333758, "learning_rate": 1.3903448162472398e-05, "loss": 0.4152, "num_tokens": 6878948463.0, "step": 8994 }, { "epoch": 3.2949528258679126, "grad_norm": 0.13485445155600062, "learning_rate": 1.3899632370036276e-05, "loss": 0.3982, "num_tokens": 6879706322.0, "step": 8995 }, { "epoch": 3.2953192268938354, "grad_norm": 0.15413907239279762, "learning_rate": 1.3895817034011652e-05, "loss": 0.3892, "num_tokens": 6880433369.0, "step": 8996 }, { "epoch": 3.295685627919758, "grad_norm": 0.14862183314585253, "learning_rate": 1.3892002154613517e-05, "loss": 0.4104, "num_tokens": 6881154628.0, "step": 8997 }, { "epoch": 3.296052028945681, "grad_norm": 0.1395484233986543, "learning_rate": 1.38881877320568e-05, "loss": 0.3877, "num_tokens": 6881919044.0, "step": 8998 }, { "epoch": 3.296418429971604, "grad_norm": 0.14253785911684042, "learning_rate": 1.388437376655642e-05, "loss": 0.4231, "num_tokens": 6882745536.0, "step": 8999 }, { "epoch": 3.296784830997527, "grad_norm": 0.1401415045463096, "learning_rate": 1.3880560258327291e-05, "loss": 0.4392, "num_tokens": 6883528319.0, "step": 9000 }, { "epoch": 3.2971512320234497, "grad_norm": 0.14523900010745797, "learning_rate": 1.3876747207584276e-05, "loss": 0.4073, "num_tokens": 6884334627.0, "step": 9001 }, { "epoch": 3.2975176330493725, "grad_norm": 0.14330731800176041, "learning_rate": 1.3872934614542216e-05, "loss": 0.4245, "num_tokens": 6885081509.0, "step": 9002 }, { "epoch": 3.2978840340752953, "grad_norm": 0.14150882258719988, "learning_rate": 1.3869122479415933e-05, "loss": 0.4297, "num_tokens": 6885847426.0, "step": 9003 }, { "epoch": 3.2982504351012185, "grad_norm": 0.14274002839832767, "learning_rate": 1.3865310802420218e-05, "loss": 0.4037, "num_tokens": 6886604512.0, "step": 9004 }, { "epoch": 3.2986168361271413, "grad_norm": 0.15159166382968825, "learning_rate": 1.386149958376984e-05, "loss": 0.4094, "num_tokens": 6887363693.0, "step": 9005 }, { "epoch": 3.298983237153064, "grad_norm": 0.14103085346020205, "learning_rate": 1.3857688823679536e-05, "loss": 0.4081, "num_tokens": 6888113633.0, "step": 9006 }, { "epoch": 3.299349638178987, "grad_norm": 0.15014862822595237, "learning_rate": 1.3853878522364029e-05, "loss": 0.4102, "num_tokens": 6888809482.0, "step": 9007 }, { "epoch": 3.2997160392049096, "grad_norm": 0.14445237467551827, "learning_rate": 1.3850068680038002e-05, "loss": 0.3867, "num_tokens": 6889619972.0, "step": 9008 }, { "epoch": 3.300082440230833, "grad_norm": 0.13402920158878648, "learning_rate": 1.3846259296916117e-05, "loss": 0.4078, "num_tokens": 6890355100.0, "step": 9009 }, { "epoch": 3.3004488412567556, "grad_norm": 0.1434625096810054, "learning_rate": 1.3842450373213021e-05, "loss": 0.378, "num_tokens": 6891190309.0, "step": 9010 }, { "epoch": 3.3008152422826784, "grad_norm": 0.13566726187149455, "learning_rate": 1.3838641909143331e-05, "loss": 0.3947, "num_tokens": 6891990553.0, "step": 9011 }, { "epoch": 3.301181643308601, "grad_norm": 0.13854961309157163, "learning_rate": 1.383483390492161e-05, "loss": 0.4489, "num_tokens": 6892756568.0, "step": 9012 }, { "epoch": 3.301548044334524, "grad_norm": 0.14057701918513152, "learning_rate": 1.3831026360762438e-05, "loss": 0.3938, "num_tokens": 6893676309.0, "step": 9013 }, { "epoch": 3.301914445360447, "grad_norm": 0.14138138097427336, "learning_rate": 1.382721927688035e-05, "loss": 0.4329, "num_tokens": 6894457014.0, "step": 9014 }, { "epoch": 3.30228084638637, "grad_norm": 0.4015156564785973, "learning_rate": 1.3823412653489832e-05, "loss": 0.4487, "num_tokens": 6895162563.0, "step": 9015 }, { "epoch": 3.3026472474122928, "grad_norm": 0.15261715357943764, "learning_rate": 1.3819606490805393e-05, "loss": 0.4144, "num_tokens": 6895971211.0, "step": 9016 }, { "epoch": 3.3030136484382155, "grad_norm": 0.13845857166978395, "learning_rate": 1.3815800789041474e-05, "loss": 0.406, "num_tokens": 6896643944.0, "step": 9017 }, { "epoch": 3.3033800494641383, "grad_norm": 0.14882631734974022, "learning_rate": 1.3811995548412514e-05, "loss": 0.4034, "num_tokens": 6897463864.0, "step": 9018 }, { "epoch": 3.3037464504900615, "grad_norm": 0.15516579391028087, "learning_rate": 1.3808190769132911e-05, "loss": 0.3739, "num_tokens": 6898257352.0, "step": 9019 }, { "epoch": 3.3041128515159843, "grad_norm": 0.13748556397982692, "learning_rate": 1.380438645141704e-05, "loss": 0.3819, "num_tokens": 6898933449.0, "step": 9020 }, { "epoch": 3.304479252541907, "grad_norm": 0.1595207542038367, "learning_rate": 1.3800582595479277e-05, "loss": 0.3892, "num_tokens": 6899826966.0, "step": 9021 }, { "epoch": 3.30484565356783, "grad_norm": 0.1328926692666182, "learning_rate": 1.3796779201533921e-05, "loss": 0.4121, "num_tokens": 6900779891.0, "step": 9022 }, { "epoch": 3.3052120545937527, "grad_norm": 0.13666848780371693, "learning_rate": 1.3792976269795275e-05, "loss": 0.4246, "num_tokens": 6901599229.0, "step": 9023 }, { "epoch": 3.305578455619676, "grad_norm": 0.13526378845817405, "learning_rate": 1.3789173800477636e-05, "loss": 0.4067, "num_tokens": 6902452642.0, "step": 9024 }, { "epoch": 3.3059448566455987, "grad_norm": 0.13979952103129895, "learning_rate": 1.378537179379523e-05, "loss": 0.4109, "num_tokens": 6903124722.0, "step": 9025 }, { "epoch": 3.3063112576715215, "grad_norm": 0.1589383607330465, "learning_rate": 1.3781570249962282e-05, "loss": 0.393, "num_tokens": 6903841975.0, "step": 9026 }, { "epoch": 3.3066776586974442, "grad_norm": 0.1444531989833573, "learning_rate": 1.3777769169193e-05, "loss": 0.4087, "num_tokens": 6904644506.0, "step": 9027 }, { "epoch": 3.307044059723367, "grad_norm": 0.13918209740953127, "learning_rate": 1.3773968551701547e-05, "loss": 0.4488, "num_tokens": 6905439786.0, "step": 9028 }, { "epoch": 3.3074104607492902, "grad_norm": 0.14547836436253725, "learning_rate": 1.3770168397702067e-05, "loss": 0.3894, "num_tokens": 6906209678.0, "step": 9029 }, { "epoch": 3.307776861775213, "grad_norm": 0.14789665567956625, "learning_rate": 1.376636870740868e-05, "loss": 0.4056, "num_tokens": 6906972730.0, "step": 9030 }, { "epoch": 3.308143262801136, "grad_norm": 0.13613154353002743, "learning_rate": 1.376256948103547e-05, "loss": 0.4194, "num_tokens": 6907752419.0, "step": 9031 }, { "epoch": 3.3085096638270586, "grad_norm": 0.13978412380109057, "learning_rate": 1.3758770718796515e-05, "loss": 0.3893, "num_tokens": 6908579882.0, "step": 9032 }, { "epoch": 3.3088760648529814, "grad_norm": 0.1476803713362284, "learning_rate": 1.3754972420905848e-05, "loss": 0.4385, "num_tokens": 6909323425.0, "step": 9033 }, { "epoch": 3.3092424658789046, "grad_norm": 0.1466808286234264, "learning_rate": 1.375117458757748e-05, "loss": 0.4164, "num_tokens": 6910228419.0, "step": 9034 }, { "epoch": 3.3096088669048274, "grad_norm": 0.1340561619130328, "learning_rate": 1.3747377219025402e-05, "loss": 0.3951, "num_tokens": 6911082922.0, "step": 9035 }, { "epoch": 3.30997526793075, "grad_norm": 0.13131106092153055, "learning_rate": 1.3743580315463567e-05, "loss": 0.4175, "num_tokens": 6911756964.0, "step": 9036 }, { "epoch": 3.310341668956673, "grad_norm": 0.15580233043934935, "learning_rate": 1.3739783877105932e-05, "loss": 0.4282, "num_tokens": 6912449085.0, "step": 9037 }, { "epoch": 3.3107080699825957, "grad_norm": 0.15069522545047231, "learning_rate": 1.373598790416638e-05, "loss": 0.4081, "num_tokens": 6913213935.0, "step": 9038 }, { "epoch": 3.311074471008519, "grad_norm": 0.14431162562201208, "learning_rate": 1.3732192396858799e-05, "loss": 0.4018, "num_tokens": 6914057584.0, "step": 9039 }, { "epoch": 3.3114408720344417, "grad_norm": 0.1381637334219751, "learning_rate": 1.3728397355397056e-05, "loss": 0.4201, "num_tokens": 6914867055.0, "step": 9040 }, { "epoch": 3.3118072730603645, "grad_norm": 0.13510050352962222, "learning_rate": 1.3724602779994982e-05, "loss": 0.4082, "num_tokens": 6915716175.0, "step": 9041 }, { "epoch": 3.3121736740862873, "grad_norm": 0.1376354800876153, "learning_rate": 1.3720808670866358e-05, "loss": 0.4343, "num_tokens": 6916519611.0, "step": 9042 }, { "epoch": 3.3125400751122105, "grad_norm": 0.15672269485967233, "learning_rate": 1.3717015028224985e-05, "loss": 0.4231, "num_tokens": 6917319131.0, "step": 9043 }, { "epoch": 3.3129064761381333, "grad_norm": 0.13612326701219057, "learning_rate": 1.3713221852284604e-05, "loss": 0.4329, "num_tokens": 6918008493.0, "step": 9044 }, { "epoch": 3.313272877164056, "grad_norm": 0.15004514271956768, "learning_rate": 1.3709429143258944e-05, "loss": 0.4093, "num_tokens": 6918821983.0, "step": 9045 }, { "epoch": 3.313639278189979, "grad_norm": 0.13818431492462013, "learning_rate": 1.3705636901361698e-05, "loss": 0.4249, "num_tokens": 6919532829.0, "step": 9046 }, { "epoch": 3.3140056792159016, "grad_norm": 0.15750203868052154, "learning_rate": 1.370184512680654e-05, "loss": 0.4492, "num_tokens": 6920187971.0, "step": 9047 }, { "epoch": 3.314372080241825, "grad_norm": 0.14616199083531423, "learning_rate": 1.369805381980712e-05, "loss": 0.4107, "num_tokens": 6921065461.0, "step": 9048 }, { "epoch": 3.3147384812677476, "grad_norm": 0.1397769679987355, "learning_rate": 1.3694262980577056e-05, "loss": 0.4267, "num_tokens": 6921902843.0, "step": 9049 }, { "epoch": 3.3151048822936704, "grad_norm": 0.1351768684510621, "learning_rate": 1.3690472609329927e-05, "loss": 0.3767, "num_tokens": 6922622158.0, "step": 9050 }, { "epoch": 3.315471283319593, "grad_norm": 0.145125492366468, "learning_rate": 1.3686682706279329e-05, "loss": 0.425, "num_tokens": 6923317102.0, "step": 9051 }, { "epoch": 3.3158376843455164, "grad_norm": 0.13715457462538128, "learning_rate": 1.3682893271638775e-05, "loss": 0.3991, "num_tokens": 6924148377.0, "step": 9052 }, { "epoch": 3.316204085371439, "grad_norm": 0.13388307407570826, "learning_rate": 1.3679104305621786e-05, "loss": 0.3903, "num_tokens": 6924896033.0, "step": 9053 }, { "epoch": 3.316570486397362, "grad_norm": 0.1451589323228443, "learning_rate": 1.3675315808441864e-05, "loss": 0.4448, "num_tokens": 6925670739.0, "step": 9054 }, { "epoch": 3.3169368874232847, "grad_norm": 0.15146630470359393, "learning_rate": 1.3671527780312448e-05, "loss": 0.415, "num_tokens": 6926477342.0, "step": 9055 }, { "epoch": 3.3173032884492075, "grad_norm": 0.14949832560137025, "learning_rate": 1.3667740221446988e-05, "loss": 0.4097, "num_tokens": 6927291360.0, "step": 9056 }, { "epoch": 3.3176696894751307, "grad_norm": 0.13913628209045223, "learning_rate": 1.366395313205889e-05, "loss": 0.4084, "num_tokens": 6928036584.0, "step": 9057 }, { "epoch": 3.3180360905010535, "grad_norm": 0.14525339334744722, "learning_rate": 1.3660166512361533e-05, "loss": 0.3795, "num_tokens": 6928792408.0, "step": 9058 }, { "epoch": 3.3184024915269763, "grad_norm": 0.13998311652226123, "learning_rate": 1.3656380362568276e-05, "loss": 0.4159, "num_tokens": 6929591347.0, "step": 9059 }, { "epoch": 3.318768892552899, "grad_norm": 0.14277228891348123, "learning_rate": 1.3652594682892442e-05, "loss": 0.3957, "num_tokens": 6930428435.0, "step": 9060 }, { "epoch": 3.319135293578822, "grad_norm": 0.1365575209183788, "learning_rate": 1.3648809473547337e-05, "loss": 0.3867, "num_tokens": 6931148985.0, "step": 9061 }, { "epoch": 3.319501694604745, "grad_norm": 0.14447343472143861, "learning_rate": 1.3645024734746234e-05, "loss": 0.4306, "num_tokens": 6931962013.0, "step": 9062 }, { "epoch": 3.319868095630668, "grad_norm": 0.14236685937311952, "learning_rate": 1.3641240466702388e-05, "loss": 0.437, "num_tokens": 6932647080.0, "step": 9063 }, { "epoch": 3.3202344966565907, "grad_norm": 0.1600839508026975, "learning_rate": 1.3637456669629017e-05, "loss": 0.4056, "num_tokens": 6933406766.0, "step": 9064 }, { "epoch": 3.3206008976825134, "grad_norm": 0.14758570751126696, "learning_rate": 1.363367334373932e-05, "loss": 0.4243, "num_tokens": 6934154747.0, "step": 9065 }, { "epoch": 3.320967298708436, "grad_norm": 0.1464409242965838, "learning_rate": 1.362989048924646e-05, "loss": 0.4311, "num_tokens": 6934886077.0, "step": 9066 }, { "epoch": 3.3213336997343594, "grad_norm": 0.14997002138526003, "learning_rate": 1.3626108106363587e-05, "loss": 0.3854, "num_tokens": 6935622138.0, "step": 9067 }, { "epoch": 3.321700100760282, "grad_norm": 0.1434387664300549, "learning_rate": 1.3622326195303828e-05, "loss": 0.4091, "num_tokens": 6936455923.0, "step": 9068 }, { "epoch": 3.322066501786205, "grad_norm": 0.13485272787751715, "learning_rate": 1.3618544756280247e-05, "loss": 0.3889, "num_tokens": 6937217038.0, "step": 9069 }, { "epoch": 3.3224329028121278, "grad_norm": 0.14352865186956396, "learning_rate": 1.361476378950593e-05, "loss": 0.3839, "num_tokens": 6937945742.0, "step": 9070 }, { "epoch": 3.3227993038380506, "grad_norm": 0.1404997640098156, "learning_rate": 1.3610983295193909e-05, "loss": 0.4335, "num_tokens": 6938618540.0, "step": 9071 }, { "epoch": 3.323165704863974, "grad_norm": 0.16448177886228346, "learning_rate": 1.3607203273557179e-05, "loss": 0.3951, "num_tokens": 6939359831.0, "step": 9072 }, { "epoch": 3.3235321058898966, "grad_norm": 0.1356396081226688, "learning_rate": 1.3603423724808745e-05, "loss": 0.3944, "num_tokens": 6940117440.0, "step": 9073 }, { "epoch": 3.3238985069158193, "grad_norm": 0.14595449015463524, "learning_rate": 1.3599644649161553e-05, "loss": 0.3764, "num_tokens": 6940905263.0, "step": 9074 }, { "epoch": 3.324264907941742, "grad_norm": 0.13966016600914796, "learning_rate": 1.3595866046828532e-05, "loss": 0.3862, "num_tokens": 6941642527.0, "step": 9075 }, { "epoch": 3.324631308967665, "grad_norm": 0.139291361491479, "learning_rate": 1.3592087918022591e-05, "loss": 0.4056, "num_tokens": 6942294062.0, "step": 9076 }, { "epoch": 3.324997709993588, "grad_norm": 0.14510955971291623, "learning_rate": 1.3588310262956595e-05, "loss": 0.3971, "num_tokens": 6943022108.0, "step": 9077 }, { "epoch": 3.325364111019511, "grad_norm": 0.14493793083713566, "learning_rate": 1.3584533081843422e-05, "loss": 0.3925, "num_tokens": 6943775190.0, "step": 9078 }, { "epoch": 3.3257305120454337, "grad_norm": 0.14811508791455646, "learning_rate": 1.358075637489587e-05, "loss": 0.4233, "num_tokens": 6944585384.0, "step": 9079 }, { "epoch": 3.3260969130713565, "grad_norm": 0.13305626310661295, "learning_rate": 1.357698014232673e-05, "loss": 0.4027, "num_tokens": 6945222202.0, "step": 9080 }, { "epoch": 3.3264633140972792, "grad_norm": 0.14734783551483474, "learning_rate": 1.3573204384348805e-05, "loss": 0.4254, "num_tokens": 6945937256.0, "step": 9081 }, { "epoch": 3.3268297151232025, "grad_norm": 0.13469541073869568, "learning_rate": 1.356942910117481e-05, "loss": 0.397, "num_tokens": 6946822050.0, "step": 9082 }, { "epoch": 3.3271961161491252, "grad_norm": 0.14014285810614166, "learning_rate": 1.356565429301746e-05, "loss": 0.4247, "num_tokens": 6947564386.0, "step": 9083 }, { "epoch": 3.327562517175048, "grad_norm": 0.13792175410137428, "learning_rate": 1.3561879960089464e-05, "loss": 0.4015, "num_tokens": 6948310566.0, "step": 9084 }, { "epoch": 3.327928918200971, "grad_norm": 0.1446666407007102, "learning_rate": 1.3558106102603477e-05, "loss": 0.4314, "num_tokens": 6949149444.0, "step": 9085 }, { "epoch": 3.3282953192268936, "grad_norm": 0.13413070798814417, "learning_rate": 1.3554332720772132e-05, "loss": 0.4348, "num_tokens": 6950008345.0, "step": 9086 }, { "epoch": 3.328661720252817, "grad_norm": 0.13043937601560504, "learning_rate": 1.3550559814808043e-05, "loss": 0.3947, "num_tokens": 6950801862.0, "step": 9087 }, { "epoch": 3.3290281212787396, "grad_norm": 0.13383039606800679, "learning_rate": 1.3546787384923787e-05, "loss": 0.4123, "num_tokens": 6951589169.0, "step": 9088 }, { "epoch": 3.3293945223046624, "grad_norm": 0.14567307592681336, "learning_rate": 1.3543015431331928e-05, "loss": 0.3826, "num_tokens": 6952346835.0, "step": 9089 }, { "epoch": 3.329760923330585, "grad_norm": 0.13217749881194948, "learning_rate": 1.3539243954244987e-05, "loss": 0.439, "num_tokens": 6953029162.0, "step": 9090 }, { "epoch": 3.3301273243565084, "grad_norm": 0.15263735552840382, "learning_rate": 1.3535472953875469e-05, "loss": 0.3841, "num_tokens": 6953686655.0, "step": 9091 }, { "epoch": 3.330493725382431, "grad_norm": 0.16119855900825394, "learning_rate": 1.3531702430435854e-05, "loss": 0.4128, "num_tokens": 6954421073.0, "step": 9092 }, { "epoch": 3.330860126408354, "grad_norm": 0.1296572041678807, "learning_rate": 1.3527932384138573e-05, "loss": 0.4044, "num_tokens": 6955189092.0, "step": 9093 }, { "epoch": 3.3312265274342767, "grad_norm": 0.1331797949525266, "learning_rate": 1.3524162815196075e-05, "loss": 0.4011, "num_tokens": 6955984420.0, "step": 9094 }, { "epoch": 3.3315929284601995, "grad_norm": 0.14691328707047452, "learning_rate": 1.3520393723820737e-05, "loss": 0.4259, "num_tokens": 6956663117.0, "step": 9095 }, { "epoch": 3.3319593294861227, "grad_norm": 0.14442623966348564, "learning_rate": 1.3516625110224921e-05, "loss": 0.4179, "num_tokens": 6957349621.0, "step": 9096 }, { "epoch": 3.3323257305120455, "grad_norm": 0.14846544543321905, "learning_rate": 1.3512856974620983e-05, "loss": 0.4123, "num_tokens": 6958122108.0, "step": 9097 }, { "epoch": 3.3326921315379683, "grad_norm": 0.13645845080296917, "learning_rate": 1.3509089317221237e-05, "loss": 0.4019, "num_tokens": 6958945938.0, "step": 9098 }, { "epoch": 3.333058532563891, "grad_norm": 0.14918493994186824, "learning_rate": 1.3505322138237953e-05, "loss": 0.3846, "num_tokens": 6959735375.0, "step": 9099 }, { "epoch": 3.3334249335898143, "grad_norm": 0.13323447041265218, "learning_rate": 1.3501555437883408e-05, "loss": 0.4104, "num_tokens": 6960473336.0, "step": 9100 }, { "epoch": 3.333791334615737, "grad_norm": 0.14564928035144295, "learning_rate": 1.3497789216369826e-05, "loss": 0.407, "num_tokens": 6961303135.0, "step": 9101 }, { "epoch": 3.33415773564166, "grad_norm": 0.14146052297920392, "learning_rate": 1.3494023473909414e-05, "loss": 0.3939, "num_tokens": 6962047911.0, "step": 9102 }, { "epoch": 3.3345241366675826, "grad_norm": 0.1438308062500259, "learning_rate": 1.3490258210714355e-05, "loss": 0.4344, "num_tokens": 6963048748.0, "step": 9103 }, { "epoch": 3.3348905376935054, "grad_norm": 0.1310923506201898, "learning_rate": 1.3486493426996798e-05, "loss": 0.4018, "num_tokens": 6963849777.0, "step": 9104 }, { "epoch": 3.3352569387194286, "grad_norm": 0.14635325975021085, "learning_rate": 1.3482729122968872e-05, "loss": 0.4261, "num_tokens": 6964665705.0, "step": 9105 }, { "epoch": 3.3356233397453514, "grad_norm": 0.1394088799364929, "learning_rate": 1.3478965298842668e-05, "loss": 0.4259, "num_tokens": 6965491338.0, "step": 9106 }, { "epoch": 3.335989740771274, "grad_norm": 0.14925972528531262, "learning_rate": 1.3475201954830252e-05, "loss": 0.4314, "num_tokens": 6966250930.0, "step": 9107 }, { "epoch": 3.336356141797197, "grad_norm": 0.1367036066525222, "learning_rate": 1.3471439091143694e-05, "loss": 0.4395, "num_tokens": 6967160567.0, "step": 9108 }, { "epoch": 3.3367225428231198, "grad_norm": 0.14652605084647227, "learning_rate": 1.3467676707994983e-05, "loss": 0.4095, "num_tokens": 6967944031.0, "step": 9109 }, { "epoch": 3.337088943849043, "grad_norm": 0.14880744523070213, "learning_rate": 1.3463914805596114e-05, "loss": 0.4007, "num_tokens": 6968757610.0, "step": 9110 }, { "epoch": 3.3374553448749658, "grad_norm": 0.13357154615898933, "learning_rate": 1.346015338415907e-05, "loss": 0.3937, "num_tokens": 6969599995.0, "step": 9111 }, { "epoch": 3.3378217459008885, "grad_norm": 0.1426602843795838, "learning_rate": 1.3456392443895756e-05, "loss": 0.4179, "num_tokens": 6970400467.0, "step": 9112 }, { "epoch": 3.3381881469268113, "grad_norm": 0.14135080334678268, "learning_rate": 1.3452631985018101e-05, "loss": 0.4019, "num_tokens": 6971067427.0, "step": 9113 }, { "epoch": 3.338554547952734, "grad_norm": 0.14960947183338255, "learning_rate": 1.344887200773798e-05, "loss": 0.4098, "num_tokens": 6971870109.0, "step": 9114 }, { "epoch": 3.3389209489786573, "grad_norm": 0.12475547475926871, "learning_rate": 1.3445112512267255e-05, "loss": 0.3869, "num_tokens": 6972562191.0, "step": 9115 }, { "epoch": 3.33928735000458, "grad_norm": 0.15406224131416293, "learning_rate": 1.344135349881774e-05, "loss": 0.3832, "num_tokens": 6973353219.0, "step": 9116 }, { "epoch": 3.339653751030503, "grad_norm": 0.12126876187414498, "learning_rate": 1.3437594967601242e-05, "loss": 0.4115, "num_tokens": 6974174050.0, "step": 9117 }, { "epoch": 3.3400201520564257, "grad_norm": 0.14126466415666297, "learning_rate": 1.3433836918829535e-05, "loss": 0.3765, "num_tokens": 6974897829.0, "step": 9118 }, { "epoch": 3.3403865530823484, "grad_norm": 0.13996388433449622, "learning_rate": 1.343007935271436e-05, "loss": 0.4217, "num_tokens": 6975751156.0, "step": 9119 }, { "epoch": 3.3407529541082717, "grad_norm": 0.13417996760299, "learning_rate": 1.3426322269467443e-05, "loss": 0.39, "num_tokens": 6976527268.0, "step": 9120 }, { "epoch": 3.3411193551341944, "grad_norm": 0.14665109681819083, "learning_rate": 1.3422565669300465e-05, "loss": 0.4492, "num_tokens": 6977152931.0, "step": 9121 }, { "epoch": 3.3414857561601172, "grad_norm": 0.16384220166185018, "learning_rate": 1.3418809552425094e-05, "loss": 0.4057, "num_tokens": 6977821996.0, "step": 9122 }, { "epoch": 3.34185215718604, "grad_norm": 0.15339464516313786, "learning_rate": 1.3415053919052963e-05, "loss": 0.3902, "num_tokens": 6978450241.0, "step": 9123 }, { "epoch": 3.342218558211963, "grad_norm": 0.14633275635292184, "learning_rate": 1.341129876939569e-05, "loss": 0.3885, "num_tokens": 6979247347.0, "step": 9124 }, { "epoch": 3.342584959237886, "grad_norm": 0.13511277394610652, "learning_rate": 1.3407544103664866e-05, "loss": 0.4137, "num_tokens": 6980060420.0, "step": 9125 }, { "epoch": 3.342951360263809, "grad_norm": 0.13961668105040517, "learning_rate": 1.3403789922072013e-05, "loss": 0.3892, "num_tokens": 6980796151.0, "step": 9126 }, { "epoch": 3.3433177612897316, "grad_norm": 0.14189994394065159, "learning_rate": 1.3400036224828688e-05, "loss": 0.3912, "num_tokens": 6981527442.0, "step": 9127 }, { "epoch": 3.3436841623156544, "grad_norm": 0.145660285139423, "learning_rate": 1.3396283012146389e-05, "loss": 0.4089, "num_tokens": 6982183566.0, "step": 9128 }, { "epoch": 3.344050563341577, "grad_norm": 0.1435325100698491, "learning_rate": 1.3392530284236569e-05, "loss": 0.411, "num_tokens": 6982954900.0, "step": 9129 }, { "epoch": 3.3444169643675004, "grad_norm": 0.14387104980345738, "learning_rate": 1.3388778041310694e-05, "loss": 0.3848, "num_tokens": 6983734369.0, "step": 9130 }, { "epoch": 3.344783365393423, "grad_norm": 0.1469312027114729, "learning_rate": 1.3385026283580174e-05, "loss": 0.4262, "num_tokens": 6984334397.0, "step": 9131 }, { "epoch": 3.345149766419346, "grad_norm": 0.17031604133816236, "learning_rate": 1.3381275011256402e-05, "loss": 0.4328, "num_tokens": 6985108143.0, "step": 9132 }, { "epoch": 3.3455161674452687, "grad_norm": 0.1392231844884434, "learning_rate": 1.337752422455074e-05, "loss": 0.3776, "num_tokens": 6985840099.0, "step": 9133 }, { "epoch": 3.3458825684711915, "grad_norm": 0.14001373788438956, "learning_rate": 1.3373773923674518e-05, "loss": 0.3861, "num_tokens": 6986479957.0, "step": 9134 }, { "epoch": 3.3462489694971147, "grad_norm": 0.16102460347897907, "learning_rate": 1.3370024108839068e-05, "loss": 0.4236, "num_tokens": 6987173347.0, "step": 9135 }, { "epoch": 3.3466153705230375, "grad_norm": 0.13794140610701203, "learning_rate": 1.3366274780255648e-05, "loss": 0.411, "num_tokens": 6987986399.0, "step": 9136 }, { "epoch": 3.3469817715489603, "grad_norm": 0.13328964425790615, "learning_rate": 1.3362525938135513e-05, "loss": 0.415, "num_tokens": 6988815328.0, "step": 9137 }, { "epoch": 3.347348172574883, "grad_norm": 0.1370063714621064, "learning_rate": 1.3358777582689913e-05, "loss": 0.4224, "num_tokens": 6989541077.0, "step": 9138 }, { "epoch": 3.3477145736008063, "grad_norm": 0.15599176201035014, "learning_rate": 1.3355029714130016e-05, "loss": 0.4259, "num_tokens": 6990270479.0, "step": 9139 }, { "epoch": 3.348080974626729, "grad_norm": 0.14615221887955565, "learning_rate": 1.3351282332667018e-05, "loss": 0.3985, "num_tokens": 6991111449.0, "step": 9140 }, { "epoch": 3.348447375652652, "grad_norm": 0.13648595820809337, "learning_rate": 1.334753543851205e-05, "loss": 0.4123, "num_tokens": 6992002487.0, "step": 9141 }, { "epoch": 3.3488137766785746, "grad_norm": 0.13897375122167777, "learning_rate": 1.3343789031876236e-05, "loss": 0.4329, "num_tokens": 6992771514.0, "step": 9142 }, { "epoch": 3.3491801777044974, "grad_norm": 0.1507686435896566, "learning_rate": 1.3340043112970665e-05, "loss": 0.4086, "num_tokens": 6993642967.0, "step": 9143 }, { "epoch": 3.3495465787304206, "grad_norm": 0.14218996587148766, "learning_rate": 1.3336297682006399e-05, "loss": 0.4176, "num_tokens": 6994383993.0, "step": 9144 }, { "epoch": 3.3499129797563434, "grad_norm": 0.1337488459795402, "learning_rate": 1.3332552739194466e-05, "loss": 0.4074, "num_tokens": 6995116043.0, "step": 9145 }, { "epoch": 3.350279380782266, "grad_norm": 0.14036150650954127, "learning_rate": 1.332880828474588e-05, "loss": 0.4024, "num_tokens": 6995963936.0, "step": 9146 }, { "epoch": 3.350645781808189, "grad_norm": 0.1316360371829451, "learning_rate": 1.3325064318871618e-05, "loss": 0.3956, "num_tokens": 6996747501.0, "step": 9147 }, { "epoch": 3.351012182834112, "grad_norm": 0.1321462407352996, "learning_rate": 1.3321320841782636e-05, "loss": 0.4067, "num_tokens": 6997463105.0, "step": 9148 }, { "epoch": 3.351378583860035, "grad_norm": 0.14531551049130806, "learning_rate": 1.3317577853689852e-05, "loss": 0.427, "num_tokens": 6998189402.0, "step": 9149 }, { "epoch": 3.3517449848859577, "grad_norm": 0.15123930194831312, "learning_rate": 1.3313835354804162e-05, "loss": 0.4196, "num_tokens": 6998973378.0, "step": 9150 }, { "epoch": 3.3521113859118805, "grad_norm": 0.15507338232409212, "learning_rate": 1.3310093345336443e-05, "loss": 0.4084, "num_tokens": 6999684418.0, "step": 9151 }, { "epoch": 3.3524777869378033, "grad_norm": 0.13240741836091252, "learning_rate": 1.3306351825497544e-05, "loss": 0.3953, "num_tokens": 7000594647.0, "step": 9152 }, { "epoch": 3.3528441879637265, "grad_norm": 0.1378273727049724, "learning_rate": 1.3302610795498253e-05, "loss": 0.3791, "num_tokens": 7001350408.0, "step": 9153 }, { "epoch": 3.3532105889896493, "grad_norm": 0.12953727395063613, "learning_rate": 1.3298870255549378e-05, "loss": 0.3712, "num_tokens": 7002165400.0, "step": 9154 }, { "epoch": 3.353576990015572, "grad_norm": 0.14005983603475533, "learning_rate": 1.329513020586168e-05, "loss": 0.3814, "num_tokens": 7002893137.0, "step": 9155 }, { "epoch": 3.353943391041495, "grad_norm": 0.1416047241942212, "learning_rate": 1.3291390646645867e-05, "loss": 0.4289, "num_tokens": 7003699263.0, "step": 9156 }, { "epoch": 3.3543097920674176, "grad_norm": 0.14344521246143158, "learning_rate": 1.328765157811267e-05, "loss": 0.4043, "num_tokens": 7004475763.0, "step": 9157 }, { "epoch": 3.354676193093341, "grad_norm": 0.13662032569672483, "learning_rate": 1.328391300047275e-05, "loss": 0.4282, "num_tokens": 7005171645.0, "step": 9158 }, { "epoch": 3.3550425941192636, "grad_norm": 0.1503933019053967, "learning_rate": 1.328017491393676e-05, "loss": 0.39, "num_tokens": 7005984529.0, "step": 9159 }, { "epoch": 3.3554089951451864, "grad_norm": 0.12559645032043137, "learning_rate": 1.327643731871532e-05, "loss": 0.4001, "num_tokens": 7006773290.0, "step": 9160 }, { "epoch": 3.355775396171109, "grad_norm": 0.13568264002829858, "learning_rate": 1.3272700215019021e-05, "loss": 0.4227, "num_tokens": 7007567984.0, "step": 9161 }, { "epoch": 3.356141797197032, "grad_norm": 0.15350842925486496, "learning_rate": 1.326896360305843e-05, "loss": 0.3886, "num_tokens": 7008332647.0, "step": 9162 }, { "epoch": 3.356508198222955, "grad_norm": 0.13584242150372766, "learning_rate": 1.326522748304409e-05, "loss": 0.4245, "num_tokens": 7009110767.0, "step": 9163 }, { "epoch": 3.356874599248878, "grad_norm": 0.14047435037981199, "learning_rate": 1.3261491855186495e-05, "loss": 0.401, "num_tokens": 7009856103.0, "step": 9164 }, { "epoch": 3.3572410002748008, "grad_norm": 0.12761350520782178, "learning_rate": 1.3257756719696152e-05, "loss": 0.4295, "num_tokens": 7010636105.0, "step": 9165 }, { "epoch": 3.3576074013007235, "grad_norm": 0.14182120565599218, "learning_rate": 1.3254022076783493e-05, "loss": 0.4048, "num_tokens": 7011469816.0, "step": 9166 }, { "epoch": 3.3579738023266463, "grad_norm": 0.13916517952095722, "learning_rate": 1.3250287926658949e-05, "loss": 0.406, "num_tokens": 7012231357.0, "step": 9167 }, { "epoch": 3.3583402033525696, "grad_norm": 0.14932370222955876, "learning_rate": 1.3246554269532937e-05, "loss": 0.4151, "num_tokens": 7012965413.0, "step": 9168 }, { "epoch": 3.3587066043784923, "grad_norm": 0.13366968948848637, "learning_rate": 1.3242821105615802e-05, "loss": 0.3816, "num_tokens": 7013730248.0, "step": 9169 }, { "epoch": 3.359073005404415, "grad_norm": 0.13799491395001354, "learning_rate": 1.3239088435117902e-05, "loss": 0.3532, "num_tokens": 7014455857.0, "step": 9170 }, { "epoch": 3.359439406430338, "grad_norm": 0.1360769709968905, "learning_rate": 1.3235356258249553e-05, "loss": 0.389, "num_tokens": 7015262409.0, "step": 9171 }, { "epoch": 3.3598058074562607, "grad_norm": 0.14324170135728065, "learning_rate": 1.3231624575221043e-05, "loss": 0.4098, "num_tokens": 7016200598.0, "step": 9172 }, { "epoch": 3.360172208482184, "grad_norm": 0.13137266242048118, "learning_rate": 1.3227893386242628e-05, "loss": 0.4243, "num_tokens": 7016872608.0, "step": 9173 }, { "epoch": 3.3605386095081067, "grad_norm": 0.1407833551023627, "learning_rate": 1.3224162691524538e-05, "loss": 0.3971, "num_tokens": 7017679254.0, "step": 9174 }, { "epoch": 3.3609050105340295, "grad_norm": 0.1390188753879502, "learning_rate": 1.3220432491276984e-05, "loss": 0.4083, "num_tokens": 7018578252.0, "step": 9175 }, { "epoch": 3.3612714115599522, "grad_norm": 0.12472562554055719, "learning_rate": 1.3216702785710136e-05, "loss": 0.4028, "num_tokens": 7019399461.0, "step": 9176 }, { "epoch": 3.361637812585875, "grad_norm": 0.13576773961600336, "learning_rate": 1.3212973575034142e-05, "loss": 0.4139, "num_tokens": 7020215398.0, "step": 9177 }, { "epoch": 3.3620042136117982, "grad_norm": 0.14288032844349782, "learning_rate": 1.3209244859459138e-05, "loss": 0.3954, "num_tokens": 7020849542.0, "step": 9178 }, { "epoch": 3.362370614637721, "grad_norm": 0.14635884555746526, "learning_rate": 1.3205516639195199e-05, "loss": 0.3996, "num_tokens": 7021594538.0, "step": 9179 }, { "epoch": 3.362737015663644, "grad_norm": 0.14493367329712284, "learning_rate": 1.3201788914452389e-05, "loss": 0.4088, "num_tokens": 7022410111.0, "step": 9180 }, { "epoch": 3.3631034166895666, "grad_norm": 0.1486259778397481, "learning_rate": 1.319806168544076e-05, "loss": 0.3884, "num_tokens": 7023124778.0, "step": 9181 }, { "epoch": 3.3634698177154894, "grad_norm": 0.1505496293277971, "learning_rate": 1.319433495237032e-05, "loss": 0.4211, "num_tokens": 7023932953.0, "step": 9182 }, { "epoch": 3.3638362187414126, "grad_norm": 0.13298501513388267, "learning_rate": 1.3190608715451028e-05, "loss": 0.4019, "num_tokens": 7024743287.0, "step": 9183 }, { "epoch": 3.3642026197673354, "grad_norm": 0.1437034929734255, "learning_rate": 1.3186882974892858e-05, "loss": 0.416, "num_tokens": 7025457818.0, "step": 9184 }, { "epoch": 3.364569020793258, "grad_norm": 0.13401320224510596, "learning_rate": 1.3183157730905738e-05, "loss": 0.4017, "num_tokens": 7026138278.0, "step": 9185 }, { "epoch": 3.364935421819181, "grad_norm": 0.15156304089600475, "learning_rate": 1.3179432983699542e-05, "loss": 0.4198, "num_tokens": 7026740180.0, "step": 9186 }, { "epoch": 3.365301822845104, "grad_norm": 0.161989394123548, "learning_rate": 1.3175708733484162e-05, "loss": 0.3791, "num_tokens": 7027415924.0, "step": 9187 }, { "epoch": 3.365668223871027, "grad_norm": 0.14180269268957252, "learning_rate": 1.3171984980469425e-05, "loss": 0.4216, "num_tokens": 7028187340.0, "step": 9188 }, { "epoch": 3.3660346248969497, "grad_norm": 0.1428617925810538, "learning_rate": 1.3168261724865157e-05, "loss": 0.4017, "num_tokens": 7028958211.0, "step": 9189 }, { "epoch": 3.3664010259228725, "grad_norm": 0.13099995682500798, "learning_rate": 1.3164538966881133e-05, "loss": 0.4274, "num_tokens": 7029676062.0, "step": 9190 }, { "epoch": 3.3667674269487953, "grad_norm": 0.1568951213188786, "learning_rate": 1.3160816706727105e-05, "loss": 0.4, "num_tokens": 7030412102.0, "step": 9191 }, { "epoch": 3.3671338279747185, "grad_norm": 0.13314935038593215, "learning_rate": 1.3157094944612827e-05, "loss": 0.3824, "num_tokens": 7031098794.0, "step": 9192 }, { "epoch": 3.3675002290006413, "grad_norm": 0.14464522442392397, "learning_rate": 1.3153373680747974e-05, "loss": 0.3874, "num_tokens": 7031807454.0, "step": 9193 }, { "epoch": 3.367866630026564, "grad_norm": 0.13593213816763905, "learning_rate": 1.3149652915342222e-05, "loss": 0.4522, "num_tokens": 7032461285.0, "step": 9194 }, { "epoch": 3.368233031052487, "grad_norm": 0.15508413466153664, "learning_rate": 1.3145932648605235e-05, "loss": 0.3855, "num_tokens": 7033361473.0, "step": 9195 }, { "epoch": 3.36859943207841, "grad_norm": 0.1260020274346794, "learning_rate": 1.3142212880746604e-05, "loss": 0.417, "num_tokens": 7034119138.0, "step": 9196 }, { "epoch": 3.368965833104333, "grad_norm": 0.1417069181061094, "learning_rate": 1.3138493611975935e-05, "loss": 0.3693, "num_tokens": 7034857576.0, "step": 9197 }, { "epoch": 3.3693322341302556, "grad_norm": 0.13410131153790505, "learning_rate": 1.3134774842502789e-05, "loss": 0.4057, "num_tokens": 7035653682.0, "step": 9198 }, { "epoch": 3.3696986351561784, "grad_norm": 0.14623529254627934, "learning_rate": 1.3131056572536686e-05, "loss": 0.4086, "num_tokens": 7036516877.0, "step": 9199 }, { "epoch": 3.370065036182101, "grad_norm": 0.12802752359487485, "learning_rate": 1.3127338802287144e-05, "loss": 0.4024, "num_tokens": 7037300149.0, "step": 9200 }, { "epoch": 3.3704314372080244, "grad_norm": 0.13437355426080147, "learning_rate": 1.3123621531963628e-05, "loss": 0.4185, "num_tokens": 7038013220.0, "step": 9201 }, { "epoch": 3.370797838233947, "grad_norm": 0.14360380665305822, "learning_rate": 1.311990476177559e-05, "loss": 0.4016, "num_tokens": 7038841619.0, "step": 9202 }, { "epoch": 3.37116423925987, "grad_norm": 0.13309566903016376, "learning_rate": 1.3116188491932453e-05, "loss": 0.4319, "num_tokens": 7039598740.0, "step": 9203 }, { "epoch": 3.3715306402857927, "grad_norm": 0.14062673135482315, "learning_rate": 1.3112472722643603e-05, "loss": 0.3852, "num_tokens": 7040316518.0, "step": 9204 }, { "epoch": 3.3718970413117155, "grad_norm": 0.14646236818460873, "learning_rate": 1.3108757454118407e-05, "loss": 0.4001, "num_tokens": 7040982040.0, "step": 9205 }, { "epoch": 3.3722634423376388, "grad_norm": 0.14336305643764036, "learning_rate": 1.31050426865662e-05, "loss": 0.404, "num_tokens": 7041735631.0, "step": 9206 }, { "epoch": 3.3726298433635615, "grad_norm": 0.14431054222254588, "learning_rate": 1.3101328420196278e-05, "loss": 0.3953, "num_tokens": 7042444718.0, "step": 9207 }, { "epoch": 3.3729962443894843, "grad_norm": 0.13310430328208184, "learning_rate": 1.3097614655217937e-05, "loss": 0.3712, "num_tokens": 7043236568.0, "step": 9208 }, { "epoch": 3.373362645415407, "grad_norm": 0.13883634386699997, "learning_rate": 1.3093901391840428e-05, "loss": 0.4093, "num_tokens": 7043977318.0, "step": 9209 }, { "epoch": 3.37372904644133, "grad_norm": 0.13942522978594177, "learning_rate": 1.3090188630272951e-05, "loss": 0.3843, "num_tokens": 7044781949.0, "step": 9210 }, { "epoch": 3.374095447467253, "grad_norm": 0.14490420633723503, "learning_rate": 1.3086476370724718e-05, "loss": 0.3831, "num_tokens": 7045599794.0, "step": 9211 }, { "epoch": 3.374461848493176, "grad_norm": 0.12018976747245412, "learning_rate": 1.3082764613404902e-05, "loss": 0.3987, "num_tokens": 7046332431.0, "step": 9212 }, { "epoch": 3.3748282495190987, "grad_norm": 0.14306501966164648, "learning_rate": 1.307905335852261e-05, "loss": 0.3994, "num_tokens": 7047147623.0, "step": 9213 }, { "epoch": 3.3751946505450214, "grad_norm": 0.13466716986969954, "learning_rate": 1.3075342606286983e-05, "loss": 0.3837, "num_tokens": 7048023957.0, "step": 9214 }, { "epoch": 3.375561051570944, "grad_norm": 0.12449797151156423, "learning_rate": 1.3071632356907085e-05, "loss": 0.3725, "num_tokens": 7048879474.0, "step": 9215 }, { "epoch": 3.3759274525968674, "grad_norm": 0.1284440002504288, "learning_rate": 1.306792261059197e-05, "loss": 0.3617, "num_tokens": 7049592353.0, "step": 9216 }, { "epoch": 3.37629385362279, "grad_norm": 0.14059754283512926, "learning_rate": 1.3064213367550664e-05, "loss": 0.3992, "num_tokens": 7050377351.0, "step": 9217 }, { "epoch": 3.376660254648713, "grad_norm": 0.13526601252018394, "learning_rate": 1.3060504627992162e-05, "loss": 0.4097, "num_tokens": 7051357463.0, "step": 9218 }, { "epoch": 3.377026655674636, "grad_norm": 0.13939810464074381, "learning_rate": 1.305679639212543e-05, "loss": 0.4315, "num_tokens": 7052044415.0, "step": 9219 }, { "epoch": 3.3773930567005586, "grad_norm": 0.1457948426735801, "learning_rate": 1.305308866015941e-05, "loss": 0.3987, "num_tokens": 7052760809.0, "step": 9220 }, { "epoch": 3.377759457726482, "grad_norm": 0.14715747885061567, "learning_rate": 1.3049381432303006e-05, "loss": 0.3915, "num_tokens": 7053570651.0, "step": 9221 }, { "epoch": 3.3781258587524046, "grad_norm": 0.12571129938705816, "learning_rate": 1.3045674708765115e-05, "loss": 0.3742, "num_tokens": 7054325608.0, "step": 9222 }, { "epoch": 3.3784922597783273, "grad_norm": 0.14155312373001855, "learning_rate": 1.3041968489754578e-05, "loss": 0.4572, "num_tokens": 7054981510.0, "step": 9223 }, { "epoch": 3.37885866080425, "grad_norm": 0.15812941209279266, "learning_rate": 1.3038262775480211e-05, "loss": 0.3854, "num_tokens": 7055759131.0, "step": 9224 }, { "epoch": 3.379225061830173, "grad_norm": 0.14026533376312442, "learning_rate": 1.3034557566150835e-05, "loss": 0.3974, "num_tokens": 7056492837.0, "step": 9225 }, { "epoch": 3.379591462856096, "grad_norm": 0.14231772267059214, "learning_rate": 1.3030852861975194e-05, "loss": 0.4326, "num_tokens": 7057204564.0, "step": 9226 }, { "epoch": 3.379957863882019, "grad_norm": 0.15007453895175796, "learning_rate": 1.3027148663162046e-05, "loss": 0.384, "num_tokens": 7057925004.0, "step": 9227 }, { "epoch": 3.3803242649079417, "grad_norm": 0.12773646851223056, "learning_rate": 1.3023444969920096e-05, "loss": 0.415, "num_tokens": 7058736234.0, "step": 9228 }, { "epoch": 3.3806906659338645, "grad_norm": 0.1497459278000897, "learning_rate": 1.3019741782458029e-05, "loss": 0.4039, "num_tokens": 7059469481.0, "step": 9229 }, { "epoch": 3.3810570669597872, "grad_norm": 0.13697469216271124, "learning_rate": 1.3016039100984495e-05, "loss": 0.3947, "num_tokens": 7060239091.0, "step": 9230 }, { "epoch": 3.3814234679857105, "grad_norm": 0.13912897615296618, "learning_rate": 1.3012336925708123e-05, "loss": 0.4064, "num_tokens": 7061009218.0, "step": 9231 }, { "epoch": 3.3817898690116333, "grad_norm": 0.13794313841606567, "learning_rate": 1.3008635256837507e-05, "loss": 0.3934, "num_tokens": 7061784204.0, "step": 9232 }, { "epoch": 3.382156270037556, "grad_norm": 0.13343879382152385, "learning_rate": 1.3004934094581221e-05, "loss": 0.3817, "num_tokens": 7062563430.0, "step": 9233 }, { "epoch": 3.382522671063479, "grad_norm": 0.14424500036300486, "learning_rate": 1.3001233439147796e-05, "loss": 0.4033, "num_tokens": 7063401846.0, "step": 9234 }, { "epoch": 3.382889072089402, "grad_norm": 0.13304718097480767, "learning_rate": 1.299753329074576e-05, "loss": 0.416, "num_tokens": 7064195305.0, "step": 9235 }, { "epoch": 3.383255473115325, "grad_norm": 0.14135414869078702, "learning_rate": 1.2993833649583583e-05, "loss": 0.4085, "num_tokens": 7065019956.0, "step": 9236 }, { "epoch": 3.3836218741412476, "grad_norm": 0.1286579564538328, "learning_rate": 1.2990134515869715e-05, "loss": 0.3843, "num_tokens": 7065780535.0, "step": 9237 }, { "epoch": 3.3839882751671704, "grad_norm": 0.14820257850018453, "learning_rate": 1.29864358898126e-05, "loss": 0.4096, "num_tokens": 7066433241.0, "step": 9238 }, { "epoch": 3.384354676193093, "grad_norm": 0.13598953010548423, "learning_rate": 1.2982737771620628e-05, "loss": 0.3739, "num_tokens": 7067234058.0, "step": 9239 }, { "epoch": 3.3847210772190164, "grad_norm": 0.13784564986125025, "learning_rate": 1.2979040161502154e-05, "loss": 0.4111, "num_tokens": 7068012699.0, "step": 9240 }, { "epoch": 3.385087478244939, "grad_norm": 0.13812756650090782, "learning_rate": 1.2975343059665535e-05, "loss": 0.403, "num_tokens": 7068770010.0, "step": 9241 }, { "epoch": 3.385453879270862, "grad_norm": 0.14692205969979028, "learning_rate": 1.2971646466319083e-05, "loss": 0.4217, "num_tokens": 7069369996.0, "step": 9242 }, { "epoch": 3.3858202802967847, "grad_norm": 0.15539324477407454, "learning_rate": 1.2967950381671062e-05, "loss": 0.3999, "num_tokens": 7070206163.0, "step": 9243 }, { "epoch": 3.386186681322708, "grad_norm": 0.12913121463438004, "learning_rate": 1.2964254805929747e-05, "loss": 0.413, "num_tokens": 7070966394.0, "step": 9244 }, { "epoch": 3.3865530823486307, "grad_norm": 0.14332232939543704, "learning_rate": 1.2960559739303355e-05, "loss": 0.4323, "num_tokens": 7071796489.0, "step": 9245 }, { "epoch": 3.3869194833745535, "grad_norm": 0.1458384025115424, "learning_rate": 1.2956865182000083e-05, "loss": 0.4038, "num_tokens": 7072531407.0, "step": 9246 }, { "epoch": 3.3872858844004763, "grad_norm": 0.14664944387891157, "learning_rate": 1.2953171134228097e-05, "loss": 0.419, "num_tokens": 7073335020.0, "step": 9247 }, { "epoch": 3.387652285426399, "grad_norm": 0.14581533635697258, "learning_rate": 1.2949477596195535e-05, "loss": 0.3915, "num_tokens": 7074053652.0, "step": 9248 }, { "epoch": 3.3880186864523223, "grad_norm": 0.13837916459031865, "learning_rate": 1.2945784568110524e-05, "loss": 0.3849, "num_tokens": 7074777246.0, "step": 9249 }, { "epoch": 3.388385087478245, "grad_norm": 0.1388269801515393, "learning_rate": 1.2942092050181128e-05, "loss": 0.4144, "num_tokens": 7075490098.0, "step": 9250 }, { "epoch": 3.388751488504168, "grad_norm": 0.14952824867807457, "learning_rate": 1.2938400042615396e-05, "loss": 0.398, "num_tokens": 7076229514.0, "step": 9251 }, { "epoch": 3.3891178895300906, "grad_norm": 0.13791801742331006, "learning_rate": 1.2934708545621379e-05, "loss": 0.4245, "num_tokens": 7077027681.0, "step": 9252 }, { "epoch": 3.3894842905560134, "grad_norm": 0.13195992647033586, "learning_rate": 1.2931017559407042e-05, "loss": 0.4153, "num_tokens": 7077856762.0, "step": 9253 }, { "epoch": 3.3898506915819366, "grad_norm": 0.13844697723771637, "learning_rate": 1.2927327084180372e-05, "loss": 0.4466, "num_tokens": 7078590820.0, "step": 9254 }, { "epoch": 3.3902170926078594, "grad_norm": 0.13724885633220524, "learning_rate": 1.2923637120149302e-05, "loss": 0.4022, "num_tokens": 7079362476.0, "step": 9255 }, { "epoch": 3.390583493633782, "grad_norm": 0.14106430487367908, "learning_rate": 1.2919947667521738e-05, "loss": 0.4213, "num_tokens": 7080123610.0, "step": 9256 }, { "epoch": 3.390949894659705, "grad_norm": 0.14602350316677262, "learning_rate": 1.2916258726505562e-05, "loss": 0.3889, "num_tokens": 7080757168.0, "step": 9257 }, { "epoch": 3.3913162956856278, "grad_norm": 0.1472936724077328, "learning_rate": 1.291257029730863e-05, "loss": 0.3841, "num_tokens": 7081542178.0, "step": 9258 }, { "epoch": 3.391682696711551, "grad_norm": 0.13609302897168682, "learning_rate": 1.2908882380138761e-05, "loss": 0.4145, "num_tokens": 7082314414.0, "step": 9259 }, { "epoch": 3.3920490977374738, "grad_norm": 0.14445412249735465, "learning_rate": 1.2905194975203745e-05, "loss": 0.3767, "num_tokens": 7082990781.0, "step": 9260 }, { "epoch": 3.3924154987633965, "grad_norm": 0.14439534888856753, "learning_rate": 1.2901508082711351e-05, "loss": 0.3771, "num_tokens": 7083923947.0, "step": 9261 }, { "epoch": 3.3927818997893193, "grad_norm": 0.12839197814847553, "learning_rate": 1.2897821702869325e-05, "loss": 0.4226, "num_tokens": 7084659305.0, "step": 9262 }, { "epoch": 3.393148300815242, "grad_norm": 0.14678879999782082, "learning_rate": 1.289413583588536e-05, "loss": 0.4109, "num_tokens": 7085352175.0, "step": 9263 }, { "epoch": 3.3935147018411653, "grad_norm": 0.1420285267284778, "learning_rate": 1.2890450481967134e-05, "loss": 0.4011, "num_tokens": 7086063394.0, "step": 9264 }, { "epoch": 3.393881102867088, "grad_norm": 0.14289946364225034, "learning_rate": 1.288676564132231e-05, "loss": 0.4309, "num_tokens": 7086778622.0, "step": 9265 }, { "epoch": 3.394247503893011, "grad_norm": 0.14314357132663702, "learning_rate": 1.2883081314158503e-05, "loss": 0.4154, "num_tokens": 7087541895.0, "step": 9266 }, { "epoch": 3.3946139049189337, "grad_norm": 0.1368807283941579, "learning_rate": 1.287939750068329e-05, "loss": 0.4565, "num_tokens": 7088351382.0, "step": 9267 }, { "epoch": 3.3949803059448564, "grad_norm": 0.1381948470610712, "learning_rate": 1.2875714201104257e-05, "loss": 0.42, "num_tokens": 7089192462.0, "step": 9268 }, { "epoch": 3.3953467069707797, "grad_norm": 0.1333165623716984, "learning_rate": 1.2872031415628935e-05, "loss": 0.3935, "num_tokens": 7090080388.0, "step": 9269 }, { "epoch": 3.3957131079967025, "grad_norm": 0.13264426645369587, "learning_rate": 1.2868349144464802e-05, "loss": 0.4018, "num_tokens": 7090909106.0, "step": 9270 }, { "epoch": 3.3960795090226252, "grad_norm": 0.12769839132716584, "learning_rate": 1.2864667387819363e-05, "loss": 0.3906, "num_tokens": 7091605870.0, "step": 9271 }, { "epoch": 3.396445910048548, "grad_norm": 0.1444533513673324, "learning_rate": 1.2860986145900052e-05, "loss": 0.4099, "num_tokens": 7092332045.0, "step": 9272 }, { "epoch": 3.396812311074471, "grad_norm": 0.14089627912848002, "learning_rate": 1.2857305418914291e-05, "loss": 0.4202, "num_tokens": 7092962604.0, "step": 9273 }, { "epoch": 3.397178712100394, "grad_norm": 0.15494472453732475, "learning_rate": 1.285362520706947e-05, "loss": 0.3904, "num_tokens": 7093813490.0, "step": 9274 }, { "epoch": 3.397545113126317, "grad_norm": 0.1329948293346592, "learning_rate": 1.2849945510572945e-05, "loss": 0.4296, "num_tokens": 7094595048.0, "step": 9275 }, { "epoch": 3.3979115141522396, "grad_norm": 0.1421747275255893, "learning_rate": 1.284626632963205e-05, "loss": 0.3886, "num_tokens": 7095367389.0, "step": 9276 }, { "epoch": 3.3982779151781624, "grad_norm": 0.13541753582317095, "learning_rate": 1.2842587664454083e-05, "loss": 0.412, "num_tokens": 7096108699.0, "step": 9277 }, { "epoch": 3.398644316204085, "grad_norm": 0.14281855258938855, "learning_rate": 1.2838909515246311e-05, "loss": 0.3753, "num_tokens": 7096923797.0, "step": 9278 }, { "epoch": 3.3990107172300084, "grad_norm": 0.14155207135776104, "learning_rate": 1.2835231882216004e-05, "loss": 0.395, "num_tokens": 7097677982.0, "step": 9279 }, { "epoch": 3.399377118255931, "grad_norm": 0.1402625690303551, "learning_rate": 1.283155476557034e-05, "loss": 0.4263, "num_tokens": 7098430282.0, "step": 9280 }, { "epoch": 3.399743519281854, "grad_norm": 0.13717494262589708, "learning_rate": 1.2827878165516527e-05, "loss": 0.4053, "num_tokens": 7099095987.0, "step": 9281 }, { "epoch": 3.4001099203077767, "grad_norm": 0.15084481385754897, "learning_rate": 1.2824202082261721e-05, "loss": 0.3913, "num_tokens": 7099872502.0, "step": 9282 }, { "epoch": 3.4004763213337, "grad_norm": 0.14020217773024754, "learning_rate": 1.2820526516013044e-05, "loss": 0.3954, "num_tokens": 7100733013.0, "step": 9283 }, { "epoch": 3.4008427223596227, "grad_norm": 0.1361896876630558, "learning_rate": 1.2816851466977595e-05, "loss": 0.4198, "num_tokens": 7101637949.0, "step": 9284 }, { "epoch": 3.4012091233855455, "grad_norm": 0.1477004476725234, "learning_rate": 1.2813176935362441e-05, "loss": 0.4486, "num_tokens": 7102341041.0, "step": 9285 }, { "epoch": 3.4015755244114683, "grad_norm": 0.1446461500537415, "learning_rate": 1.280950292137463e-05, "loss": 0.4083, "num_tokens": 7102970745.0, "step": 9286 }, { "epoch": 3.401941925437391, "grad_norm": 0.145769554744689, "learning_rate": 1.2805829425221164e-05, "loss": 0.3878, "num_tokens": 7103876378.0, "step": 9287 }, { "epoch": 3.4023083264633143, "grad_norm": 0.12790606035581184, "learning_rate": 1.2802156447109022e-05, "loss": 0.418, "num_tokens": 7104663076.0, "step": 9288 }, { "epoch": 3.402674727489237, "grad_norm": 0.14690116111033225, "learning_rate": 1.2798483987245165e-05, "loss": 0.4642, "num_tokens": 7105333409.0, "step": 9289 }, { "epoch": 3.40304112851516, "grad_norm": 0.1460208794300773, "learning_rate": 1.2794812045836511e-05, "loss": 0.4022, "num_tokens": 7106187766.0, "step": 9290 }, { "epoch": 3.4034075295410826, "grad_norm": 0.13351789893995675, "learning_rate": 1.2791140623089948e-05, "loss": 0.4172, "num_tokens": 7106881335.0, "step": 9291 }, { "epoch": 3.403773930567006, "grad_norm": 0.14622033674295704, "learning_rate": 1.2787469719212365e-05, "loss": 0.3842, "num_tokens": 7107800738.0, "step": 9292 }, { "epoch": 3.4041403315929286, "grad_norm": 0.1350580093137346, "learning_rate": 1.2783799334410571e-05, "loss": 0.4224, "num_tokens": 7108513227.0, "step": 9293 }, { "epoch": 3.4045067326188514, "grad_norm": 0.14597755807246443, "learning_rate": 1.2780129468891374e-05, "loss": 0.4331, "num_tokens": 7109262385.0, "step": 9294 }, { "epoch": 3.404873133644774, "grad_norm": 0.13786146910201372, "learning_rate": 1.2776460122861564e-05, "loss": 0.4371, "num_tokens": 7109954062.0, "step": 9295 }, { "epoch": 3.405239534670697, "grad_norm": 0.14644709665592898, "learning_rate": 1.2772791296527893e-05, "loss": 0.3758, "num_tokens": 7110704330.0, "step": 9296 }, { "epoch": 3.40560593569662, "grad_norm": 0.13915356471832832, "learning_rate": 1.2769122990097052e-05, "loss": 0.4336, "num_tokens": 7111443909.0, "step": 9297 }, { "epoch": 3.405972336722543, "grad_norm": 0.14282879359068562, "learning_rate": 1.2765455203775756e-05, "loss": 0.3887, "num_tokens": 7112208936.0, "step": 9298 }, { "epoch": 3.4063387377484657, "grad_norm": 0.14256787156412445, "learning_rate": 1.2761787937770657e-05, "loss": 0.4013, "num_tokens": 7112901446.0, "step": 9299 }, { "epoch": 3.4067051387743885, "grad_norm": 0.14053015422554802, "learning_rate": 1.2758121192288381e-05, "loss": 0.4117, "num_tokens": 7113699577.0, "step": 9300 }, { "epoch": 3.4070715398003113, "grad_norm": 0.142261993479142, "learning_rate": 1.2754454967535534e-05, "loss": 0.4324, "num_tokens": 7114366934.0, "step": 9301 }, { "epoch": 3.4074379408262345, "grad_norm": 0.14274555543401204, "learning_rate": 1.2750789263718688e-05, "loss": 0.39, "num_tokens": 7115218087.0, "step": 9302 }, { "epoch": 3.4078043418521573, "grad_norm": 0.13463232190316027, "learning_rate": 1.2747124081044383e-05, "loss": 0.4342, "num_tokens": 7116108251.0, "step": 9303 }, { "epoch": 3.40817074287808, "grad_norm": 0.13249985855442295, "learning_rate": 1.2743459419719129e-05, "loss": 0.3857, "num_tokens": 7116908293.0, "step": 9304 }, { "epoch": 3.408537143904003, "grad_norm": 0.12674168032990918, "learning_rate": 1.273979527994941e-05, "loss": 0.3627, "num_tokens": 7117723752.0, "step": 9305 }, { "epoch": 3.4089035449299256, "grad_norm": 0.12348167397953856, "learning_rate": 1.2736131661941698e-05, "loss": 0.3947, "num_tokens": 7118468188.0, "step": 9306 }, { "epoch": 3.409269945955849, "grad_norm": 0.13816600365115742, "learning_rate": 1.2732468565902397e-05, "loss": 0.4335, "num_tokens": 7119256457.0, "step": 9307 }, { "epoch": 3.4096363469817716, "grad_norm": 0.14547454182604944, "learning_rate": 1.2728805992037901e-05, "loss": 0.3984, "num_tokens": 7119988380.0, "step": 9308 }, { "epoch": 3.4100027480076944, "grad_norm": 0.13823964708711772, "learning_rate": 1.2725143940554594e-05, "loss": 0.3972, "num_tokens": 7120823762.0, "step": 9309 }, { "epoch": 3.410369149033617, "grad_norm": 0.13535907487711218, "learning_rate": 1.2721482411658792e-05, "loss": 0.3764, "num_tokens": 7121502557.0, "step": 9310 }, { "epoch": 3.41073555005954, "grad_norm": 0.13764846931575142, "learning_rate": 1.271782140555682e-05, "loss": 0.4001, "num_tokens": 7122261923.0, "step": 9311 }, { "epoch": 3.411101951085463, "grad_norm": 0.14377895180796388, "learning_rate": 1.2714160922454948e-05, "loss": 0.4399, "num_tokens": 7122985749.0, "step": 9312 }, { "epoch": 3.411468352111386, "grad_norm": 0.1416498705817951, "learning_rate": 1.2710500962559427e-05, "loss": 0.4088, "num_tokens": 7123685487.0, "step": 9313 }, { "epoch": 3.4118347531373088, "grad_norm": 0.14489920513113175, "learning_rate": 1.2706841526076472e-05, "loss": 0.4232, "num_tokens": 7124459596.0, "step": 9314 }, { "epoch": 3.4122011541632316, "grad_norm": 0.14569468669382143, "learning_rate": 1.270318261321227e-05, "loss": 0.4056, "num_tokens": 7125244862.0, "step": 9315 }, { "epoch": 3.4125675551891543, "grad_norm": 0.13259076972695527, "learning_rate": 1.269952422417299e-05, "loss": 0.3856, "num_tokens": 7126022576.0, "step": 9316 }, { "epoch": 3.4129339562150776, "grad_norm": 0.1367065234634088, "learning_rate": 1.2695866359164755e-05, "loss": 0.4078, "num_tokens": 7126708509.0, "step": 9317 }, { "epoch": 3.4133003572410003, "grad_norm": 0.15039636186460992, "learning_rate": 1.2692209018393659e-05, "loss": 0.4069, "num_tokens": 7127575098.0, "step": 9318 }, { "epoch": 3.413666758266923, "grad_norm": 0.1263996853586427, "learning_rate": 1.2688552202065796e-05, "loss": 0.4191, "num_tokens": 7128367076.0, "step": 9319 }, { "epoch": 3.414033159292846, "grad_norm": 0.1481271805656004, "learning_rate": 1.2684895910387187e-05, "loss": 0.4518, "num_tokens": 7129152179.0, "step": 9320 }, { "epoch": 3.4143995603187687, "grad_norm": 0.1459753633768724, "learning_rate": 1.2681240143563844e-05, "loss": 0.3874, "num_tokens": 7130047295.0, "step": 9321 }, { "epoch": 3.414765961344692, "grad_norm": 0.12736846142854924, "learning_rate": 1.2677584901801762e-05, "loss": 0.416, "num_tokens": 7130790901.0, "step": 9322 }, { "epoch": 3.4151323623706147, "grad_norm": 0.14757070690002821, "learning_rate": 1.2673930185306895e-05, "loss": 0.394, "num_tokens": 7131479726.0, "step": 9323 }, { "epoch": 3.4154987633965375, "grad_norm": 0.140824618045437, "learning_rate": 1.2670275994285146e-05, "loss": 0.4107, "num_tokens": 7132218188.0, "step": 9324 }, { "epoch": 3.4158651644224602, "grad_norm": 0.14256607803779117, "learning_rate": 1.2666622328942428e-05, "loss": 0.4199, "num_tokens": 7132948242.0, "step": 9325 }, { "epoch": 3.416231565448383, "grad_norm": 0.13990248040662384, "learning_rate": 1.2662969189484607e-05, "loss": 0.4297, "num_tokens": 7133687663.0, "step": 9326 }, { "epoch": 3.4165979664743062, "grad_norm": 0.14700286673364932, "learning_rate": 1.2659316576117494e-05, "loss": 0.4236, "num_tokens": 7134483904.0, "step": 9327 }, { "epoch": 3.416964367500229, "grad_norm": 0.1432002335206705, "learning_rate": 1.265566448904692e-05, "loss": 0.39, "num_tokens": 7135184799.0, "step": 9328 }, { "epoch": 3.417330768526152, "grad_norm": 0.1561045356880725, "learning_rate": 1.2652012928478643e-05, "loss": 0.415, "num_tokens": 7135836975.0, "step": 9329 }, { "epoch": 3.4176971695520746, "grad_norm": 0.14515125904516726, "learning_rate": 1.2648361894618422e-05, "loss": 0.3988, "num_tokens": 7136608322.0, "step": 9330 }, { "epoch": 3.418063570577998, "grad_norm": 0.14085465836595867, "learning_rate": 1.2644711387671962e-05, "loss": 0.3643, "num_tokens": 7137380214.0, "step": 9331 }, { "epoch": 3.4184299716039206, "grad_norm": 0.13571650689186895, "learning_rate": 1.2641061407844953e-05, "loss": 0.4306, "num_tokens": 7138099799.0, "step": 9332 }, { "epoch": 3.4187963726298434, "grad_norm": 0.1500991633643184, "learning_rate": 1.2637411955343052e-05, "loss": 0.3974, "num_tokens": 7138866668.0, "step": 9333 }, { "epoch": 3.419162773655766, "grad_norm": 0.14197164726354242, "learning_rate": 1.2633763030371884e-05, "loss": 0.4006, "num_tokens": 7139583505.0, "step": 9334 }, { "epoch": 3.419529174681689, "grad_norm": 0.1356083222632878, "learning_rate": 1.2630114633137041e-05, "loss": 0.3905, "num_tokens": 7140377149.0, "step": 9335 }, { "epoch": 3.419895575707612, "grad_norm": 0.13506589105219097, "learning_rate": 1.2626466763844109e-05, "loss": 0.4049, "num_tokens": 7141027976.0, "step": 9336 }, { "epoch": 3.420261976733535, "grad_norm": 0.14797791757002654, "learning_rate": 1.2622819422698601e-05, "loss": 0.3884, "num_tokens": 7141846961.0, "step": 9337 }, { "epoch": 3.4206283777594577, "grad_norm": 0.1496777353634806, "learning_rate": 1.2619172609906043e-05, "loss": 0.3914, "num_tokens": 7142574735.0, "step": 9338 }, { "epoch": 3.4209947787853805, "grad_norm": 0.13583406764995176, "learning_rate": 1.2615526325671907e-05, "loss": 0.4212, "num_tokens": 7143374708.0, "step": 9339 }, { "epoch": 3.4213611798113037, "grad_norm": 0.14079916695992453, "learning_rate": 1.261188057020164e-05, "loss": 0.4099, "num_tokens": 7144090671.0, "step": 9340 }, { "epoch": 3.4217275808372265, "grad_norm": 0.14914137189740712, "learning_rate": 1.2608235343700664e-05, "loss": 0.3912, "num_tokens": 7144803348.0, "step": 9341 }, { "epoch": 3.4220939818631493, "grad_norm": 0.1369129920459626, "learning_rate": 1.260459064637436e-05, "loss": 0.4052, "num_tokens": 7145527426.0, "step": 9342 }, { "epoch": 3.422460382889072, "grad_norm": 0.14044045124879576, "learning_rate": 1.2600946478428097e-05, "loss": 0.3958, "num_tokens": 7146325691.0, "step": 9343 }, { "epoch": 3.422826783914995, "grad_norm": 0.14833771635947948, "learning_rate": 1.2597302840067198e-05, "loss": 0.3854, "num_tokens": 7146998520.0, "step": 9344 }, { "epoch": 3.423193184940918, "grad_norm": 0.14076677872723345, "learning_rate": 1.259365973149696e-05, "loss": 0.4178, "num_tokens": 7147866635.0, "step": 9345 }, { "epoch": 3.423559585966841, "grad_norm": 0.1271342625996975, "learning_rate": 1.2590017152922655e-05, "loss": 0.4154, "num_tokens": 7148732470.0, "step": 9346 }, { "epoch": 3.4239259869927636, "grad_norm": 0.14625040756231245, "learning_rate": 1.2586375104549524e-05, "loss": 0.3829, "num_tokens": 7149485934.0, "step": 9347 }, { "epoch": 3.4242923880186864, "grad_norm": 0.13943077063792406, "learning_rate": 1.258273358658277e-05, "loss": 0.3869, "num_tokens": 7150209068.0, "step": 9348 }, { "epoch": 3.424658789044609, "grad_norm": 0.14435880159688358, "learning_rate": 1.257909259922759e-05, "loss": 0.4106, "num_tokens": 7151003069.0, "step": 9349 }, { "epoch": 3.4250251900705324, "grad_norm": 0.1402213169822631, "learning_rate": 1.257545214268911e-05, "loss": 0.4125, "num_tokens": 7151804651.0, "step": 9350 }, { "epoch": 3.425391591096455, "grad_norm": 0.1463103420774103, "learning_rate": 1.2571812217172461e-05, "loss": 0.4409, "num_tokens": 7152728076.0, "step": 9351 }, { "epoch": 3.425757992122378, "grad_norm": 0.13505942091784157, "learning_rate": 1.2568172822882738e-05, "loss": 0.3972, "num_tokens": 7153513644.0, "step": 9352 }, { "epoch": 3.4261243931483008, "grad_norm": 0.13423661355092376, "learning_rate": 1.2564533960025003e-05, "loss": 0.3872, "num_tokens": 7154305582.0, "step": 9353 }, { "epoch": 3.4264907941742235, "grad_norm": 0.1445998070702291, "learning_rate": 1.2560895628804266e-05, "loss": 0.4288, "num_tokens": 7155056118.0, "step": 9354 }, { "epoch": 3.4268571952001468, "grad_norm": 0.14007293707249582, "learning_rate": 1.2557257829425546e-05, "loss": 0.3972, "num_tokens": 7155842073.0, "step": 9355 }, { "epoch": 3.4272235962260695, "grad_norm": 0.13979500344107396, "learning_rate": 1.255362056209381e-05, "loss": 0.4399, "num_tokens": 7156559270.0, "step": 9356 }, { "epoch": 3.4275899972519923, "grad_norm": 0.14579848722376584, "learning_rate": 1.2549983827013996e-05, "loss": 0.4174, "num_tokens": 7157244850.0, "step": 9357 }, { "epoch": 3.427956398277915, "grad_norm": 0.14191229522260806, "learning_rate": 1.254634762439101e-05, "loss": 0.4021, "num_tokens": 7158096769.0, "step": 9358 }, { "epoch": 3.428322799303838, "grad_norm": 0.1528096731534533, "learning_rate": 1.254271195442974e-05, "loss": 0.3862, "num_tokens": 7158894170.0, "step": 9359 }, { "epoch": 3.428689200329761, "grad_norm": 0.1360600140836139, "learning_rate": 1.2539076817335031e-05, "loss": 0.4101, "num_tokens": 7159598286.0, "step": 9360 }, { "epoch": 3.429055601355684, "grad_norm": 0.13836454617074345, "learning_rate": 1.2535442213311704e-05, "loss": 0.4279, "num_tokens": 7160436112.0, "step": 9361 }, { "epoch": 3.4294220023816067, "grad_norm": 0.14772663751233642, "learning_rate": 1.2531808142564543e-05, "loss": 0.4051, "num_tokens": 7161116156.0, "step": 9362 }, { "epoch": 3.4297884034075294, "grad_norm": 0.1475678247804277, "learning_rate": 1.2528174605298328e-05, "loss": 0.4004, "num_tokens": 7161792750.0, "step": 9363 }, { "epoch": 3.430154804433452, "grad_norm": 0.14019154695194913, "learning_rate": 1.252454160171777e-05, "loss": 0.418, "num_tokens": 7162632092.0, "step": 9364 }, { "epoch": 3.4305212054593754, "grad_norm": 0.13325672574471012, "learning_rate": 1.2520909132027566e-05, "loss": 0.3983, "num_tokens": 7163524517.0, "step": 9365 }, { "epoch": 3.4308876064852982, "grad_norm": 0.12937730101425188, "learning_rate": 1.2517277196432409e-05, "loss": 0.3952, "num_tokens": 7164359929.0, "step": 9366 }, { "epoch": 3.431254007511221, "grad_norm": 0.12847600273662624, "learning_rate": 1.251364579513691e-05, "loss": 0.4079, "num_tokens": 7165158926.0, "step": 9367 }, { "epoch": 3.431620408537144, "grad_norm": 0.14140457328127207, "learning_rate": 1.2510014928345702e-05, "loss": 0.4205, "num_tokens": 7165928268.0, "step": 9368 }, { "epoch": 3.4319868095630666, "grad_norm": 0.13509594150344684, "learning_rate": 1.2506384596263352e-05, "loss": 0.4143, "num_tokens": 7166691717.0, "step": 9369 }, { "epoch": 3.43235321058899, "grad_norm": 0.14552657809491049, "learning_rate": 1.2502754799094414e-05, "loss": 0.3709, "num_tokens": 7167367463.0, "step": 9370 }, { "epoch": 3.4327196116149126, "grad_norm": 0.13654219389634945, "learning_rate": 1.2499125537043404e-05, "loss": 0.4349, "num_tokens": 7168157603.0, "step": 9371 }, { "epoch": 3.4330860126408353, "grad_norm": 0.1295597417555844, "learning_rate": 1.2495496810314813e-05, "loss": 0.4256, "num_tokens": 7168987518.0, "step": 9372 }, { "epoch": 3.433452413666758, "grad_norm": 0.1362905049468401, "learning_rate": 1.2491868619113104e-05, "loss": 0.43, "num_tokens": 7169793486.0, "step": 9373 }, { "epoch": 3.433818814692681, "grad_norm": 0.13865243889788847, "learning_rate": 1.2488240963642699e-05, "loss": 0.3779, "num_tokens": 7170575863.0, "step": 9374 }, { "epoch": 3.434185215718604, "grad_norm": 0.13250507576449083, "learning_rate": 1.2484613844107991e-05, "loss": 0.4187, "num_tokens": 7171285816.0, "step": 9375 }, { "epoch": 3.434551616744527, "grad_norm": 0.14598353326110078, "learning_rate": 1.2480987260713372e-05, "loss": 0.3744, "num_tokens": 7172038661.0, "step": 9376 }, { "epoch": 3.4349180177704497, "grad_norm": 0.13666975716948002, "learning_rate": 1.247736121366316e-05, "loss": 0.4184, "num_tokens": 7172744785.0, "step": 9377 }, { "epoch": 3.4352844187963725, "grad_norm": 0.14558982899732442, "learning_rate": 1.2473735703161658e-05, "loss": 0.437, "num_tokens": 7173425427.0, "step": 9378 }, { "epoch": 3.4356508198222953, "grad_norm": 0.15395070349823142, "learning_rate": 1.2470110729413161e-05, "loss": 0.3882, "num_tokens": 7174139869.0, "step": 9379 }, { "epoch": 3.4360172208482185, "grad_norm": 0.12831986944724413, "learning_rate": 1.2466486292621918e-05, "loss": 0.3954, "num_tokens": 7174967401.0, "step": 9380 }, { "epoch": 3.4363836218741413, "grad_norm": 0.13132671045385774, "learning_rate": 1.2462862392992122e-05, "loss": 0.3933, "num_tokens": 7175783218.0, "step": 9381 }, { "epoch": 3.436750022900064, "grad_norm": 0.14010611075131402, "learning_rate": 1.2459239030727981e-05, "loss": 0.402, "num_tokens": 7176634228.0, "step": 9382 }, { "epoch": 3.437116423925987, "grad_norm": 0.1261248455862378, "learning_rate": 1.2455616206033648e-05, "loss": 0.4275, "num_tokens": 7177419610.0, "step": 9383 }, { "epoch": 3.43748282495191, "grad_norm": 0.14300477349480228, "learning_rate": 1.2451993919113248e-05, "loss": 0.4183, "num_tokens": 7178114329.0, "step": 9384 }, { "epoch": 3.437849225977833, "grad_norm": 0.13927287998794852, "learning_rate": 1.244837217017087e-05, "loss": 0.403, "num_tokens": 7178893052.0, "step": 9385 }, { "epoch": 3.4382156270037556, "grad_norm": 0.14040172169228657, "learning_rate": 1.2444750959410594e-05, "loss": 0.4032, "num_tokens": 7179596385.0, "step": 9386 }, { "epoch": 3.4385820280296784, "grad_norm": 0.14640781049662682, "learning_rate": 1.2441130287036444e-05, "loss": 0.3872, "num_tokens": 7180348442.0, "step": 9387 }, { "epoch": 3.438948429055601, "grad_norm": 0.13480537425232184, "learning_rate": 1.2437510153252431e-05, "loss": 0.4198, "num_tokens": 7181044917.0, "step": 9388 }, { "epoch": 3.4393148300815244, "grad_norm": 0.14377546643433436, "learning_rate": 1.2433890558262523e-05, "loss": 0.384, "num_tokens": 7181858270.0, "step": 9389 }, { "epoch": 3.439681231107447, "grad_norm": 0.1416356782787078, "learning_rate": 1.2430271502270674e-05, "loss": 0.4103, "num_tokens": 7182571989.0, "step": 9390 }, { "epoch": 3.44004763213337, "grad_norm": 0.13978291797083414, "learning_rate": 1.2426652985480789e-05, "loss": 0.3967, "num_tokens": 7183375643.0, "step": 9391 }, { "epoch": 3.4404140331592927, "grad_norm": 0.13462779657569424, "learning_rate": 1.2423035008096753e-05, "loss": 0.3669, "num_tokens": 7184236941.0, "step": 9392 }, { "epoch": 3.440780434185216, "grad_norm": 0.12561901587210086, "learning_rate": 1.2419417570322433e-05, "loss": 0.3872, "num_tokens": 7185070915.0, "step": 9393 }, { "epoch": 3.4411468352111387, "grad_norm": 0.14051646917034188, "learning_rate": 1.2415800672361629e-05, "loss": 0.3977, "num_tokens": 7185873232.0, "step": 9394 }, { "epoch": 3.4415132362370615, "grad_norm": 0.12956305895075765, "learning_rate": 1.241218431441815e-05, "loss": 0.4148, "num_tokens": 7186689694.0, "step": 9395 }, { "epoch": 3.4418796372629843, "grad_norm": 0.14534153896509525, "learning_rate": 1.2408568496695754e-05, "loss": 0.3827, "num_tokens": 7187441526.0, "step": 9396 }, { "epoch": 3.442246038288907, "grad_norm": 0.13562424459284658, "learning_rate": 1.2404953219398173e-05, "loss": 0.4081, "num_tokens": 7188238937.0, "step": 9397 }, { "epoch": 3.4426124393148303, "grad_norm": 0.13778694044912507, "learning_rate": 1.2401338482729108e-05, "loss": 0.4259, "num_tokens": 7188971023.0, "step": 9398 }, { "epoch": 3.442978840340753, "grad_norm": 0.1478327599130751, "learning_rate": 1.2397724286892225e-05, "loss": 0.402, "num_tokens": 7189741218.0, "step": 9399 }, { "epoch": 3.443345241366676, "grad_norm": 0.14530162323822332, "learning_rate": 1.2394110632091172e-05, "loss": 0.3794, "num_tokens": 7190476702.0, "step": 9400 }, { "epoch": 3.4437116423925986, "grad_norm": 0.12465712022815123, "learning_rate": 1.2390497518529551e-05, "loss": 0.3919, "num_tokens": 7191322027.0, "step": 9401 }, { "epoch": 3.4440780434185214, "grad_norm": 0.1371300291354744, "learning_rate": 1.2386884946410942e-05, "loss": 0.4107, "num_tokens": 7192121824.0, "step": 9402 }, { "epoch": 3.4444444444444446, "grad_norm": 0.14291130571124377, "learning_rate": 1.2383272915938909e-05, "loss": 0.4363, "num_tokens": 7192787870.0, "step": 9403 }, { "epoch": 3.4448108454703674, "grad_norm": 0.14438538114649835, "learning_rate": 1.2379661427316949e-05, "loss": 0.4288, "num_tokens": 7193434104.0, "step": 9404 }, { "epoch": 3.44517724649629, "grad_norm": 0.146131364941363, "learning_rate": 1.2376050480748553e-05, "loss": 0.411, "num_tokens": 7194318948.0, "step": 9405 }, { "epoch": 3.445543647522213, "grad_norm": 0.12971840232917664, "learning_rate": 1.2372440076437198e-05, "loss": 0.4001, "num_tokens": 7195077000.0, "step": 9406 }, { "epoch": 3.4459100485481358, "grad_norm": 0.14632476995310287, "learning_rate": 1.236883021458629e-05, "loss": 0.3881, "num_tokens": 7195805863.0, "step": 9407 }, { "epoch": 3.446276449574059, "grad_norm": 0.13237616832209637, "learning_rate": 1.2365220895399221e-05, "loss": 0.3949, "num_tokens": 7196670824.0, "step": 9408 }, { "epoch": 3.4466428505999818, "grad_norm": 0.13046462757724475, "learning_rate": 1.2361612119079378e-05, "loss": 0.4203, "num_tokens": 7197491976.0, "step": 9409 }, { "epoch": 3.4470092516259045, "grad_norm": 0.14013238005277012, "learning_rate": 1.2358003885830088e-05, "loss": 0.4042, "num_tokens": 7198271126.0, "step": 9410 }, { "epoch": 3.4473756526518273, "grad_norm": 0.13606945918339414, "learning_rate": 1.2354396195854638e-05, "loss": 0.4173, "num_tokens": 7199066729.0, "step": 9411 }, { "epoch": 3.44774205367775, "grad_norm": 0.1507291576293089, "learning_rate": 1.2350789049356324e-05, "loss": 0.4232, "num_tokens": 7199723725.0, "step": 9412 }, { "epoch": 3.4481084547036733, "grad_norm": 0.1545172816429433, "learning_rate": 1.2347182446538382e-05, "loss": 0.4043, "num_tokens": 7200384388.0, "step": 9413 }, { "epoch": 3.448474855729596, "grad_norm": 0.14885080563153924, "learning_rate": 1.234357638760402e-05, "loss": 0.3781, "num_tokens": 7201168385.0, "step": 9414 }, { "epoch": 3.448841256755519, "grad_norm": 0.13015285757752426, "learning_rate": 1.2339970872756423e-05, "loss": 0.3814, "num_tokens": 7201971684.0, "step": 9415 }, { "epoch": 3.4492076577814417, "grad_norm": 0.13217703567966294, "learning_rate": 1.2336365902198743e-05, "loss": 0.4088, "num_tokens": 7202730559.0, "step": 9416 }, { "epoch": 3.4495740588073645, "grad_norm": 0.14628138200737825, "learning_rate": 1.2332761476134096e-05, "loss": 0.4042, "num_tokens": 7203545218.0, "step": 9417 }, { "epoch": 3.4499404598332877, "grad_norm": 0.1457907148930772, "learning_rate": 1.232915759476558e-05, "loss": 0.3952, "num_tokens": 7204381035.0, "step": 9418 }, { "epoch": 3.4503068608592105, "grad_norm": 0.1306659469537003, "learning_rate": 1.232555425829624e-05, "loss": 0.4176, "num_tokens": 7205248082.0, "step": 9419 }, { "epoch": 3.4506732618851332, "grad_norm": 0.13349040954890748, "learning_rate": 1.2321951466929126e-05, "loss": 0.3967, "num_tokens": 7205917660.0, "step": 9420 }, { "epoch": 3.451039662911056, "grad_norm": 0.14747081332667156, "learning_rate": 1.231834922086721e-05, "loss": 0.4149, "num_tokens": 7206563388.0, "step": 9421 }, { "epoch": 3.451406063936979, "grad_norm": 0.15250247757863783, "learning_rate": 1.2314747520313477e-05, "loss": 0.4115, "num_tokens": 7207410056.0, "step": 9422 }, { "epoch": 3.451772464962902, "grad_norm": 0.12930511867797692, "learning_rate": 1.2311146365470863e-05, "loss": 0.4152, "num_tokens": 7208217695.0, "step": 9423 }, { "epoch": 3.452138865988825, "grad_norm": 0.13294348814001114, "learning_rate": 1.2307545756542258e-05, "loss": 0.3758, "num_tokens": 7209094763.0, "step": 9424 }, { "epoch": 3.4525052670147476, "grad_norm": 0.13132785468200145, "learning_rate": 1.2303945693730552e-05, "loss": 0.4119, "num_tokens": 7209854840.0, "step": 9425 }, { "epoch": 3.4528716680406704, "grad_norm": 0.15137528474655232, "learning_rate": 1.2300346177238585e-05, "loss": 0.4438, "num_tokens": 7210582484.0, "step": 9426 }, { "epoch": 3.453238069066593, "grad_norm": 0.14087112692297554, "learning_rate": 1.2296747207269171e-05, "loss": 0.4475, "num_tokens": 7211302228.0, "step": 9427 }, { "epoch": 3.4536044700925164, "grad_norm": 0.15482838762396012, "learning_rate": 1.2293148784025086e-05, "loss": 0.4301, "num_tokens": 7211991300.0, "step": 9428 }, { "epoch": 3.453970871118439, "grad_norm": 0.14628263362600216, "learning_rate": 1.2289550907709087e-05, "loss": 0.4041, "num_tokens": 7212678676.0, "step": 9429 }, { "epoch": 3.454337272144362, "grad_norm": 0.1352739050365795, "learning_rate": 1.2285953578523895e-05, "loss": 0.4142, "num_tokens": 7213358291.0, "step": 9430 }, { "epoch": 3.4547036731702847, "grad_norm": 0.15451192643823738, "learning_rate": 1.2282356796672198e-05, "loss": 0.427, "num_tokens": 7213984898.0, "step": 9431 }, { "epoch": 3.455070074196208, "grad_norm": 0.1610534713494072, "learning_rate": 1.2278760562356648e-05, "loss": 0.3892, "num_tokens": 7214815008.0, "step": 9432 }, { "epoch": 3.4554364752221307, "grad_norm": 0.12940666529991052, "learning_rate": 1.2275164875779894e-05, "loss": 0.4022, "num_tokens": 7215658330.0, "step": 9433 }, { "epoch": 3.4558028762480535, "grad_norm": 0.14659874948536983, "learning_rate": 1.2271569737144513e-05, "loss": 0.3842, "num_tokens": 7216454378.0, "step": 9434 }, { "epoch": 3.4561692772739763, "grad_norm": 0.13587107397743936, "learning_rate": 1.2267975146653072e-05, "loss": 0.417, "num_tokens": 7217301023.0, "step": 9435 }, { "epoch": 3.456535678299899, "grad_norm": 0.13824260642924116, "learning_rate": 1.2264381104508121e-05, "loss": 0.3883, "num_tokens": 7218087696.0, "step": 9436 }, { "epoch": 3.4569020793258223, "grad_norm": 0.13214129738241595, "learning_rate": 1.2260787610912161e-05, "loss": 0.4315, "num_tokens": 7218862791.0, "step": 9437 }, { "epoch": 3.457268480351745, "grad_norm": 0.14250482834906605, "learning_rate": 1.2257194666067652e-05, "loss": 0.3925, "num_tokens": 7219712033.0, "step": 9438 }, { "epoch": 3.457634881377668, "grad_norm": 0.12697079518163443, "learning_rate": 1.2253602270177045e-05, "loss": 0.4169, "num_tokens": 7220578080.0, "step": 9439 }, { "epoch": 3.4580012824035906, "grad_norm": 0.1450780909081255, "learning_rate": 1.225001042344276e-05, "loss": 0.4057, "num_tokens": 7221295563.0, "step": 9440 }, { "epoch": 3.458367683429514, "grad_norm": 0.14019222271636925, "learning_rate": 1.2246419126067167e-05, "loss": 0.3813, "num_tokens": 7222060300.0, "step": 9441 }, { "epoch": 3.4587340844554366, "grad_norm": 0.13134235965715957, "learning_rate": 1.2242828378252623e-05, "loss": 0.4186, "num_tokens": 7222873747.0, "step": 9442 }, { "epoch": 3.4591004854813594, "grad_norm": 0.1370205201292804, "learning_rate": 1.2239238180201442e-05, "loss": 0.4048, "num_tokens": 7223682895.0, "step": 9443 }, { "epoch": 3.459466886507282, "grad_norm": 0.13860049427529467, "learning_rate": 1.2235648532115913e-05, "loss": 0.395, "num_tokens": 7224481142.0, "step": 9444 }, { "epoch": 3.459833287533205, "grad_norm": 0.13811687275253462, "learning_rate": 1.2232059434198295e-05, "loss": 0.4225, "num_tokens": 7225294274.0, "step": 9445 }, { "epoch": 3.460199688559128, "grad_norm": 0.12920627931423087, "learning_rate": 1.2228470886650816e-05, "loss": 0.4164, "num_tokens": 7226090184.0, "step": 9446 }, { "epoch": 3.460566089585051, "grad_norm": 0.1450328092072009, "learning_rate": 1.2224882889675667e-05, "loss": 0.423, "num_tokens": 7226896382.0, "step": 9447 }, { "epoch": 3.4609324906109737, "grad_norm": 0.13754503492338238, "learning_rate": 1.2221295443475012e-05, "loss": 0.3902, "num_tokens": 7227716254.0, "step": 9448 }, { "epoch": 3.4612988916368965, "grad_norm": 0.12914125032166338, "learning_rate": 1.2217708548250978e-05, "loss": 0.422, "num_tokens": 7228579151.0, "step": 9449 }, { "epoch": 3.4616652926628193, "grad_norm": 0.1352163181544492, "learning_rate": 1.221412220420569e-05, "loss": 0.4138, "num_tokens": 7229309357.0, "step": 9450 }, { "epoch": 3.4620316936887425, "grad_norm": 0.14445637798759092, "learning_rate": 1.2210536411541187e-05, "loss": 0.4087, "num_tokens": 7230036721.0, "step": 9451 }, { "epoch": 3.4623980947146653, "grad_norm": 0.133275537393742, "learning_rate": 1.2206951170459534e-05, "loss": 0.3912, "num_tokens": 7230759444.0, "step": 9452 }, { "epoch": 3.462764495740588, "grad_norm": 0.13702682730190344, "learning_rate": 1.220336648116273e-05, "loss": 0.4229, "num_tokens": 7231483684.0, "step": 9453 }, { "epoch": 3.463130896766511, "grad_norm": 0.14718586406538034, "learning_rate": 1.2199782343852756e-05, "loss": 0.386, "num_tokens": 7232273330.0, "step": 9454 }, { "epoch": 3.4634972977924336, "grad_norm": 0.13426398658133698, "learning_rate": 1.2196198758731553e-05, "loss": 0.3945, "num_tokens": 7233005900.0, "step": 9455 }, { "epoch": 3.463863698818357, "grad_norm": 0.13733591806816497, "learning_rate": 1.2192615726001038e-05, "loss": 0.4378, "num_tokens": 7233801755.0, "step": 9456 }, { "epoch": 3.4642300998442797, "grad_norm": 0.1410194414355361, "learning_rate": 1.21890332458631e-05, "loss": 0.4194, "num_tokens": 7234538216.0, "step": 9457 }, { "epoch": 3.4645965008702024, "grad_norm": 0.13745672956485344, "learning_rate": 1.218545131851959e-05, "loss": 0.4206, "num_tokens": 7235426980.0, "step": 9458 }, { "epoch": 3.464962901896125, "grad_norm": 0.12884884927738782, "learning_rate": 1.218186994417232e-05, "loss": 0.396, "num_tokens": 7236234228.0, "step": 9459 }, { "epoch": 3.465329302922048, "grad_norm": 0.1368316707427407, "learning_rate": 1.2178289123023106e-05, "loss": 0.4188, "num_tokens": 7236943309.0, "step": 9460 }, { "epoch": 3.465695703947971, "grad_norm": 0.14144716433153273, "learning_rate": 1.2174708855273682e-05, "loss": 0.4231, "num_tokens": 7237647019.0, "step": 9461 }, { "epoch": 3.466062104973894, "grad_norm": 0.1399755045120059, "learning_rate": 1.2171129141125781e-05, "loss": 0.4216, "num_tokens": 7238376855.0, "step": 9462 }, { "epoch": 3.466428505999817, "grad_norm": 0.1559400808750072, "learning_rate": 1.2167549980781119e-05, "loss": 0.4025, "num_tokens": 7239072647.0, "step": 9463 }, { "epoch": 3.4667949070257396, "grad_norm": 0.1350887794170471, "learning_rate": 1.2163971374441341e-05, "loss": 0.3854, "num_tokens": 7239827146.0, "step": 9464 }, { "epoch": 3.4671613080516623, "grad_norm": 0.14350868029595573, "learning_rate": 1.2160393322308087e-05, "loss": 0.3941, "num_tokens": 7240501731.0, "step": 9465 }, { "epoch": 3.4675277090775856, "grad_norm": 0.15024469591391834, "learning_rate": 1.2156815824582969e-05, "loss": 0.4261, "num_tokens": 7241168356.0, "step": 9466 }, { "epoch": 3.4678941101035083, "grad_norm": 0.14249856844502096, "learning_rate": 1.2153238881467557e-05, "loss": 0.3989, "num_tokens": 7241849006.0, "step": 9467 }, { "epoch": 3.468260511129431, "grad_norm": 0.14142515977324743, "learning_rate": 1.214966249316338e-05, "loss": 0.3946, "num_tokens": 7242646362.0, "step": 9468 }, { "epoch": 3.468626912155354, "grad_norm": 0.13552131867315576, "learning_rate": 1.2146086659871958e-05, "loss": 0.3839, "num_tokens": 7243381856.0, "step": 9469 }, { "epoch": 3.4689933131812767, "grad_norm": 0.13666826182499123, "learning_rate": 1.2142511381794773e-05, "loss": 0.4249, "num_tokens": 7244151521.0, "step": 9470 }, { "epoch": 3.4693597142072, "grad_norm": 0.13840278928500419, "learning_rate": 1.2138936659133269e-05, "loss": 0.4087, "num_tokens": 7244961986.0, "step": 9471 }, { "epoch": 3.4697261152331227, "grad_norm": 0.13494362758634237, "learning_rate": 1.213536249208886e-05, "loss": 0.3977, "num_tokens": 7245755842.0, "step": 9472 }, { "epoch": 3.4700925162590455, "grad_norm": 0.14400272236724562, "learning_rate": 1.2131788880862932e-05, "loss": 0.4238, "num_tokens": 7246493679.0, "step": 9473 }, { "epoch": 3.4704589172849682, "grad_norm": 0.135004259632485, "learning_rate": 1.2128215825656837e-05, "loss": 0.3964, "num_tokens": 7247306131.0, "step": 9474 }, { "epoch": 3.470825318310891, "grad_norm": 0.13170450697200328, "learning_rate": 1.2124643326671902e-05, "loss": 0.4452, "num_tokens": 7248060044.0, "step": 9475 }, { "epoch": 3.4711917193368143, "grad_norm": 0.1514723506169517, "learning_rate": 1.2121071384109406e-05, "loss": 0.3861, "num_tokens": 7248734282.0, "step": 9476 }, { "epoch": 3.471558120362737, "grad_norm": 0.14230533383038652, "learning_rate": 1.2117499998170631e-05, "loss": 0.4094, "num_tokens": 7249475653.0, "step": 9477 }, { "epoch": 3.47192452138866, "grad_norm": 0.14327910312353423, "learning_rate": 1.2113929169056777e-05, "loss": 0.4022, "num_tokens": 7250322651.0, "step": 9478 }, { "epoch": 3.4722909224145826, "grad_norm": 0.14890425351769015, "learning_rate": 1.2110358896969063e-05, "loss": 0.416, "num_tokens": 7251109251.0, "step": 9479 }, { "epoch": 3.472657323440506, "grad_norm": 0.1460029403728897, "learning_rate": 1.2106789182108654e-05, "loss": 0.3978, "num_tokens": 7251842095.0, "step": 9480 }, { "epoch": 3.4730237244664286, "grad_norm": 0.13888496669118072, "learning_rate": 1.210322002467666e-05, "loss": 0.3908, "num_tokens": 7252628805.0, "step": 9481 }, { "epoch": 3.4733901254923514, "grad_norm": 0.14023402112245043, "learning_rate": 1.2099651424874206e-05, "loss": 0.4019, "num_tokens": 7253384772.0, "step": 9482 }, { "epoch": 3.473756526518274, "grad_norm": 0.13881387675255832, "learning_rate": 1.209608338290236e-05, "loss": 0.4204, "num_tokens": 7254166717.0, "step": 9483 }, { "epoch": 3.474122927544197, "grad_norm": 0.13608269807595533, "learning_rate": 1.2092515898962156e-05, "loss": 0.3899, "num_tokens": 7254826466.0, "step": 9484 }, { "epoch": 3.47448932857012, "grad_norm": 0.14928182322360237, "learning_rate": 1.2088948973254605e-05, "loss": 0.4468, "num_tokens": 7255538495.0, "step": 9485 }, { "epoch": 3.474855729596043, "grad_norm": 0.14774973833279154, "learning_rate": 1.2085382605980684e-05, "loss": 0.3981, "num_tokens": 7256429246.0, "step": 9486 }, { "epoch": 3.4752221306219657, "grad_norm": 0.12874557654768753, "learning_rate": 1.208181679734134e-05, "loss": 0.4173, "num_tokens": 7257192356.0, "step": 9487 }, { "epoch": 3.4755885316478885, "grad_norm": 0.13837073858541302, "learning_rate": 1.2078251547537483e-05, "loss": 0.4236, "num_tokens": 7258010892.0, "step": 9488 }, { "epoch": 3.4759549326738117, "grad_norm": 0.13596358564429023, "learning_rate": 1.2074686856769989e-05, "loss": 0.4068, "num_tokens": 7258794681.0, "step": 9489 }, { "epoch": 3.4763213336997345, "grad_norm": 0.1317855445720199, "learning_rate": 1.2071122725239732e-05, "loss": 0.4453, "num_tokens": 7259586759.0, "step": 9490 }, { "epoch": 3.4766877347256573, "grad_norm": 0.141251604998719, "learning_rate": 1.206755915314751e-05, "loss": 0.3819, "num_tokens": 7260307419.0, "step": 9491 }, { "epoch": 3.47705413575158, "grad_norm": 0.14550575634070845, "learning_rate": 1.206399614069411e-05, "loss": 0.4653, "num_tokens": 7260977145.0, "step": 9492 }, { "epoch": 3.477420536777503, "grad_norm": 0.15537743384822503, "learning_rate": 1.2060433688080302e-05, "loss": 0.3853, "num_tokens": 7261669057.0, "step": 9493 }, { "epoch": 3.477786937803426, "grad_norm": 0.13920325233519737, "learning_rate": 1.2056871795506809e-05, "loss": 0.416, "num_tokens": 7262433753.0, "step": 9494 }, { "epoch": 3.478153338829349, "grad_norm": 0.13760028700606536, "learning_rate": 1.205331046317431e-05, "loss": 0.4125, "num_tokens": 7263114123.0, "step": 9495 }, { "epoch": 3.4785197398552716, "grad_norm": 0.15368303180418802, "learning_rate": 1.2049749691283482e-05, "loss": 0.3768, "num_tokens": 7263774304.0, "step": 9496 }, { "epoch": 3.4788861408811944, "grad_norm": 0.15083981336552216, "learning_rate": 1.2046189480034947e-05, "loss": 0.4168, "num_tokens": 7264485291.0, "step": 9497 }, { "epoch": 3.479252541907117, "grad_norm": 0.14989351567139836, "learning_rate": 1.2042629829629307e-05, "loss": 0.407, "num_tokens": 7265243638.0, "step": 9498 }, { "epoch": 3.4796189429330404, "grad_norm": 0.1313843144578646, "learning_rate": 1.2039070740267127e-05, "loss": 0.3915, "num_tokens": 7265997130.0, "step": 9499 }, { "epoch": 3.479985343958963, "grad_norm": 0.13833354964906025, "learning_rate": 1.203551221214894e-05, "loss": 0.4317, "num_tokens": 7266839087.0, "step": 9500 }, { "epoch": 3.480351744984886, "grad_norm": 0.14187935780399719, "learning_rate": 1.2031954245475258e-05, "loss": 0.4188, "num_tokens": 7267703402.0, "step": 9501 }, { "epoch": 3.4807181460108088, "grad_norm": 0.13227113172243865, "learning_rate": 1.2028396840446543e-05, "loss": 0.3966, "num_tokens": 7268373072.0, "step": 9502 }, { "epoch": 3.4810845470367315, "grad_norm": 0.1551758624807168, "learning_rate": 1.2024839997263245e-05, "loss": 0.4254, "num_tokens": 7269154792.0, "step": 9503 }, { "epoch": 3.4814509480626548, "grad_norm": 0.14131318507420706, "learning_rate": 1.202128371612576e-05, "loss": 0.3894, "num_tokens": 7269964705.0, "step": 9504 }, { "epoch": 3.4818173490885775, "grad_norm": 0.1427201224471467, "learning_rate": 1.2017727997234471e-05, "loss": 0.4247, "num_tokens": 7270700053.0, "step": 9505 }, { "epoch": 3.4821837501145003, "grad_norm": 0.15499733482236455, "learning_rate": 1.2014172840789731e-05, "loss": 0.4031, "num_tokens": 7271385808.0, "step": 9506 }, { "epoch": 3.482550151140423, "grad_norm": 0.13657736328698222, "learning_rate": 1.2010618246991854e-05, "loss": 0.374, "num_tokens": 7272075392.0, "step": 9507 }, { "epoch": 3.482916552166346, "grad_norm": 0.1596639178414705, "learning_rate": 1.2007064216041102e-05, "loss": 0.4193, "num_tokens": 7272818178.0, "step": 9508 }, { "epoch": 3.483282953192269, "grad_norm": 0.13977049458200355, "learning_rate": 1.2003510748137746e-05, "loss": 0.4201, "num_tokens": 7273596792.0, "step": 9509 }, { "epoch": 3.483649354218192, "grad_norm": 0.1340703037939469, "learning_rate": 1.1999957843481993e-05, "loss": 0.4051, "num_tokens": 7274368853.0, "step": 9510 }, { "epoch": 3.4840157552441147, "grad_norm": 0.1457892137874536, "learning_rate": 1.199640550227404e-05, "loss": 0.4441, "num_tokens": 7275023009.0, "step": 9511 }, { "epoch": 3.4843821562700374, "grad_norm": 0.14803545214020938, "learning_rate": 1.1992853724714034e-05, "loss": 0.4084, "num_tokens": 7275858884.0, "step": 9512 }, { "epoch": 3.4847485572959602, "grad_norm": 0.14191082136464456, "learning_rate": 1.1989302511002098e-05, "loss": 0.3824, "num_tokens": 7276664807.0, "step": 9513 }, { "epoch": 3.4851149583218834, "grad_norm": 0.1350186936023306, "learning_rate": 1.1985751861338328e-05, "loss": 0.4079, "num_tokens": 7277364679.0, "step": 9514 }, { "epoch": 3.4854813593478062, "grad_norm": 0.14336429386980756, "learning_rate": 1.1982201775922781e-05, "loss": 0.3754, "num_tokens": 7278111852.0, "step": 9515 }, { "epoch": 3.485847760373729, "grad_norm": 0.12987642839173769, "learning_rate": 1.1978652254955479e-05, "loss": 0.3984, "num_tokens": 7278935394.0, "step": 9516 }, { "epoch": 3.486214161399652, "grad_norm": 0.1306675871479428, "learning_rate": 1.197510329863644e-05, "loss": 0.3934, "num_tokens": 7279701302.0, "step": 9517 }, { "epoch": 3.4865805624255746, "grad_norm": 0.13965740136243587, "learning_rate": 1.1971554907165603e-05, "loss": 0.4321, "num_tokens": 7280518678.0, "step": 9518 }, { "epoch": 3.486946963451498, "grad_norm": 0.13343956555252248, "learning_rate": 1.1968007080742905e-05, "loss": 0.4235, "num_tokens": 7281246680.0, "step": 9519 }, { "epoch": 3.4873133644774206, "grad_norm": 0.13809607412593072, "learning_rate": 1.1964459819568266e-05, "loss": 0.3879, "num_tokens": 7282093670.0, "step": 9520 }, { "epoch": 3.4876797655033434, "grad_norm": 0.1407091751008291, "learning_rate": 1.1960913123841533e-05, "loss": 0.4348, "num_tokens": 7282814040.0, "step": 9521 }, { "epoch": 3.488046166529266, "grad_norm": 0.14027095687764995, "learning_rate": 1.1957366993762544e-05, "loss": 0.4101, "num_tokens": 7283592123.0, "step": 9522 }, { "epoch": 3.488412567555189, "grad_norm": 0.14392134723373376, "learning_rate": 1.195382142953112e-05, "loss": 0.4161, "num_tokens": 7284381346.0, "step": 9523 }, { "epoch": 3.488778968581112, "grad_norm": 0.1328003848949614, "learning_rate": 1.1950276431347025e-05, "loss": 0.3966, "num_tokens": 7285102692.0, "step": 9524 }, { "epoch": 3.489145369607035, "grad_norm": 0.13377201495023025, "learning_rate": 1.1946731999409997e-05, "loss": 0.3986, "num_tokens": 7285981013.0, "step": 9525 }, { "epoch": 3.4895117706329577, "grad_norm": 0.1330589433879243, "learning_rate": 1.1943188133919753e-05, "loss": 0.4138, "num_tokens": 7286734347.0, "step": 9526 }, { "epoch": 3.4898781716588805, "grad_norm": 0.1552990261763418, "learning_rate": 1.1939644835075963e-05, "loss": 0.4218, "num_tokens": 7287485544.0, "step": 9527 }, { "epoch": 3.4902445726848037, "grad_norm": 0.13288492358872064, "learning_rate": 1.1936102103078278e-05, "loss": 0.3972, "num_tokens": 7288185577.0, "step": 9528 }, { "epoch": 3.4906109737107265, "grad_norm": 0.14530912670411203, "learning_rate": 1.1932559938126309e-05, "loss": 0.4079, "num_tokens": 7288833274.0, "step": 9529 }, { "epoch": 3.4909773747366493, "grad_norm": 0.14339034542415727, "learning_rate": 1.1929018340419636e-05, "loss": 0.4078, "num_tokens": 7289713112.0, "step": 9530 }, { "epoch": 3.491343775762572, "grad_norm": 0.12527514950496904, "learning_rate": 1.1925477310157814e-05, "loss": 0.3802, "num_tokens": 7290464038.0, "step": 9531 }, { "epoch": 3.491710176788495, "grad_norm": 0.13760918667286118, "learning_rate": 1.1921936847540356e-05, "loss": 0.4334, "num_tokens": 7291277857.0, "step": 9532 }, { "epoch": 3.492076577814418, "grad_norm": 0.13921419442389707, "learning_rate": 1.1918396952766742e-05, "loss": 0.4295, "num_tokens": 7292082705.0, "step": 9533 }, { "epoch": 3.492442978840341, "grad_norm": 0.1371504243549322, "learning_rate": 1.1914857626036446e-05, "loss": 0.4118, "num_tokens": 7292908004.0, "step": 9534 }, { "epoch": 3.4928093798662636, "grad_norm": 0.13278524439571784, "learning_rate": 1.1911318867548866e-05, "loss": 0.3931, "num_tokens": 7293692341.0, "step": 9535 }, { "epoch": 3.4931757808921864, "grad_norm": 0.14709076392719303, "learning_rate": 1.190778067750341e-05, "loss": 0.4247, "num_tokens": 7294546344.0, "step": 9536 }, { "epoch": 3.4935421819181096, "grad_norm": 0.12905191288507098, "learning_rate": 1.1904243056099429e-05, "loss": 0.3962, "num_tokens": 7295280645.0, "step": 9537 }, { "epoch": 3.4939085829440324, "grad_norm": 0.1459827436909872, "learning_rate": 1.190070600353624e-05, "loss": 0.3749, "num_tokens": 7296021736.0, "step": 9538 }, { "epoch": 3.494274983969955, "grad_norm": 0.13055063418279536, "learning_rate": 1.1897169520013149e-05, "loss": 0.4407, "num_tokens": 7296844869.0, "step": 9539 }, { "epoch": 3.494641384995878, "grad_norm": 0.1394044645164557, "learning_rate": 1.1893633605729411e-05, "loss": 0.3869, "num_tokens": 7297546958.0, "step": 9540 }, { "epoch": 3.4950077860218007, "grad_norm": 0.1351232086775021, "learning_rate": 1.1890098260884262e-05, "loss": 0.4184, "num_tokens": 7298371602.0, "step": 9541 }, { "epoch": 3.495374187047724, "grad_norm": 0.141983644477259, "learning_rate": 1.1886563485676892e-05, "loss": 0.3917, "num_tokens": 7299136977.0, "step": 9542 }, { "epoch": 3.4957405880736467, "grad_norm": 0.13230782607031147, "learning_rate": 1.1883029280306464e-05, "loss": 0.4104, "num_tokens": 7299772512.0, "step": 9543 }, { "epoch": 3.4961069890995695, "grad_norm": 0.14547514558892735, "learning_rate": 1.1879495644972128e-05, "loss": 0.4177, "num_tokens": 7300541020.0, "step": 9544 }, { "epoch": 3.4964733901254923, "grad_norm": 0.15359623930409108, "learning_rate": 1.1875962579872966e-05, "loss": 0.4042, "num_tokens": 7301478363.0, "step": 9545 }, { "epoch": 3.496839791151415, "grad_norm": 0.12802030481151525, "learning_rate": 1.187243008520805e-05, "loss": 0.4072, "num_tokens": 7302322107.0, "step": 9546 }, { "epoch": 3.4972061921773383, "grad_norm": 0.13837278148080492, "learning_rate": 1.1868898161176435e-05, "loss": 0.4107, "num_tokens": 7303074347.0, "step": 9547 }, { "epoch": 3.497572593203261, "grad_norm": 0.13749381457075283, "learning_rate": 1.1865366807977105e-05, "loss": 0.3803, "num_tokens": 7303810756.0, "step": 9548 }, { "epoch": 3.497938994229184, "grad_norm": 0.1419495589254219, "learning_rate": 1.186183602580903e-05, "loss": 0.4178, "num_tokens": 7304603527.0, "step": 9549 }, { "epoch": 3.4983053952551066, "grad_norm": 0.14702667483207812, "learning_rate": 1.1858305814871169e-05, "loss": 0.3821, "num_tokens": 7305390373.0, "step": 9550 }, { "epoch": 3.4986717962810294, "grad_norm": 0.136845123155919, "learning_rate": 1.1854776175362423e-05, "loss": 0.4161, "num_tokens": 7306122020.0, "step": 9551 }, { "epoch": 3.4990381973069526, "grad_norm": 0.13287296132261847, "learning_rate": 1.1851247107481659e-05, "loss": 0.423, "num_tokens": 7306918061.0, "step": 9552 }, { "epoch": 3.4994045983328754, "grad_norm": 0.13628231747053443, "learning_rate": 1.184771861142773e-05, "loss": 0.3964, "num_tokens": 7307594374.0, "step": 9553 }, { "epoch": 3.499770999358798, "grad_norm": 0.14011467902349647, "learning_rate": 1.1844190687399441e-05, "loss": 0.4279, "num_tokens": 7308383896.0, "step": 9554 }, { "epoch": 3.500137400384721, "grad_norm": 0.14279946751897887, "learning_rate": 1.1840663335595584e-05, "loss": 0.399, "num_tokens": 7309238185.0, "step": 9555 }, { "epoch": 3.5005038014106438, "grad_norm": 0.12776208466548827, "learning_rate": 1.183713655621489e-05, "loss": 0.4116, "num_tokens": 7310037924.0, "step": 9556 }, { "epoch": 3.500870202436567, "grad_norm": 0.14415243849417947, "learning_rate": 1.1833610349456082e-05, "loss": 0.4215, "num_tokens": 7310718635.0, "step": 9557 }, { "epoch": 3.5012366034624898, "grad_norm": 0.14556751154452147, "learning_rate": 1.1830084715517845e-05, "loss": 0.4124, "num_tokens": 7311471434.0, "step": 9558 }, { "epoch": 3.5016030044884126, "grad_norm": 0.135180736723353, "learning_rate": 1.1826559654598824e-05, "loss": 0.4359, "num_tokens": 7312206833.0, "step": 9559 }, { "epoch": 3.5019694055143353, "grad_norm": 0.14531136944574594, "learning_rate": 1.1823035166897628e-05, "loss": 0.4106, "num_tokens": 7312927323.0, "step": 9560 }, { "epoch": 3.502335806540258, "grad_norm": 0.1445490206620376, "learning_rate": 1.181951125261287e-05, "loss": 0.4021, "num_tokens": 7313715804.0, "step": 9561 }, { "epoch": 3.5027022075661813, "grad_norm": 0.13643654854199103, "learning_rate": 1.1815987911943072e-05, "loss": 0.4049, "num_tokens": 7314582958.0, "step": 9562 }, { "epoch": 3.503068608592104, "grad_norm": 0.1243846880835244, "learning_rate": 1.1812465145086778e-05, "loss": 0.4095, "num_tokens": 7315464157.0, "step": 9563 }, { "epoch": 3.503435009618027, "grad_norm": 0.1433648544598796, "learning_rate": 1.1808942952242472e-05, "loss": 0.443, "num_tokens": 7316152631.0, "step": 9564 }, { "epoch": 3.5038014106439497, "grad_norm": 0.15475331209034504, "learning_rate": 1.1805421333608598e-05, "loss": 0.3951, "num_tokens": 7316911528.0, "step": 9565 }, { "epoch": 3.5041678116698725, "grad_norm": 0.1309783685190265, "learning_rate": 1.180190028938359e-05, "loss": 0.4172, "num_tokens": 7317617965.0, "step": 9566 }, { "epoch": 3.5045342126957957, "grad_norm": 0.1446639806857363, "learning_rate": 1.179837981976584e-05, "loss": 0.4371, "num_tokens": 7318319069.0, "step": 9567 }, { "epoch": 3.5049006137217185, "grad_norm": 0.1464225760335597, "learning_rate": 1.1794859924953706e-05, "loss": 0.4085, "num_tokens": 7319072558.0, "step": 9568 }, { "epoch": 3.5052670147476412, "grad_norm": 0.1328115920129284, "learning_rate": 1.1791340605145516e-05, "loss": 0.3545, "num_tokens": 7319763052.0, "step": 9569 }, { "epoch": 3.505633415773564, "grad_norm": 0.14043051145025826, "learning_rate": 1.1787821860539558e-05, "loss": 0.3965, "num_tokens": 7320471178.0, "step": 9570 }, { "epoch": 3.505999816799487, "grad_norm": 0.1420483560564994, "learning_rate": 1.1784303691334102e-05, "loss": 0.4192, "num_tokens": 7321189566.0, "step": 9571 }, { "epoch": 3.50636621782541, "grad_norm": 0.1525763338698647, "learning_rate": 1.1780786097727374e-05, "loss": 0.39, "num_tokens": 7322114037.0, "step": 9572 }, { "epoch": 3.506732618851333, "grad_norm": 0.12945760272756737, "learning_rate": 1.1777269079917565e-05, "loss": 0.4061, "num_tokens": 7322778135.0, "step": 9573 }, { "epoch": 3.5070990198772556, "grad_norm": 0.15206782903990865, "learning_rate": 1.177375263810286e-05, "loss": 0.4381, "num_tokens": 7323452673.0, "step": 9574 }, { "epoch": 3.5074654209031784, "grad_norm": 0.15054948106716382, "learning_rate": 1.177023677248137e-05, "loss": 0.4003, "num_tokens": 7324146982.0, "step": 9575 }, { "epoch": 3.507831821929101, "grad_norm": 0.14074007655281684, "learning_rate": 1.1766721483251198e-05, "loss": 0.4064, "num_tokens": 7324940180.0, "step": 9576 }, { "epoch": 3.5081982229550244, "grad_norm": 0.13319196315256945, "learning_rate": 1.1763206770610427e-05, "loss": 0.4146, "num_tokens": 7325664109.0, "step": 9577 }, { "epoch": 3.508564623980947, "grad_norm": 0.13936940581436488, "learning_rate": 1.1759692634757077e-05, "loss": 0.3916, "num_tokens": 7326610282.0, "step": 9578 }, { "epoch": 3.50893102500687, "grad_norm": 0.14728827460173177, "learning_rate": 1.1756179075889143e-05, "loss": 0.3839, "num_tokens": 7327425057.0, "step": 9579 }, { "epoch": 3.509297426032793, "grad_norm": 0.13251181428501047, "learning_rate": 1.1752666094204616e-05, "loss": 0.4151, "num_tokens": 7328242895.0, "step": 9580 }, { "epoch": 3.5096638270587155, "grad_norm": 0.13366463625367886, "learning_rate": 1.1749153689901423e-05, "loss": 0.3923, "num_tokens": 7329028032.0, "step": 9581 }, { "epoch": 3.5100302280846387, "grad_norm": 0.13382244763897208, "learning_rate": 1.1745641863177468e-05, "loss": 0.3917, "num_tokens": 7329768100.0, "step": 9582 }, { "epoch": 3.5103966291105615, "grad_norm": 0.1357669087212582, "learning_rate": 1.1742130614230625e-05, "loss": 0.3994, "num_tokens": 7330577343.0, "step": 9583 }, { "epoch": 3.5107630301364843, "grad_norm": 0.1381615853299365, "learning_rate": 1.1738619943258731e-05, "loss": 0.3814, "num_tokens": 7331408955.0, "step": 9584 }, { "epoch": 3.5111294311624075, "grad_norm": 0.12904010165309884, "learning_rate": 1.1735109850459599e-05, "loss": 0.3896, "num_tokens": 7332055764.0, "step": 9585 }, { "epoch": 3.5114958321883303, "grad_norm": 0.14987532597595374, "learning_rate": 1.1731600336030997e-05, "loss": 0.395, "num_tokens": 7332817067.0, "step": 9586 }, { "epoch": 3.511862233214253, "grad_norm": 0.1432213939101429, "learning_rate": 1.1728091400170671e-05, "loss": 0.3983, "num_tokens": 7333595012.0, "step": 9587 }, { "epoch": 3.512228634240176, "grad_norm": 0.13928630182780666, "learning_rate": 1.172458304307633e-05, "loss": 0.3803, "num_tokens": 7334237242.0, "step": 9588 }, { "epoch": 3.5125950352660986, "grad_norm": 0.14817633864194363, "learning_rate": 1.172107526494565e-05, "loss": 0.4026, "num_tokens": 7334923649.0, "step": 9589 }, { "epoch": 3.512961436292022, "grad_norm": 0.14201147701102382, "learning_rate": 1.1717568065976266e-05, "loss": 0.4036, "num_tokens": 7335759470.0, "step": 9590 }, { "epoch": 3.5133278373179446, "grad_norm": 0.13341602217615184, "learning_rate": 1.1714061446365816e-05, "loss": 0.3684, "num_tokens": 7336539647.0, "step": 9591 }, { "epoch": 3.5136942383438674, "grad_norm": 0.14018944069846329, "learning_rate": 1.1710555406311845e-05, "loss": 0.4017, "num_tokens": 7337358122.0, "step": 9592 }, { "epoch": 3.51406063936979, "grad_norm": 0.1316701131610485, "learning_rate": 1.1707049946011924e-05, "loss": 0.4077, "num_tokens": 7338129007.0, "step": 9593 }, { "epoch": 3.514427040395713, "grad_norm": 0.1388096958833129, "learning_rate": 1.1703545065663565e-05, "loss": 0.3867, "num_tokens": 7338879075.0, "step": 9594 }, { "epoch": 3.514793441421636, "grad_norm": 0.14102667315894601, "learning_rate": 1.1700040765464226e-05, "loss": 0.3981, "num_tokens": 7339554632.0, "step": 9595 }, { "epoch": 3.515159842447559, "grad_norm": 0.1465595715850181, "learning_rate": 1.1696537045611383e-05, "loss": 0.4244, "num_tokens": 7340340782.0, "step": 9596 }, { "epoch": 3.5155262434734817, "grad_norm": 0.14390497929477322, "learning_rate": 1.1693033906302436e-05, "loss": 0.3933, "num_tokens": 7341151366.0, "step": 9597 }, { "epoch": 3.5158926444994045, "grad_norm": 0.12999489007809487, "learning_rate": 1.168953134773477e-05, "loss": 0.4058, "num_tokens": 7342007508.0, "step": 9598 }, { "epoch": 3.5162590455253273, "grad_norm": 0.1451137381960313, "learning_rate": 1.1686029370105741e-05, "loss": 0.4146, "num_tokens": 7342857032.0, "step": 9599 }, { "epoch": 3.5166254465512505, "grad_norm": 0.1343235887459526, "learning_rate": 1.1682527973612654e-05, "loss": 0.412, "num_tokens": 7343652011.0, "step": 9600 }, { "epoch": 3.5169918475771733, "grad_norm": 0.15144517399697194, "learning_rate": 1.1679027158452815e-05, "loss": 0.4257, "num_tokens": 7344363622.0, "step": 9601 }, { "epoch": 3.517358248603096, "grad_norm": 0.15400000277784234, "learning_rate": 1.1675526924823455e-05, "loss": 0.3759, "num_tokens": 7345171752.0, "step": 9602 }, { "epoch": 3.517724649629019, "grad_norm": 0.12727431950814835, "learning_rate": 1.1672027272921795e-05, "loss": 0.422, "num_tokens": 7345991950.0, "step": 9603 }, { "epoch": 3.5180910506549417, "grad_norm": 0.13151984422124408, "learning_rate": 1.166852820294504e-05, "loss": 0.371, "num_tokens": 7346905656.0, "step": 9604 }, { "epoch": 3.518457451680865, "grad_norm": 0.12821203263013223, "learning_rate": 1.1665029715090324e-05, "loss": 0.4086, "num_tokens": 7347711243.0, "step": 9605 }, { "epoch": 3.5188238527067877, "grad_norm": 0.13307433522969833, "learning_rate": 1.1661531809554765e-05, "loss": 0.4013, "num_tokens": 7348453601.0, "step": 9606 }, { "epoch": 3.5191902537327104, "grad_norm": 0.14404512484310616, "learning_rate": 1.1658034486535468e-05, "loss": 0.3587, "num_tokens": 7349255960.0, "step": 9607 }, { "epoch": 3.519556654758633, "grad_norm": 0.13280600385160604, "learning_rate": 1.165453774622948e-05, "loss": 0.428, "num_tokens": 7349928465.0, "step": 9608 }, { "epoch": 3.519923055784556, "grad_norm": 0.15350402454778136, "learning_rate": 1.1651041588833825e-05, "loss": 0.4032, "num_tokens": 7350679968.0, "step": 9609 }, { "epoch": 3.5202894568104792, "grad_norm": 0.1391660453883397, "learning_rate": 1.1647546014545488e-05, "loss": 0.3752, "num_tokens": 7351472729.0, "step": 9610 }, { "epoch": 3.520655857836402, "grad_norm": 0.14411527150779413, "learning_rate": 1.1644051023561426e-05, "loss": 0.4151, "num_tokens": 7352169194.0, "step": 9611 }, { "epoch": 3.521022258862325, "grad_norm": 0.1531944496772649, "learning_rate": 1.1640556616078568e-05, "loss": 0.4201, "num_tokens": 7353075864.0, "step": 9612 }, { "epoch": 3.5213886598882476, "grad_norm": 0.13322428633154787, "learning_rate": 1.16370627922938e-05, "loss": 0.4141, "num_tokens": 7353780363.0, "step": 9613 }, { "epoch": 3.5217550609141703, "grad_norm": 0.15154684438748797, "learning_rate": 1.1633569552403983e-05, "loss": 0.4041, "num_tokens": 7354502038.0, "step": 9614 }, { "epoch": 3.5221214619400936, "grad_norm": 0.1418187276498127, "learning_rate": 1.1630076896605942e-05, "loss": 0.4137, "num_tokens": 7355314764.0, "step": 9615 }, { "epoch": 3.5224878629660163, "grad_norm": 0.1397398125376307, "learning_rate": 1.1626584825096465e-05, "loss": 0.3845, "num_tokens": 7356117393.0, "step": 9616 }, { "epoch": 3.522854263991939, "grad_norm": 0.12776001537853235, "learning_rate": 1.1623093338072306e-05, "loss": 0.3825, "num_tokens": 7356914968.0, "step": 9617 }, { "epoch": 3.523220665017862, "grad_norm": 0.13124825191802728, "learning_rate": 1.1619602435730215e-05, "loss": 0.3569, "num_tokens": 7357688036.0, "step": 9618 }, { "epoch": 3.5235870660437847, "grad_norm": 0.1493797741901947, "learning_rate": 1.1616112118266853e-05, "loss": 0.4305, "num_tokens": 7358488845.0, "step": 9619 }, { "epoch": 3.523953467069708, "grad_norm": 0.14538247133072657, "learning_rate": 1.1612622385878907e-05, "loss": 0.4127, "num_tokens": 7359104334.0, "step": 9620 }, { "epoch": 3.5243198680956307, "grad_norm": 0.15249107534075165, "learning_rate": 1.1609133238762995e-05, "loss": 0.387, "num_tokens": 7359928642.0, "step": 9621 }, { "epoch": 3.5246862691215535, "grad_norm": 0.13071424824394645, "learning_rate": 1.1605644677115702e-05, "loss": 0.3739, "num_tokens": 7360689311.0, "step": 9622 }, { "epoch": 3.5250526701474763, "grad_norm": 0.14645404436860213, "learning_rate": 1.1602156701133603e-05, "loss": 0.4298, "num_tokens": 7361374947.0, "step": 9623 }, { "epoch": 3.525419071173399, "grad_norm": 0.15084902697427993, "learning_rate": 1.159866931101322e-05, "loss": 0.3934, "num_tokens": 7362091254.0, "step": 9624 }, { "epoch": 3.5257854721993223, "grad_norm": 0.13753207593258915, "learning_rate": 1.159518250695105e-05, "loss": 0.4049, "num_tokens": 7362857142.0, "step": 9625 }, { "epoch": 3.526151873225245, "grad_norm": 0.14245604246587085, "learning_rate": 1.1591696289143557e-05, "loss": 0.4339, "num_tokens": 7363717867.0, "step": 9626 }, { "epoch": 3.526518274251168, "grad_norm": 0.13245789703779143, "learning_rate": 1.1588210657787165e-05, "loss": 0.3913, "num_tokens": 7364449577.0, "step": 9627 }, { "epoch": 3.526884675277091, "grad_norm": 0.16315116783461847, "learning_rate": 1.1584725613078276e-05, "loss": 0.3787, "num_tokens": 7365200768.0, "step": 9628 }, { "epoch": 3.5272510763030134, "grad_norm": 0.13568887420712708, "learning_rate": 1.158124115521325e-05, "loss": 0.4157, "num_tokens": 7366032963.0, "step": 9629 }, { "epoch": 3.5276174773289366, "grad_norm": 0.14235710503045654, "learning_rate": 1.1577757284388414e-05, "loss": 0.3986, "num_tokens": 7366706492.0, "step": 9630 }, { "epoch": 3.5279838783548594, "grad_norm": 0.14038441578961258, "learning_rate": 1.1574274000800076e-05, "loss": 0.3898, "num_tokens": 7367515109.0, "step": 9631 }, { "epoch": 3.528350279380782, "grad_norm": 0.13359798710695714, "learning_rate": 1.1570791304644491e-05, "loss": 0.3775, "num_tokens": 7368305696.0, "step": 9632 }, { "epoch": 3.5287166804067054, "grad_norm": 0.14034358764475485, "learning_rate": 1.1567309196117887e-05, "loss": 0.4087, "num_tokens": 7369109354.0, "step": 9633 }, { "epoch": 3.529083081432628, "grad_norm": 0.13890449817894943, "learning_rate": 1.1563827675416474e-05, "loss": 0.3986, "num_tokens": 7369901558.0, "step": 9634 }, { "epoch": 3.529449482458551, "grad_norm": 0.12736703594248072, "learning_rate": 1.1560346742736411e-05, "loss": 0.4033, "num_tokens": 7370718317.0, "step": 9635 }, { "epoch": 3.5298158834844737, "grad_norm": 0.13693642710971698, "learning_rate": 1.1556866398273814e-05, "loss": 0.4041, "num_tokens": 7371519689.0, "step": 9636 }, { "epoch": 3.5301822845103965, "grad_norm": 0.1297764921916108, "learning_rate": 1.1553386642224806e-05, "loss": 0.425, "num_tokens": 7372367064.0, "step": 9637 }, { "epoch": 3.5305486855363197, "grad_norm": 0.14914283859256686, "learning_rate": 1.154990747478544e-05, "loss": 0.4227, "num_tokens": 7373077868.0, "step": 9638 }, { "epoch": 3.5309150865622425, "grad_norm": 0.14686178378251516, "learning_rate": 1.1546428896151753e-05, "loss": 0.4273, "num_tokens": 7373801773.0, "step": 9639 }, { "epoch": 3.5312814875881653, "grad_norm": 0.14748551065753768, "learning_rate": 1.1542950906519738e-05, "loss": 0.4112, "num_tokens": 7374613663.0, "step": 9640 }, { "epoch": 3.531647888614088, "grad_norm": 0.13578684978316416, "learning_rate": 1.1539473506085364e-05, "loss": 0.4228, "num_tokens": 7375472527.0, "step": 9641 }, { "epoch": 3.532014289640011, "grad_norm": 0.14032832133386083, "learning_rate": 1.1535996695044562e-05, "loss": 0.4264, "num_tokens": 7376243987.0, "step": 9642 }, { "epoch": 3.532380690665934, "grad_norm": 0.1367522888451849, "learning_rate": 1.1532520473593234e-05, "loss": 0.402, "num_tokens": 7377099471.0, "step": 9643 }, { "epoch": 3.532747091691857, "grad_norm": 0.12867218312048068, "learning_rate": 1.1529044841927244e-05, "loss": 0.4099, "num_tokens": 7377810062.0, "step": 9644 }, { "epoch": 3.5331134927177796, "grad_norm": 0.1497362396941554, "learning_rate": 1.1525569800242424e-05, "loss": 0.4452, "num_tokens": 7378530594.0, "step": 9645 }, { "epoch": 3.5334798937437024, "grad_norm": 0.1545352368813206, "learning_rate": 1.1522095348734571e-05, "loss": 0.3999, "num_tokens": 7379300447.0, "step": 9646 }, { "epoch": 3.533846294769625, "grad_norm": 0.13887712037482008, "learning_rate": 1.1518621487599462e-05, "loss": 0.3737, "num_tokens": 7380055016.0, "step": 9647 }, { "epoch": 3.5342126957955484, "grad_norm": 0.1450755782367128, "learning_rate": 1.1515148217032828e-05, "loss": 0.4379, "num_tokens": 7380801582.0, "step": 9648 }, { "epoch": 3.534579096821471, "grad_norm": 0.1437383866154573, "learning_rate": 1.1511675537230354e-05, "loss": 0.3846, "num_tokens": 7381552846.0, "step": 9649 }, { "epoch": 3.534945497847394, "grad_norm": 0.13417530842724157, "learning_rate": 1.150820344838772e-05, "loss": 0.4111, "num_tokens": 7382213292.0, "step": 9650 }, { "epoch": 3.5353118988733168, "grad_norm": 0.1528253593683538, "learning_rate": 1.1504731950700564e-05, "loss": 0.4046, "num_tokens": 7382855513.0, "step": 9651 }, { "epoch": 3.5356782998992395, "grad_norm": 0.15966128366195698, "learning_rate": 1.1501261044364465e-05, "loss": 0.4026, "num_tokens": 7383651460.0, "step": 9652 }, { "epoch": 3.5360447009251628, "grad_norm": 0.13517773021191373, "learning_rate": 1.1497790729575012e-05, "loss": 0.3895, "num_tokens": 7384469149.0, "step": 9653 }, { "epoch": 3.5364111019510855, "grad_norm": 0.1412707444100156, "learning_rate": 1.1494321006527724e-05, "loss": 0.4104, "num_tokens": 7385273161.0, "step": 9654 }, { "epoch": 3.5367775029770083, "grad_norm": 0.1377464213185553, "learning_rate": 1.149085187541811e-05, "loss": 0.4068, "num_tokens": 7385974689.0, "step": 9655 }, { "epoch": 3.537143904002931, "grad_norm": 0.15081804803282786, "learning_rate": 1.1487383336441629e-05, "loss": 0.387, "num_tokens": 7386665775.0, "step": 9656 }, { "epoch": 3.537510305028854, "grad_norm": 0.14782174106314508, "learning_rate": 1.1483915389793717e-05, "loss": 0.4185, "num_tokens": 7387384881.0, "step": 9657 }, { "epoch": 3.537876706054777, "grad_norm": 0.1391151256818803, "learning_rate": 1.148044803566978e-05, "loss": 0.4173, "num_tokens": 7388040385.0, "step": 9658 }, { "epoch": 3.5382431070807, "grad_norm": 0.1599733937692208, "learning_rate": 1.1476981274265177e-05, "loss": 0.401, "num_tokens": 7388710561.0, "step": 9659 }, { "epoch": 3.5386095081066227, "grad_norm": 0.1505809185447164, "learning_rate": 1.1473515105775235e-05, "loss": 0.395, "num_tokens": 7389420322.0, "step": 9660 }, { "epoch": 3.5389759091325454, "grad_norm": 0.14850099104240033, "learning_rate": 1.1470049530395278e-05, "loss": 0.4185, "num_tokens": 7390267207.0, "step": 9661 }, { "epoch": 3.5393423101584682, "grad_norm": 0.14253933287034118, "learning_rate": 1.1466584548320547e-05, "loss": 0.3884, "num_tokens": 7391047529.0, "step": 9662 }, { "epoch": 3.5397087111843915, "grad_norm": 0.13393918145324366, "learning_rate": 1.1463120159746279e-05, "loss": 0.4122, "num_tokens": 7391794921.0, "step": 9663 }, { "epoch": 3.5400751122103142, "grad_norm": 0.13639582317402252, "learning_rate": 1.1459656364867681e-05, "loss": 0.4053, "num_tokens": 7392517666.0, "step": 9664 }, { "epoch": 3.540441513236237, "grad_norm": 0.1346833354510837, "learning_rate": 1.1456193163879923e-05, "loss": 0.396, "num_tokens": 7393269571.0, "step": 9665 }, { "epoch": 3.54080791426216, "grad_norm": 0.14353388035717643, "learning_rate": 1.1452730556978127e-05, "loss": 0.4191, "num_tokens": 7394127776.0, "step": 9666 }, { "epoch": 3.5411743152880826, "grad_norm": 0.14685200371984328, "learning_rate": 1.1449268544357395e-05, "loss": 0.4019, "num_tokens": 7394826497.0, "step": 9667 }, { "epoch": 3.541540716314006, "grad_norm": 0.15152325222130184, "learning_rate": 1.1445807126212795e-05, "loss": 0.3991, "num_tokens": 7395648027.0, "step": 9668 }, { "epoch": 3.5419071173399286, "grad_norm": 0.1286695712029774, "learning_rate": 1.1442346302739358e-05, "loss": 0.419, "num_tokens": 7396362330.0, "step": 9669 }, { "epoch": 3.5422735183658514, "grad_norm": 0.14784585587549826, "learning_rate": 1.1438886074132078e-05, "loss": 0.3792, "num_tokens": 7397030044.0, "step": 9670 }, { "epoch": 3.542639919391774, "grad_norm": 0.14994364513478695, "learning_rate": 1.1435426440585929e-05, "loss": 0.4326, "num_tokens": 7397796398.0, "step": 9671 }, { "epoch": 3.543006320417697, "grad_norm": 0.1389573591152376, "learning_rate": 1.1431967402295831e-05, "loss": 0.397, "num_tokens": 7398572811.0, "step": 9672 }, { "epoch": 3.54337272144362, "grad_norm": 0.14876733995360314, "learning_rate": 1.1428508959456694e-05, "loss": 0.4036, "num_tokens": 7399398240.0, "step": 9673 }, { "epoch": 3.543739122469543, "grad_norm": 0.1416258621150854, "learning_rate": 1.1425051112263366e-05, "loss": 0.397, "num_tokens": 7400076096.0, "step": 9674 }, { "epoch": 3.5441055234954657, "grad_norm": 0.14359994572502546, "learning_rate": 1.1421593860910706e-05, "loss": 0.3834, "num_tokens": 7400759916.0, "step": 9675 }, { "epoch": 3.544471924521389, "grad_norm": 0.14304269135666758, "learning_rate": 1.1418137205593476e-05, "loss": 0.3926, "num_tokens": 7401581924.0, "step": 9676 }, { "epoch": 3.5448383255473113, "grad_norm": 0.1406385306090974, "learning_rate": 1.1414681146506465e-05, "loss": 0.4342, "num_tokens": 7402454988.0, "step": 9677 }, { "epoch": 3.5452047265732345, "grad_norm": 0.12899579603561714, "learning_rate": 1.1411225683844403e-05, "loss": 0.4066, "num_tokens": 7403219014.0, "step": 9678 }, { "epoch": 3.5455711275991573, "grad_norm": 0.14600885787003612, "learning_rate": 1.1407770817801966e-05, "loss": 0.4089, "num_tokens": 7403972992.0, "step": 9679 }, { "epoch": 3.54593752862508, "grad_norm": 0.1601350445961423, "learning_rate": 1.1404316548573835e-05, "loss": 0.3872, "num_tokens": 7404702200.0, "step": 9680 }, { "epoch": 3.5463039296510033, "grad_norm": 0.14004603055023182, "learning_rate": 1.1400862876354635e-05, "loss": 0.4269, "num_tokens": 7405427137.0, "step": 9681 }, { "epoch": 3.546670330676926, "grad_norm": 0.14015791501417837, "learning_rate": 1.139740980133896e-05, "loss": 0.4135, "num_tokens": 7406158481.0, "step": 9682 }, { "epoch": 3.547036731702849, "grad_norm": 0.1450544182179326, "learning_rate": 1.1393957323721371e-05, "loss": 0.3972, "num_tokens": 7406867911.0, "step": 9683 }, { "epoch": 3.5474031327287716, "grad_norm": 0.14065779932358904, "learning_rate": 1.1390505443696403e-05, "loss": 0.3739, "num_tokens": 7407540617.0, "step": 9684 }, { "epoch": 3.5477695337546944, "grad_norm": 0.13321360050363068, "learning_rate": 1.138705416145854e-05, "loss": 0.384, "num_tokens": 7408428499.0, "step": 9685 }, { "epoch": 3.5481359347806176, "grad_norm": 0.1259141618456871, "learning_rate": 1.1383603477202249e-05, "loss": 0.3981, "num_tokens": 7409271668.0, "step": 9686 }, { "epoch": 3.5485023358065404, "grad_norm": 0.1333519425627701, "learning_rate": 1.138015339112195e-05, "loss": 0.4109, "num_tokens": 7409989882.0, "step": 9687 }, { "epoch": 3.548868736832463, "grad_norm": 0.14902350797961283, "learning_rate": 1.1376703903412057e-05, "loss": 0.4252, "num_tokens": 7410698881.0, "step": 9688 }, { "epoch": 3.549235137858386, "grad_norm": 0.14051264674285577, "learning_rate": 1.137325501426691e-05, "loss": 0.4137, "num_tokens": 7411470563.0, "step": 9689 }, { "epoch": 3.5496015388843087, "grad_norm": 0.14192872481343788, "learning_rate": 1.1369806723880834e-05, "loss": 0.379, "num_tokens": 7412213958.0, "step": 9690 }, { "epoch": 3.549967939910232, "grad_norm": 0.1368383164734539, "learning_rate": 1.1366359032448133e-05, "loss": 0.4067, "num_tokens": 7412938742.0, "step": 9691 }, { "epoch": 3.5503343409361547, "grad_norm": 0.1397044054275098, "learning_rate": 1.1362911940163069e-05, "loss": 0.397, "num_tokens": 7413617242.0, "step": 9692 }, { "epoch": 3.5507007419620775, "grad_norm": 0.1465046226277778, "learning_rate": 1.1359465447219841e-05, "loss": 0.3994, "num_tokens": 7414482604.0, "step": 9693 }, { "epoch": 3.5510671429880003, "grad_norm": 0.12781265502740105, "learning_rate": 1.135601955381267e-05, "loss": 0.4069, "num_tokens": 7415268489.0, "step": 9694 }, { "epoch": 3.551433544013923, "grad_norm": 0.1399705576313159, "learning_rate": 1.1352574260135698e-05, "loss": 0.3892, "num_tokens": 7415977964.0, "step": 9695 }, { "epoch": 3.5517999450398463, "grad_norm": 0.1368098007923274, "learning_rate": 1.1349129566383055e-05, "loss": 0.4205, "num_tokens": 7416692611.0, "step": 9696 }, { "epoch": 3.552166346065769, "grad_norm": 0.14773737872534465, "learning_rate": 1.1345685472748821e-05, "loss": 0.4062, "num_tokens": 7417403288.0, "step": 9697 }, { "epoch": 3.552532747091692, "grad_norm": 0.14359478778386242, "learning_rate": 1.1342241979427062e-05, "loss": 0.4193, "num_tokens": 7418188235.0, "step": 9698 }, { "epoch": 3.5528991481176146, "grad_norm": 0.14567075289087397, "learning_rate": 1.1338799086611797e-05, "loss": 0.3854, "num_tokens": 7419008783.0, "step": 9699 }, { "epoch": 3.5532655491435374, "grad_norm": 0.14259198425594452, "learning_rate": 1.133535679449701e-05, "loss": 0.3932, "num_tokens": 7419794259.0, "step": 9700 }, { "epoch": 3.5536319501694607, "grad_norm": 0.13978156118751814, "learning_rate": 1.133191510327666e-05, "loss": 0.4076, "num_tokens": 7420447596.0, "step": 9701 }, { "epoch": 3.5539983511953834, "grad_norm": 0.15159598443555333, "learning_rate": 1.1328474013144666e-05, "loss": 0.4194, "num_tokens": 7421267689.0, "step": 9702 }, { "epoch": 3.554364752221306, "grad_norm": 0.14915317547307858, "learning_rate": 1.1325033524294908e-05, "loss": 0.3865, "num_tokens": 7422109503.0, "step": 9703 }, { "epoch": 3.554731153247229, "grad_norm": 0.14685221963154715, "learning_rate": 1.1321593636921255e-05, "loss": 0.4232, "num_tokens": 7422806063.0, "step": 9704 }, { "epoch": 3.5550975542731518, "grad_norm": 0.15149821387154327, "learning_rate": 1.1318154351217521e-05, "loss": 0.428, "num_tokens": 7423514506.0, "step": 9705 }, { "epoch": 3.555463955299075, "grad_norm": 0.1435512739530399, "learning_rate": 1.1314715667377474e-05, "loss": 0.4065, "num_tokens": 7424271348.0, "step": 9706 }, { "epoch": 3.5558303563249978, "grad_norm": 0.1432622555313435, "learning_rate": 1.1311277585594881e-05, "loss": 0.4071, "num_tokens": 7425059471.0, "step": 9707 }, { "epoch": 3.5561967573509206, "grad_norm": 0.1348402713764852, "learning_rate": 1.1307840106063463e-05, "loss": 0.4129, "num_tokens": 7425711576.0, "step": 9708 }, { "epoch": 3.5565631583768433, "grad_norm": 0.15601670877434123, "learning_rate": 1.1304403228976886e-05, "loss": 0.3921, "num_tokens": 7426403974.0, "step": 9709 }, { "epoch": 3.556929559402766, "grad_norm": 0.1510889946162152, "learning_rate": 1.1300966954528815e-05, "loss": 0.4061, "num_tokens": 7427160411.0, "step": 9710 }, { "epoch": 3.5572959604286893, "grad_norm": 0.13194963764914636, "learning_rate": 1.1297531282912856e-05, "loss": 0.4109, "num_tokens": 7427963927.0, "step": 9711 }, { "epoch": 3.557662361454612, "grad_norm": 0.13550651282954448, "learning_rate": 1.1294096214322598e-05, "loss": 0.3903, "num_tokens": 7428591114.0, "step": 9712 }, { "epoch": 3.558028762480535, "grad_norm": 0.15874533048926603, "learning_rate": 1.1290661748951582e-05, "loss": 0.4045, "num_tokens": 7429269298.0, "step": 9713 }, { "epoch": 3.5583951635064577, "grad_norm": 0.15986637684163252, "learning_rate": 1.1287227886993319e-05, "loss": 0.4384, "num_tokens": 7429857198.0, "step": 9714 }, { "epoch": 3.5587615645323805, "grad_norm": 0.15677983284698496, "learning_rate": 1.1283794628641305e-05, "loss": 0.426, "num_tokens": 7430599374.0, "step": 9715 }, { "epoch": 3.5591279655583037, "grad_norm": 0.13624149230734994, "learning_rate": 1.1280361974088963e-05, "loss": 0.4031, "num_tokens": 7431456572.0, "step": 9716 }, { "epoch": 3.5594943665842265, "grad_norm": 0.13320101810675292, "learning_rate": 1.1276929923529712e-05, "loss": 0.3946, "num_tokens": 7432350958.0, "step": 9717 }, { "epoch": 3.5598607676101492, "grad_norm": 0.14267378579729034, "learning_rate": 1.1273498477156943e-05, "loss": 0.3939, "num_tokens": 7433227200.0, "step": 9718 }, { "epoch": 3.560227168636072, "grad_norm": 0.13351502573370827, "learning_rate": 1.1270067635163983e-05, "loss": 0.3943, "num_tokens": 7434053430.0, "step": 9719 }, { "epoch": 3.560593569661995, "grad_norm": 0.1510591071882408, "learning_rate": 1.126663739774414e-05, "loss": 0.4058, "num_tokens": 7434680108.0, "step": 9720 }, { "epoch": 3.560959970687918, "grad_norm": 0.13925954364835177, "learning_rate": 1.1263207765090702e-05, "loss": 0.3975, "num_tokens": 7435438416.0, "step": 9721 }, { "epoch": 3.561326371713841, "grad_norm": 0.1587440175404787, "learning_rate": 1.1259778737396902e-05, "loss": 0.388, "num_tokens": 7436200091.0, "step": 9722 }, { "epoch": 3.5616927727397636, "grad_norm": 0.13422902096421935, "learning_rate": 1.1256350314855953e-05, "loss": 0.4131, "num_tokens": 7436857419.0, "step": 9723 }, { "epoch": 3.562059173765687, "grad_norm": 0.15017139883113903, "learning_rate": 1.1252922497661019e-05, "loss": 0.4127, "num_tokens": 7437690416.0, "step": 9724 }, { "epoch": 3.562425574791609, "grad_norm": 0.1281737976979788, "learning_rate": 1.1249495286005244e-05, "loss": 0.4025, "num_tokens": 7438456908.0, "step": 9725 }, { "epoch": 3.5627919758175324, "grad_norm": 0.13668858459342376, "learning_rate": 1.1246068680081735e-05, "loss": 0.4285, "num_tokens": 7439196591.0, "step": 9726 }, { "epoch": 3.563158376843455, "grad_norm": 0.14869531010875814, "learning_rate": 1.1242642680083558e-05, "loss": 0.4058, "num_tokens": 7440035568.0, "step": 9727 }, { "epoch": 3.563524777869378, "grad_norm": 0.13204596608718006, "learning_rate": 1.123921728620375e-05, "loss": 0.4096, "num_tokens": 7440744860.0, "step": 9728 }, { "epoch": 3.563891178895301, "grad_norm": 0.14637715673350155, "learning_rate": 1.1235792498635313e-05, "loss": 0.4238, "num_tokens": 7441467582.0, "step": 9729 }, { "epoch": 3.564257579921224, "grad_norm": 0.144830240823298, "learning_rate": 1.1232368317571212e-05, "loss": 0.408, "num_tokens": 7442271868.0, "step": 9730 }, { "epoch": 3.5646239809471467, "grad_norm": 0.1296046875232202, "learning_rate": 1.1228944743204391e-05, "loss": 0.3885, "num_tokens": 7443027763.0, "step": 9731 }, { "epoch": 3.5649903819730695, "grad_norm": 0.14661579256159366, "learning_rate": 1.1225521775727752e-05, "loss": 0.3856, "num_tokens": 7443844618.0, "step": 9732 }, { "epoch": 3.5653567829989923, "grad_norm": 0.1334879432403011, "learning_rate": 1.1222099415334138e-05, "loss": 0.4226, "num_tokens": 7444623869.0, "step": 9733 }, { "epoch": 3.5657231840249155, "grad_norm": 0.1456024589624981, "learning_rate": 1.1218677662216401e-05, "loss": 0.4196, "num_tokens": 7445393365.0, "step": 9734 }, { "epoch": 3.5660895850508383, "grad_norm": 0.13489921253723203, "learning_rate": 1.1215256516567336e-05, "loss": 0.4559, "num_tokens": 7446145950.0, "step": 9735 }, { "epoch": 3.566455986076761, "grad_norm": 0.13892486018268882, "learning_rate": 1.1211835978579696e-05, "loss": 0.4146, "num_tokens": 7447029438.0, "step": 9736 }, { "epoch": 3.566822387102684, "grad_norm": 0.15089828069798905, "learning_rate": 1.1208416048446215e-05, "loss": 0.4412, "num_tokens": 7447751174.0, "step": 9737 }, { "epoch": 3.5671887881286066, "grad_norm": 0.150226408429261, "learning_rate": 1.1204996726359591e-05, "loss": 0.4038, "num_tokens": 7448424873.0, "step": 9738 }, { "epoch": 3.56755518915453, "grad_norm": 0.13369178067814919, "learning_rate": 1.120157801251248e-05, "loss": 0.3857, "num_tokens": 7449120678.0, "step": 9739 }, { "epoch": 3.5679215901804526, "grad_norm": 0.14566784525348517, "learning_rate": 1.119815990709751e-05, "loss": 0.4088, "num_tokens": 7449906563.0, "step": 9740 }, { "epoch": 3.5682879912063754, "grad_norm": 0.13177244011287795, "learning_rate": 1.119474241030727e-05, "loss": 0.3928, "num_tokens": 7450669271.0, "step": 9741 }, { "epoch": 3.568654392232298, "grad_norm": 0.1449870628171674, "learning_rate": 1.119132552233432e-05, "loss": 0.4173, "num_tokens": 7451373323.0, "step": 9742 }, { "epoch": 3.569020793258221, "grad_norm": 0.14721502887845483, "learning_rate": 1.1187909243371182e-05, "loss": 0.4168, "num_tokens": 7452170554.0, "step": 9743 }, { "epoch": 3.569387194284144, "grad_norm": 0.145806442921028, "learning_rate": 1.1184493573610336e-05, "loss": 0.4355, "num_tokens": 7452836174.0, "step": 9744 }, { "epoch": 3.569753595310067, "grad_norm": 0.15115235155803922, "learning_rate": 1.1181078513244256e-05, "loss": 0.3828, "num_tokens": 7453540643.0, "step": 9745 }, { "epoch": 3.5701199963359898, "grad_norm": 0.14328573534731717, "learning_rate": 1.117766406246535e-05, "loss": 0.4158, "num_tokens": 7454373237.0, "step": 9746 }, { "epoch": 3.5704863973619125, "grad_norm": 0.14422933278411454, "learning_rate": 1.1174250221465994e-05, "loss": 0.404, "num_tokens": 7455078915.0, "step": 9747 }, { "epoch": 3.5708527983878353, "grad_norm": 0.13490040615466367, "learning_rate": 1.1170836990438556e-05, "loss": 0.4051, "num_tokens": 7455786624.0, "step": 9748 }, { "epoch": 3.5712191994137585, "grad_norm": 0.14274897349655172, "learning_rate": 1.1167424369575349e-05, "loss": 0.3974, "num_tokens": 7456522818.0, "step": 9749 }, { "epoch": 3.5715856004396813, "grad_norm": 0.13355406261950312, "learning_rate": 1.116401235906865e-05, "loss": 0.394, "num_tokens": 7457370857.0, "step": 9750 }, { "epoch": 3.571952001465604, "grad_norm": 0.13929238049518677, "learning_rate": 1.1160600959110712e-05, "loss": 0.3887, "num_tokens": 7458153769.0, "step": 9751 }, { "epoch": 3.572318402491527, "grad_norm": 0.1320437375064165, "learning_rate": 1.1157190169893748e-05, "loss": 0.4316, "num_tokens": 7458885743.0, "step": 9752 }, { "epoch": 3.5726848035174497, "grad_norm": 0.14982075018181162, "learning_rate": 1.1153779991609937e-05, "loss": 0.4618, "num_tokens": 7459584575.0, "step": 9753 }, { "epoch": 3.573051204543373, "grad_norm": 0.15415148650837515, "learning_rate": 1.1150370424451422e-05, "loss": 0.4018, "num_tokens": 7460287092.0, "step": 9754 }, { "epoch": 3.5734176055692957, "grad_norm": 0.13978662784033957, "learning_rate": 1.1146961468610316e-05, "loss": 0.3803, "num_tokens": 7461094913.0, "step": 9755 }, { "epoch": 3.5737840065952184, "grad_norm": 0.13684993321316885, "learning_rate": 1.1143553124278693e-05, "loss": 0.4188, "num_tokens": 7461821851.0, "step": 9756 }, { "epoch": 3.5741504076211412, "grad_norm": 0.14032293462653733, "learning_rate": 1.1140145391648595e-05, "loss": 0.3736, "num_tokens": 7462730856.0, "step": 9757 }, { "epoch": 3.574516808647064, "grad_norm": 0.12414021612658072, "learning_rate": 1.1136738270912031e-05, "loss": 0.3873, "num_tokens": 7463678350.0, "step": 9758 }, { "epoch": 3.5748832096729872, "grad_norm": 0.12183716077760828, "learning_rate": 1.1133331762260972e-05, "loss": 0.4149, "num_tokens": 7464518695.0, "step": 9759 }, { "epoch": 3.57524961069891, "grad_norm": 0.14215384033007125, "learning_rate": 1.1129925865887348e-05, "loss": 0.3896, "num_tokens": 7465270372.0, "step": 9760 }, { "epoch": 3.575616011724833, "grad_norm": 0.1371257791342927, "learning_rate": 1.1126520581983081e-05, "loss": 0.4228, "num_tokens": 7466037039.0, "step": 9761 }, { "epoch": 3.5759824127507556, "grad_norm": 0.1378330557217701, "learning_rate": 1.1123115910740036e-05, "loss": 0.3886, "num_tokens": 7466762837.0, "step": 9762 }, { "epoch": 3.5763488137766783, "grad_norm": 0.14027438140659468, "learning_rate": 1.1119711852350025e-05, "loss": 0.3781, "num_tokens": 7467490702.0, "step": 9763 }, { "epoch": 3.5767152148026016, "grad_norm": 0.14177049924681898, "learning_rate": 1.1116308407004878e-05, "loss": 0.3998, "num_tokens": 7468290339.0, "step": 9764 }, { "epoch": 3.5770816158285244, "grad_norm": 0.12729592753387006, "learning_rate": 1.111290557489635e-05, "loss": 0.4163, "num_tokens": 7469134536.0, "step": 9765 }, { "epoch": 3.577448016854447, "grad_norm": 0.1308147799484523, "learning_rate": 1.1109503356216158e-05, "loss": 0.4208, "num_tokens": 7469953065.0, "step": 9766 }, { "epoch": 3.57781441788037, "grad_norm": 0.13556183085153592, "learning_rate": 1.110610175115602e-05, "loss": 0.3939, "num_tokens": 7470686030.0, "step": 9767 }, { "epoch": 3.5781808189062927, "grad_norm": 0.13795158040680586, "learning_rate": 1.1102700759907583e-05, "loss": 0.4121, "num_tokens": 7471436368.0, "step": 9768 }, { "epoch": 3.578547219932216, "grad_norm": 0.1386083848208285, "learning_rate": 1.1099300382662481e-05, "loss": 0.3993, "num_tokens": 7472312240.0, "step": 9769 }, { "epoch": 3.5789136209581387, "grad_norm": 0.13386864477737617, "learning_rate": 1.1095900619612304e-05, "loss": 0.4321, "num_tokens": 7473176277.0, "step": 9770 }, { "epoch": 3.5792800219840615, "grad_norm": 0.13451026809785424, "learning_rate": 1.1092501470948606e-05, "loss": 0.3925, "num_tokens": 7474020835.0, "step": 9771 }, { "epoch": 3.5796464230099847, "grad_norm": 0.1225076799578049, "learning_rate": 1.108910293686293e-05, "loss": 0.4292, "num_tokens": 7474773434.0, "step": 9772 }, { "epoch": 3.580012824035907, "grad_norm": 0.14181723426953258, "learning_rate": 1.1085705017546741e-05, "loss": 0.4066, "num_tokens": 7475478050.0, "step": 9773 }, { "epoch": 3.5803792250618303, "grad_norm": 0.13756214263548594, "learning_rate": 1.1082307713191497e-05, "loss": 0.3629, "num_tokens": 7476173381.0, "step": 9774 }, { "epoch": 3.580745626087753, "grad_norm": 0.135279364712115, "learning_rate": 1.1078911023988637e-05, "loss": 0.3924, "num_tokens": 7476954366.0, "step": 9775 }, { "epoch": 3.581112027113676, "grad_norm": 0.1290853215640624, "learning_rate": 1.1075514950129524e-05, "loss": 0.4188, "num_tokens": 7477758514.0, "step": 9776 }, { "epoch": 3.581478428139599, "grad_norm": 0.1495586830131787, "learning_rate": 1.1072119491805514e-05, "loss": 0.3964, "num_tokens": 7478569274.0, "step": 9777 }, { "epoch": 3.581844829165522, "grad_norm": 0.1333083103290281, "learning_rate": 1.1068724649207927e-05, "loss": 0.3945, "num_tokens": 7479258130.0, "step": 9778 }, { "epoch": 3.5822112301914446, "grad_norm": 0.16199483662583963, "learning_rate": 1.1065330422528043e-05, "loss": 0.4252, "num_tokens": 7480025469.0, "step": 9779 }, { "epoch": 3.5825776312173674, "grad_norm": 0.13102944360346186, "learning_rate": 1.1061936811957108e-05, "loss": 0.416, "num_tokens": 7480906675.0, "step": 9780 }, { "epoch": 3.58294403224329, "grad_norm": 0.13121843883047746, "learning_rate": 1.105854381768633e-05, "loss": 0.385, "num_tokens": 7481657766.0, "step": 9781 }, { "epoch": 3.5833104332692134, "grad_norm": 0.13779939380148837, "learning_rate": 1.1055151439906888e-05, "loss": 0.369, "num_tokens": 7482390467.0, "step": 9782 }, { "epoch": 3.583676834295136, "grad_norm": 0.13840526874363057, "learning_rate": 1.1051759678809928e-05, "loss": 0.3892, "num_tokens": 7483121145.0, "step": 9783 }, { "epoch": 3.584043235321059, "grad_norm": 0.14347934914836039, "learning_rate": 1.104836853458655e-05, "loss": 0.4095, "num_tokens": 7483959280.0, "step": 9784 }, { "epoch": 3.5844096363469817, "grad_norm": 0.13919454771759468, "learning_rate": 1.1044978007427832e-05, "loss": 0.425, "num_tokens": 7484616238.0, "step": 9785 }, { "epoch": 3.5847760373729045, "grad_norm": 0.1519231503427436, "learning_rate": 1.1041588097524807e-05, "loss": 0.4168, "num_tokens": 7485361711.0, "step": 9786 }, { "epoch": 3.5851424383988277, "grad_norm": 0.14209233764265505, "learning_rate": 1.1038198805068472e-05, "loss": 0.4215, "num_tokens": 7486211466.0, "step": 9787 }, { "epoch": 3.5855088394247505, "grad_norm": 0.12603028491885812, "learning_rate": 1.1034810130249814e-05, "loss": 0.3944, "num_tokens": 7486944169.0, "step": 9788 }, { "epoch": 3.5858752404506733, "grad_norm": 0.13596105758169055, "learning_rate": 1.103142207325976e-05, "loss": 0.413, "num_tokens": 7487698072.0, "step": 9789 }, { "epoch": 3.586241641476596, "grad_norm": 0.14572887300126885, "learning_rate": 1.1028034634289192e-05, "loss": 0.4121, "num_tokens": 7488451876.0, "step": 9790 }, { "epoch": 3.586608042502519, "grad_norm": 0.13954537934051084, "learning_rate": 1.1024647813528992e-05, "loss": 0.3942, "num_tokens": 7489206630.0, "step": 9791 }, { "epoch": 3.586974443528442, "grad_norm": 0.140514735068651, "learning_rate": 1.1021261611169991e-05, "loss": 0.4121, "num_tokens": 7489919836.0, "step": 9792 }, { "epoch": 3.587340844554365, "grad_norm": 0.14005777295087277, "learning_rate": 1.101787602740296e-05, "loss": 0.4276, "num_tokens": 7490627970.0, "step": 9793 }, { "epoch": 3.5877072455802876, "grad_norm": 0.15373069919850335, "learning_rate": 1.101449106241868e-05, "loss": 0.3856, "num_tokens": 7491498083.0, "step": 9794 }, { "epoch": 3.5880736466062104, "grad_norm": 0.13068748171389555, "learning_rate": 1.1011106716407868e-05, "loss": 0.4018, "num_tokens": 7492250434.0, "step": 9795 }, { "epoch": 3.588440047632133, "grad_norm": 0.13756919935489664, "learning_rate": 1.1007722989561216e-05, "loss": 0.4204, "num_tokens": 7492981888.0, "step": 9796 }, { "epoch": 3.5888064486580564, "grad_norm": 0.14394949809604557, "learning_rate": 1.1004339882069375e-05, "loss": 0.3921, "num_tokens": 7493686572.0, "step": 9797 }, { "epoch": 3.589172849683979, "grad_norm": 0.13888894880112104, "learning_rate": 1.1000957394122964e-05, "loss": 0.4116, "num_tokens": 7494576929.0, "step": 9798 }, { "epoch": 3.589539250709902, "grad_norm": 0.12736790903809053, "learning_rate": 1.0997575525912572e-05, "loss": 0.4016, "num_tokens": 7495235383.0, "step": 9799 }, { "epoch": 3.5899056517358248, "grad_norm": 0.1637164114827434, "learning_rate": 1.0994194277628747e-05, "loss": 0.455, "num_tokens": 7495914626.0, "step": 9800 }, { "epoch": 3.5902720527617475, "grad_norm": 0.15132815770435085, "learning_rate": 1.0990813649461996e-05, "loss": 0.449, "num_tokens": 7496533369.0, "step": 9801 }, { "epoch": 3.5906384537876708, "grad_norm": 0.15636168275192777, "learning_rate": 1.0987433641602819e-05, "loss": 0.4151, "num_tokens": 7497240106.0, "step": 9802 }, { "epoch": 3.5910048548135935, "grad_norm": 0.15085029219127624, "learning_rate": 1.0984054254241638e-05, "loss": 0.3983, "num_tokens": 7498010172.0, "step": 9803 }, { "epoch": 3.5913712558395163, "grad_norm": 0.13407344083725553, "learning_rate": 1.0980675487568873e-05, "loss": 0.424, "num_tokens": 7498804017.0, "step": 9804 }, { "epoch": 3.591737656865439, "grad_norm": 0.13787213556717445, "learning_rate": 1.0977297341774903e-05, "loss": 0.4115, "num_tokens": 7499645858.0, "step": 9805 }, { "epoch": 3.592104057891362, "grad_norm": 0.13765515992285102, "learning_rate": 1.0973919817050063e-05, "loss": 0.365, "num_tokens": 7500422427.0, "step": 9806 }, { "epoch": 3.592470458917285, "grad_norm": 0.13949009067528853, "learning_rate": 1.0970542913584662e-05, "loss": 0.4188, "num_tokens": 7501132291.0, "step": 9807 }, { "epoch": 3.592836859943208, "grad_norm": 0.14532590341358875, "learning_rate": 1.0967166631568968e-05, "loss": 0.4109, "num_tokens": 7501926356.0, "step": 9808 }, { "epoch": 3.5932032609691307, "grad_norm": 0.1440142189706848, "learning_rate": 1.0963790971193213e-05, "loss": 0.3859, "num_tokens": 7502682474.0, "step": 9809 }, { "epoch": 3.5935696619950535, "grad_norm": 0.14113785694993122, "learning_rate": 1.0960415932647606e-05, "loss": 0.4084, "num_tokens": 7503486564.0, "step": 9810 }, { "epoch": 3.5939360630209762, "grad_norm": 0.13209322953087865, "learning_rate": 1.09570415161223e-05, "loss": 0.4061, "num_tokens": 7504239214.0, "step": 9811 }, { "epoch": 3.5943024640468995, "grad_norm": 0.1379893335501351, "learning_rate": 1.095366772180743e-05, "loss": 0.4134, "num_tokens": 7505065673.0, "step": 9812 }, { "epoch": 3.5946688650728222, "grad_norm": 0.13594041487764377, "learning_rate": 1.0950294549893094e-05, "loss": 0.4153, "num_tokens": 7505770834.0, "step": 9813 }, { "epoch": 3.595035266098745, "grad_norm": 0.14969694110267273, "learning_rate": 1.094692200056935e-05, "loss": 0.3981, "num_tokens": 7506605721.0, "step": 9814 }, { "epoch": 3.595401667124668, "grad_norm": 0.13224443447201942, "learning_rate": 1.0943550074026222e-05, "loss": 0.3918, "num_tokens": 7507474503.0, "step": 9815 }, { "epoch": 3.5957680681505906, "grad_norm": 0.13270932767692353, "learning_rate": 1.0940178770453699e-05, "loss": 0.415, "num_tokens": 7508285663.0, "step": 9816 }, { "epoch": 3.596134469176514, "grad_norm": 0.146155775886993, "learning_rate": 1.0936808090041729e-05, "loss": 0.3935, "num_tokens": 7508949346.0, "step": 9817 }, { "epoch": 3.5965008702024366, "grad_norm": 0.14374297364570315, "learning_rate": 1.0933438032980246e-05, "loss": 0.3959, "num_tokens": 7509674344.0, "step": 9818 }, { "epoch": 3.5968672712283594, "grad_norm": 0.14623207405003286, "learning_rate": 1.0930068599459137e-05, "loss": 0.3887, "num_tokens": 7510430099.0, "step": 9819 }, { "epoch": 3.5972336722542826, "grad_norm": 0.1305512320279946, "learning_rate": 1.0926699789668222e-05, "loss": 0.4023, "num_tokens": 7511190827.0, "step": 9820 }, { "epoch": 3.597600073280205, "grad_norm": 0.13263598082714506, "learning_rate": 1.0923331603797344e-05, "loss": 0.3971, "num_tokens": 7511977945.0, "step": 9821 }, { "epoch": 3.597966474306128, "grad_norm": 0.13720465170890614, "learning_rate": 1.0919964042036274e-05, "loss": 0.3946, "num_tokens": 7512673011.0, "step": 9822 }, { "epoch": 3.598332875332051, "grad_norm": 0.15603244840802857, "learning_rate": 1.091659710457475e-05, "loss": 0.409, "num_tokens": 7513413112.0, "step": 9823 }, { "epoch": 3.5986992763579737, "grad_norm": 0.15264005361544636, "learning_rate": 1.0913230791602487e-05, "loss": 0.3773, "num_tokens": 7514131300.0, "step": 9824 }, { "epoch": 3.599065677383897, "grad_norm": 0.136958266206366, "learning_rate": 1.0909865103309156e-05, "loss": 0.3898, "num_tokens": 7514850894.0, "step": 9825 }, { "epoch": 3.5994320784098197, "grad_norm": 0.13745770614138283, "learning_rate": 1.0906500039884396e-05, "loss": 0.3846, "num_tokens": 7515594782.0, "step": 9826 }, { "epoch": 3.5997984794357425, "grad_norm": 0.13459298517848084, "learning_rate": 1.0903135601517806e-05, "loss": 0.4202, "num_tokens": 7516310140.0, "step": 9827 }, { "epoch": 3.6001648804616653, "grad_norm": 0.1484702923350416, "learning_rate": 1.0899771788398954e-05, "loss": 0.4192, "num_tokens": 7517130934.0, "step": 9828 }, { "epoch": 3.600531281487588, "grad_norm": 0.13670280040866345, "learning_rate": 1.0896408600717385e-05, "loss": 0.4285, "num_tokens": 7517819792.0, "step": 9829 }, { "epoch": 3.6008976825135113, "grad_norm": 0.1509227837822022, "learning_rate": 1.0893046038662583e-05, "loss": 0.4121, "num_tokens": 7518621780.0, "step": 9830 }, { "epoch": 3.601264083539434, "grad_norm": 0.1340544218723338, "learning_rate": 1.0889684102424005e-05, "loss": 0.4038, "num_tokens": 7519416127.0, "step": 9831 }, { "epoch": 3.601630484565357, "grad_norm": 0.12765052423726894, "learning_rate": 1.0886322792191099e-05, "loss": 0.3619, "num_tokens": 7520173974.0, "step": 9832 }, { "epoch": 3.6019968855912796, "grad_norm": 0.13056305898079867, "learning_rate": 1.088296210815324e-05, "loss": 0.393, "num_tokens": 7520977305.0, "step": 9833 }, { "epoch": 3.6023632866172024, "grad_norm": 0.15006138803661506, "learning_rate": 1.0879602050499778e-05, "loss": 0.4497, "num_tokens": 7521624249.0, "step": 9834 }, { "epoch": 3.6027296876431256, "grad_norm": 0.15400813718274933, "learning_rate": 1.0876242619420052e-05, "loss": 0.3949, "num_tokens": 7522445705.0, "step": 9835 }, { "epoch": 3.6030960886690484, "grad_norm": 0.137264201303475, "learning_rate": 1.0872883815103341e-05, "loss": 0.4169, "num_tokens": 7523193703.0, "step": 9836 }, { "epoch": 3.603462489694971, "grad_norm": 0.14720788963957343, "learning_rate": 1.0869525637738892e-05, "loss": 0.4611, "num_tokens": 7523864207.0, "step": 9837 }, { "epoch": 3.603828890720894, "grad_norm": 0.14643111483012092, "learning_rate": 1.0866168087515922e-05, "loss": 0.4011, "num_tokens": 7524581209.0, "step": 9838 }, { "epoch": 3.6041952917468167, "grad_norm": 0.1490007050150142, "learning_rate": 1.086281116462361e-05, "loss": 0.4131, "num_tokens": 7525458147.0, "step": 9839 }, { "epoch": 3.60456169277274, "grad_norm": 0.12361002773335725, "learning_rate": 1.0859454869251103e-05, "loss": 0.4071, "num_tokens": 7526270081.0, "step": 9840 }, { "epoch": 3.6049280937986627, "grad_norm": 0.1501742569203299, "learning_rate": 1.0856099201587504e-05, "loss": 0.4196, "num_tokens": 7526950560.0, "step": 9841 }, { "epoch": 3.6052944948245855, "grad_norm": 0.12574495240413666, "learning_rate": 1.0852744161821893e-05, "loss": 0.3972, "num_tokens": 7527778972.0, "step": 9842 }, { "epoch": 3.6056608958505083, "grad_norm": 0.14082031168987116, "learning_rate": 1.0849389750143305e-05, "loss": 0.4223, "num_tokens": 7528507324.0, "step": 9843 }, { "epoch": 3.606027296876431, "grad_norm": 0.13330287660836476, "learning_rate": 1.0846035966740734e-05, "loss": 0.4281, "num_tokens": 7529344874.0, "step": 9844 }, { "epoch": 3.6063936979023543, "grad_norm": 0.13620560058858427, "learning_rate": 1.0842682811803168e-05, "loss": 0.4134, "num_tokens": 7530206084.0, "step": 9845 }, { "epoch": 3.606760098928277, "grad_norm": 0.1333915137828688, "learning_rate": 1.083933028551953e-05, "loss": 0.4259, "num_tokens": 7530928338.0, "step": 9846 }, { "epoch": 3.6071264999542, "grad_norm": 0.14056288502612, "learning_rate": 1.0835978388078701e-05, "loss": 0.4016, "num_tokens": 7531758210.0, "step": 9847 }, { "epoch": 3.6074929009801227, "grad_norm": 0.13205904768110222, "learning_rate": 1.0832627119669563e-05, "loss": 0.4007, "num_tokens": 7532542471.0, "step": 9848 }, { "epoch": 3.6078593020060454, "grad_norm": 0.1392259709241309, "learning_rate": 1.082927648048094e-05, "loss": 0.4014, "num_tokens": 7533286304.0, "step": 9849 }, { "epoch": 3.6082257030319687, "grad_norm": 0.1325208728191317, "learning_rate": 1.0825926470701601e-05, "loss": 0.4168, "num_tokens": 7534181170.0, "step": 9850 }, { "epoch": 3.6085921040578914, "grad_norm": 0.13615598662656006, "learning_rate": 1.0822577090520325e-05, "loss": 0.4327, "num_tokens": 7534950732.0, "step": 9851 }, { "epoch": 3.608958505083814, "grad_norm": 0.13220626031493615, "learning_rate": 1.0819228340125822e-05, "loss": 0.4105, "num_tokens": 7535783759.0, "step": 9852 }, { "epoch": 3.609324906109737, "grad_norm": 0.13433529613693548, "learning_rate": 1.0815880219706777e-05, "loss": 0.4054, "num_tokens": 7536681234.0, "step": 9853 }, { "epoch": 3.6096913071356598, "grad_norm": 0.13080455158005933, "learning_rate": 1.0812532729451836e-05, "loss": 0.4224, "num_tokens": 7537477042.0, "step": 9854 }, { "epoch": 3.610057708161583, "grad_norm": 0.13729227627891893, "learning_rate": 1.0809185869549613e-05, "loss": 0.4302, "num_tokens": 7538200732.0, "step": 9855 }, { "epoch": 3.610424109187506, "grad_norm": 0.148103210752074, "learning_rate": 1.0805839640188685e-05, "loss": 0.382, "num_tokens": 7538990263.0, "step": 9856 }, { "epoch": 3.6107905102134286, "grad_norm": 0.13385644074807684, "learning_rate": 1.0802494041557594e-05, "loss": 0.4284, "num_tokens": 7539752140.0, "step": 9857 }, { "epoch": 3.6111569112393513, "grad_norm": 0.13324885556452637, "learning_rate": 1.0799149073844841e-05, "loss": 0.3846, "num_tokens": 7540541219.0, "step": 9858 }, { "epoch": 3.611523312265274, "grad_norm": 0.13770408977491294, "learning_rate": 1.0795804737238916e-05, "loss": 0.4148, "num_tokens": 7541317091.0, "step": 9859 }, { "epoch": 3.6118897132911973, "grad_norm": 0.14719510440487285, "learning_rate": 1.0792461031928231e-05, "loss": 0.4173, "num_tokens": 7541980376.0, "step": 9860 }, { "epoch": 3.61225611431712, "grad_norm": 0.15342591249075546, "learning_rate": 1.0789117958101189e-05, "loss": 0.4135, "num_tokens": 7542672486.0, "step": 9861 }, { "epoch": 3.612622515343043, "grad_norm": 0.1428518300258249, "learning_rate": 1.0785775515946168e-05, "loss": 0.3877, "num_tokens": 7543372009.0, "step": 9862 }, { "epoch": 3.6129889163689657, "grad_norm": 0.1355206153372365, "learning_rate": 1.0782433705651485e-05, "loss": 0.4032, "num_tokens": 7544152309.0, "step": 9863 }, { "epoch": 3.6133553173948885, "grad_norm": 0.1394494010853701, "learning_rate": 1.0779092527405437e-05, "loss": 0.3826, "num_tokens": 7545004109.0, "step": 9864 }, { "epoch": 3.6137217184208117, "grad_norm": 0.13548064481914585, "learning_rate": 1.0775751981396281e-05, "loss": 0.4003, "num_tokens": 7545739438.0, "step": 9865 }, { "epoch": 3.6140881194467345, "grad_norm": 0.14043440894842005, "learning_rate": 1.0772412067812236e-05, "loss": 0.4259, "num_tokens": 7546380850.0, "step": 9866 }, { "epoch": 3.6144545204726573, "grad_norm": 0.14775449108730226, "learning_rate": 1.076907278684149e-05, "loss": 0.4032, "num_tokens": 7547261438.0, "step": 9867 }, { "epoch": 3.61482092149858, "grad_norm": 0.12959992341223467, "learning_rate": 1.0765734138672194e-05, "loss": 0.3885, "num_tokens": 7548045134.0, "step": 9868 }, { "epoch": 3.615187322524503, "grad_norm": 0.1424171058118352, "learning_rate": 1.076239612349246e-05, "loss": 0.4327, "num_tokens": 7548765006.0, "step": 9869 }, { "epoch": 3.615553723550426, "grad_norm": 0.15351443202946885, "learning_rate": 1.0759058741490365e-05, "loss": 0.3911, "num_tokens": 7549581320.0, "step": 9870 }, { "epoch": 3.615920124576349, "grad_norm": 0.13141669227496258, "learning_rate": 1.0755721992853955e-05, "loss": 0.4195, "num_tokens": 7550372568.0, "step": 9871 }, { "epoch": 3.6162865256022716, "grad_norm": 0.1511118076590076, "learning_rate": 1.0752385877771246e-05, "loss": 0.4204, "num_tokens": 7551081037.0, "step": 9872 }, { "epoch": 3.616652926628195, "grad_norm": 0.1362359644705067, "learning_rate": 1.0749050396430198e-05, "loss": 0.4157, "num_tokens": 7551909185.0, "step": 9873 }, { "epoch": 3.617019327654117, "grad_norm": 0.14131408724428748, "learning_rate": 1.0745715549018746e-05, "loss": 0.4019, "num_tokens": 7552673580.0, "step": 9874 }, { "epoch": 3.6173857286800404, "grad_norm": 0.1395083572692029, "learning_rate": 1.0742381335724799e-05, "loss": 0.4159, "num_tokens": 7553379715.0, "step": 9875 }, { "epoch": 3.617752129705963, "grad_norm": 0.13886412763171185, "learning_rate": 1.0739047756736225e-05, "loss": 0.4166, "num_tokens": 7554212334.0, "step": 9876 }, { "epoch": 3.618118530731886, "grad_norm": 0.1390571864069615, "learning_rate": 1.0735714812240835e-05, "loss": 0.404, "num_tokens": 7554953050.0, "step": 9877 }, { "epoch": 3.618484931757809, "grad_norm": 0.13422311412512333, "learning_rate": 1.0732382502426439e-05, "loss": 0.418, "num_tokens": 7555740206.0, "step": 9878 }, { "epoch": 3.618851332783732, "grad_norm": 0.13727093494446566, "learning_rate": 1.0729050827480788e-05, "loss": 0.4106, "num_tokens": 7556552357.0, "step": 9879 }, { "epoch": 3.6192177338096547, "grad_norm": 0.14387938363978547, "learning_rate": 1.0725719787591604e-05, "loss": 0.4113, "num_tokens": 7557256727.0, "step": 9880 }, { "epoch": 3.6195841348355775, "grad_norm": 0.1424704265654143, "learning_rate": 1.0722389382946576e-05, "loss": 0.3747, "num_tokens": 7558071589.0, "step": 9881 }, { "epoch": 3.6199505358615003, "grad_norm": 0.13851630071330792, "learning_rate": 1.071905961373335e-05, "loss": 0.418, "num_tokens": 7558811508.0, "step": 9882 }, { "epoch": 3.6203169368874235, "grad_norm": 0.13001360712448182, "learning_rate": 1.071573048013954e-05, "loss": 0.4096, "num_tokens": 7559613950.0, "step": 9883 }, { "epoch": 3.6206833379133463, "grad_norm": 0.13677990512617624, "learning_rate": 1.071240198235273e-05, "loss": 0.4243, "num_tokens": 7560438691.0, "step": 9884 }, { "epoch": 3.621049738939269, "grad_norm": 0.13395850335914328, "learning_rate": 1.0709074120560454e-05, "loss": 0.4455, "num_tokens": 7561125922.0, "step": 9885 }, { "epoch": 3.621416139965192, "grad_norm": 0.14876941015391332, "learning_rate": 1.0705746894950235e-05, "loss": 0.4181, "num_tokens": 7561899793.0, "step": 9886 }, { "epoch": 3.6217825409911146, "grad_norm": 0.1391053854958443, "learning_rate": 1.0702420305709524e-05, "loss": 0.4353, "num_tokens": 7562635428.0, "step": 9887 }, { "epoch": 3.622148942017038, "grad_norm": 0.14512862882044414, "learning_rate": 1.0699094353025762e-05, "loss": 0.4088, "num_tokens": 7563244903.0, "step": 9888 }, { "epoch": 3.6225153430429606, "grad_norm": 0.15218283987323666, "learning_rate": 1.0695769037086364e-05, "loss": 0.3878, "num_tokens": 7564047270.0, "step": 9889 }, { "epoch": 3.6228817440688834, "grad_norm": 0.12516633972861002, "learning_rate": 1.0692444358078667e-05, "loss": 0.3621, "num_tokens": 7564818938.0, "step": 9890 }, { "epoch": 3.623248145094806, "grad_norm": 0.1347253376510909, "learning_rate": 1.0689120316190019e-05, "loss": 0.4025, "num_tokens": 7565552295.0, "step": 9891 }, { "epoch": 3.623614546120729, "grad_norm": 0.13266762249998146, "learning_rate": 1.0685796911607703e-05, "loss": 0.4206, "num_tokens": 7566343337.0, "step": 9892 }, { "epoch": 3.623980947146652, "grad_norm": 0.14516678322653134, "learning_rate": 1.0682474144518978e-05, "loss": 0.3988, "num_tokens": 7567084156.0, "step": 9893 }, { "epoch": 3.624347348172575, "grad_norm": 0.14332372046170327, "learning_rate": 1.0679152015111066e-05, "loss": 0.3939, "num_tokens": 7567843003.0, "step": 9894 }, { "epoch": 3.6247137491984978, "grad_norm": 0.13916668571799376, "learning_rate": 1.0675830523571142e-05, "loss": 0.38, "num_tokens": 7568647586.0, "step": 9895 }, { "epoch": 3.6250801502244205, "grad_norm": 0.12604305857414091, "learning_rate": 1.067250967008636e-05, "loss": 0.417, "num_tokens": 7569485523.0, "step": 9896 }, { "epoch": 3.6254465512503433, "grad_norm": 0.14950913787999995, "learning_rate": 1.0669189454843836e-05, "loss": 0.4281, "num_tokens": 7570207232.0, "step": 9897 }, { "epoch": 3.6258129522762665, "grad_norm": 0.14636946583318128, "learning_rate": 1.0665869878030637e-05, "loss": 0.398, "num_tokens": 7570929905.0, "step": 9898 }, { "epoch": 3.6261793533021893, "grad_norm": 0.13326474947823397, "learning_rate": 1.0662550939833806e-05, "loss": 0.4151, "num_tokens": 7571689753.0, "step": 9899 }, { "epoch": 3.626545754328112, "grad_norm": 0.14033544983506321, "learning_rate": 1.0659232640440353e-05, "loss": 0.4055, "num_tokens": 7572505491.0, "step": 9900 }, { "epoch": 3.626912155354035, "grad_norm": 0.14188400196552634, "learning_rate": 1.0655914980037233e-05, "loss": 0.3925, "num_tokens": 7573273020.0, "step": 9901 }, { "epoch": 3.6272785563799577, "grad_norm": 0.12473966808974356, "learning_rate": 1.065259795881139e-05, "loss": 0.394, "num_tokens": 7574089510.0, "step": 9902 }, { "epoch": 3.627644957405881, "grad_norm": 0.13007398545522503, "learning_rate": 1.0649281576949725e-05, "loss": 0.3897, "num_tokens": 7574895128.0, "step": 9903 }, { "epoch": 3.6280113584318037, "grad_norm": 0.14166400741679475, "learning_rate": 1.0645965834639078e-05, "loss": 0.4191, "num_tokens": 7575708392.0, "step": 9904 }, { "epoch": 3.6283777594577264, "grad_norm": 0.13463343722941046, "learning_rate": 1.0642650732066289e-05, "loss": 0.3903, "num_tokens": 7576543712.0, "step": 9905 }, { "epoch": 3.6287441604836492, "grad_norm": 0.13746710600541143, "learning_rate": 1.0639336269418148e-05, "loss": 0.3849, "num_tokens": 7577233605.0, "step": 9906 }, { "epoch": 3.629110561509572, "grad_norm": 0.151804326994981, "learning_rate": 1.0636022446881391e-05, "loss": 0.3947, "num_tokens": 7578021561.0, "step": 9907 }, { "epoch": 3.6294769625354952, "grad_norm": 0.14024878364061577, "learning_rate": 1.0632709264642744e-05, "loss": 0.4102, "num_tokens": 7578691512.0, "step": 9908 }, { "epoch": 3.629843363561418, "grad_norm": 0.1481875373662978, "learning_rate": 1.0629396722888887e-05, "loss": 0.4112, "num_tokens": 7579414662.0, "step": 9909 }, { "epoch": 3.630209764587341, "grad_norm": 0.14396255951424367, "learning_rate": 1.0626084821806466e-05, "loss": 0.4228, "num_tokens": 7580112681.0, "step": 9910 }, { "epoch": 3.6305761656132636, "grad_norm": 0.14905093492131283, "learning_rate": 1.0622773561582082e-05, "loss": 0.4072, "num_tokens": 7580889515.0, "step": 9911 }, { "epoch": 3.6309425666391864, "grad_norm": 0.13693271190259224, "learning_rate": 1.0619462942402313e-05, "loss": 0.4243, "num_tokens": 7581675174.0, "step": 9912 }, { "epoch": 3.6313089676651096, "grad_norm": 0.15798801973807453, "learning_rate": 1.0616152964453685e-05, "loss": 0.3931, "num_tokens": 7582498631.0, "step": 9913 }, { "epoch": 3.6316753686910324, "grad_norm": 0.13762623195375706, "learning_rate": 1.0612843627922707e-05, "loss": 0.4204, "num_tokens": 7583271530.0, "step": 9914 }, { "epoch": 3.632041769716955, "grad_norm": 0.13341472485889144, "learning_rate": 1.0609534932995831e-05, "loss": 0.3875, "num_tokens": 7584053630.0, "step": 9915 }, { "epoch": 3.632408170742878, "grad_norm": 0.14326931069248192, "learning_rate": 1.0606226879859502e-05, "loss": 0.4301, "num_tokens": 7584705762.0, "step": 9916 }, { "epoch": 3.6327745717688007, "grad_norm": 0.15412189879528415, "learning_rate": 1.0602919468700094e-05, "loss": 0.4103, "num_tokens": 7585438604.0, "step": 9917 }, { "epoch": 3.633140972794724, "grad_norm": 0.1416263554027217, "learning_rate": 1.0599612699703962e-05, "loss": 0.3991, "num_tokens": 7586262093.0, "step": 9918 }, { "epoch": 3.6335073738206467, "grad_norm": 0.1259662019354412, "learning_rate": 1.0596306573057435e-05, "loss": 0.396, "num_tokens": 7587110042.0, "step": 9919 }, { "epoch": 3.6338737748465695, "grad_norm": 0.13331554676787516, "learning_rate": 1.059300108894679e-05, "loss": 0.3804, "num_tokens": 7587874349.0, "step": 9920 }, { "epoch": 3.6342401758724927, "grad_norm": 0.13375862936608005, "learning_rate": 1.0589696247558274e-05, "loss": 0.4117, "num_tokens": 7588630607.0, "step": 9921 }, { "epoch": 3.634606576898415, "grad_norm": 0.14492918893227477, "learning_rate": 1.0586392049078096e-05, "loss": 0.4148, "num_tokens": 7589406096.0, "step": 9922 }, { "epoch": 3.6349729779243383, "grad_norm": 0.12867782283889012, "learning_rate": 1.0583088493692426e-05, "loss": 0.3722, "num_tokens": 7590332847.0, "step": 9923 }, { "epoch": 3.635339378950261, "grad_norm": 0.1183484473259966, "learning_rate": 1.0579785581587408e-05, "loss": 0.4069, "num_tokens": 7591161595.0, "step": 9924 }, { "epoch": 3.635705779976184, "grad_norm": 0.13295381336051854, "learning_rate": 1.0576483312949138e-05, "loss": 0.3909, "num_tokens": 7591966752.0, "step": 9925 }, { "epoch": 3.636072181002107, "grad_norm": 0.14411923919986563, "learning_rate": 1.057318168796368e-05, "loss": 0.423, "num_tokens": 7592631547.0, "step": 9926 }, { "epoch": 3.63643858202803, "grad_norm": 0.14455575513307187, "learning_rate": 1.0569880706817069e-05, "loss": 0.4082, "num_tokens": 7593435751.0, "step": 9927 }, { "epoch": 3.6368049830539526, "grad_norm": 0.13155982071068997, "learning_rate": 1.0566580369695284e-05, "loss": 0.3666, "num_tokens": 7594154097.0, "step": 9928 }, { "epoch": 3.6371713840798754, "grad_norm": 0.14775695448463225, "learning_rate": 1.0563280676784302e-05, "loss": 0.4535, "num_tokens": 7594782596.0, "step": 9929 }, { "epoch": 3.637537785105798, "grad_norm": 0.13706090729650763, "learning_rate": 1.0559981628270026e-05, "loss": 0.4013, "num_tokens": 7595583216.0, "step": 9930 }, { "epoch": 3.6379041861317214, "grad_norm": 0.1509484731591869, "learning_rate": 1.0556683224338338e-05, "loss": 0.4448, "num_tokens": 7596331179.0, "step": 9931 }, { "epoch": 3.638270587157644, "grad_norm": 0.13900442158190865, "learning_rate": 1.0553385465175096e-05, "loss": 0.405, "num_tokens": 7597130694.0, "step": 9932 }, { "epoch": 3.638636988183567, "grad_norm": 0.13868988383216757, "learning_rate": 1.0550088350966114e-05, "loss": 0.4085, "num_tokens": 7597891826.0, "step": 9933 }, { "epoch": 3.6390033892094897, "grad_norm": 0.13355531020737085, "learning_rate": 1.0546791881897144e-05, "loss": 0.399, "num_tokens": 7598628897.0, "step": 9934 }, { "epoch": 3.6393697902354125, "grad_norm": 0.1416290197557866, "learning_rate": 1.0543496058153947e-05, "loss": 0.3936, "num_tokens": 7599419148.0, "step": 9935 }, { "epoch": 3.6397361912613357, "grad_norm": 0.13187299236550445, "learning_rate": 1.0540200879922216e-05, "loss": 0.4345, "num_tokens": 7600208284.0, "step": 9936 }, { "epoch": 3.6401025922872585, "grad_norm": 0.1364840528348996, "learning_rate": 1.0536906347387616e-05, "loss": 0.4265, "num_tokens": 7601092324.0, "step": 9937 }, { "epoch": 3.6404689933131813, "grad_norm": 0.13254231855677925, "learning_rate": 1.0533612460735775e-05, "loss": 0.3998, "num_tokens": 7601818334.0, "step": 9938 }, { "epoch": 3.640835394339104, "grad_norm": 0.14090176195509327, "learning_rate": 1.053031922015229e-05, "loss": 0.407, "num_tokens": 7602464024.0, "step": 9939 }, { "epoch": 3.641201795365027, "grad_norm": 0.15320437625439534, "learning_rate": 1.052702662582271e-05, "loss": 0.4281, "num_tokens": 7603271187.0, "step": 9940 }, { "epoch": 3.64156819639095, "grad_norm": 0.13472242103191662, "learning_rate": 1.052373467793256e-05, "loss": 0.3824, "num_tokens": 7603994952.0, "step": 9941 }, { "epoch": 3.641934597416873, "grad_norm": 0.13464471509039536, "learning_rate": 1.0520443376667314e-05, "loss": 0.4092, "num_tokens": 7604945271.0, "step": 9942 }, { "epoch": 3.6423009984427956, "grad_norm": 0.12983187228599877, "learning_rate": 1.0517152722212443e-05, "loss": 0.4117, "num_tokens": 7605609310.0, "step": 9943 }, { "epoch": 3.6426673994687184, "grad_norm": 0.15322151046709467, "learning_rate": 1.0513862714753334e-05, "loss": 0.4415, "num_tokens": 7606416195.0, "step": 9944 }, { "epoch": 3.643033800494641, "grad_norm": 0.13497714064291153, "learning_rate": 1.0510573354475362e-05, "loss": 0.4233, "num_tokens": 7607262203.0, "step": 9945 }, { "epoch": 3.6434002015205644, "grad_norm": 0.13848126862237664, "learning_rate": 1.0507284641563879e-05, "loss": 0.4015, "num_tokens": 7608075855.0, "step": 9946 }, { "epoch": 3.643766602546487, "grad_norm": 0.1387854412100903, "learning_rate": 1.0503996576204171e-05, "loss": 0.4069, "num_tokens": 7608827621.0, "step": 9947 }, { "epoch": 3.64413300357241, "grad_norm": 0.14384070463277923, "learning_rate": 1.0500709158581514e-05, "loss": 0.3864, "num_tokens": 7609614538.0, "step": 9948 }, { "epoch": 3.6444994045983328, "grad_norm": 0.1339558916410282, "learning_rate": 1.0497422388881131e-05, "loss": 0.3759, "num_tokens": 7610346934.0, "step": 9949 }, { "epoch": 3.6448658056242556, "grad_norm": 0.14925032515534098, "learning_rate": 1.0494136267288213e-05, "loss": 0.4105, "num_tokens": 7611079341.0, "step": 9950 }, { "epoch": 3.6452322066501788, "grad_norm": 0.13500196127931208, "learning_rate": 1.0490850793987915e-05, "loss": 0.4038, "num_tokens": 7611887768.0, "step": 9951 }, { "epoch": 3.6455986076761016, "grad_norm": 0.13379804212572655, "learning_rate": 1.048756596916536e-05, "loss": 0.3811, "num_tokens": 7612675067.0, "step": 9952 }, { "epoch": 3.6459650087020243, "grad_norm": 0.13843904846383331, "learning_rate": 1.0484281793005624e-05, "loss": 0.4377, "num_tokens": 7613453026.0, "step": 9953 }, { "epoch": 3.646331409727947, "grad_norm": 0.13216102485607656, "learning_rate": 1.0480998265693754e-05, "loss": 0.4005, "num_tokens": 7614230614.0, "step": 9954 }, { "epoch": 3.64669781075387, "grad_norm": 0.1417598542236096, "learning_rate": 1.047771538741476e-05, "loss": 0.4085, "num_tokens": 7614940878.0, "step": 9955 }, { "epoch": 3.647064211779793, "grad_norm": 0.1522479655345181, "learning_rate": 1.0474433158353614e-05, "loss": 0.4122, "num_tokens": 7615808391.0, "step": 9956 }, { "epoch": 3.647430612805716, "grad_norm": 0.1284219739553891, "learning_rate": 1.047115157869525e-05, "loss": 0.3809, "num_tokens": 7616532312.0, "step": 9957 }, { "epoch": 3.6477970138316387, "grad_norm": 0.14268451999260254, "learning_rate": 1.0467870648624564e-05, "loss": 0.4061, "num_tokens": 7617284333.0, "step": 9958 }, { "epoch": 3.6481634148575615, "grad_norm": 0.1369861159618987, "learning_rate": 1.0464590368326433e-05, "loss": 0.4556, "num_tokens": 7618093584.0, "step": 9959 }, { "epoch": 3.6485298158834842, "grad_norm": 0.13848073412864412, "learning_rate": 1.0461310737985673e-05, "loss": 0.424, "num_tokens": 7618933876.0, "step": 9960 }, { "epoch": 3.6488962169094075, "grad_norm": 0.1388291999549002, "learning_rate": 1.0458031757787066e-05, "loss": 0.3797, "num_tokens": 7619598412.0, "step": 9961 }, { "epoch": 3.6492626179353302, "grad_norm": 0.14358567508296058, "learning_rate": 1.0454753427915377e-05, "loss": 0.3939, "num_tokens": 7620438581.0, "step": 9962 }, { "epoch": 3.649629018961253, "grad_norm": 0.1387255910420058, "learning_rate": 1.045147574855532e-05, "loss": 0.404, "num_tokens": 7621193812.0, "step": 9963 }, { "epoch": 3.649995419987176, "grad_norm": 0.14417715769506476, "learning_rate": 1.0448198719891561e-05, "loss": 0.3967, "num_tokens": 7621956450.0, "step": 9964 }, { "epoch": 3.6503618210130986, "grad_norm": 0.136960689515783, "learning_rate": 1.0444922342108756e-05, "loss": 0.4291, "num_tokens": 7622823728.0, "step": 9965 }, { "epoch": 3.650728222039022, "grad_norm": 0.13887649565704857, "learning_rate": 1.0441646615391513e-05, "loss": 0.3844, "num_tokens": 7623624856.0, "step": 9966 }, { "epoch": 3.6510946230649446, "grad_norm": 0.1255767380327711, "learning_rate": 1.0438371539924395e-05, "loss": 0.3759, "num_tokens": 7624464042.0, "step": 9967 }, { "epoch": 3.6514610240908674, "grad_norm": 0.13957307737338756, "learning_rate": 1.0435097115891932e-05, "loss": 0.3867, "num_tokens": 7625301646.0, "step": 9968 }, { "epoch": 3.6518274251167906, "grad_norm": 0.12527650375312535, "learning_rate": 1.0431823343478626e-05, "loss": 0.3838, "num_tokens": 7626139119.0, "step": 9969 }, { "epoch": 3.652193826142713, "grad_norm": 0.13267154247110852, "learning_rate": 1.0428550222868936e-05, "loss": 0.3753, "num_tokens": 7626858213.0, "step": 9970 }, { "epoch": 3.652560227168636, "grad_norm": 0.13438481263325647, "learning_rate": 1.0425277754247283e-05, "loss": 0.3996, "num_tokens": 7627579792.0, "step": 9971 }, { "epoch": 3.652926628194559, "grad_norm": 0.14693522548671809, "learning_rate": 1.0422005937798043e-05, "loss": 0.4278, "num_tokens": 7628387204.0, "step": 9972 }, { "epoch": 3.6532930292204817, "grad_norm": 0.13196430806698348, "learning_rate": 1.0418734773705588e-05, "loss": 0.3719, "num_tokens": 7629163745.0, "step": 9973 }, { "epoch": 3.653659430246405, "grad_norm": 0.13255768834105683, "learning_rate": 1.0415464262154202e-05, "loss": 0.4004, "num_tokens": 7629956814.0, "step": 9974 }, { "epoch": 3.6540258312723277, "grad_norm": 0.1289664361901076, "learning_rate": 1.0412194403328184e-05, "loss": 0.4194, "num_tokens": 7630820537.0, "step": 9975 }, { "epoch": 3.6543922322982505, "grad_norm": 0.1297735315212879, "learning_rate": 1.0408925197411763e-05, "loss": 0.4177, "num_tokens": 7631583188.0, "step": 9976 }, { "epoch": 3.6547586333241733, "grad_norm": 0.13734679157141028, "learning_rate": 1.0405656644589141e-05, "loss": 0.4142, "num_tokens": 7632321296.0, "step": 9977 }, { "epoch": 3.655125034350096, "grad_norm": 0.14074589240703123, "learning_rate": 1.0402388745044484e-05, "loss": 0.4091, "num_tokens": 7633179526.0, "step": 9978 }, { "epoch": 3.6554914353760193, "grad_norm": 0.13392726635383204, "learning_rate": 1.0399121498961921e-05, "loss": 0.4147, "num_tokens": 7633854848.0, "step": 9979 }, { "epoch": 3.655857836401942, "grad_norm": 0.14087763150110452, "learning_rate": 1.039585490652554e-05, "loss": 0.3968, "num_tokens": 7634621820.0, "step": 9980 }, { "epoch": 3.656224237427865, "grad_norm": 0.14452934408459994, "learning_rate": 1.03925889679194e-05, "loss": 0.3947, "num_tokens": 7635243451.0, "step": 9981 }, { "epoch": 3.6565906384537876, "grad_norm": 0.15447852959020092, "learning_rate": 1.0389323683327518e-05, "loss": 0.4157, "num_tokens": 7636062685.0, "step": 9982 }, { "epoch": 3.6569570394797104, "grad_norm": 0.13731197699222147, "learning_rate": 1.0386059052933874e-05, "loss": 0.4184, "num_tokens": 7636772907.0, "step": 9983 }, { "epoch": 3.6573234405056336, "grad_norm": 0.14779840184052662, "learning_rate": 1.0382795076922409e-05, "loss": 0.4403, "num_tokens": 7637494020.0, "step": 9984 }, { "epoch": 3.6576898415315564, "grad_norm": 0.14562653839099807, "learning_rate": 1.0379531755477028e-05, "loss": 0.419, "num_tokens": 7638305038.0, "step": 9985 }, { "epoch": 3.658056242557479, "grad_norm": 0.13364181978242634, "learning_rate": 1.037626908878162e-05, "loss": 0.438, "num_tokens": 7639034813.0, "step": 9986 }, { "epoch": 3.658422643583402, "grad_norm": 0.1534050637643367, "learning_rate": 1.0373007077019999e-05, "loss": 0.412, "num_tokens": 7639757238.0, "step": 9987 }, { "epoch": 3.6587890446093247, "grad_norm": 0.13337561368930798, "learning_rate": 1.0369745720375961e-05, "loss": 0.3887, "num_tokens": 7640446010.0, "step": 9988 }, { "epoch": 3.659155445635248, "grad_norm": 0.14604120805717025, "learning_rate": 1.036648501903328e-05, "loss": 0.3881, "num_tokens": 7641198404.0, "step": 9989 }, { "epoch": 3.6595218466611708, "grad_norm": 0.1354433134186903, "learning_rate": 1.0363224973175674e-05, "loss": 0.4366, "num_tokens": 7641964662.0, "step": 9990 }, { "epoch": 3.6598882476870935, "grad_norm": 0.15154346742750197, "learning_rate": 1.0359965582986814e-05, "loss": 0.4095, "num_tokens": 7642777908.0, "step": 9991 }, { "epoch": 3.6602546487130163, "grad_norm": 0.14932274875362403, "learning_rate": 1.0356706848650369e-05, "loss": 0.4206, "num_tokens": 7643536375.0, "step": 9992 }, { "epoch": 3.660621049738939, "grad_norm": 0.1296746438836288, "learning_rate": 1.035344877034994e-05, "loss": 0.3871, "num_tokens": 7644260885.0, "step": 9993 }, { "epoch": 3.6609874507648623, "grad_norm": 0.14353929290546247, "learning_rate": 1.0350191348269107e-05, "loss": 0.4179, "num_tokens": 7645028822.0, "step": 9994 }, { "epoch": 3.661353851790785, "grad_norm": 0.1472867378254871, "learning_rate": 1.0346934582591403e-05, "loss": 0.4124, "num_tokens": 7645798112.0, "step": 9995 }, { "epoch": 3.661720252816708, "grad_norm": 0.13092352733952065, "learning_rate": 1.0343678473500334e-05, "loss": 0.3772, "num_tokens": 7646587523.0, "step": 9996 }, { "epoch": 3.6620866538426307, "grad_norm": 0.14284415730044522, "learning_rate": 1.0340423021179356e-05, "loss": 0.4269, "num_tokens": 7647404981.0, "step": 9997 }, { "epoch": 3.6624530548685534, "grad_norm": 0.14101388664669232, "learning_rate": 1.0337168225811907e-05, "loss": 0.3961, "num_tokens": 7648163113.0, "step": 9998 }, { "epoch": 3.6628194558944767, "grad_norm": 0.1365564605404179, "learning_rate": 1.0333914087581363e-05, "loss": 0.4208, "num_tokens": 7648964032.0, "step": 9999 }, { "epoch": 3.6631858569203994, "grad_norm": 0.14748433321894766, "learning_rate": 1.0330660606671097e-05, "loss": 0.3875, "num_tokens": 7649614034.0, "step": 10000 }, { "epoch": 3.663552257946322, "grad_norm": 0.15319487997801415, "learning_rate": 1.0327407783264407e-05, "loss": 0.4141, "num_tokens": 7650430149.0, "step": 10001 }, { "epoch": 3.663918658972245, "grad_norm": 0.13899531379239335, "learning_rate": 1.0324155617544571e-05, "loss": 0.429, "num_tokens": 7651217051.0, "step": 10002 }, { "epoch": 3.664285059998168, "grad_norm": 0.1413771114204553, "learning_rate": 1.0320904109694853e-05, "loss": 0.4044, "num_tokens": 7652043568.0, "step": 10003 }, { "epoch": 3.664651461024091, "grad_norm": 0.13716094470164047, "learning_rate": 1.0317653259898429e-05, "loss": 0.4261, "num_tokens": 7652937181.0, "step": 10004 }, { "epoch": 3.665017862050014, "grad_norm": 0.1278750099832301, "learning_rate": 1.0314403068338485e-05, "loss": 0.4317, "num_tokens": 7653702814.0, "step": 10005 }, { "epoch": 3.6653842630759366, "grad_norm": 0.13506859496170076, "learning_rate": 1.0311153535198146e-05, "loss": 0.4091, "num_tokens": 7654531127.0, "step": 10006 }, { "epoch": 3.6657506641018593, "grad_norm": 0.13234451501626635, "learning_rate": 1.0307904660660507e-05, "loss": 0.4013, "num_tokens": 7655383898.0, "step": 10007 }, { "epoch": 3.666117065127782, "grad_norm": 0.14529602516961415, "learning_rate": 1.0304656444908626e-05, "loss": 0.4212, "num_tokens": 7656134028.0, "step": 10008 }, { "epoch": 3.6664834661537054, "grad_norm": 0.14182530432154203, "learning_rate": 1.0301408888125519e-05, "loss": 0.4229, "num_tokens": 7656868471.0, "step": 10009 }, { "epoch": 3.666849867179628, "grad_norm": 0.15136518189510415, "learning_rate": 1.0298161990494164e-05, "loss": 0.4362, "num_tokens": 7657547692.0, "step": 10010 }, { "epoch": 3.667216268205551, "grad_norm": 0.15751110394182874, "learning_rate": 1.0294915752197517e-05, "loss": 0.4059, "num_tokens": 7658231903.0, "step": 10011 }, { "epoch": 3.6675826692314737, "grad_norm": 0.14795834943450742, "learning_rate": 1.029167017341847e-05, "loss": 0.4206, "num_tokens": 7658895702.0, "step": 10012 }, { "epoch": 3.6679490702573965, "grad_norm": 0.14820181423167922, "learning_rate": 1.0288425254339916e-05, "loss": 0.3949, "num_tokens": 7659636091.0, "step": 10013 }, { "epoch": 3.6683154712833197, "grad_norm": 0.14173371018497177, "learning_rate": 1.0285180995144666e-05, "loss": 0.4187, "num_tokens": 7660466198.0, "step": 10014 }, { "epoch": 3.6686818723092425, "grad_norm": 0.136076808219452, "learning_rate": 1.0281937396015525e-05, "loss": 0.3723, "num_tokens": 7661328174.0, "step": 10015 }, { "epoch": 3.6690482733351653, "grad_norm": 0.12584445562838958, "learning_rate": 1.0278694457135256e-05, "loss": 0.4062, "num_tokens": 7662053196.0, "step": 10016 }, { "epoch": 3.6694146743610885, "grad_norm": 0.141997306169779, "learning_rate": 1.0275452178686585e-05, "loss": 0.4014, "num_tokens": 7662838800.0, "step": 10017 }, { "epoch": 3.669781075387011, "grad_norm": 0.13368189064696043, "learning_rate": 1.0272210560852179e-05, "loss": 0.4151, "num_tokens": 7663602067.0, "step": 10018 }, { "epoch": 3.670147476412934, "grad_norm": 0.13430982237957503, "learning_rate": 1.0268969603814702e-05, "loss": 0.3891, "num_tokens": 7664366530.0, "step": 10019 }, { "epoch": 3.670513877438857, "grad_norm": 0.1501016985790424, "learning_rate": 1.0265729307756765e-05, "loss": 0.4271, "num_tokens": 7665222345.0, "step": 10020 }, { "epoch": 3.6708802784647796, "grad_norm": 0.12496209557336271, "learning_rate": 1.026248967286092e-05, "loss": 0.3882, "num_tokens": 7666060030.0, "step": 10021 }, { "epoch": 3.671246679490703, "grad_norm": 0.12963758043310447, "learning_rate": 1.0259250699309722e-05, "loss": 0.4072, "num_tokens": 7666992103.0, "step": 10022 }, { "epoch": 3.6716130805166256, "grad_norm": 0.11681918276737702, "learning_rate": 1.0256012387285667e-05, "loss": 0.3855, "num_tokens": 7667866761.0, "step": 10023 }, { "epoch": 3.6719794815425484, "grad_norm": 0.13910404388833994, "learning_rate": 1.0252774736971212e-05, "loss": 0.4094, "num_tokens": 7668636497.0, "step": 10024 }, { "epoch": 3.672345882568471, "grad_norm": 0.14550482869474682, "learning_rate": 1.0249537748548786e-05, "loss": 0.3962, "num_tokens": 7669343510.0, "step": 10025 }, { "epoch": 3.672712283594394, "grad_norm": 0.14634252569626818, "learning_rate": 1.0246301422200764e-05, "loss": 0.4041, "num_tokens": 7670048670.0, "step": 10026 }, { "epoch": 3.673078684620317, "grad_norm": 0.1361434058855768, "learning_rate": 1.0243065758109515e-05, "loss": 0.4138, "num_tokens": 7670811095.0, "step": 10027 }, { "epoch": 3.67344508564624, "grad_norm": 0.13613480552289653, "learning_rate": 1.0239830756457336e-05, "loss": 0.4277, "num_tokens": 7671504008.0, "step": 10028 }, { "epoch": 3.6738114866721627, "grad_norm": 0.1479660305349619, "learning_rate": 1.0236596417426497e-05, "loss": 0.4045, "num_tokens": 7672270867.0, "step": 10029 }, { "epoch": 3.6741778876980855, "grad_norm": 0.1605455027558832, "learning_rate": 1.0233362741199257e-05, "loss": 0.455, "num_tokens": 7672892807.0, "step": 10030 }, { "epoch": 3.6745442887240083, "grad_norm": 0.15133972376788513, "learning_rate": 1.0230129727957792e-05, "loss": 0.4085, "num_tokens": 7673656283.0, "step": 10031 }, { "epoch": 3.6749106897499315, "grad_norm": 0.13108211384084165, "learning_rate": 1.022689737788428e-05, "loss": 0.4095, "num_tokens": 7674432687.0, "step": 10032 }, { "epoch": 3.6752770907758543, "grad_norm": 0.13858234147017265, "learning_rate": 1.022366569116084e-05, "loss": 0.4143, "num_tokens": 7675230875.0, "step": 10033 }, { "epoch": 3.675643491801777, "grad_norm": 0.12457229220073734, "learning_rate": 1.0220434667969563e-05, "loss": 0.3814, "num_tokens": 7676143861.0, "step": 10034 }, { "epoch": 3.6760098928277, "grad_norm": 0.13338895455853916, "learning_rate": 1.0217204308492498e-05, "loss": 0.4166, "num_tokens": 7676907764.0, "step": 10035 }, { "epoch": 3.6763762938536226, "grad_norm": 0.1440827556389478, "learning_rate": 1.021397461291166e-05, "loss": 0.415, "num_tokens": 7677601078.0, "step": 10036 }, { "epoch": 3.676742694879546, "grad_norm": 0.14437790894346758, "learning_rate": 1.021074558140902e-05, "loss": 0.4272, "num_tokens": 7678395989.0, "step": 10037 }, { "epoch": 3.6771090959054686, "grad_norm": 0.13513108077533373, "learning_rate": 1.0207517214166523e-05, "loss": 0.3972, "num_tokens": 7679078173.0, "step": 10038 }, { "epoch": 3.6774754969313914, "grad_norm": 0.14272967870070533, "learning_rate": 1.0204289511366064e-05, "loss": 0.3848, "num_tokens": 7679938778.0, "step": 10039 }, { "epoch": 3.677841897957314, "grad_norm": 0.13569697802580974, "learning_rate": 1.0201062473189507e-05, "loss": 0.3934, "num_tokens": 7680819105.0, "step": 10040 }, { "epoch": 3.678208298983237, "grad_norm": 0.12247862721200835, "learning_rate": 1.0197836099818682e-05, "loss": 0.407, "num_tokens": 7681646821.0, "step": 10041 }, { "epoch": 3.67857470000916, "grad_norm": 0.1223116299272058, "learning_rate": 1.0194610391435366e-05, "loss": 0.3944, "num_tokens": 7682571083.0, "step": 10042 }, { "epoch": 3.678941101035083, "grad_norm": 0.13315350355379962, "learning_rate": 1.0191385348221334e-05, "loss": 0.4032, "num_tokens": 7683311882.0, "step": 10043 }, { "epoch": 3.6793075020610058, "grad_norm": 0.13641600638672646, "learning_rate": 1.018816097035828e-05, "loss": 0.4058, "num_tokens": 7684179130.0, "step": 10044 }, { "epoch": 3.6796739030869285, "grad_norm": 0.13411471487720367, "learning_rate": 1.0184937258027877e-05, "loss": 0.3726, "num_tokens": 7685028458.0, "step": 10045 }, { "epoch": 3.6800403041128513, "grad_norm": 0.12794734098485488, "learning_rate": 1.0181714211411774e-05, "loss": 0.43, "num_tokens": 7685789287.0, "step": 10046 }, { "epoch": 3.6804067051387745, "grad_norm": 0.12718398631844943, "learning_rate": 1.017849183069158e-05, "loss": 0.3994, "num_tokens": 7686578870.0, "step": 10047 }, { "epoch": 3.6807731061646973, "grad_norm": 0.13735338434210714, "learning_rate": 1.0175270116048837e-05, "loss": 0.44, "num_tokens": 7687282706.0, "step": 10048 }, { "epoch": 3.68113950719062, "grad_norm": 0.14709486396887067, "learning_rate": 1.0172049067665082e-05, "loss": 0.4038, "num_tokens": 7688055873.0, "step": 10049 }, { "epoch": 3.681505908216543, "grad_norm": 0.13842346241374565, "learning_rate": 1.0168828685721807e-05, "loss": 0.4192, "num_tokens": 7688863194.0, "step": 10050 }, { "epoch": 3.6818723092424657, "grad_norm": 0.1275981734912242, "learning_rate": 1.0165608970400462e-05, "loss": 0.3877, "num_tokens": 7689590426.0, "step": 10051 }, { "epoch": 3.682238710268389, "grad_norm": 0.1412396234413337, "learning_rate": 1.0162389921882456e-05, "loss": 0.4216, "num_tokens": 7690323361.0, "step": 10052 }, { "epoch": 3.6826051112943117, "grad_norm": 0.1548803041275391, "learning_rate": 1.0159171540349165e-05, "loss": 0.4242, "num_tokens": 7691069307.0, "step": 10053 }, { "epoch": 3.6829715123202345, "grad_norm": 0.13939864710183308, "learning_rate": 1.0155953825981931e-05, "loss": 0.4175, "num_tokens": 7691865227.0, "step": 10054 }, { "epoch": 3.6833379133461572, "grad_norm": 0.14249282870187593, "learning_rate": 1.0152736778962053e-05, "loss": 0.3813, "num_tokens": 7692553395.0, "step": 10055 }, { "epoch": 3.68370431437208, "grad_norm": 0.14282547132078655, "learning_rate": 1.0149520399470787e-05, "loss": 0.4362, "num_tokens": 7693422579.0, "step": 10056 }, { "epoch": 3.6840707153980032, "grad_norm": 0.13722137953899674, "learning_rate": 1.0146304687689374e-05, "loss": 0.3957, "num_tokens": 7694201061.0, "step": 10057 }, { "epoch": 3.684437116423926, "grad_norm": 0.13186323109557585, "learning_rate": 1.0143089643798992e-05, "loss": 0.3728, "num_tokens": 7694867979.0, "step": 10058 }, { "epoch": 3.684803517449849, "grad_norm": 0.16512125717346907, "learning_rate": 1.0139875267980783e-05, "loss": 0.4276, "num_tokens": 7695577647.0, "step": 10059 }, { "epoch": 3.6851699184757716, "grad_norm": 0.1439336454193817, "learning_rate": 1.0136661560415879e-05, "loss": 0.391, "num_tokens": 7696342402.0, "step": 10060 }, { "epoch": 3.6855363195016944, "grad_norm": 0.13999777628209828, "learning_rate": 1.0133448521285336e-05, "loss": 0.3946, "num_tokens": 7697219754.0, "step": 10061 }, { "epoch": 3.6859027205276176, "grad_norm": 0.13046128159987463, "learning_rate": 1.0130236150770205e-05, "loss": 0.4351, "num_tokens": 7697961355.0, "step": 10062 }, { "epoch": 3.6862691215535404, "grad_norm": 0.13424765956267634, "learning_rate": 1.0127024449051478e-05, "loss": 0.3999, "num_tokens": 7698875402.0, "step": 10063 }, { "epoch": 3.686635522579463, "grad_norm": 0.13724795862062808, "learning_rate": 1.0123813416310122e-05, "loss": 0.395, "num_tokens": 7699563353.0, "step": 10064 }, { "epoch": 3.6870019236053864, "grad_norm": 0.1474904164423302, "learning_rate": 1.0120603052727058e-05, "loss": 0.3744, "num_tokens": 7700216839.0, "step": 10065 }, { "epoch": 3.6873683246313087, "grad_norm": 0.15083191738145196, "learning_rate": 1.0117393358483171e-05, "loss": 0.4082, "num_tokens": 7700865675.0, "step": 10066 }, { "epoch": 3.687734725657232, "grad_norm": 0.1411418113285079, "learning_rate": 1.0114184333759311e-05, "loss": 0.3861, "num_tokens": 7701614070.0, "step": 10067 }, { "epoch": 3.6881011266831547, "grad_norm": 0.1377203000995119, "learning_rate": 1.0110975978736293e-05, "loss": 0.4016, "num_tokens": 7702458975.0, "step": 10068 }, { "epoch": 3.6884675277090775, "grad_norm": 0.1393857580151807, "learning_rate": 1.0107768293594878e-05, "loss": 0.3916, "num_tokens": 7703307726.0, "step": 10069 }, { "epoch": 3.6888339287350007, "grad_norm": 0.12239826496197088, "learning_rate": 1.0104561278515824e-05, "loss": 0.3902, "num_tokens": 7704023568.0, "step": 10070 }, { "epoch": 3.6892003297609235, "grad_norm": 0.13719595006464816, "learning_rate": 1.0101354933679809e-05, "loss": 0.3983, "num_tokens": 7704785546.0, "step": 10071 }, { "epoch": 3.6895667307868463, "grad_norm": 0.14224917390590108, "learning_rate": 1.0098149259267491e-05, "loss": 0.4084, "num_tokens": 7705584292.0, "step": 10072 }, { "epoch": 3.689933131812769, "grad_norm": 0.13473529130504802, "learning_rate": 1.0094944255459507e-05, "loss": 0.3934, "num_tokens": 7706412539.0, "step": 10073 }, { "epoch": 3.690299532838692, "grad_norm": 0.1287134845568592, "learning_rate": 1.0091739922436439e-05, "loss": 0.4606, "num_tokens": 7707105687.0, "step": 10074 }, { "epoch": 3.690665933864615, "grad_norm": 0.15263882661019634, "learning_rate": 1.0088536260378823e-05, "loss": 0.4139, "num_tokens": 7707797945.0, "step": 10075 }, { "epoch": 3.691032334890538, "grad_norm": 0.1458701377023267, "learning_rate": 1.0085333269467176e-05, "loss": 0.4262, "num_tokens": 7708630025.0, "step": 10076 }, { "epoch": 3.6913987359164606, "grad_norm": 0.14295340763072903, "learning_rate": 1.008213094988197e-05, "loss": 0.4232, "num_tokens": 7709499526.0, "step": 10077 }, { "epoch": 3.6917651369423834, "grad_norm": 0.12161601988091383, "learning_rate": 1.0078929301803635e-05, "loss": 0.3842, "num_tokens": 7710316408.0, "step": 10078 }, { "epoch": 3.692131537968306, "grad_norm": 0.1485695652152018, "learning_rate": 1.0075728325412566e-05, "loss": 0.3972, "num_tokens": 7711092775.0, "step": 10079 }, { "epoch": 3.6924979389942294, "grad_norm": 0.13831101779856078, "learning_rate": 1.007252802088912e-05, "loss": 0.3683, "num_tokens": 7711762439.0, "step": 10080 }, { "epoch": 3.692864340020152, "grad_norm": 0.1310831176368394, "learning_rate": 1.0069328388413621e-05, "loss": 0.4101, "num_tokens": 7712621539.0, "step": 10081 }, { "epoch": 3.693230741046075, "grad_norm": 0.1354313699521314, "learning_rate": 1.0066129428166352e-05, "loss": 0.3997, "num_tokens": 7713340631.0, "step": 10082 }, { "epoch": 3.6935971420719977, "grad_norm": 0.1499043418921145, "learning_rate": 1.0062931140327541e-05, "loss": 0.3965, "num_tokens": 7714168653.0, "step": 10083 }, { "epoch": 3.6939635430979205, "grad_norm": 0.13393820033433423, "learning_rate": 1.0059733525077421e-05, "loss": 0.4289, "num_tokens": 7714879972.0, "step": 10084 }, { "epoch": 3.6943299441238437, "grad_norm": 0.1516957377074407, "learning_rate": 1.0056536582596139e-05, "loss": 0.4071, "num_tokens": 7715664901.0, "step": 10085 }, { "epoch": 3.6946963451497665, "grad_norm": 0.12962794410779435, "learning_rate": 1.0053340313063825e-05, "loss": 0.3834, "num_tokens": 7716412378.0, "step": 10086 }, { "epoch": 3.6950627461756893, "grad_norm": 0.14821965353481703, "learning_rate": 1.005014471666059e-05, "loss": 0.4004, "num_tokens": 7717193941.0, "step": 10087 }, { "epoch": 3.695429147201612, "grad_norm": 0.13179080727035572, "learning_rate": 1.0046949793566465e-05, "loss": 0.4106, "num_tokens": 7718042971.0, "step": 10088 }, { "epoch": 3.695795548227535, "grad_norm": 0.13308576780919695, "learning_rate": 1.0043755543961485e-05, "loss": 0.4168, "num_tokens": 7718796018.0, "step": 10089 }, { "epoch": 3.696161949253458, "grad_norm": 0.1500724489811713, "learning_rate": 1.0040561968025617e-05, "loss": 0.4477, "num_tokens": 7719646824.0, "step": 10090 }, { "epoch": 3.696528350279381, "grad_norm": 0.13552510071967877, "learning_rate": 1.003736906593881e-05, "loss": 0.424, "num_tokens": 7720301961.0, "step": 10091 }, { "epoch": 3.6968947513053037, "grad_norm": 0.15046778995444735, "learning_rate": 1.0034176837880962e-05, "loss": 0.4002, "num_tokens": 7721008218.0, "step": 10092 }, { "epoch": 3.6972611523312264, "grad_norm": 0.1406461929354607, "learning_rate": 1.0030985284031938e-05, "loss": 0.3788, "num_tokens": 7721848896.0, "step": 10093 }, { "epoch": 3.697627553357149, "grad_norm": 0.13296594098432818, "learning_rate": 1.0027794404571565e-05, "loss": 0.3995, "num_tokens": 7722553542.0, "step": 10094 }, { "epoch": 3.6979939543830724, "grad_norm": 0.1427000337299515, "learning_rate": 1.0024604199679633e-05, "loss": 0.3888, "num_tokens": 7723296447.0, "step": 10095 }, { "epoch": 3.698360355408995, "grad_norm": 0.14026647963176275, "learning_rate": 1.0021414669535882e-05, "loss": 0.3907, "num_tokens": 7724102356.0, "step": 10096 }, { "epoch": 3.698726756434918, "grad_norm": 0.13451398534797912, "learning_rate": 1.0018225814320045e-05, "loss": 0.4108, "num_tokens": 7724934485.0, "step": 10097 }, { "epoch": 3.6990931574608408, "grad_norm": 0.1341520625507008, "learning_rate": 1.0015037634211783e-05, "loss": 0.4212, "num_tokens": 7725716624.0, "step": 10098 }, { "epoch": 3.6994595584867636, "grad_norm": 0.14123727086420249, "learning_rate": 1.0011850129390728e-05, "loss": 0.4118, "num_tokens": 7726456846.0, "step": 10099 }, { "epoch": 3.699825959512687, "grad_norm": 0.14854722329635373, "learning_rate": 1.0008663300036492e-05, "loss": 0.4492, "num_tokens": 7727162002.0, "step": 10100 }, { "epoch": 3.7001923605386096, "grad_norm": 0.1564098198549993, "learning_rate": 1.0005477146328631e-05, "loss": 0.4092, "num_tokens": 7727766915.0, "step": 10101 }, { "epoch": 3.7005587615645323, "grad_norm": 0.14782774386718525, "learning_rate": 1.0002291668446655e-05, "loss": 0.4216, "num_tokens": 7728533920.0, "step": 10102 }, { "epoch": 3.700925162590455, "grad_norm": 0.14256728347601072, "learning_rate": 9.999106866570066e-06, "loss": 0.4344, "num_tokens": 7729242897.0, "step": 10103 }, { "epoch": 3.701291563616378, "grad_norm": 0.1410270946255847, "learning_rate": 9.995922740878308e-06, "loss": 0.3881, "num_tokens": 7730053333.0, "step": 10104 }, { "epoch": 3.701657964642301, "grad_norm": 0.14620244101556018, "learning_rate": 9.992739291550772e-06, "loss": 0.4215, "num_tokens": 7730706231.0, "step": 10105 }, { "epoch": 3.702024365668224, "grad_norm": 0.14447266418739627, "learning_rate": 9.989556518766846e-06, "loss": 0.3926, "num_tokens": 7731445586.0, "step": 10106 }, { "epoch": 3.7023907666941467, "grad_norm": 0.1471866971259857, "learning_rate": 9.986374422705855e-06, "loss": 0.3946, "num_tokens": 7732229443.0, "step": 10107 }, { "epoch": 3.7027571677200695, "grad_norm": 0.1444366315214341, "learning_rate": 9.983193003547093e-06, "loss": 0.4257, "num_tokens": 7732957077.0, "step": 10108 }, { "epoch": 3.7031235687459922, "grad_norm": 0.1431235232830592, "learning_rate": 9.980012261469817e-06, "loss": 0.4294, "num_tokens": 7733746522.0, "step": 10109 }, { "epoch": 3.7034899697719155, "grad_norm": 0.13550481310800166, "learning_rate": 9.97683219665324e-06, "loss": 0.4079, "num_tokens": 7734485323.0, "step": 10110 }, { "epoch": 3.7038563707978382, "grad_norm": 0.1417971303381716, "learning_rate": 9.973652809276548e-06, "loss": 0.3857, "num_tokens": 7735125924.0, "step": 10111 }, { "epoch": 3.704222771823761, "grad_norm": 0.15003769642286754, "learning_rate": 9.97047409951888e-06, "loss": 0.3939, "num_tokens": 7735916692.0, "step": 10112 }, { "epoch": 3.7045891728496843, "grad_norm": 0.13316392015717454, "learning_rate": 9.967296067559329e-06, "loss": 0.4034, "num_tokens": 7736723120.0, "step": 10113 }, { "epoch": 3.7049555738756066, "grad_norm": 0.13850968296640742, "learning_rate": 9.964118713576982e-06, "loss": 0.4245, "num_tokens": 7737450953.0, "step": 10114 }, { "epoch": 3.70532197490153, "grad_norm": 0.14601261462736342, "learning_rate": 9.96094203775084e-06, "loss": 0.3948, "num_tokens": 7738159554.0, "step": 10115 }, { "epoch": 3.7056883759274526, "grad_norm": 0.13208620042790792, "learning_rate": 9.95776604025991e-06, "loss": 0.3892, "num_tokens": 7739028078.0, "step": 10116 }, { "epoch": 3.7060547769533754, "grad_norm": 0.14446120519098749, "learning_rate": 9.95459072128314e-06, "loss": 0.3762, "num_tokens": 7739759234.0, "step": 10117 }, { "epoch": 3.7064211779792986, "grad_norm": 0.1372188243228264, "learning_rate": 9.951416080999428e-06, "loss": 0.4097, "num_tokens": 7740625270.0, "step": 10118 }, { "epoch": 3.7067875790052214, "grad_norm": 0.1474848765114376, "learning_rate": 9.94824211958766e-06, "loss": 0.4245, "num_tokens": 7741311163.0, "step": 10119 }, { "epoch": 3.707153980031144, "grad_norm": 0.16233038965697819, "learning_rate": 9.94506883722667e-06, "loss": 0.4015, "num_tokens": 7742054037.0, "step": 10120 }, { "epoch": 3.707520381057067, "grad_norm": 0.14279533954014603, "learning_rate": 9.941896234095252e-06, "loss": 0.3823, "num_tokens": 7742837476.0, "step": 10121 }, { "epoch": 3.7078867820829897, "grad_norm": 0.1364481942419487, "learning_rate": 9.93872431037217e-06, "loss": 0.4042, "num_tokens": 7743591511.0, "step": 10122 }, { "epoch": 3.708253183108913, "grad_norm": 0.14406572034737317, "learning_rate": 9.93555306623614e-06, "loss": 0.4207, "num_tokens": 7744348487.0, "step": 10123 }, { "epoch": 3.7086195841348357, "grad_norm": 0.15818059080504368, "learning_rate": 9.932382501865845e-06, "loss": 0.4009, "num_tokens": 7745113512.0, "step": 10124 }, { "epoch": 3.7089859851607585, "grad_norm": 0.13709579952218623, "learning_rate": 9.929212617439928e-06, "loss": 0.407, "num_tokens": 7745906811.0, "step": 10125 }, { "epoch": 3.7093523861866813, "grad_norm": 0.13390618309657362, "learning_rate": 9.926043413136992e-06, "loss": 0.4047, "num_tokens": 7746620836.0, "step": 10126 }, { "epoch": 3.709718787212604, "grad_norm": 0.15701637594900927, "learning_rate": 9.922874889135621e-06, "loss": 0.4225, "num_tokens": 7747238243.0, "step": 10127 }, { "epoch": 3.7100851882385273, "grad_norm": 0.15424823424864834, "learning_rate": 9.919707045614323e-06, "loss": 0.4323, "num_tokens": 7748059656.0, "step": 10128 }, { "epoch": 3.71045158926445, "grad_norm": 0.13705649128687994, "learning_rate": 9.916539882751594e-06, "loss": 0.4536, "num_tokens": 7748831430.0, "step": 10129 }, { "epoch": 3.710817990290373, "grad_norm": 0.14096625015191677, "learning_rate": 9.913373400725894e-06, "loss": 0.4007, "num_tokens": 7749721576.0, "step": 10130 }, { "epoch": 3.7111843913162956, "grad_norm": 0.129775590636127, "learning_rate": 9.910207599715638e-06, "loss": 0.4019, "num_tokens": 7750437509.0, "step": 10131 }, { "epoch": 3.7115507923422184, "grad_norm": 0.1372470587151423, "learning_rate": 9.907042479899185e-06, "loss": 0.4237, "num_tokens": 7751294790.0, "step": 10132 }, { "epoch": 3.7119171933681416, "grad_norm": 0.14133458741310817, "learning_rate": 9.90387804145489e-06, "loss": 0.4081, "num_tokens": 7751985504.0, "step": 10133 }, { "epoch": 3.7122835943940644, "grad_norm": 0.14982498299822355, "learning_rate": 9.900714284561045e-06, "loss": 0.4043, "num_tokens": 7752697646.0, "step": 10134 }, { "epoch": 3.712649995419987, "grad_norm": 0.1354656495246162, "learning_rate": 9.897551209395909e-06, "loss": 0.408, "num_tokens": 7753476711.0, "step": 10135 }, { "epoch": 3.71301639644591, "grad_norm": 0.1388639994280521, "learning_rate": 9.894388816137707e-06, "loss": 0.4099, "num_tokens": 7754222284.0, "step": 10136 }, { "epoch": 3.7133827974718328, "grad_norm": 0.14028051719550963, "learning_rate": 9.891227104964621e-06, "loss": 0.4187, "num_tokens": 7754980388.0, "step": 10137 }, { "epoch": 3.713749198497756, "grad_norm": 0.14737824417211087, "learning_rate": 9.888066076054795e-06, "loss": 0.4309, "num_tokens": 7755721258.0, "step": 10138 }, { "epoch": 3.7141155995236788, "grad_norm": 0.141380104847259, "learning_rate": 9.88490572958634e-06, "loss": 0.405, "num_tokens": 7756405157.0, "step": 10139 }, { "epoch": 3.7144820005496015, "grad_norm": 0.14288886375125226, "learning_rate": 9.881746065737314e-06, "loss": 0.3751, "num_tokens": 7757200683.0, "step": 10140 }, { "epoch": 3.7148484015755243, "grad_norm": 0.12529648407946223, "learning_rate": 9.878587084685766e-06, "loss": 0.4346, "num_tokens": 7758005098.0, "step": 10141 }, { "epoch": 3.715214802601447, "grad_norm": 0.14725872939670123, "learning_rate": 9.87542878660967e-06, "loss": 0.3924, "num_tokens": 7758756762.0, "step": 10142 }, { "epoch": 3.7155812036273703, "grad_norm": 0.14512063690881996, "learning_rate": 9.872271171686978e-06, "loss": 0.4308, "num_tokens": 7759562106.0, "step": 10143 }, { "epoch": 3.715947604653293, "grad_norm": 0.14161116488584988, "learning_rate": 9.869114240095622e-06, "loss": 0.4162, "num_tokens": 7760323731.0, "step": 10144 }, { "epoch": 3.716314005679216, "grad_norm": 0.1252358552801112, "learning_rate": 9.865957992013457e-06, "loss": 0.3734, "num_tokens": 7761115738.0, "step": 10145 }, { "epoch": 3.7166804067051387, "grad_norm": 0.13328997667872022, "learning_rate": 9.862802427618334e-06, "loss": 0.4219, "num_tokens": 7761897022.0, "step": 10146 }, { "epoch": 3.7170468077310614, "grad_norm": 0.13837714246184818, "learning_rate": 9.859647547088048e-06, "loss": 0.4351, "num_tokens": 7762712484.0, "step": 10147 }, { "epoch": 3.7174132087569847, "grad_norm": 0.15396400248211517, "learning_rate": 9.85649335060036e-06, "loss": 0.4145, "num_tokens": 7763544156.0, "step": 10148 }, { "epoch": 3.7177796097829074, "grad_norm": 0.12405476225346879, "learning_rate": 9.85333983833299e-06, "loss": 0.3874, "num_tokens": 7764340406.0, "step": 10149 }, { "epoch": 3.7181460108088302, "grad_norm": 0.14216274895111128, "learning_rate": 9.850187010463624e-06, "loss": 0.4147, "num_tokens": 7764973630.0, "step": 10150 }, { "epoch": 3.718512411834753, "grad_norm": 0.144366442571864, "learning_rate": 9.847034867169902e-06, "loss": 0.4112, "num_tokens": 7765721700.0, "step": 10151 }, { "epoch": 3.718878812860676, "grad_norm": 0.14359207648475622, "learning_rate": 9.843883408629435e-06, "loss": 0.4127, "num_tokens": 7766463972.0, "step": 10152 }, { "epoch": 3.719245213886599, "grad_norm": 0.13855595297562945, "learning_rate": 9.840732635019782e-06, "loss": 0.3561, "num_tokens": 7767211109.0, "step": 10153 }, { "epoch": 3.719611614912522, "grad_norm": 0.13884429930670988, "learning_rate": 9.83758254651849e-06, "loss": 0.3952, "num_tokens": 7768027833.0, "step": 10154 }, { "epoch": 3.7199780159384446, "grad_norm": 0.14061482720479265, "learning_rate": 9.83443314330303e-06, "loss": 0.3943, "num_tokens": 7768804234.0, "step": 10155 }, { "epoch": 3.7203444169643674, "grad_norm": 0.14192181869016324, "learning_rate": 9.831284425550855e-06, "loss": 0.41, "num_tokens": 7769503779.0, "step": 10156 }, { "epoch": 3.72071081799029, "grad_norm": 0.13561516431509615, "learning_rate": 9.828136393439393e-06, "loss": 0.4313, "num_tokens": 7770259777.0, "step": 10157 }, { "epoch": 3.7210772190162134, "grad_norm": 0.14383133258845196, "learning_rate": 9.824989047146015e-06, "loss": 0.4397, "num_tokens": 7771034999.0, "step": 10158 }, { "epoch": 3.721443620042136, "grad_norm": 0.14267278805550415, "learning_rate": 9.821842386848037e-06, "loss": 0.4148, "num_tokens": 7771784012.0, "step": 10159 }, { "epoch": 3.721810021068059, "grad_norm": 0.13510858255677555, "learning_rate": 9.81869641272278e-06, "loss": 0.3837, "num_tokens": 7772571987.0, "step": 10160 }, { "epoch": 3.722176422093982, "grad_norm": 0.1357563423072169, "learning_rate": 9.815551124947496e-06, "loss": 0.4015, "num_tokens": 7773315781.0, "step": 10161 }, { "epoch": 3.7225428231199045, "grad_norm": 0.13569436778702004, "learning_rate": 9.812406523699391e-06, "loss": 0.4117, "num_tokens": 7774002356.0, "step": 10162 }, { "epoch": 3.7229092241458277, "grad_norm": 0.13859960725360593, "learning_rate": 9.809262609155662e-06, "loss": 0.3985, "num_tokens": 7774830934.0, "step": 10163 }, { "epoch": 3.7232756251717505, "grad_norm": 0.13759384177344255, "learning_rate": 9.806119381493449e-06, "loss": 0.3952, "num_tokens": 7775427402.0, "step": 10164 }, { "epoch": 3.7236420261976733, "grad_norm": 0.15224409942385075, "learning_rate": 9.802976840889851e-06, "loss": 0.4035, "num_tokens": 7776105835.0, "step": 10165 }, { "epoch": 3.7240084272235965, "grad_norm": 0.14435153865377193, "learning_rate": 9.799834987521937e-06, "loss": 0.4451, "num_tokens": 7776887181.0, "step": 10166 }, { "epoch": 3.7243748282495193, "grad_norm": 0.14178685786244696, "learning_rate": 9.79669382156673e-06, "loss": 0.3921, "num_tokens": 7777623580.0, "step": 10167 }, { "epoch": 3.724741229275442, "grad_norm": 0.13970288710079293, "learning_rate": 9.793553343201218e-06, "loss": 0.4265, "num_tokens": 7778430296.0, "step": 10168 }, { "epoch": 3.725107630301365, "grad_norm": 0.13432259087377374, "learning_rate": 9.790413552602353e-06, "loss": 0.4076, "num_tokens": 7779247183.0, "step": 10169 }, { "epoch": 3.7254740313272876, "grad_norm": 0.13644539699890296, "learning_rate": 9.787274449947035e-06, "loss": 0.4339, "num_tokens": 7780002640.0, "step": 10170 }, { "epoch": 3.725840432353211, "grad_norm": 0.14601504544933244, "learning_rate": 9.784136035412156e-06, "loss": 0.3924, "num_tokens": 7780663874.0, "step": 10171 }, { "epoch": 3.7262068333791336, "grad_norm": 0.1468023786022361, "learning_rate": 9.780998309174522e-06, "loss": 0.4178, "num_tokens": 7781333973.0, "step": 10172 }, { "epoch": 3.7265732344050564, "grad_norm": 0.14788405395894882, "learning_rate": 9.777861271410947e-06, "loss": 0.4174, "num_tokens": 7782027830.0, "step": 10173 }, { "epoch": 3.726939635430979, "grad_norm": 0.14134296835485513, "learning_rate": 9.774724922298188e-06, "loss": 0.3807, "num_tokens": 7782858615.0, "step": 10174 }, { "epoch": 3.727306036456902, "grad_norm": 0.13933071001948713, "learning_rate": 9.77158926201294e-06, "loss": 0.3942, "num_tokens": 7783685069.0, "step": 10175 }, { "epoch": 3.727672437482825, "grad_norm": 0.12700149770861952, "learning_rate": 9.768454290731896e-06, "loss": 0.3945, "num_tokens": 7784463943.0, "step": 10176 }, { "epoch": 3.728038838508748, "grad_norm": 0.14356993891156605, "learning_rate": 9.765320008631692e-06, "loss": 0.4294, "num_tokens": 7785281383.0, "step": 10177 }, { "epoch": 3.7284052395346707, "grad_norm": 0.13487421338203429, "learning_rate": 9.76218641588893e-06, "loss": 0.3906, "num_tokens": 7785977721.0, "step": 10178 }, { "epoch": 3.7287716405605935, "grad_norm": 0.13252032432889838, "learning_rate": 9.759053512680165e-06, "loss": 0.4046, "num_tokens": 7786902221.0, "step": 10179 }, { "epoch": 3.7291380415865163, "grad_norm": 0.13057289587659981, "learning_rate": 9.755921299181924e-06, "loss": 0.4135, "num_tokens": 7787663627.0, "step": 10180 }, { "epoch": 3.7295044426124395, "grad_norm": 0.13709682127568418, "learning_rate": 9.752789775570686e-06, "loss": 0.3943, "num_tokens": 7788529096.0, "step": 10181 }, { "epoch": 3.7298708436383623, "grad_norm": 0.13371705870214734, "learning_rate": 9.749658942022897e-06, "loss": 0.4019, "num_tokens": 7789350376.0, "step": 10182 }, { "epoch": 3.730237244664285, "grad_norm": 0.13411648208453758, "learning_rate": 9.74652879871496e-06, "loss": 0.4065, "num_tokens": 7790117967.0, "step": 10183 }, { "epoch": 3.730603645690208, "grad_norm": 0.13082734390186332, "learning_rate": 9.743399345823255e-06, "loss": 0.3791, "num_tokens": 7790953518.0, "step": 10184 }, { "epoch": 3.7309700467161306, "grad_norm": 0.12921855523739278, "learning_rate": 9.740270583524093e-06, "loss": 0.4192, "num_tokens": 7791812222.0, "step": 10185 }, { "epoch": 3.731336447742054, "grad_norm": 0.13565801510607953, "learning_rate": 9.73714251199376e-06, "loss": 0.402, "num_tokens": 7792625579.0, "step": 10186 }, { "epoch": 3.7317028487679766, "grad_norm": 0.1315440067142211, "learning_rate": 9.734015131408522e-06, "loss": 0.4322, "num_tokens": 7793431905.0, "step": 10187 }, { "epoch": 3.7320692497938994, "grad_norm": 0.12563719882873498, "learning_rate": 9.730888441944588e-06, "loss": 0.3843, "num_tokens": 7794272176.0, "step": 10188 }, { "epoch": 3.732435650819822, "grad_norm": 0.1308146834902628, "learning_rate": 9.727762443778111e-06, "loss": 0.3833, "num_tokens": 7795033111.0, "step": 10189 }, { "epoch": 3.732802051845745, "grad_norm": 0.13456220550768955, "learning_rate": 9.724637137085243e-06, "loss": 0.3686, "num_tokens": 7795555736.0, "step": 10190 }, { "epoch": 3.733168452871668, "grad_norm": 0.15704949153649655, "learning_rate": 9.721512522042073e-06, "loss": 0.4149, "num_tokens": 7796305780.0, "step": 10191 }, { "epoch": 3.733534853897591, "grad_norm": 0.14532813647428466, "learning_rate": 9.718388598824654e-06, "loss": 0.416, "num_tokens": 7796992488.0, "step": 10192 }, { "epoch": 3.7339012549235138, "grad_norm": 0.14818275349614027, "learning_rate": 9.715265367609005e-06, "loss": 0.418, "num_tokens": 7797605993.0, "step": 10193 }, { "epoch": 3.7342676559494365, "grad_norm": 0.1490965672660049, "learning_rate": 9.712142828571098e-06, "loss": 0.4013, "num_tokens": 7798395666.0, "step": 10194 }, { "epoch": 3.7346340569753593, "grad_norm": 0.13946669047681914, "learning_rate": 9.709020981886873e-06, "loss": 0.4258, "num_tokens": 7799101712.0, "step": 10195 }, { "epoch": 3.7350004580012826, "grad_norm": 0.15487849454632918, "learning_rate": 9.705899827732233e-06, "loss": 0.4212, "num_tokens": 7799883054.0, "step": 10196 }, { "epoch": 3.7353668590272053, "grad_norm": 0.14099520133248095, "learning_rate": 9.702779366283029e-06, "loss": 0.3905, "num_tokens": 7800622722.0, "step": 10197 }, { "epoch": 3.735733260053128, "grad_norm": 0.14048496103392966, "learning_rate": 9.699659597715097e-06, "loss": 0.4193, "num_tokens": 7801406051.0, "step": 10198 }, { "epoch": 3.736099661079051, "grad_norm": 0.133151545171989, "learning_rate": 9.6965405222042e-06, "loss": 0.393, "num_tokens": 7802305110.0, "step": 10199 }, { "epoch": 3.7364660621049737, "grad_norm": 0.12211945882485405, "learning_rate": 9.693422139926097e-06, "loss": 0.368, "num_tokens": 7803111234.0, "step": 10200 }, { "epoch": 3.736832463130897, "grad_norm": 0.13435790897812064, "learning_rate": 9.69030445105649e-06, "loss": 0.425, "num_tokens": 7803951085.0, "step": 10201 }, { "epoch": 3.7371988641568197, "grad_norm": 0.14483072601442804, "learning_rate": 9.68718745577103e-06, "loss": 0.4192, "num_tokens": 7804641050.0, "step": 10202 }, { "epoch": 3.7375652651827425, "grad_norm": 0.14814284284441281, "learning_rate": 9.684071154245355e-06, "loss": 0.4115, "num_tokens": 7805416591.0, "step": 10203 }, { "epoch": 3.7379316662086652, "grad_norm": 0.14552438418813585, "learning_rate": 9.68095554665505e-06, "loss": 0.4255, "num_tokens": 7806050534.0, "step": 10204 }, { "epoch": 3.738298067234588, "grad_norm": 0.14688486046061205, "learning_rate": 9.67784063317566e-06, "loss": 0.4124, "num_tokens": 7806755852.0, "step": 10205 }, { "epoch": 3.7386644682605112, "grad_norm": 0.14058740127612923, "learning_rate": 9.674726413982696e-06, "loss": 0.3924, "num_tokens": 7807529083.0, "step": 10206 }, { "epoch": 3.739030869286434, "grad_norm": 0.1333260314101093, "learning_rate": 9.671612889251624e-06, "loss": 0.3703, "num_tokens": 7808274107.0, "step": 10207 }, { "epoch": 3.739397270312357, "grad_norm": 0.1380369134474157, "learning_rate": 9.668500059157878e-06, "loss": 0.4025, "num_tokens": 7809127519.0, "step": 10208 }, { "epoch": 3.7397636713382796, "grad_norm": 0.13906172166050884, "learning_rate": 9.665387923876845e-06, "loss": 0.4443, "num_tokens": 7809893212.0, "step": 10209 }, { "epoch": 3.7401300723642024, "grad_norm": 0.1337905802064536, "learning_rate": 9.662276483583873e-06, "loss": 0.4109, "num_tokens": 7810664793.0, "step": 10210 }, { "epoch": 3.7404964733901256, "grad_norm": 0.13521497265898844, "learning_rate": 9.65916573845429e-06, "loss": 0.4241, "num_tokens": 7811375310.0, "step": 10211 }, { "epoch": 3.7408628744160484, "grad_norm": 0.1492203307779673, "learning_rate": 9.656055688663357e-06, "loss": 0.4195, "num_tokens": 7812136433.0, "step": 10212 }, { "epoch": 3.741229275441971, "grad_norm": 0.14891546521645097, "learning_rate": 9.652946334386306e-06, "loss": 0.4323, "num_tokens": 7812789792.0, "step": 10213 }, { "epoch": 3.7415956764678944, "grad_norm": 0.15498699416297365, "learning_rate": 9.649837675798342e-06, "loss": 0.4045, "num_tokens": 7813573390.0, "step": 10214 }, { "epoch": 3.7419620774938167, "grad_norm": 0.13610119101328846, "learning_rate": 9.64672971307462e-06, "loss": 0.3925, "num_tokens": 7814365474.0, "step": 10215 }, { "epoch": 3.74232847851974, "grad_norm": 0.13669084033381954, "learning_rate": 9.643622446390243e-06, "loss": 0.4077, "num_tokens": 7815187564.0, "step": 10216 }, { "epoch": 3.7426948795456627, "grad_norm": 0.13558667048879602, "learning_rate": 9.640515875920303e-06, "loss": 0.4001, "num_tokens": 7815984542.0, "step": 10217 }, { "epoch": 3.7430612805715855, "grad_norm": 0.12523734160560104, "learning_rate": 9.637410001839836e-06, "loss": 0.4037, "num_tokens": 7816754833.0, "step": 10218 }, { "epoch": 3.7434276815975087, "grad_norm": 0.14144821073825414, "learning_rate": 9.63430482432384e-06, "loss": 0.3815, "num_tokens": 7817456638.0, "step": 10219 }, { "epoch": 3.7437940826234315, "grad_norm": 0.14225955529656914, "learning_rate": 9.631200343547268e-06, "loss": 0.4221, "num_tokens": 7818303452.0, "step": 10220 }, { "epoch": 3.7441604836493543, "grad_norm": 0.13659395628637347, "learning_rate": 9.62809655968505e-06, "loss": 0.4277, "num_tokens": 7819142357.0, "step": 10221 }, { "epoch": 3.744526884675277, "grad_norm": 0.14188666698587746, "learning_rate": 9.624993472912063e-06, "loss": 0.4128, "num_tokens": 7819863706.0, "step": 10222 }, { "epoch": 3.7448932857012, "grad_norm": 0.14480676198238868, "learning_rate": 9.62189108340315e-06, "loss": 0.4125, "num_tokens": 7820657589.0, "step": 10223 }, { "epoch": 3.745259686727123, "grad_norm": 0.13319929362627983, "learning_rate": 9.618789391333111e-06, "loss": 0.3978, "num_tokens": 7821437860.0, "step": 10224 }, { "epoch": 3.745626087753046, "grad_norm": 0.13237807337170385, "learning_rate": 9.615688396876714e-06, "loss": 0.3709, "num_tokens": 7822156383.0, "step": 10225 }, { "epoch": 3.7459924887789686, "grad_norm": 0.14425390000273675, "learning_rate": 9.612588100208679e-06, "loss": 0.4197, "num_tokens": 7823034397.0, "step": 10226 }, { "epoch": 3.7463588898048914, "grad_norm": 0.12688250033229498, "learning_rate": 9.609488501503689e-06, "loss": 0.4096, "num_tokens": 7823709683.0, "step": 10227 }, { "epoch": 3.746725290830814, "grad_norm": 0.14708528549010785, "learning_rate": 9.606389600936403e-06, "loss": 0.3677, "num_tokens": 7824587349.0, "step": 10228 }, { "epoch": 3.7470916918567374, "grad_norm": 0.129576780219711, "learning_rate": 9.603291398681405e-06, "loss": 0.416, "num_tokens": 7825402743.0, "step": 10229 }, { "epoch": 3.74745809288266, "grad_norm": 0.14022350074760379, "learning_rate": 9.60019389491328e-06, "loss": 0.4122, "num_tokens": 7826165168.0, "step": 10230 }, { "epoch": 3.747824493908583, "grad_norm": 0.14162200272140105, "learning_rate": 9.59709708980655e-06, "loss": 0.3872, "num_tokens": 7826850265.0, "step": 10231 }, { "epoch": 3.7481908949345057, "grad_norm": 0.14370271826858738, "learning_rate": 9.594000983535703e-06, "loss": 0.3954, "num_tokens": 7827611566.0, "step": 10232 }, { "epoch": 3.7485572959604285, "grad_norm": 0.14165524891186965, "learning_rate": 9.590905576275186e-06, "loss": 0.3948, "num_tokens": 7828300224.0, "step": 10233 }, { "epoch": 3.7489236969863518, "grad_norm": 0.15005233732362966, "learning_rate": 9.587810868199407e-06, "loss": 0.4125, "num_tokens": 7829141670.0, "step": 10234 }, { "epoch": 3.7492900980122745, "grad_norm": 0.12998555627017572, "learning_rate": 9.584716859482743e-06, "loss": 0.386, "num_tokens": 7829919703.0, "step": 10235 }, { "epoch": 3.7496564990381973, "grad_norm": 0.13500892857576344, "learning_rate": 9.581623550299518e-06, "loss": 0.3802, "num_tokens": 7830756535.0, "step": 10236 }, { "epoch": 3.75002290006412, "grad_norm": 0.1283044656365714, "learning_rate": 9.578530940824016e-06, "loss": 0.4108, "num_tokens": 7831541664.0, "step": 10237 }, { "epoch": 3.750389301090043, "grad_norm": 0.1370411780106983, "learning_rate": 9.575439031230511e-06, "loss": 0.4168, "num_tokens": 7832258628.0, "step": 10238 }, { "epoch": 3.750755702115966, "grad_norm": 0.1401426420502573, "learning_rate": 9.5723478216932e-06, "loss": 0.429, "num_tokens": 7832995567.0, "step": 10239 }, { "epoch": 3.751122103141889, "grad_norm": 0.1517851609278425, "learning_rate": 9.569257312386246e-06, "loss": 0.3971, "num_tokens": 7833661408.0, "step": 10240 }, { "epoch": 3.7514885041678117, "grad_norm": 0.14754467878007535, "learning_rate": 9.566167503483807e-06, "loss": 0.4042, "num_tokens": 7834441691.0, "step": 10241 }, { "epoch": 3.7518549051937344, "grad_norm": 0.1387244292523524, "learning_rate": 9.56307839515996e-06, "loss": 0.4273, "num_tokens": 7835237628.0, "step": 10242 }, { "epoch": 3.752221306219657, "grad_norm": 0.13141991382658638, "learning_rate": 9.559989987588758e-06, "loss": 0.3892, "num_tokens": 7835924589.0, "step": 10243 }, { "epoch": 3.7525877072455804, "grad_norm": 0.1419008138594839, "learning_rate": 9.556902280944222e-06, "loss": 0.4009, "num_tokens": 7836680794.0, "step": 10244 }, { "epoch": 3.752954108271503, "grad_norm": 0.12907063753431755, "learning_rate": 9.553815275400336e-06, "loss": 0.3937, "num_tokens": 7837546860.0, "step": 10245 }, { "epoch": 3.753320509297426, "grad_norm": 0.13654020584864326, "learning_rate": 9.550728971131014e-06, "loss": 0.4128, "num_tokens": 7838358782.0, "step": 10246 }, { "epoch": 3.753686910323349, "grad_norm": 0.12970311033240303, "learning_rate": 9.54764336831017e-06, "loss": 0.424, "num_tokens": 7839120103.0, "step": 10247 }, { "epoch": 3.7540533113492716, "grad_norm": 0.14127964407233173, "learning_rate": 9.544558467111657e-06, "loss": 0.4211, "num_tokens": 7839972501.0, "step": 10248 }, { "epoch": 3.754419712375195, "grad_norm": 0.13625534755844773, "learning_rate": 9.54147426770929e-06, "loss": 0.3976, "num_tokens": 7840655536.0, "step": 10249 }, { "epoch": 3.7547861134011176, "grad_norm": 0.14728533912516098, "learning_rate": 9.538390770276847e-06, "loss": 0.4325, "num_tokens": 7841427693.0, "step": 10250 }, { "epoch": 3.7551525144270403, "grad_norm": 0.13072850624075397, "learning_rate": 9.53530797498807e-06, "loss": 0.3716, "num_tokens": 7842241492.0, "step": 10251 }, { "epoch": 3.755518915452963, "grad_norm": 0.1334847016789661, "learning_rate": 9.532225882016653e-06, "loss": 0.3954, "num_tokens": 7842978857.0, "step": 10252 }, { "epoch": 3.755885316478886, "grad_norm": 0.15571033129430734, "learning_rate": 9.529144491536255e-06, "loss": 0.4071, "num_tokens": 7843761336.0, "step": 10253 }, { "epoch": 3.756251717504809, "grad_norm": 0.12345804343537148, "learning_rate": 9.526063803720494e-06, "loss": 0.4146, "num_tokens": 7844575803.0, "step": 10254 }, { "epoch": 3.756618118530732, "grad_norm": 0.14307928475890538, "learning_rate": 9.522983818742965e-06, "loss": 0.4166, "num_tokens": 7845362209.0, "step": 10255 }, { "epoch": 3.7569845195566547, "grad_norm": 0.14005985343701627, "learning_rate": 9.519904536777184e-06, "loss": 0.3879, "num_tokens": 7846162033.0, "step": 10256 }, { "epoch": 3.7573509205825775, "grad_norm": 0.131049118385653, "learning_rate": 9.51682595799667e-06, "loss": 0.4256, "num_tokens": 7846893890.0, "step": 10257 }, { "epoch": 3.7577173216085002, "grad_norm": 0.1456436021999055, "learning_rate": 9.513748082574885e-06, "loss": 0.4193, "num_tokens": 7847686974.0, "step": 10258 }, { "epoch": 3.7580837226344235, "grad_norm": 0.1345295423610916, "learning_rate": 9.510670910685232e-06, "loss": 0.3877, "num_tokens": 7848391972.0, "step": 10259 }, { "epoch": 3.7584501236603463, "grad_norm": 0.13304391842482385, "learning_rate": 9.50759444250111e-06, "loss": 0.4044, "num_tokens": 7849179416.0, "step": 10260 }, { "epoch": 3.758816524686269, "grad_norm": 0.14102323609222994, "learning_rate": 9.504518678195858e-06, "loss": 0.4147, "num_tokens": 7849942518.0, "step": 10261 }, { "epoch": 3.7591829257121923, "grad_norm": 0.13883044789825758, "learning_rate": 9.501443617942774e-06, "loss": 0.4156, "num_tokens": 7850670243.0, "step": 10262 }, { "epoch": 3.7595493267381146, "grad_norm": 0.13946043732190608, "learning_rate": 9.498369261915126e-06, "loss": 0.4249, "num_tokens": 7851432073.0, "step": 10263 }, { "epoch": 3.759915727764038, "grad_norm": 0.13550939253671085, "learning_rate": 9.495295610286132e-06, "loss": 0.4026, "num_tokens": 7852270445.0, "step": 10264 }, { "epoch": 3.7602821287899606, "grad_norm": 0.14419719676884263, "learning_rate": 9.49222266322898e-06, "loss": 0.3994, "num_tokens": 7852962686.0, "step": 10265 }, { "epoch": 3.7606485298158834, "grad_norm": 0.14561724488453015, "learning_rate": 9.489150420916811e-06, "loss": 0.4332, "num_tokens": 7853802427.0, "step": 10266 }, { "epoch": 3.7610149308418066, "grad_norm": 0.13012003247536, "learning_rate": 9.486078883522725e-06, "loss": 0.4211, "num_tokens": 7854627984.0, "step": 10267 }, { "epoch": 3.7613813318677294, "grad_norm": 0.14000916217965342, "learning_rate": 9.483008051219804e-06, "loss": 0.4074, "num_tokens": 7855323819.0, "step": 10268 }, { "epoch": 3.761747732893652, "grad_norm": 0.1463115674609788, "learning_rate": 9.479937924181052e-06, "loss": 0.3949, "num_tokens": 7856158714.0, "step": 10269 }, { "epoch": 3.762114133919575, "grad_norm": 0.12664456823734271, "learning_rate": 9.476868502579455e-06, "loss": 0.3918, "num_tokens": 7856770435.0, "step": 10270 }, { "epoch": 3.7624805349454977, "grad_norm": 0.15240243695983513, "learning_rate": 9.473799786587974e-06, "loss": 0.4198, "num_tokens": 7857556889.0, "step": 10271 }, { "epoch": 3.762846935971421, "grad_norm": 0.12931858416377176, "learning_rate": 9.470731776379512e-06, "loss": 0.3756, "num_tokens": 7858364313.0, "step": 10272 }, { "epoch": 3.7632133369973437, "grad_norm": 0.13701202663788817, "learning_rate": 9.467664472126913e-06, "loss": 0.458, "num_tokens": 7859103391.0, "step": 10273 }, { "epoch": 3.7635797380232665, "grad_norm": 0.1365705489844956, "learning_rate": 9.464597874003026e-06, "loss": 0.4023, "num_tokens": 7859955238.0, "step": 10274 }, { "epoch": 3.7639461390491893, "grad_norm": 0.135067939298895, "learning_rate": 9.461531982180627e-06, "loss": 0.4125, "num_tokens": 7860754798.0, "step": 10275 }, { "epoch": 3.764312540075112, "grad_norm": 0.13213093106527898, "learning_rate": 9.458466796832467e-06, "loss": 0.4203, "num_tokens": 7861593074.0, "step": 10276 }, { "epoch": 3.7646789411010353, "grad_norm": 0.12954646317278132, "learning_rate": 9.455402318131246e-06, "loss": 0.4127, "num_tokens": 7862331034.0, "step": 10277 }, { "epoch": 3.765045342126958, "grad_norm": 0.13733474021263756, "learning_rate": 9.452338546249638e-06, "loss": 0.4397, "num_tokens": 7863074854.0, "step": 10278 }, { "epoch": 3.765411743152881, "grad_norm": 0.15277212626255116, "learning_rate": 9.449275481360263e-06, "loss": 0.3998, "num_tokens": 7863830932.0, "step": 10279 }, { "epoch": 3.7657781441788036, "grad_norm": 0.1299160911396624, "learning_rate": 9.446213123635714e-06, "loss": 0.4244, "num_tokens": 7864556176.0, "step": 10280 }, { "epoch": 3.7661445452047264, "grad_norm": 0.13992834291905304, "learning_rate": 9.443151473248535e-06, "loss": 0.3852, "num_tokens": 7865347883.0, "step": 10281 }, { "epoch": 3.7665109462306496, "grad_norm": 0.15482553813474328, "learning_rate": 9.440090530371232e-06, "loss": 0.4181, "num_tokens": 7866030757.0, "step": 10282 }, { "epoch": 3.7668773472565724, "grad_norm": 0.13824741657625, "learning_rate": 9.437030295176274e-06, "loss": 0.4237, "num_tokens": 7866884712.0, "step": 10283 }, { "epoch": 3.767243748282495, "grad_norm": 0.13467334372557346, "learning_rate": 9.433970767836084e-06, "loss": 0.3919, "num_tokens": 7867658692.0, "step": 10284 }, { "epoch": 3.767610149308418, "grad_norm": 0.14209550451490638, "learning_rate": 9.430911948523066e-06, "loss": 0.3946, "num_tokens": 7868454289.0, "step": 10285 }, { "epoch": 3.7679765503343408, "grad_norm": 0.13832899637016, "learning_rate": 9.427853837409543e-06, "loss": 0.3821, "num_tokens": 7869122439.0, "step": 10286 }, { "epoch": 3.768342951360264, "grad_norm": 0.1356937620438294, "learning_rate": 9.424796434667843e-06, "loss": 0.4616, "num_tokens": 7869816503.0, "step": 10287 }, { "epoch": 3.7687093523861868, "grad_norm": 0.14825221174863437, "learning_rate": 9.421739740470224e-06, "loss": 0.3632, "num_tokens": 7870588108.0, "step": 10288 }, { "epoch": 3.7690757534121095, "grad_norm": 0.138265641569056, "learning_rate": 9.41868375498892e-06, "loss": 0.4269, "num_tokens": 7871458383.0, "step": 10289 }, { "epoch": 3.7694421544380323, "grad_norm": 0.13526623041400637, "learning_rate": 9.415628478396116e-06, "loss": 0.4033, "num_tokens": 7872200141.0, "step": 10290 }, { "epoch": 3.769808555463955, "grad_norm": 0.13198357139754982, "learning_rate": 9.412573910863959e-06, "loss": 0.4479, "num_tokens": 7872955507.0, "step": 10291 }, { "epoch": 3.7701749564898783, "grad_norm": 0.14218205981550405, "learning_rate": 9.409520052564559e-06, "loss": 0.4028, "num_tokens": 7873815002.0, "step": 10292 }, { "epoch": 3.770541357515801, "grad_norm": 0.12173551100869053, "learning_rate": 9.406466903669984e-06, "loss": 0.3731, "num_tokens": 7874582899.0, "step": 10293 }, { "epoch": 3.770907758541724, "grad_norm": 0.13363174905127817, "learning_rate": 9.403414464352254e-06, "loss": 0.3701, "num_tokens": 7875401001.0, "step": 10294 }, { "epoch": 3.7712741595676467, "grad_norm": 0.13227033476714475, "learning_rate": 9.40036273478338e-06, "loss": 0.3869, "num_tokens": 7876262473.0, "step": 10295 }, { "epoch": 3.7716405605935694, "grad_norm": 0.1362602801730174, "learning_rate": 9.397311715135286e-06, "loss": 0.4335, "num_tokens": 7877067439.0, "step": 10296 }, { "epoch": 3.7720069616194927, "grad_norm": 0.13420048945839247, "learning_rate": 9.394261405579886e-06, "loss": 0.4334, "num_tokens": 7877785720.0, "step": 10297 }, { "epoch": 3.7723733626454155, "grad_norm": 0.14415332841929412, "learning_rate": 9.391211806289064e-06, "loss": 0.3928, "num_tokens": 7878563820.0, "step": 10298 }, { "epoch": 3.7727397636713382, "grad_norm": 0.13717776955698305, "learning_rate": 9.38816291743463e-06, "loss": 0.3846, "num_tokens": 7879381361.0, "step": 10299 }, { "epoch": 3.773106164697261, "grad_norm": 0.1300081643866248, "learning_rate": 9.385114739188375e-06, "loss": 0.3933, "num_tokens": 7880198029.0, "step": 10300 }, { "epoch": 3.773472565723184, "grad_norm": 0.1308952572455246, "learning_rate": 9.382067271722053e-06, "loss": 0.4186, "num_tokens": 7880906722.0, "step": 10301 }, { "epoch": 3.773838966749107, "grad_norm": 0.14463139002123873, "learning_rate": 9.379020515207378e-06, "loss": 0.4288, "num_tokens": 7881645112.0, "step": 10302 }, { "epoch": 3.77420536777503, "grad_norm": 0.14643463075116633, "learning_rate": 9.375974469815998e-06, "loss": 0.397, "num_tokens": 7882363192.0, "step": 10303 }, { "epoch": 3.7745717688009526, "grad_norm": 0.13833923757638, "learning_rate": 9.372929135719558e-06, "loss": 0.3832, "num_tokens": 7883141967.0, "step": 10304 }, { "epoch": 3.7749381698268754, "grad_norm": 0.13112541151425103, "learning_rate": 9.369884513089641e-06, "loss": 0.4181, "num_tokens": 7883958600.0, "step": 10305 }, { "epoch": 3.775304570852798, "grad_norm": 0.13655542683841526, "learning_rate": 9.366840602097796e-06, "loss": 0.4186, "num_tokens": 7884696227.0, "step": 10306 }, { "epoch": 3.7756709718787214, "grad_norm": 0.13971949307620052, "learning_rate": 9.363797402915528e-06, "loss": 0.4099, "num_tokens": 7885549915.0, "step": 10307 }, { "epoch": 3.776037372904644, "grad_norm": 0.12997295267761066, "learning_rate": 9.360754915714308e-06, "loss": 0.3937, "num_tokens": 7886377611.0, "step": 10308 }, { "epoch": 3.776403773930567, "grad_norm": 0.13812062970613295, "learning_rate": 9.357713140665561e-06, "loss": 0.4062, "num_tokens": 7887078374.0, "step": 10309 }, { "epoch": 3.77677017495649, "grad_norm": 0.13923428385825676, "learning_rate": 9.354672077940676e-06, "loss": 0.3717, "num_tokens": 7887795590.0, "step": 10310 }, { "epoch": 3.7771365759824125, "grad_norm": 0.14942188252792163, "learning_rate": 9.351631727710994e-06, "loss": 0.3919, "num_tokens": 7888535995.0, "step": 10311 }, { "epoch": 3.7775029770083357, "grad_norm": 0.1386575140461616, "learning_rate": 9.34859209014784e-06, "loss": 0.4079, "num_tokens": 7889313957.0, "step": 10312 }, { "epoch": 3.7778693780342585, "grad_norm": 0.14163730358097837, "learning_rate": 9.345553165422455e-06, "loss": 0.4078, "num_tokens": 7890074753.0, "step": 10313 }, { "epoch": 3.7782357790601813, "grad_norm": 0.13342116626528802, "learning_rate": 9.342514953706087e-06, "loss": 0.3902, "num_tokens": 7890841787.0, "step": 10314 }, { "epoch": 3.7786021800861045, "grad_norm": 0.13896320044674848, "learning_rate": 9.33947745516992e-06, "loss": 0.4229, "num_tokens": 7891596672.0, "step": 10315 }, { "epoch": 3.7789685811120273, "grad_norm": 0.14503001472105692, "learning_rate": 9.336440669985086e-06, "loss": 0.4182, "num_tokens": 7892271033.0, "step": 10316 }, { "epoch": 3.77933498213795, "grad_norm": 0.15185153907256002, "learning_rate": 9.333404598322704e-06, "loss": 0.4122, "num_tokens": 7893142226.0, "step": 10317 }, { "epoch": 3.779701383163873, "grad_norm": 0.13938084067441667, "learning_rate": 9.330369240353839e-06, "loss": 0.421, "num_tokens": 7893917314.0, "step": 10318 }, { "epoch": 3.7800677841897956, "grad_norm": 0.1340513751987269, "learning_rate": 9.327334596249515e-06, "loss": 0.3939, "num_tokens": 7894747573.0, "step": 10319 }, { "epoch": 3.780434185215719, "grad_norm": 0.1268501146796633, "learning_rate": 9.324300666180717e-06, "loss": 0.3765, "num_tokens": 7895598904.0, "step": 10320 }, { "epoch": 3.7808005862416416, "grad_norm": 0.14917040044257956, "learning_rate": 9.32126745031839e-06, "loss": 0.4055, "num_tokens": 7896390810.0, "step": 10321 }, { "epoch": 3.7811669872675644, "grad_norm": 0.12513063620070491, "learning_rate": 9.318234948833439e-06, "loss": 0.4128, "num_tokens": 7897116782.0, "step": 10322 }, { "epoch": 3.781533388293487, "grad_norm": 0.14053895105050862, "learning_rate": 9.315203161896733e-06, "loss": 0.383, "num_tokens": 7897926373.0, "step": 10323 }, { "epoch": 3.78189978931941, "grad_norm": 0.13144322778027087, "learning_rate": 9.312172089679085e-06, "loss": 0.42, "num_tokens": 7898657830.0, "step": 10324 }, { "epoch": 3.782266190345333, "grad_norm": 0.14359885587225443, "learning_rate": 9.3091417323513e-06, "loss": 0.405, "num_tokens": 7899393340.0, "step": 10325 }, { "epoch": 3.782632591371256, "grad_norm": 0.15288590473833732, "learning_rate": 9.306112090084105e-06, "loss": 0.4071, "num_tokens": 7900101683.0, "step": 10326 }, { "epoch": 3.7829989923971787, "grad_norm": 0.13864140109839893, "learning_rate": 9.303083163048204e-06, "loss": 0.4019, "num_tokens": 7900889738.0, "step": 10327 }, { "epoch": 3.7833653934231015, "grad_norm": 0.13857202404119484, "learning_rate": 9.300054951414271e-06, "loss": 0.4535, "num_tokens": 7901748407.0, "step": 10328 }, { "epoch": 3.7837317944490243, "grad_norm": 0.13750625345846937, "learning_rate": 9.297027455352928e-06, "loss": 0.4125, "num_tokens": 7902504875.0, "step": 10329 }, { "epoch": 3.7840981954749475, "grad_norm": 0.1554563606434728, "learning_rate": 9.294000675034743e-06, "loss": 0.4427, "num_tokens": 7903289927.0, "step": 10330 }, { "epoch": 3.7844645965008703, "grad_norm": 0.14205130533958626, "learning_rate": 9.290974610630275e-06, "loss": 0.4127, "num_tokens": 7904034877.0, "step": 10331 }, { "epoch": 3.784830997526793, "grad_norm": 0.134279079197211, "learning_rate": 9.28794926231002e-06, "loss": 0.4111, "num_tokens": 7904805874.0, "step": 10332 }, { "epoch": 3.785197398552716, "grad_norm": 0.13782051380076998, "learning_rate": 9.28492463024444e-06, "loss": 0.3982, "num_tokens": 7905562349.0, "step": 10333 }, { "epoch": 3.7855637995786386, "grad_norm": 0.14785292888050142, "learning_rate": 9.281900714603956e-06, "loss": 0.3979, "num_tokens": 7906292007.0, "step": 10334 }, { "epoch": 3.785930200604562, "grad_norm": 0.1419116865893777, "learning_rate": 9.278877515558949e-06, "loss": 0.3709, "num_tokens": 7907107683.0, "step": 10335 }, { "epoch": 3.7862966016304846, "grad_norm": 0.12462869232812604, "learning_rate": 9.275855033279763e-06, "loss": 0.3841, "num_tokens": 7907875436.0, "step": 10336 }, { "epoch": 3.7866630026564074, "grad_norm": 0.13634004566390004, "learning_rate": 9.272833267936693e-06, "loss": 0.3991, "num_tokens": 7908690819.0, "step": 10337 }, { "epoch": 3.78702940368233, "grad_norm": 0.13239151772494034, "learning_rate": 9.269812219700004e-06, "loss": 0.4117, "num_tokens": 7909562729.0, "step": 10338 }, { "epoch": 3.787395804708253, "grad_norm": 0.13860913000794736, "learning_rate": 9.266791888739914e-06, "loss": 0.4114, "num_tokens": 7910308700.0, "step": 10339 }, { "epoch": 3.787762205734176, "grad_norm": 0.14573156309298232, "learning_rate": 9.263772275226593e-06, "loss": 0.3865, "num_tokens": 7911127021.0, "step": 10340 }, { "epoch": 3.788128606760099, "grad_norm": 0.1325343646118626, "learning_rate": 9.260753379330196e-06, "loss": 0.4136, "num_tokens": 7911784753.0, "step": 10341 }, { "epoch": 3.7884950077860218, "grad_norm": 0.1403603642269907, "learning_rate": 9.257735201220819e-06, "loss": 0.4098, "num_tokens": 7912548532.0, "step": 10342 }, { "epoch": 3.7888614088119446, "grad_norm": 0.13936394202274416, "learning_rate": 9.254717741068503e-06, "loss": 0.4026, "num_tokens": 7913366777.0, "step": 10343 }, { "epoch": 3.7892278098378673, "grad_norm": 0.13135838777331013, "learning_rate": 9.251700999043284e-06, "loss": 0.3955, "num_tokens": 7914169456.0, "step": 10344 }, { "epoch": 3.7895942108637906, "grad_norm": 0.14306029861500918, "learning_rate": 9.24868497531513e-06, "loss": 0.4467, "num_tokens": 7914961868.0, "step": 10345 }, { "epoch": 3.7899606118897133, "grad_norm": 0.14330115836424107, "learning_rate": 9.245669670053979e-06, "loss": 0.4144, "num_tokens": 7915599251.0, "step": 10346 }, { "epoch": 3.790327012915636, "grad_norm": 0.14933362442130108, "learning_rate": 9.242655083429729e-06, "loss": 0.3868, "num_tokens": 7916274849.0, "step": 10347 }, { "epoch": 3.790693413941559, "grad_norm": 0.1410626126046578, "learning_rate": 9.239641215612234e-06, "loss": 0.4096, "num_tokens": 7917045390.0, "step": 10348 }, { "epoch": 3.7910598149674817, "grad_norm": 0.13328712608067134, "learning_rate": 9.236628066771308e-06, "loss": 0.3825, "num_tokens": 7917936728.0, "step": 10349 }, { "epoch": 3.791426215993405, "grad_norm": 0.13276221533315102, "learning_rate": 9.233615637076726e-06, "loss": 0.4115, "num_tokens": 7918724011.0, "step": 10350 }, { "epoch": 3.7917926170193277, "grad_norm": 0.14375706284153592, "learning_rate": 9.230603926698216e-06, "loss": 0.4242, "num_tokens": 7919534691.0, "step": 10351 }, { "epoch": 3.7921590180452505, "grad_norm": 0.13020235232743232, "learning_rate": 9.227592935805491e-06, "loss": 0.3817, "num_tokens": 7920220213.0, "step": 10352 }, { "epoch": 3.7925254190711732, "grad_norm": 0.13991051963328002, "learning_rate": 9.224582664568183e-06, "loss": 0.4016, "num_tokens": 7921039102.0, "step": 10353 }, { "epoch": 3.792891820097096, "grad_norm": 0.13895511849070585, "learning_rate": 9.22157311315591e-06, "loss": 0.4145, "num_tokens": 7921893771.0, "step": 10354 }, { "epoch": 3.7932582211230192, "grad_norm": 0.1295408008094258, "learning_rate": 9.218564281738252e-06, "loss": 0.4075, "num_tokens": 7922699072.0, "step": 10355 }, { "epoch": 3.793624622148942, "grad_norm": 0.13742710930627108, "learning_rate": 9.215556170484732e-06, "loss": 0.4293, "num_tokens": 7923491987.0, "step": 10356 }, { "epoch": 3.793991023174865, "grad_norm": 0.13268995081046744, "learning_rate": 9.212548779564837e-06, "loss": 0.4366, "num_tokens": 7924380169.0, "step": 10357 }, { "epoch": 3.794357424200788, "grad_norm": 0.13044543773943013, "learning_rate": 9.209542109148026e-06, "loss": 0.3925, "num_tokens": 7925139120.0, "step": 10358 }, { "epoch": 3.7947238252267104, "grad_norm": 0.129217095251567, "learning_rate": 9.206536159403705e-06, "loss": 0.3954, "num_tokens": 7925936530.0, "step": 10359 }, { "epoch": 3.7950902262526336, "grad_norm": 0.13709029500130582, "learning_rate": 9.203530930501246e-06, "loss": 0.4214, "num_tokens": 7926791147.0, "step": 10360 }, { "epoch": 3.7954566272785564, "grad_norm": 0.12886572528793627, "learning_rate": 9.200526422609973e-06, "loss": 0.4149, "num_tokens": 7927550930.0, "step": 10361 }, { "epoch": 3.795823028304479, "grad_norm": 0.13950653004968483, "learning_rate": 9.197522635899175e-06, "loss": 0.4212, "num_tokens": 7928267165.0, "step": 10362 }, { "epoch": 3.7961894293304024, "grad_norm": 0.14595619766835274, "learning_rate": 9.194519570538098e-06, "loss": 0.4288, "num_tokens": 7929050594.0, "step": 10363 }, { "epoch": 3.796555830356325, "grad_norm": 0.12644022545100422, "learning_rate": 9.19151722669595e-06, "loss": 0.3969, "num_tokens": 7929763816.0, "step": 10364 }, { "epoch": 3.796922231382248, "grad_norm": 0.14393435854553946, "learning_rate": 9.188515604541895e-06, "loss": 0.4043, "num_tokens": 7930584506.0, "step": 10365 }, { "epoch": 3.7972886324081707, "grad_norm": 0.12564393962278142, "learning_rate": 9.185514704245061e-06, "loss": 0.4105, "num_tokens": 7931483297.0, "step": 10366 }, { "epoch": 3.7976550334340935, "grad_norm": 0.13199220404281034, "learning_rate": 9.18251452597453e-06, "loss": 0.4062, "num_tokens": 7932187801.0, "step": 10367 }, { "epoch": 3.7980214344600167, "grad_norm": 0.1412712827924447, "learning_rate": 9.17951506989934e-06, "loss": 0.4049, "num_tokens": 7932911298.0, "step": 10368 }, { "epoch": 3.7983878354859395, "grad_norm": 0.1350362288662409, "learning_rate": 9.17651633618851e-06, "loss": 0.4368, "num_tokens": 7933807827.0, "step": 10369 }, { "epoch": 3.7987542365118623, "grad_norm": 0.1428258049079061, "learning_rate": 9.173518325010982e-06, "loss": 0.428, "num_tokens": 7934600399.0, "step": 10370 }, { "epoch": 3.799120637537785, "grad_norm": 0.13203295590850508, "learning_rate": 9.170521036535694e-06, "loss": 0.4259, "num_tokens": 7935427959.0, "step": 10371 }, { "epoch": 3.799487038563708, "grad_norm": 0.13796674749896004, "learning_rate": 9.167524470931525e-06, "loss": 0.3928, "num_tokens": 7936265605.0, "step": 10372 }, { "epoch": 3.799853439589631, "grad_norm": 0.1284466182098457, "learning_rate": 9.1645286283673e-06, "loss": 0.4126, "num_tokens": 7937067339.0, "step": 10373 }, { "epoch": 3.800219840615554, "grad_norm": 0.13479807914131442, "learning_rate": 9.161533509011835e-06, "loss": 0.3803, "num_tokens": 7937916959.0, "step": 10374 }, { "epoch": 3.8005862416414766, "grad_norm": 0.1379129895483214, "learning_rate": 9.158539113033883e-06, "loss": 0.4002, "num_tokens": 7938674803.0, "step": 10375 }, { "epoch": 3.8009526426673994, "grad_norm": 0.13507385216452894, "learning_rate": 9.155545440602162e-06, "loss": 0.4245, "num_tokens": 7939558293.0, "step": 10376 }, { "epoch": 3.801319043693322, "grad_norm": 0.13171967484380434, "learning_rate": 9.152552491885348e-06, "loss": 0.4116, "num_tokens": 7940161103.0, "step": 10377 }, { "epoch": 3.8016854447192454, "grad_norm": 0.15083276680930016, "learning_rate": 9.149560267052079e-06, "loss": 0.4061, "num_tokens": 7940959352.0, "step": 10378 }, { "epoch": 3.802051845745168, "grad_norm": 0.13384968146333576, "learning_rate": 9.146568766270948e-06, "loss": 0.3835, "num_tokens": 7941747884.0, "step": 10379 }, { "epoch": 3.802418246771091, "grad_norm": 0.13305746508047434, "learning_rate": 9.143577989710511e-06, "loss": 0.4337, "num_tokens": 7942476857.0, "step": 10380 }, { "epoch": 3.8027846477970138, "grad_norm": 0.13686514012351034, "learning_rate": 9.14058793753928e-06, "loss": 0.3959, "num_tokens": 7943220115.0, "step": 10381 }, { "epoch": 3.8031510488229365, "grad_norm": 0.14582610588151348, "learning_rate": 9.137598609925737e-06, "loss": 0.3862, "num_tokens": 7943912955.0, "step": 10382 }, { "epoch": 3.8035174498488598, "grad_norm": 0.1388524362686723, "learning_rate": 9.134610007038306e-06, "loss": 0.416, "num_tokens": 7944668354.0, "step": 10383 }, { "epoch": 3.8038838508747825, "grad_norm": 0.140568382097705, "learning_rate": 9.131622129045374e-06, "loss": 0.4391, "num_tokens": 7945495655.0, "step": 10384 }, { "epoch": 3.8042502519007053, "grad_norm": 0.14932740174580955, "learning_rate": 9.128634976115302e-06, "loss": 0.3971, "num_tokens": 7946137115.0, "step": 10385 }, { "epoch": 3.804616652926628, "grad_norm": 0.14551665739829223, "learning_rate": 9.125648548416402e-06, "loss": 0.3958, "num_tokens": 7946881953.0, "step": 10386 }, { "epoch": 3.804983053952551, "grad_norm": 0.1392528119221447, "learning_rate": 9.122662846116926e-06, "loss": 0.4392, "num_tokens": 7947532256.0, "step": 10387 }, { "epoch": 3.805349454978474, "grad_norm": 0.15351998468042216, "learning_rate": 9.119677869385116e-06, "loss": 0.3781, "num_tokens": 7948185656.0, "step": 10388 }, { "epoch": 3.805715856004397, "grad_norm": 0.14786350691669767, "learning_rate": 9.116693618389156e-06, "loss": 0.3955, "num_tokens": 7949032877.0, "step": 10389 }, { "epoch": 3.8060822570303197, "grad_norm": 0.13018084377884462, "learning_rate": 9.113710093297194e-06, "loss": 0.4049, "num_tokens": 7949870290.0, "step": 10390 }, { "epoch": 3.8064486580562424, "grad_norm": 0.1349617301495009, "learning_rate": 9.110727294277331e-06, "loss": 0.3875, "num_tokens": 7950585776.0, "step": 10391 }, { "epoch": 3.806815059082165, "grad_norm": 0.1286539885744609, "learning_rate": 9.107745221497636e-06, "loss": 0.4138, "num_tokens": 7951395747.0, "step": 10392 }, { "epoch": 3.8071814601080884, "grad_norm": 0.1399864446063129, "learning_rate": 9.104763875126128e-06, "loss": 0.4017, "num_tokens": 7952145061.0, "step": 10393 }, { "epoch": 3.8075478611340112, "grad_norm": 0.13341991742320228, "learning_rate": 9.101783255330794e-06, "loss": 0.4134, "num_tokens": 7953027555.0, "step": 10394 }, { "epoch": 3.807914262159934, "grad_norm": 0.13482495771961878, "learning_rate": 9.098803362279571e-06, "loss": 0.4085, "num_tokens": 7953722721.0, "step": 10395 }, { "epoch": 3.808280663185857, "grad_norm": 0.14836168773863728, "learning_rate": 9.095824196140363e-06, "loss": 0.3944, "num_tokens": 7954478654.0, "step": 10396 }, { "epoch": 3.8086470642117796, "grad_norm": 0.13983826999570942, "learning_rate": 9.092845757081023e-06, "loss": 0.4124, "num_tokens": 7955119534.0, "step": 10397 }, { "epoch": 3.809013465237703, "grad_norm": 0.14620436152471342, "learning_rate": 9.089868045269383e-06, "loss": 0.3984, "num_tokens": 7955724384.0, "step": 10398 }, { "epoch": 3.8093798662636256, "grad_norm": 0.14604878297966062, "learning_rate": 9.086891060873217e-06, "loss": 0.3989, "num_tokens": 7956479089.0, "step": 10399 }, { "epoch": 3.8097462672895483, "grad_norm": 0.147493437592484, "learning_rate": 9.083914804060248e-06, "loss": 0.3933, "num_tokens": 7957230951.0, "step": 10400 }, { "epoch": 3.810112668315471, "grad_norm": 0.13115942590996607, "learning_rate": 9.080939274998187e-06, "loss": 0.426, "num_tokens": 7958053731.0, "step": 10401 }, { "epoch": 3.810479069341394, "grad_norm": 0.14321690114382868, "learning_rate": 9.077964473854686e-06, "loss": 0.4126, "num_tokens": 7958810070.0, "step": 10402 }, { "epoch": 3.810845470367317, "grad_norm": 0.1418189689763089, "learning_rate": 9.074990400797357e-06, "loss": 0.4205, "num_tokens": 7959552892.0, "step": 10403 }, { "epoch": 3.81121187139324, "grad_norm": 0.13892387234012044, "learning_rate": 9.07201705599377e-06, "loss": 0.4158, "num_tokens": 7960204012.0, "step": 10404 }, { "epoch": 3.8115782724191627, "grad_norm": 0.14671129600760052, "learning_rate": 9.069044439611461e-06, "loss": 0.3922, "num_tokens": 7961061519.0, "step": 10405 }, { "epoch": 3.811944673445086, "grad_norm": 0.1319104368844703, "learning_rate": 9.066072551817917e-06, "loss": 0.4048, "num_tokens": 7961820032.0, "step": 10406 }, { "epoch": 3.8123110744710083, "grad_norm": 0.14417801114231835, "learning_rate": 9.063101392780593e-06, "loss": 0.4194, "num_tokens": 7962564838.0, "step": 10407 }, { "epoch": 3.8126774754969315, "grad_norm": 0.130448679195409, "learning_rate": 9.060130962666885e-06, "loss": 0.3968, "num_tokens": 7963295253.0, "step": 10408 }, { "epoch": 3.8130438765228543, "grad_norm": 0.13584427118103304, "learning_rate": 9.057161261644183e-06, "loss": 0.4121, "num_tokens": 7964065545.0, "step": 10409 }, { "epoch": 3.813410277548777, "grad_norm": 0.1404196442336681, "learning_rate": 9.054192289879796e-06, "loss": 0.4448, "num_tokens": 7964751314.0, "step": 10410 }, { "epoch": 3.8137766785747003, "grad_norm": 0.1425746681289402, "learning_rate": 9.051224047541005e-06, "loss": 0.4216, "num_tokens": 7965507239.0, "step": 10411 }, { "epoch": 3.814143079600623, "grad_norm": 0.14210373785101346, "learning_rate": 9.048256534795077e-06, "loss": 0.3729, "num_tokens": 7966327515.0, "step": 10412 }, { "epoch": 3.814509480626546, "grad_norm": 0.13807558549179896, "learning_rate": 9.045289751809195e-06, "loss": 0.409, "num_tokens": 7967074149.0, "step": 10413 }, { "epoch": 3.8148758816524686, "grad_norm": 0.13847230523042697, "learning_rate": 9.042323698750519e-06, "loss": 0.3987, "num_tokens": 7967892694.0, "step": 10414 }, { "epoch": 3.8152422826783914, "grad_norm": 0.1297568900021438, "learning_rate": 9.039358375786186e-06, "loss": 0.41, "num_tokens": 7968692940.0, "step": 10415 }, { "epoch": 3.8156086837043146, "grad_norm": 0.13973018492320255, "learning_rate": 9.036393783083267e-06, "loss": 0.4004, "num_tokens": 7969357556.0, "step": 10416 }, { "epoch": 3.8159750847302374, "grad_norm": 0.14661554242039054, "learning_rate": 9.0334299208088e-06, "loss": 0.3798, "num_tokens": 7970165602.0, "step": 10417 }, { "epoch": 3.81634148575616, "grad_norm": 0.1401405806834199, "learning_rate": 9.030466789129787e-06, "loss": 0.374, "num_tokens": 7970916074.0, "step": 10418 }, { "epoch": 3.816707886782083, "grad_norm": 0.13440972163517542, "learning_rate": 9.027504388213176e-06, "loss": 0.3847, "num_tokens": 7971685134.0, "step": 10419 }, { "epoch": 3.8170742878080057, "grad_norm": 0.1381986117266265, "learning_rate": 9.02454271822589e-06, "loss": 0.4284, "num_tokens": 7972444677.0, "step": 10420 }, { "epoch": 3.817440688833929, "grad_norm": 0.1448229339203005, "learning_rate": 9.021581779334801e-06, "loss": 0.4432, "num_tokens": 7973094256.0, "step": 10421 }, { "epoch": 3.8178070898598517, "grad_norm": 0.14482407116503423, "learning_rate": 9.01862157170674e-06, "loss": 0.3874, "num_tokens": 7973836531.0, "step": 10422 }, { "epoch": 3.8181734908857745, "grad_norm": 0.129581392792433, "learning_rate": 9.015662095508501e-06, "loss": 0.3851, "num_tokens": 7974687056.0, "step": 10423 }, { "epoch": 3.8185398919116973, "grad_norm": 0.1311122215386341, "learning_rate": 9.01270335090683e-06, "loss": 0.3861, "num_tokens": 7975487573.0, "step": 10424 }, { "epoch": 3.81890629293762, "grad_norm": 0.12624972782301494, "learning_rate": 9.009745338068435e-06, "loss": 0.3868, "num_tokens": 7976273052.0, "step": 10425 }, { "epoch": 3.8192726939635433, "grad_norm": 0.1333379292061015, "learning_rate": 9.006788057159998e-06, "loss": 0.4165, "num_tokens": 7976868247.0, "step": 10426 }, { "epoch": 3.819639094989466, "grad_norm": 0.16677191264213367, "learning_rate": 9.003831508348126e-06, "loss": 0.4549, "num_tokens": 7977685103.0, "step": 10427 }, { "epoch": 3.820005496015389, "grad_norm": 0.14217319288441016, "learning_rate": 9.000875691799417e-06, "loss": 0.4299, "num_tokens": 7978433654.0, "step": 10428 }, { "epoch": 3.8203718970413116, "grad_norm": 0.15011401770660773, "learning_rate": 8.997920607680417e-06, "loss": 0.3948, "num_tokens": 7979137839.0, "step": 10429 }, { "epoch": 3.8207382980672344, "grad_norm": 0.14892617920651413, "learning_rate": 8.994966256157615e-06, "loss": 0.3961, "num_tokens": 7979813405.0, "step": 10430 }, { "epoch": 3.8211046990931576, "grad_norm": 0.14396629607176395, "learning_rate": 8.992012637397488e-06, "loss": 0.4016, "num_tokens": 7980745162.0, "step": 10431 }, { "epoch": 3.8214711001190804, "grad_norm": 0.13041773012555852, "learning_rate": 8.989059751566445e-06, "loss": 0.4242, "num_tokens": 7981458092.0, "step": 10432 }, { "epoch": 3.821837501145003, "grad_norm": 0.15204591481504454, "learning_rate": 8.986107598830875e-06, "loss": 0.4052, "num_tokens": 7982134284.0, "step": 10433 }, { "epoch": 3.822203902170926, "grad_norm": 0.14826172033470797, "learning_rate": 8.983156179357106e-06, "loss": 0.4042, "num_tokens": 7982845566.0, "step": 10434 }, { "epoch": 3.8225703031968488, "grad_norm": 0.13982235323280676, "learning_rate": 8.980205493311436e-06, "loss": 0.4468, "num_tokens": 7983598993.0, "step": 10435 }, { "epoch": 3.822936704222772, "grad_norm": 0.14511859009451553, "learning_rate": 8.977255540860132e-06, "loss": 0.412, "num_tokens": 7984353538.0, "step": 10436 }, { "epoch": 3.8233031052486948, "grad_norm": 0.13799944271619694, "learning_rate": 8.974306322169393e-06, "loss": 0.4026, "num_tokens": 7985087293.0, "step": 10437 }, { "epoch": 3.8236695062746175, "grad_norm": 0.15053275733258806, "learning_rate": 8.971357837405392e-06, "loss": 0.388, "num_tokens": 7985743933.0, "step": 10438 }, { "epoch": 3.8240359073005403, "grad_norm": 0.13978432427027113, "learning_rate": 8.968410086734276e-06, "loss": 0.4236, "num_tokens": 7986562480.0, "step": 10439 }, { "epoch": 3.824402308326463, "grad_norm": 0.14018645470587737, "learning_rate": 8.96546307032212e-06, "loss": 0.4133, "num_tokens": 7987438493.0, "step": 10440 }, { "epoch": 3.8247687093523863, "grad_norm": 0.12567406767390135, "learning_rate": 8.962516788334966e-06, "loss": 0.4005, "num_tokens": 7988150871.0, "step": 10441 }, { "epoch": 3.825135110378309, "grad_norm": 0.15218741523708673, "learning_rate": 8.95957124093884e-06, "loss": 0.4104, "num_tokens": 7988852528.0, "step": 10442 }, { "epoch": 3.825501511404232, "grad_norm": 0.14166934952579646, "learning_rate": 8.956626428299695e-06, "loss": 0.3915, "num_tokens": 7989557309.0, "step": 10443 }, { "epoch": 3.8258679124301547, "grad_norm": 0.14685898403183412, "learning_rate": 8.953682350583462e-06, "loss": 0.444, "num_tokens": 7990357336.0, "step": 10444 }, { "epoch": 3.8262343134560775, "grad_norm": 0.13181769623816372, "learning_rate": 8.950739007956017e-06, "loss": 0.4225, "num_tokens": 7991203051.0, "step": 10445 }, { "epoch": 3.8266007144820007, "grad_norm": 0.13445605411516182, "learning_rate": 8.947796400583206e-06, "loss": 0.4311, "num_tokens": 7991905328.0, "step": 10446 }, { "epoch": 3.8269671155079235, "grad_norm": 0.146913953489459, "learning_rate": 8.944854528630827e-06, "loss": 0.4004, "num_tokens": 7992564154.0, "step": 10447 }, { "epoch": 3.8273335165338462, "grad_norm": 0.14721916632217047, "learning_rate": 8.941913392264638e-06, "loss": 0.4543, "num_tokens": 7993312145.0, "step": 10448 }, { "epoch": 3.827699917559769, "grad_norm": 0.1439366003158413, "learning_rate": 8.938972991650357e-06, "loss": 0.4254, "num_tokens": 7994063226.0, "step": 10449 }, { "epoch": 3.828066318585692, "grad_norm": 0.1397241736221631, "learning_rate": 8.93603332695366e-06, "loss": 0.4032, "num_tokens": 7994746070.0, "step": 10450 }, { "epoch": 3.828432719611615, "grad_norm": 0.14711944680916722, "learning_rate": 8.933094398340179e-06, "loss": 0.4367, "num_tokens": 7995553541.0, "step": 10451 }, { "epoch": 3.828799120637538, "grad_norm": 0.14070839431049031, "learning_rate": 8.930156205975508e-06, "loss": 0.3909, "num_tokens": 7996302921.0, "step": 10452 }, { "epoch": 3.8291655216634606, "grad_norm": 0.1376634395630006, "learning_rate": 8.927218750025199e-06, "loss": 0.3626, "num_tokens": 7997044112.0, "step": 10453 }, { "epoch": 3.829531922689384, "grad_norm": 0.1360484412845292, "learning_rate": 8.924282030654755e-06, "loss": 0.4155, "num_tokens": 7997839989.0, "step": 10454 }, { "epoch": 3.829898323715306, "grad_norm": 0.13871681538870106, "learning_rate": 8.921346048029656e-06, "loss": 0.3756, "num_tokens": 7998665683.0, "step": 10455 }, { "epoch": 3.8302647247412294, "grad_norm": 0.14528866351603734, "learning_rate": 8.918410802315325e-06, "loss": 0.4272, "num_tokens": 7999253746.0, "step": 10456 }, { "epoch": 3.830631125767152, "grad_norm": 0.15756926815533234, "learning_rate": 8.915476293677135e-06, "loss": 0.3887, "num_tokens": 7999978475.0, "step": 10457 }, { "epoch": 3.830997526793075, "grad_norm": 0.1465412200180587, "learning_rate": 8.912542522280442e-06, "loss": 0.4142, "num_tokens": 8000721396.0, "step": 10458 }, { "epoch": 3.831363927818998, "grad_norm": 0.14139192788742738, "learning_rate": 8.909609488290547e-06, "loss": 0.4239, "num_tokens": 8001452684.0, "step": 10459 }, { "epoch": 3.831730328844921, "grad_norm": 0.13854931262829126, "learning_rate": 8.906677191872708e-06, "loss": 0.3947, "num_tokens": 8002219263.0, "step": 10460 }, { "epoch": 3.8320967298708437, "grad_norm": 0.13836599492978002, "learning_rate": 8.903745633192145e-06, "loss": 0.38, "num_tokens": 8002978790.0, "step": 10461 }, { "epoch": 3.8324631308967665, "grad_norm": 0.13941409693957002, "learning_rate": 8.900814812414034e-06, "loss": 0.4073, "num_tokens": 8003851075.0, "step": 10462 }, { "epoch": 3.8328295319226893, "grad_norm": 0.14034789812432966, "learning_rate": 8.89788472970351e-06, "loss": 0.4094, "num_tokens": 8004774305.0, "step": 10463 }, { "epoch": 3.8331959329486125, "grad_norm": 0.14548933126450173, "learning_rate": 8.89495538522567e-06, "loss": 0.4127, "num_tokens": 8005597881.0, "step": 10464 }, { "epoch": 3.8335623339745353, "grad_norm": 0.1473593855666009, "learning_rate": 8.892026779145559e-06, "loss": 0.3816, "num_tokens": 8006331495.0, "step": 10465 }, { "epoch": 3.833928735000458, "grad_norm": 0.13348992028385687, "learning_rate": 8.889098911628205e-06, "loss": 0.4143, "num_tokens": 8007162788.0, "step": 10466 }, { "epoch": 3.834295136026381, "grad_norm": 0.13874919160833368, "learning_rate": 8.886171782838562e-06, "loss": 0.4546, "num_tokens": 8007895821.0, "step": 10467 }, { "epoch": 3.8346615370523036, "grad_norm": 0.15114087047991848, "learning_rate": 8.883245392941557e-06, "loss": 0.4048, "num_tokens": 8008501126.0, "step": 10468 }, { "epoch": 3.835027938078227, "grad_norm": 0.17080375924081315, "learning_rate": 8.88031974210209e-06, "loss": 0.4564, "num_tokens": 8009190911.0, "step": 10469 }, { "epoch": 3.8353943391041496, "grad_norm": 0.1441277759859528, "learning_rate": 8.87739483048499e-06, "loss": 0.4018, "num_tokens": 8009896613.0, "step": 10470 }, { "epoch": 3.8357607401300724, "grad_norm": 0.14303181592948666, "learning_rate": 8.874470658255066e-06, "loss": 0.4209, "num_tokens": 8010769719.0, "step": 10471 }, { "epoch": 3.836127141155995, "grad_norm": 0.13449787654592632, "learning_rate": 8.871547225577081e-06, "loss": 0.4267, "num_tokens": 8011436771.0, "step": 10472 }, { "epoch": 3.836493542181918, "grad_norm": 0.15436781778774467, "learning_rate": 8.868624532615755e-06, "loss": 0.388, "num_tokens": 8012224244.0, "step": 10473 }, { "epoch": 3.836859943207841, "grad_norm": 0.13799995464665754, "learning_rate": 8.865702579535763e-06, "loss": 0.4205, "num_tokens": 8012998203.0, "step": 10474 }, { "epoch": 3.837226344233764, "grad_norm": 0.13014796834623477, "learning_rate": 8.862781366501741e-06, "loss": 0.416, "num_tokens": 8013773945.0, "step": 10475 }, { "epoch": 3.8375927452596867, "grad_norm": 0.14335660144784187, "learning_rate": 8.859860893678286e-06, "loss": 0.4137, "num_tokens": 8014589333.0, "step": 10476 }, { "epoch": 3.8379591462856095, "grad_norm": 0.1260862239483332, "learning_rate": 8.856941161229949e-06, "loss": 0.4187, "num_tokens": 8015277376.0, "step": 10477 }, { "epoch": 3.8383255473115323, "grad_norm": 0.1425452007937687, "learning_rate": 8.854022169321241e-06, "loss": 0.4281, "num_tokens": 8016083993.0, "step": 10478 }, { "epoch": 3.8386919483374555, "grad_norm": 0.14552732307365251, "learning_rate": 8.85110391811663e-06, "loss": 0.434, "num_tokens": 8016740289.0, "step": 10479 }, { "epoch": 3.8390583493633783, "grad_norm": 0.1543307717341004, "learning_rate": 8.848186407780546e-06, "loss": 0.3993, "num_tokens": 8017423300.0, "step": 10480 }, { "epoch": 3.839424750389301, "grad_norm": 0.14855471784976726, "learning_rate": 8.845269638477369e-06, "loss": 0.4296, "num_tokens": 8018161978.0, "step": 10481 }, { "epoch": 3.839791151415224, "grad_norm": 0.1515927707942689, "learning_rate": 8.84235361037145e-06, "loss": 0.3855, "num_tokens": 8018869880.0, "step": 10482 }, { "epoch": 3.8401575524411466, "grad_norm": 0.1379796393779652, "learning_rate": 8.839438323627099e-06, "loss": 0.3983, "num_tokens": 8019654748.0, "step": 10483 }, { "epoch": 3.84052395346707, "grad_norm": 0.13057694496696473, "learning_rate": 8.836523778408552e-06, "loss": 0.4084, "num_tokens": 8020448461.0, "step": 10484 }, { "epoch": 3.8408903544929927, "grad_norm": 0.1329940242888439, "learning_rate": 8.833609974880049e-06, "loss": 0.3759, "num_tokens": 8021230456.0, "step": 10485 }, { "epoch": 3.8412567555189154, "grad_norm": 0.12884943767290674, "learning_rate": 8.830696913205766e-06, "loss": 0.39, "num_tokens": 8022002643.0, "step": 10486 }, { "epoch": 3.841623156544838, "grad_norm": 0.13284717621283476, "learning_rate": 8.82778459354982e-06, "loss": 0.3977, "num_tokens": 8022812546.0, "step": 10487 }, { "epoch": 3.841989557570761, "grad_norm": 0.13593869424117183, "learning_rate": 8.824873016076321e-06, "loss": 0.3987, "num_tokens": 8023653535.0, "step": 10488 }, { "epoch": 3.842355958596684, "grad_norm": 0.12737441809734917, "learning_rate": 8.82196218094932e-06, "loss": 0.4114, "num_tokens": 8024420069.0, "step": 10489 }, { "epoch": 3.842722359622607, "grad_norm": 0.1339380792478939, "learning_rate": 8.81905208833282e-06, "loss": 0.3978, "num_tokens": 8025255253.0, "step": 10490 }, { "epoch": 3.84308876064853, "grad_norm": 0.12845080898978703, "learning_rate": 8.816142738390792e-06, "loss": 0.4067, "num_tokens": 8025979888.0, "step": 10491 }, { "epoch": 3.8434551616744526, "grad_norm": 0.13589240525446433, "learning_rate": 8.813234131287154e-06, "loss": 0.4054, "num_tokens": 8026741947.0, "step": 10492 }, { "epoch": 3.8438215627003753, "grad_norm": 0.13799993799344187, "learning_rate": 8.810326267185812e-06, "loss": 0.4145, "num_tokens": 8027542766.0, "step": 10493 }, { "epoch": 3.8441879637262986, "grad_norm": 0.13347493314065859, "learning_rate": 8.807419146250587e-06, "loss": 0.3871, "num_tokens": 8028370043.0, "step": 10494 }, { "epoch": 3.8445543647522213, "grad_norm": 0.1328189228550523, "learning_rate": 8.80451276864528e-06, "loss": 0.4046, "num_tokens": 8029159288.0, "step": 10495 }, { "epoch": 3.844920765778144, "grad_norm": 0.13591557829410691, "learning_rate": 8.801607134533668e-06, "loss": 0.3862, "num_tokens": 8029910481.0, "step": 10496 }, { "epoch": 3.845287166804067, "grad_norm": 0.15081909905422564, "learning_rate": 8.798702244079449e-06, "loss": 0.4063, "num_tokens": 8030635971.0, "step": 10497 }, { "epoch": 3.8456535678299897, "grad_norm": 0.14154758246211085, "learning_rate": 8.7957980974463e-06, "loss": 0.4082, "num_tokens": 8031393966.0, "step": 10498 }, { "epoch": 3.846019968855913, "grad_norm": 0.14236872723819735, "learning_rate": 8.792894694797864e-06, "loss": 0.3848, "num_tokens": 8032091736.0, "step": 10499 }, { "epoch": 3.8463863698818357, "grad_norm": 0.13818185191333845, "learning_rate": 8.789992036297725e-06, "loss": 0.3828, "num_tokens": 8032869024.0, "step": 10500 }, { "epoch": 3.8467527709077585, "grad_norm": 0.13341080034669492, "learning_rate": 8.787090122109432e-06, "loss": 0.3761, "num_tokens": 8033662041.0, "step": 10501 }, { "epoch": 3.8471191719336817, "grad_norm": 0.13137773718586962, "learning_rate": 8.784188952396492e-06, "loss": 0.4017, "num_tokens": 8034399196.0, "step": 10502 }, { "epoch": 3.847485572959604, "grad_norm": 0.15797417385185702, "learning_rate": 8.781288527322373e-06, "loss": 0.4208, "num_tokens": 8035153886.0, "step": 10503 }, { "epoch": 3.8478519739855273, "grad_norm": 0.13857131461615566, "learning_rate": 8.778388847050497e-06, "loss": 0.383, "num_tokens": 8035902413.0, "step": 10504 }, { "epoch": 3.84821837501145, "grad_norm": 0.12992248703554596, "learning_rate": 8.775489911744243e-06, "loss": 0.3828, "num_tokens": 8036626103.0, "step": 10505 }, { "epoch": 3.848584776037373, "grad_norm": 0.1426892001375045, "learning_rate": 8.77259172156695e-06, "loss": 0.4064, "num_tokens": 8037481102.0, "step": 10506 }, { "epoch": 3.848951177063296, "grad_norm": 0.12654042076479627, "learning_rate": 8.769694276681917e-06, "loss": 0.4212, "num_tokens": 8038271053.0, "step": 10507 }, { "epoch": 3.8493175780892184, "grad_norm": 0.14170034322098382, "learning_rate": 8.7667975772524e-06, "loss": 0.4559, "num_tokens": 8039008452.0, "step": 10508 }, { "epoch": 3.8496839791151416, "grad_norm": 0.13885059637592836, "learning_rate": 8.763901623441611e-06, "loss": 0.4148, "num_tokens": 8039871481.0, "step": 10509 }, { "epoch": 3.8500503801410644, "grad_norm": 0.13182588173740314, "learning_rate": 8.76100641541272e-06, "loss": 0.4155, "num_tokens": 8040685580.0, "step": 10510 }, { "epoch": 3.850416781166987, "grad_norm": 0.13487293630426225, "learning_rate": 8.758111953328853e-06, "loss": 0.3874, "num_tokens": 8041445946.0, "step": 10511 }, { "epoch": 3.8507831821929104, "grad_norm": 0.13617318506697632, "learning_rate": 8.755218237353107e-06, "loss": 0.4267, "num_tokens": 8042243065.0, "step": 10512 }, { "epoch": 3.851149583218833, "grad_norm": 0.14064202599817355, "learning_rate": 8.752325267648528e-06, "loss": 0.4262, "num_tokens": 8042992404.0, "step": 10513 }, { "epoch": 3.851515984244756, "grad_norm": 0.14285457898702225, "learning_rate": 8.749433044378099e-06, "loss": 0.3897, "num_tokens": 8043746601.0, "step": 10514 }, { "epoch": 3.8518823852706787, "grad_norm": 0.13555176419920512, "learning_rate": 8.746541567704802e-06, "loss": 0.4089, "num_tokens": 8044475445.0, "step": 10515 }, { "epoch": 3.8522487862966015, "grad_norm": 0.1396267877669611, "learning_rate": 8.743650837791549e-06, "loss": 0.4055, "num_tokens": 8045240720.0, "step": 10516 }, { "epoch": 3.8526151873225247, "grad_norm": 0.15105799257559657, "learning_rate": 8.740760854801215e-06, "loss": 0.4218, "num_tokens": 8045907315.0, "step": 10517 }, { "epoch": 3.8529815883484475, "grad_norm": 0.1371371419807362, "learning_rate": 8.737871618896638e-06, "loss": 0.4066, "num_tokens": 8046646216.0, "step": 10518 }, { "epoch": 3.8533479893743703, "grad_norm": 0.13260120211901655, "learning_rate": 8.734983130240608e-06, "loss": 0.3787, "num_tokens": 8047464103.0, "step": 10519 }, { "epoch": 3.853714390400293, "grad_norm": 0.14528578058301753, "learning_rate": 8.732095388995877e-06, "loss": 0.4377, "num_tokens": 8048370170.0, "step": 10520 }, { "epoch": 3.854080791426216, "grad_norm": 0.1382213066157935, "learning_rate": 8.72920839532515e-06, "loss": 0.4064, "num_tokens": 8049170302.0, "step": 10521 }, { "epoch": 3.854447192452139, "grad_norm": 0.14256265865020015, "learning_rate": 8.726322149391092e-06, "loss": 0.4046, "num_tokens": 8049867648.0, "step": 10522 }, { "epoch": 3.854813593478062, "grad_norm": 0.1379884806603255, "learning_rate": 8.723436651356344e-06, "loss": 0.4253, "num_tokens": 8050628807.0, "step": 10523 }, { "epoch": 3.8551799945039846, "grad_norm": 0.13648681485712855, "learning_rate": 8.720551901383472e-06, "loss": 0.4075, "num_tokens": 8051444120.0, "step": 10524 }, { "epoch": 3.8555463955299074, "grad_norm": 0.13770155256199074, "learning_rate": 8.717667899635012e-06, "loss": 0.385, "num_tokens": 8052132335.0, "step": 10525 }, { "epoch": 3.85591279655583, "grad_norm": 0.14035911105231705, "learning_rate": 8.71478464627348e-06, "loss": 0.3756, "num_tokens": 8052817832.0, "step": 10526 }, { "epoch": 3.8562791975817534, "grad_norm": 0.14600082199803713, "learning_rate": 8.711902141461315e-06, "loss": 0.4122, "num_tokens": 8053589125.0, "step": 10527 }, { "epoch": 3.856645598607676, "grad_norm": 0.13340611448176998, "learning_rate": 8.709020385360931e-06, "loss": 0.4329, "num_tokens": 8054273786.0, "step": 10528 }, { "epoch": 3.857011999633599, "grad_norm": 0.15161662225050154, "learning_rate": 8.70613937813471e-06, "loss": 0.4412, "num_tokens": 8055015050.0, "step": 10529 }, { "epoch": 3.8573784006595218, "grad_norm": 0.14563286036200612, "learning_rate": 8.703259119944973e-06, "loss": 0.3995, "num_tokens": 8055813508.0, "step": 10530 }, { "epoch": 3.8577448016854445, "grad_norm": 0.1363902980210162, "learning_rate": 8.70037961095401e-06, "loss": 0.3977, "num_tokens": 8056532766.0, "step": 10531 }, { "epoch": 3.8581112027113678, "grad_norm": 0.14752482133580005, "learning_rate": 8.69750085132407e-06, "loss": 0.3895, "num_tokens": 8057225020.0, "step": 10532 }, { "epoch": 3.8584776037372905, "grad_norm": 0.13377873771370952, "learning_rate": 8.694622841217344e-06, "loss": 0.4204, "num_tokens": 8057958325.0, "step": 10533 }, { "epoch": 3.8588440047632133, "grad_norm": 0.1442658818094071, "learning_rate": 8.691745580796e-06, "loss": 0.4007, "num_tokens": 8058750857.0, "step": 10534 }, { "epoch": 3.859210405789136, "grad_norm": 0.1389709256408429, "learning_rate": 8.68886907022216e-06, "loss": 0.4429, "num_tokens": 8059505280.0, "step": 10535 }, { "epoch": 3.859576806815059, "grad_norm": 0.13343917498087565, "learning_rate": 8.685993309657887e-06, "loss": 0.4069, "num_tokens": 8060311637.0, "step": 10536 }, { "epoch": 3.859943207840982, "grad_norm": 0.13223631917768167, "learning_rate": 8.683118299265225e-06, "loss": 0.4108, "num_tokens": 8061229108.0, "step": 10537 }, { "epoch": 3.860309608866905, "grad_norm": 0.1294260942120956, "learning_rate": 8.680244039206154e-06, "loss": 0.4224, "num_tokens": 8062039068.0, "step": 10538 }, { "epoch": 3.8606760098928277, "grad_norm": 0.13320352316964224, "learning_rate": 8.677370529642637e-06, "loss": 0.4101, "num_tokens": 8062839419.0, "step": 10539 }, { "epoch": 3.8610424109187504, "grad_norm": 0.13054886074549416, "learning_rate": 8.674497770736578e-06, "loss": 0.4088, "num_tokens": 8063593964.0, "step": 10540 }, { "epoch": 3.8614088119446732, "grad_norm": 0.13569848277715385, "learning_rate": 8.67162576264983e-06, "loss": 0.3936, "num_tokens": 8064434835.0, "step": 10541 }, { "epoch": 3.8617752129705964, "grad_norm": 0.12997364293388305, "learning_rate": 8.668754505544225e-06, "loss": 0.406, "num_tokens": 8065234233.0, "step": 10542 }, { "epoch": 3.8621416139965192, "grad_norm": 0.1307343772056874, "learning_rate": 8.665883999581546e-06, "loss": 0.4448, "num_tokens": 8065998054.0, "step": 10543 }, { "epoch": 3.862508015022442, "grad_norm": 0.14070582541540264, "learning_rate": 8.663014244923514e-06, "loss": 0.4253, "num_tokens": 8066710668.0, "step": 10544 }, { "epoch": 3.862874416048365, "grad_norm": 0.1396701886749864, "learning_rate": 8.66014524173184e-06, "loss": 0.442, "num_tokens": 8067550742.0, "step": 10545 }, { "epoch": 3.8632408170742876, "grad_norm": 0.13904315286047067, "learning_rate": 8.657276990168171e-06, "loss": 0.4047, "num_tokens": 8068386820.0, "step": 10546 }, { "epoch": 3.863607218100211, "grad_norm": 0.1315367120220178, "learning_rate": 8.654409490394115e-06, "loss": 0.4016, "num_tokens": 8069131551.0, "step": 10547 }, { "epoch": 3.8639736191261336, "grad_norm": 0.13955181467827177, "learning_rate": 8.651542742571244e-06, "loss": 0.4205, "num_tokens": 8069962013.0, "step": 10548 }, { "epoch": 3.8643400201520564, "grad_norm": 0.13506282917475876, "learning_rate": 8.648676746861078e-06, "loss": 0.3755, "num_tokens": 8070746242.0, "step": 10549 }, { "epoch": 3.864706421177979, "grad_norm": 0.14126351568968637, "learning_rate": 8.645811503425112e-06, "loss": 0.4156, "num_tokens": 8071394434.0, "step": 10550 }, { "epoch": 3.865072822203902, "grad_norm": 0.1505830013482243, "learning_rate": 8.642947012424777e-06, "loss": 0.4058, "num_tokens": 8072121304.0, "step": 10551 }, { "epoch": 3.865439223229825, "grad_norm": 0.13687702835715196, "learning_rate": 8.640083274021464e-06, "loss": 0.3923, "num_tokens": 8072880992.0, "step": 10552 }, { "epoch": 3.865805624255748, "grad_norm": 0.13533689956093176, "learning_rate": 8.637220288376554e-06, "loss": 0.4238, "num_tokens": 8073804114.0, "step": 10553 }, { "epoch": 3.8661720252816707, "grad_norm": 0.1287416773280191, "learning_rate": 8.634358055651335e-06, "loss": 0.3917, "num_tokens": 8074573521.0, "step": 10554 }, { "epoch": 3.866538426307594, "grad_norm": 0.13045745748714052, "learning_rate": 8.631496576007084e-06, "loss": 0.4068, "num_tokens": 8075287312.0, "step": 10555 }, { "epoch": 3.8669048273335163, "grad_norm": 0.1447195260839256, "learning_rate": 8.62863584960504e-06, "loss": 0.434, "num_tokens": 8075974791.0, "step": 10556 }, { "epoch": 3.8672712283594395, "grad_norm": 0.1427000388790841, "learning_rate": 8.625775876606384e-06, "loss": 0.4159, "num_tokens": 8076622274.0, "step": 10557 }, { "epoch": 3.8676376293853623, "grad_norm": 0.1521556097944941, "learning_rate": 8.622916657172258e-06, "loss": 0.4073, "num_tokens": 8077282054.0, "step": 10558 }, { "epoch": 3.868004030411285, "grad_norm": 0.14818354157596428, "learning_rate": 8.620058191463765e-06, "loss": 0.415, "num_tokens": 8078093913.0, "step": 10559 }, { "epoch": 3.8683704314372083, "grad_norm": 0.13020795963173304, "learning_rate": 8.617200479641962e-06, "loss": 0.3949, "num_tokens": 8078778933.0, "step": 10560 }, { "epoch": 3.868736832463131, "grad_norm": 0.14925710509590806, "learning_rate": 8.614343521867869e-06, "loss": 0.3803, "num_tokens": 8079618119.0, "step": 10561 }, { "epoch": 3.869103233489054, "grad_norm": 0.11751418253294786, "learning_rate": 8.611487318302459e-06, "loss": 0.4003, "num_tokens": 8080388995.0, "step": 10562 }, { "epoch": 3.8694696345149766, "grad_norm": 0.13541448127211553, "learning_rate": 8.60863186910666e-06, "loss": 0.4013, "num_tokens": 8081132713.0, "step": 10563 }, { "epoch": 3.8698360355408994, "grad_norm": 0.14464316402992533, "learning_rate": 8.605777174441364e-06, "loss": 0.4113, "num_tokens": 8081938970.0, "step": 10564 }, { "epoch": 3.8702024365668226, "grad_norm": 0.1381292867007173, "learning_rate": 8.602923234467412e-06, "loss": 0.4127, "num_tokens": 8082688594.0, "step": 10565 }, { "epoch": 3.8705688375927454, "grad_norm": 0.13735076143984143, "learning_rate": 8.600070049345618e-06, "loss": 0.4054, "num_tokens": 8083376202.0, "step": 10566 }, { "epoch": 3.870935238618668, "grad_norm": 0.1597640077089643, "learning_rate": 8.597217619236742e-06, "loss": 0.4608, "num_tokens": 8084067130.0, "step": 10567 }, { "epoch": 3.871301639644591, "grad_norm": 0.1437434304944835, "learning_rate": 8.59436594430149e-06, "loss": 0.4373, "num_tokens": 8084796763.0, "step": 10568 }, { "epoch": 3.8716680406705137, "grad_norm": 0.15342922044626214, "learning_rate": 8.591515024700551e-06, "loss": 0.419, "num_tokens": 8085503778.0, "step": 10569 }, { "epoch": 3.872034441696437, "grad_norm": 0.14023484707764058, "learning_rate": 8.588664860594563e-06, "loss": 0.4266, "num_tokens": 8086312836.0, "step": 10570 }, { "epoch": 3.8724008427223597, "grad_norm": 0.12707333980192706, "learning_rate": 8.585815452144098e-06, "loss": 0.3865, "num_tokens": 8087154608.0, "step": 10571 }, { "epoch": 3.8727672437482825, "grad_norm": 0.13151752187594307, "learning_rate": 8.58296679950972e-06, "loss": 0.3967, "num_tokens": 8087832578.0, "step": 10572 }, { "epoch": 3.8731336447742053, "grad_norm": 0.15214065936198382, "learning_rate": 8.580118902851933e-06, "loss": 0.3957, "num_tokens": 8088503849.0, "step": 10573 }, { "epoch": 3.873500045800128, "grad_norm": 0.13581929896871384, "learning_rate": 8.577271762331197e-06, "loss": 0.3855, "num_tokens": 8089262099.0, "step": 10574 }, { "epoch": 3.8738664468260513, "grad_norm": 0.1374907751392074, "learning_rate": 8.574425378107934e-06, "loss": 0.3872, "num_tokens": 8090013130.0, "step": 10575 }, { "epoch": 3.874232847851974, "grad_norm": 0.13931231644397793, "learning_rate": 8.571579750342524e-06, "loss": 0.4382, "num_tokens": 8090810223.0, "step": 10576 }, { "epoch": 3.874599248877897, "grad_norm": 0.13876099543237558, "learning_rate": 8.5687348791953e-06, "loss": 0.4165, "num_tokens": 8091482724.0, "step": 10577 }, { "epoch": 3.8749656499038196, "grad_norm": 0.14427011012192983, "learning_rate": 8.565890764826555e-06, "loss": 0.4126, "num_tokens": 8092217069.0, "step": 10578 }, { "epoch": 3.8753320509297424, "grad_norm": 0.13516146942990054, "learning_rate": 8.563047407396536e-06, "loss": 0.397, "num_tokens": 8093029215.0, "step": 10579 }, { "epoch": 3.8756984519556656, "grad_norm": 0.1336041087450856, "learning_rate": 8.560204807065465e-06, "loss": 0.3862, "num_tokens": 8093928785.0, "step": 10580 }, { "epoch": 3.8760648529815884, "grad_norm": 0.12931779464106508, "learning_rate": 8.557362963993493e-06, "loss": 0.4085, "num_tokens": 8094718129.0, "step": 10581 }, { "epoch": 3.876431254007511, "grad_norm": 0.1286113632513982, "learning_rate": 8.55452187834074e-06, "loss": 0.4451, "num_tokens": 8095463811.0, "step": 10582 }, { "epoch": 3.876797655033434, "grad_norm": 0.15352188926163832, "learning_rate": 8.551681550267302e-06, "loss": 0.4258, "num_tokens": 8096210106.0, "step": 10583 }, { "epoch": 3.8771640560593568, "grad_norm": 0.1355090093833233, "learning_rate": 8.548841979933197e-06, "loss": 0.4235, "num_tokens": 8097003892.0, "step": 10584 }, { "epoch": 3.87753045708528, "grad_norm": 0.14446478734872262, "learning_rate": 8.546003167498432e-06, "loss": 0.424, "num_tokens": 8097777017.0, "step": 10585 }, { "epoch": 3.8778968581112028, "grad_norm": 0.13344715448261238, "learning_rate": 8.543165113122957e-06, "loss": 0.4011, "num_tokens": 8098540072.0, "step": 10586 }, { "epoch": 3.8782632591371256, "grad_norm": 0.141191178330128, "learning_rate": 8.540327816966678e-06, "loss": 0.4535, "num_tokens": 8099300078.0, "step": 10587 }, { "epoch": 3.8786296601630483, "grad_norm": 0.14401752475815163, "learning_rate": 8.53749127918946e-06, "loss": 0.3982, "num_tokens": 8099967692.0, "step": 10588 }, { "epoch": 3.878996061188971, "grad_norm": 0.14456091864528584, "learning_rate": 8.53465549995113e-06, "loss": 0.4027, "num_tokens": 8100692258.0, "step": 10589 }, { "epoch": 3.8793624622148943, "grad_norm": 0.14673783643116078, "learning_rate": 8.531820479411468e-06, "loss": 0.4352, "num_tokens": 8101438348.0, "step": 10590 }, { "epoch": 3.879728863240817, "grad_norm": 0.13494515384317732, "learning_rate": 8.52898621773021e-06, "loss": 0.4039, "num_tokens": 8102279155.0, "step": 10591 }, { "epoch": 3.88009526426674, "grad_norm": 0.130330941448877, "learning_rate": 8.52615271506705e-06, "loss": 0.4235, "num_tokens": 8103070756.0, "step": 10592 }, { "epoch": 3.8804616652926627, "grad_norm": 0.13650126084696101, "learning_rate": 8.523319971581646e-06, "loss": 0.4222, "num_tokens": 8103807010.0, "step": 10593 }, { "epoch": 3.8808280663185855, "grad_norm": 0.13093634749203636, "learning_rate": 8.520487987433604e-06, "loss": 0.3948, "num_tokens": 8104557949.0, "step": 10594 }, { "epoch": 3.8811944673445087, "grad_norm": 0.1323110107788572, "learning_rate": 8.517656762782484e-06, "loss": 0.3748, "num_tokens": 8105375726.0, "step": 10595 }, { "epoch": 3.8815608683704315, "grad_norm": 0.13493376776308347, "learning_rate": 8.514826297787824e-06, "loss": 0.3742, "num_tokens": 8106068543.0, "step": 10596 }, { "epoch": 3.8819272693963542, "grad_norm": 0.13424904983570443, "learning_rate": 8.5119965926091e-06, "loss": 0.394, "num_tokens": 8106920457.0, "step": 10597 }, { "epoch": 3.882293670422277, "grad_norm": 0.12549989834740485, "learning_rate": 8.509167647405741e-06, "loss": 0.3881, "num_tokens": 8107740273.0, "step": 10598 }, { "epoch": 3.8826600714482, "grad_norm": 0.13276095945928487, "learning_rate": 8.506339462337156e-06, "loss": 0.416, "num_tokens": 8108510381.0, "step": 10599 }, { "epoch": 3.883026472474123, "grad_norm": 0.12949092160117023, "learning_rate": 8.503512037562696e-06, "loss": 0.4011, "num_tokens": 8109147572.0, "step": 10600 }, { "epoch": 3.883392873500046, "grad_norm": 0.15336183089135985, "learning_rate": 8.50068537324166e-06, "loss": 0.4005, "num_tokens": 8109926951.0, "step": 10601 }, { "epoch": 3.8837592745259686, "grad_norm": 0.15763796322693543, "learning_rate": 8.497859469533322e-06, "loss": 0.3877, "num_tokens": 8110668311.0, "step": 10602 }, { "epoch": 3.884125675551892, "grad_norm": 0.13161101248804583, "learning_rate": 8.495034326596911e-06, "loss": 0.3812, "num_tokens": 8111654733.0, "step": 10603 }, { "epoch": 3.884492076577814, "grad_norm": 0.12829296728878845, "learning_rate": 8.492209944591603e-06, "loss": 0.4167, "num_tokens": 8112392327.0, "step": 10604 }, { "epoch": 3.8848584776037374, "grad_norm": 0.14686625730974828, "learning_rate": 8.489386323676536e-06, "loss": 0.3981, "num_tokens": 8113226820.0, "step": 10605 }, { "epoch": 3.88522487862966, "grad_norm": 0.12635949977213035, "learning_rate": 8.486563464010801e-06, "loss": 0.4074, "num_tokens": 8113956608.0, "step": 10606 }, { "epoch": 3.885591279655583, "grad_norm": 0.14205562362450164, "learning_rate": 8.483741365753469e-06, "loss": 0.4261, "num_tokens": 8114702795.0, "step": 10607 }, { "epoch": 3.885957680681506, "grad_norm": 0.1603312106531521, "learning_rate": 8.480920029063534e-06, "loss": 0.4152, "num_tokens": 8115503736.0, "step": 10608 }, { "epoch": 3.886324081707429, "grad_norm": 0.12916234879769758, "learning_rate": 8.478099454099958e-06, "loss": 0.4016, "num_tokens": 8116296121.0, "step": 10609 }, { "epoch": 3.8866904827333517, "grad_norm": 0.12989198868594404, "learning_rate": 8.475279641021686e-06, "loss": 0.4198, "num_tokens": 8117031753.0, "step": 10610 }, { "epoch": 3.8870568837592745, "grad_norm": 0.13728513860981142, "learning_rate": 8.472460589987578e-06, "loss": 0.4007, "num_tokens": 8117834598.0, "step": 10611 }, { "epoch": 3.8874232847851973, "grad_norm": 0.13969671034330478, "learning_rate": 8.46964230115648e-06, "loss": 0.3942, "num_tokens": 8118661325.0, "step": 10612 }, { "epoch": 3.8877896858111205, "grad_norm": 0.13396065098905602, "learning_rate": 8.466824774687191e-06, "loss": 0.3999, "num_tokens": 8119417287.0, "step": 10613 }, { "epoch": 3.8881560868370433, "grad_norm": 0.12893811668906327, "learning_rate": 8.464008010738461e-06, "loss": 0.3677, "num_tokens": 8120208620.0, "step": 10614 }, { "epoch": 3.888522487862966, "grad_norm": 0.13576581322476966, "learning_rate": 8.461192009468998e-06, "loss": 0.4415, "num_tokens": 8120877474.0, "step": 10615 }, { "epoch": 3.888888888888889, "grad_norm": 0.1471369323540509, "learning_rate": 8.45837677103747e-06, "loss": 0.4172, "num_tokens": 8121567612.0, "step": 10616 }, { "epoch": 3.8892552899148116, "grad_norm": 0.1474847846498261, "learning_rate": 8.455562295602495e-06, "loss": 0.3911, "num_tokens": 8122345317.0, "step": 10617 }, { "epoch": 3.889621690940735, "grad_norm": 0.13149280566171506, "learning_rate": 8.452748583322662e-06, "loss": 0.4034, "num_tokens": 8123133749.0, "step": 10618 }, { "epoch": 3.8899880919666576, "grad_norm": 0.14169919401820139, "learning_rate": 8.4499356343565e-06, "loss": 0.4221, "num_tokens": 8123946002.0, "step": 10619 }, { "epoch": 3.8903544929925804, "grad_norm": 0.13196972354136918, "learning_rate": 8.44712344886251e-06, "loss": 0.3975, "num_tokens": 8124701941.0, "step": 10620 }, { "epoch": 3.890720894018503, "grad_norm": 0.13704633444952619, "learning_rate": 8.444312026999139e-06, "loss": 0.3799, "num_tokens": 8125429668.0, "step": 10621 }, { "epoch": 3.891087295044426, "grad_norm": 0.1362074369387503, "learning_rate": 8.441501368924792e-06, "loss": 0.3927, "num_tokens": 8126260826.0, "step": 10622 }, { "epoch": 3.891453696070349, "grad_norm": 0.12319277788384767, "learning_rate": 8.438691474797842e-06, "loss": 0.3811, "num_tokens": 8126976052.0, "step": 10623 }, { "epoch": 3.891820097096272, "grad_norm": 0.14984100143162413, "learning_rate": 8.435882344776615e-06, "loss": 0.4305, "num_tokens": 8127632892.0, "step": 10624 }, { "epoch": 3.8921864981221947, "grad_norm": 0.13570374958751788, "learning_rate": 8.433073979019375e-06, "loss": 0.413, "num_tokens": 8128388354.0, "step": 10625 }, { "epoch": 3.8925528991481175, "grad_norm": 0.13285209897470263, "learning_rate": 8.430266377684368e-06, "loss": 0.3935, "num_tokens": 8129235721.0, "step": 10626 }, { "epoch": 3.8929193001740403, "grad_norm": 0.14502226629894815, "learning_rate": 8.427459540929792e-06, "loss": 0.4081, "num_tokens": 8130118847.0, "step": 10627 }, { "epoch": 3.8932857011999635, "grad_norm": 0.12394960145647438, "learning_rate": 8.42465346891378e-06, "loss": 0.4095, "num_tokens": 8130849885.0, "step": 10628 }, { "epoch": 3.8936521022258863, "grad_norm": 0.13907693665614876, "learning_rate": 8.421848161794453e-06, "loss": 0.4123, "num_tokens": 8131737417.0, "step": 10629 }, { "epoch": 3.894018503251809, "grad_norm": 0.13576309048639862, "learning_rate": 8.419043619729871e-06, "loss": 0.4373, "num_tokens": 8132413792.0, "step": 10630 }, { "epoch": 3.894384904277732, "grad_norm": 0.13771112706386682, "learning_rate": 8.416239842878055e-06, "loss": 0.4122, "num_tokens": 8133180232.0, "step": 10631 }, { "epoch": 3.8947513053036547, "grad_norm": 0.13929648538326442, "learning_rate": 8.413436831396982e-06, "loss": 0.393, "num_tokens": 8133941811.0, "step": 10632 }, { "epoch": 3.895117706329578, "grad_norm": 0.1355500600405032, "learning_rate": 8.410634585444587e-06, "loss": 0.3824, "num_tokens": 8134785396.0, "step": 10633 }, { "epoch": 3.8954841073555007, "grad_norm": 0.12721229086374478, "learning_rate": 8.407833105178759e-06, "loss": 0.4133, "num_tokens": 8135540549.0, "step": 10634 }, { "epoch": 3.8958505083814234, "grad_norm": 0.1379440387568484, "learning_rate": 8.405032390757348e-06, "loss": 0.4335, "num_tokens": 8136311780.0, "step": 10635 }, { "epoch": 3.896216909407346, "grad_norm": 0.13283254525696456, "learning_rate": 8.402232442338157e-06, "loss": 0.3978, "num_tokens": 8137121313.0, "step": 10636 }, { "epoch": 3.896583310433269, "grad_norm": 0.13655091319511733, "learning_rate": 8.399433260078956e-06, "loss": 0.4351, "num_tokens": 8137868169.0, "step": 10637 }, { "epoch": 3.8969497114591922, "grad_norm": 0.14892079776809553, "learning_rate": 8.396634844137453e-06, "loss": 0.3807, "num_tokens": 8138675086.0, "step": 10638 }, { "epoch": 3.897316112485115, "grad_norm": 0.1193591587691645, "learning_rate": 8.393837194671321e-06, "loss": 0.4017, "num_tokens": 8139464860.0, "step": 10639 }, { "epoch": 3.897682513511038, "grad_norm": 0.14090178586524318, "learning_rate": 8.391040311838208e-06, "loss": 0.413, "num_tokens": 8140353313.0, "step": 10640 }, { "epoch": 3.8980489145369606, "grad_norm": 0.12822761925911105, "learning_rate": 8.388244195795692e-06, "loss": 0.4193, "num_tokens": 8140972686.0, "step": 10641 }, { "epoch": 3.8984153155628833, "grad_norm": 0.1584763298807798, "learning_rate": 8.38544884670132e-06, "loss": 0.4241, "num_tokens": 8141674764.0, "step": 10642 }, { "epoch": 3.8987817165888066, "grad_norm": 0.13463355265228155, "learning_rate": 8.382654264712595e-06, "loss": 0.4047, "num_tokens": 8142376512.0, "step": 10643 }, { "epoch": 3.8991481176147293, "grad_norm": 0.15029211629413552, "learning_rate": 8.379860449986978e-06, "loss": 0.3997, "num_tokens": 8143077003.0, "step": 10644 }, { "epoch": 3.899514518640652, "grad_norm": 0.15066903070568585, "learning_rate": 8.377067402681885e-06, "loss": 0.435, "num_tokens": 8143776950.0, "step": 10645 }, { "epoch": 3.899880919666575, "grad_norm": 0.14121497372566244, "learning_rate": 8.374275122954686e-06, "loss": 0.4196, "num_tokens": 8144608734.0, "step": 10646 }, { "epoch": 3.9002473206924977, "grad_norm": 0.1328708304990279, "learning_rate": 8.371483610962715e-06, "loss": 0.4414, "num_tokens": 8145315184.0, "step": 10647 }, { "epoch": 3.900613721718421, "grad_norm": 0.15092926001089277, "learning_rate": 8.368692866863257e-06, "loss": 0.4256, "num_tokens": 8146089877.0, "step": 10648 }, { "epoch": 3.9009801227443437, "grad_norm": 0.14087148629049548, "learning_rate": 8.365902890813554e-06, "loss": 0.3975, "num_tokens": 8146828885.0, "step": 10649 }, { "epoch": 3.9013465237702665, "grad_norm": 0.1405817560297883, "learning_rate": 8.363113682970804e-06, "loss": 0.4378, "num_tokens": 8147652716.0, "step": 10650 }, { "epoch": 3.9017129247961897, "grad_norm": 0.1367185462884167, "learning_rate": 8.360325243492167e-06, "loss": 0.4224, "num_tokens": 8148465167.0, "step": 10651 }, { "epoch": 3.902079325822112, "grad_norm": 0.13699250150021947, "learning_rate": 8.357537572534753e-06, "loss": 0.3918, "num_tokens": 8149260647.0, "step": 10652 }, { "epoch": 3.9024457268480353, "grad_norm": 0.12838145772637208, "learning_rate": 8.354750670255638e-06, "loss": 0.4031, "num_tokens": 8150148201.0, "step": 10653 }, { "epoch": 3.902812127873958, "grad_norm": 0.12436580398650168, "learning_rate": 8.35196453681185e-06, "loss": 0.3927, "num_tokens": 8150936717.0, "step": 10654 }, { "epoch": 3.903178528899881, "grad_norm": 0.14211297462755526, "learning_rate": 8.349179172360358e-06, "loss": 0.4129, "num_tokens": 8151642516.0, "step": 10655 }, { "epoch": 3.903544929925804, "grad_norm": 0.14120067193982463, "learning_rate": 8.346394577058116e-06, "loss": 0.4068, "num_tokens": 8152361172.0, "step": 10656 }, { "epoch": 3.903911330951727, "grad_norm": 0.13980311112318178, "learning_rate": 8.343610751062022e-06, "loss": 0.37, "num_tokens": 8153077234.0, "step": 10657 }, { "epoch": 3.9042777319776496, "grad_norm": 0.14263041180302413, "learning_rate": 8.340827694528914e-06, "loss": 0.4132, "num_tokens": 8153844071.0, "step": 10658 }, { "epoch": 3.9046441330035724, "grad_norm": 0.13316143332487168, "learning_rate": 8.338045407615616e-06, "loss": 0.3831, "num_tokens": 8154536914.0, "step": 10659 }, { "epoch": 3.905010534029495, "grad_norm": 0.1455404727653587, "learning_rate": 8.335263890478892e-06, "loss": 0.4373, "num_tokens": 8155369865.0, "step": 10660 }, { "epoch": 3.9053769350554184, "grad_norm": 0.14751343713511822, "learning_rate": 8.332483143275463e-06, "loss": 0.4023, "num_tokens": 8156095889.0, "step": 10661 }, { "epoch": 3.905743336081341, "grad_norm": 0.1341480785016855, "learning_rate": 8.329703166162008e-06, "loss": 0.4091, "num_tokens": 8156834886.0, "step": 10662 }, { "epoch": 3.906109737107264, "grad_norm": 0.14604788926867876, "learning_rate": 8.326923959295163e-06, "loss": 0.4288, "num_tokens": 8157535396.0, "step": 10663 }, { "epoch": 3.9064761381331867, "grad_norm": 0.1533788254164731, "learning_rate": 8.324145522831534e-06, "loss": 0.3789, "num_tokens": 8158257272.0, "step": 10664 }, { "epoch": 3.9068425391591095, "grad_norm": 0.13065617973751908, "learning_rate": 8.321367856927653e-06, "loss": 0.4312, "num_tokens": 8159002031.0, "step": 10665 }, { "epoch": 3.9072089401850327, "grad_norm": 0.14446440589935883, "learning_rate": 8.318590961740032e-06, "loss": 0.4269, "num_tokens": 8159787685.0, "step": 10666 }, { "epoch": 3.9075753412109555, "grad_norm": 0.14118561069236604, "learning_rate": 8.315814837425143e-06, "loss": 0.4141, "num_tokens": 8160407241.0, "step": 10667 }, { "epoch": 3.9079417422368783, "grad_norm": 0.15888258804347105, "learning_rate": 8.313039484139387e-06, "loss": 0.4004, "num_tokens": 8161104559.0, "step": 10668 }, { "epoch": 3.908308143262801, "grad_norm": 0.15351544950546175, "learning_rate": 8.310264902039156e-06, "loss": 0.4138, "num_tokens": 8161749701.0, "step": 10669 }, { "epoch": 3.908674544288724, "grad_norm": 0.14361355368653775, "learning_rate": 8.30749109128078e-06, "loss": 0.3972, "num_tokens": 8162524339.0, "step": 10670 }, { "epoch": 3.909040945314647, "grad_norm": 0.14034470623771866, "learning_rate": 8.304718052020543e-06, "loss": 0.3612, "num_tokens": 8163160306.0, "step": 10671 }, { "epoch": 3.90940734634057, "grad_norm": 0.13232061665438966, "learning_rate": 8.301945784414694e-06, "loss": 0.3858, "num_tokens": 8163857963.0, "step": 10672 }, { "epoch": 3.9097737473664926, "grad_norm": 0.14389362978537126, "learning_rate": 8.299174288619433e-06, "loss": 0.4231, "num_tokens": 8164603835.0, "step": 10673 }, { "epoch": 3.9101401483924154, "grad_norm": 0.14130091484611834, "learning_rate": 8.296403564790923e-06, "loss": 0.3898, "num_tokens": 8165298097.0, "step": 10674 }, { "epoch": 3.910506549418338, "grad_norm": 0.12778874758823686, "learning_rate": 8.293633613085275e-06, "loss": 0.4163, "num_tokens": 8166090683.0, "step": 10675 }, { "epoch": 3.9108729504442614, "grad_norm": 0.14703670683925585, "learning_rate": 8.290864433658562e-06, "loss": 0.3926, "num_tokens": 8166924916.0, "step": 10676 }, { "epoch": 3.911239351470184, "grad_norm": 0.13088668034887616, "learning_rate": 8.288096026666812e-06, "loss": 0.3856, "num_tokens": 8167661473.0, "step": 10677 }, { "epoch": 3.911605752496107, "grad_norm": 0.14196450900811092, "learning_rate": 8.285328392266008e-06, "loss": 0.4164, "num_tokens": 8168403346.0, "step": 10678 }, { "epoch": 3.9119721535220298, "grad_norm": 0.1425176599742412, "learning_rate": 8.282561530612088e-06, "loss": 0.3949, "num_tokens": 8169149455.0, "step": 10679 }, { "epoch": 3.9123385545479525, "grad_norm": 0.13814557030546798, "learning_rate": 8.279795441860959e-06, "loss": 0.4051, "num_tokens": 8169926211.0, "step": 10680 }, { "epoch": 3.9127049555738758, "grad_norm": 0.1341840934537886, "learning_rate": 8.277030126168477e-06, "loss": 0.403, "num_tokens": 8170714597.0, "step": 10681 }, { "epoch": 3.9130713565997985, "grad_norm": 0.13535243189316604, "learning_rate": 8.274265583690435e-06, "loss": 0.4101, "num_tokens": 8171579237.0, "step": 10682 }, { "epoch": 3.9134377576257213, "grad_norm": 0.13487984278913814, "learning_rate": 8.271501814582615e-06, "loss": 0.4077, "num_tokens": 8172358711.0, "step": 10683 }, { "epoch": 3.913804158651644, "grad_norm": 0.14526194976331805, "learning_rate": 8.268738819000742e-06, "loss": 0.4358, "num_tokens": 8173045236.0, "step": 10684 }, { "epoch": 3.914170559677567, "grad_norm": 0.14352775541759294, "learning_rate": 8.265976597100478e-06, "loss": 0.3935, "num_tokens": 8173774816.0, "step": 10685 }, { "epoch": 3.91453696070349, "grad_norm": 0.14066080264898226, "learning_rate": 8.263215149037477e-06, "loss": 0.4001, "num_tokens": 8174524478.0, "step": 10686 }, { "epoch": 3.914903361729413, "grad_norm": 0.14657889778946553, "learning_rate": 8.260454474967326e-06, "loss": 0.41, "num_tokens": 8175241462.0, "step": 10687 }, { "epoch": 3.9152697627553357, "grad_norm": 0.14131684606702732, "learning_rate": 8.257694575045571e-06, "loss": 0.4147, "num_tokens": 8176055203.0, "step": 10688 }, { "epoch": 3.9156361637812584, "grad_norm": 0.13471154503755187, "learning_rate": 8.254935449427718e-06, "loss": 0.3963, "num_tokens": 8176809342.0, "step": 10689 }, { "epoch": 3.9160025648071812, "grad_norm": 0.14461475670224244, "learning_rate": 8.252177098269235e-06, "loss": 0.4021, "num_tokens": 8177656014.0, "step": 10690 }, { "epoch": 3.9163689658331045, "grad_norm": 0.13274009268764247, "learning_rate": 8.249419521725532e-06, "loss": 0.4295, "num_tokens": 8178420158.0, "step": 10691 }, { "epoch": 3.9167353668590272, "grad_norm": 0.13990190238916284, "learning_rate": 8.246662719951987e-06, "loss": 0.3851, "num_tokens": 8179229700.0, "step": 10692 }, { "epoch": 3.91710176788495, "grad_norm": 0.1348158538499994, "learning_rate": 8.243906693103925e-06, "loss": 0.3726, "num_tokens": 8179992421.0, "step": 10693 }, { "epoch": 3.917468168910873, "grad_norm": 0.13279876504426485, "learning_rate": 8.24115144133665e-06, "loss": 0.3829, "num_tokens": 8180858906.0, "step": 10694 }, { "epoch": 3.9178345699367956, "grad_norm": 0.1257175739759447, "learning_rate": 8.238396964805389e-06, "loss": 0.3973, "num_tokens": 8181618128.0, "step": 10695 }, { "epoch": 3.918200970962719, "grad_norm": 0.14284574963161004, "learning_rate": 8.235643263665338e-06, "loss": 0.3864, "num_tokens": 8182260285.0, "step": 10696 }, { "epoch": 3.9185673719886416, "grad_norm": 0.14045496873027513, "learning_rate": 8.232890338071669e-06, "loss": 0.361, "num_tokens": 8182983389.0, "step": 10697 }, { "epoch": 3.9189337730145644, "grad_norm": 0.14138452313114358, "learning_rate": 8.230138188179486e-06, "loss": 0.437, "num_tokens": 8183748369.0, "step": 10698 }, { "epoch": 3.9193001740404876, "grad_norm": 0.13116046197450634, "learning_rate": 8.22738681414386e-06, "loss": 0.407, "num_tokens": 8184619820.0, "step": 10699 }, { "epoch": 3.91966657506641, "grad_norm": 0.1332990354700598, "learning_rate": 8.224636216119812e-06, "loss": 0.3875, "num_tokens": 8185464707.0, "step": 10700 }, { "epoch": 3.920032976092333, "grad_norm": 0.13113751302931928, "learning_rate": 8.221886394262327e-06, "loss": 0.4149, "num_tokens": 8186303532.0, "step": 10701 }, { "epoch": 3.920399377118256, "grad_norm": 0.12594400443050224, "learning_rate": 8.219137348726341e-06, "loss": 0.3923, "num_tokens": 8186953246.0, "step": 10702 }, { "epoch": 3.9207657781441787, "grad_norm": 0.14449835717615536, "learning_rate": 8.216389079666747e-06, "loss": 0.4257, "num_tokens": 8187780690.0, "step": 10703 }, { "epoch": 3.921132179170102, "grad_norm": 0.1385458863324207, "learning_rate": 8.213641587238397e-06, "loss": 0.4464, "num_tokens": 8188523725.0, "step": 10704 }, { "epoch": 3.9214985801960247, "grad_norm": 0.15187097321984355, "learning_rate": 8.210894871596093e-06, "loss": 0.4145, "num_tokens": 8189236733.0, "step": 10705 }, { "epoch": 3.9218649812219475, "grad_norm": 0.1364639055268251, "learning_rate": 8.208148932894597e-06, "loss": 0.3931, "num_tokens": 8189974372.0, "step": 10706 }, { "epoch": 3.9222313822478703, "grad_norm": 0.15338025753037599, "learning_rate": 8.20540377128864e-06, "loss": 0.4069, "num_tokens": 8190689951.0, "step": 10707 }, { "epoch": 3.922597783273793, "grad_norm": 0.14523045792421643, "learning_rate": 8.202659386932887e-06, "loss": 0.4151, "num_tokens": 8191414083.0, "step": 10708 }, { "epoch": 3.9229641842997163, "grad_norm": 0.15011703121668935, "learning_rate": 8.19991577998196e-06, "loss": 0.4111, "num_tokens": 8192073031.0, "step": 10709 }, { "epoch": 3.923330585325639, "grad_norm": 0.150631409436348, "learning_rate": 8.197172950590464e-06, "loss": 0.3847, "num_tokens": 8192771554.0, "step": 10710 }, { "epoch": 3.923696986351562, "grad_norm": 0.14397454107105193, "learning_rate": 8.194430898912937e-06, "loss": 0.4043, "num_tokens": 8193541167.0, "step": 10711 }, { "epoch": 3.9240633873774846, "grad_norm": 0.13706978302741696, "learning_rate": 8.191689625103869e-06, "loss": 0.4164, "num_tokens": 8194335364.0, "step": 10712 }, { "epoch": 3.9244297884034074, "grad_norm": 0.13942020274935643, "learning_rate": 8.188949129317727e-06, "loss": 0.4046, "num_tokens": 8195111441.0, "step": 10713 }, { "epoch": 3.9247961894293306, "grad_norm": 0.1443648078649111, "learning_rate": 8.186209411708925e-06, "loss": 0.408, "num_tokens": 8195869894.0, "step": 10714 }, { "epoch": 3.9251625904552534, "grad_norm": 0.13505796367397038, "learning_rate": 8.183470472431813e-06, "loss": 0.4505, "num_tokens": 8196594559.0, "step": 10715 }, { "epoch": 3.925528991481176, "grad_norm": 0.142885966654538, "learning_rate": 8.180732311640735e-06, "loss": 0.4173, "num_tokens": 8197300749.0, "step": 10716 }, { "epoch": 3.925895392507099, "grad_norm": 0.1489550864485307, "learning_rate": 8.177994929489964e-06, "loss": 0.3986, "num_tokens": 8198070176.0, "step": 10717 }, { "epoch": 3.9262617935330217, "grad_norm": 0.12813819438683013, "learning_rate": 8.175258326133736e-06, "loss": 0.3946, "num_tokens": 8198743144.0, "step": 10718 }, { "epoch": 3.926628194558945, "grad_norm": 0.1458260225916602, "learning_rate": 8.172522501726243e-06, "loss": 0.4189, "num_tokens": 8199607608.0, "step": 10719 }, { "epoch": 3.9269945955848677, "grad_norm": 0.13130127943351397, "learning_rate": 8.169787456421629e-06, "loss": 0.4079, "num_tokens": 8200359024.0, "step": 10720 }, { "epoch": 3.9273609966107905, "grad_norm": 0.14505423959228572, "learning_rate": 8.167053190374017e-06, "loss": 0.4018, "num_tokens": 8201238993.0, "step": 10721 }, { "epoch": 3.9277273976367133, "grad_norm": 0.1358416524349943, "learning_rate": 8.164319703737449e-06, "loss": 0.3855, "num_tokens": 8201957178.0, "step": 10722 }, { "epoch": 3.928093798662636, "grad_norm": 0.13682885024309074, "learning_rate": 8.161586996665944e-06, "loss": 0.3918, "num_tokens": 8202780841.0, "step": 10723 }, { "epoch": 3.9284601996885593, "grad_norm": 0.1373002177985167, "learning_rate": 8.158855069313491e-06, "loss": 0.4051, "num_tokens": 8203596569.0, "step": 10724 }, { "epoch": 3.928826600714482, "grad_norm": 0.14383795731614266, "learning_rate": 8.156123921833998e-06, "loss": 0.417, "num_tokens": 8204329130.0, "step": 10725 }, { "epoch": 3.929193001740405, "grad_norm": 0.13953187995249972, "learning_rate": 8.153393554381366e-06, "loss": 0.3972, "num_tokens": 8205140218.0, "step": 10726 }, { "epoch": 3.9295594027663276, "grad_norm": 0.12450932225433833, "learning_rate": 8.150663967109429e-06, "loss": 0.3818, "num_tokens": 8206027881.0, "step": 10727 }, { "epoch": 3.9299258037922504, "grad_norm": 0.133550585842262, "learning_rate": 8.147935160171988e-06, "loss": 0.3953, "num_tokens": 8206868826.0, "step": 10728 }, { "epoch": 3.9302922048181737, "grad_norm": 0.13264299932898593, "learning_rate": 8.145207133722793e-06, "loss": 0.4099, "num_tokens": 8207645702.0, "step": 10729 }, { "epoch": 3.9306586058440964, "grad_norm": 0.13633871760676974, "learning_rate": 8.142479887915554e-06, "loss": 0.4046, "num_tokens": 8208435005.0, "step": 10730 }, { "epoch": 3.931025006870019, "grad_norm": 0.13921916033790713, "learning_rate": 8.13975342290394e-06, "loss": 0.4015, "num_tokens": 8209224705.0, "step": 10731 }, { "epoch": 3.931391407895942, "grad_norm": 0.1303404811382353, "learning_rate": 8.137027738841567e-06, "loss": 0.4269, "num_tokens": 8210028736.0, "step": 10732 }, { "epoch": 3.9317578089218648, "grad_norm": 0.14681952877889443, "learning_rate": 8.134302835882016e-06, "loss": 0.4147, "num_tokens": 8210818098.0, "step": 10733 }, { "epoch": 3.932124209947788, "grad_norm": 0.13638153009545148, "learning_rate": 8.131578714178821e-06, "loss": 0.3985, "num_tokens": 8211688507.0, "step": 10734 }, { "epoch": 3.9324906109737108, "grad_norm": 0.12851052929279516, "learning_rate": 8.128855373885467e-06, "loss": 0.4293, "num_tokens": 8212400283.0, "step": 10735 }, { "epoch": 3.9328570119996336, "grad_norm": 0.13775790873266586, "learning_rate": 8.126132815155398e-06, "loss": 0.4092, "num_tokens": 8213118633.0, "step": 10736 }, { "epoch": 3.9332234130255563, "grad_norm": 0.14479529028273802, "learning_rate": 8.123411038142023e-06, "loss": 0.4335, "num_tokens": 8213892723.0, "step": 10737 }, { "epoch": 3.933589814051479, "grad_norm": 0.13920423617788016, "learning_rate": 8.120690042998705e-06, "loss": 0.4384, "num_tokens": 8214715826.0, "step": 10738 }, { "epoch": 3.9339562150774023, "grad_norm": 0.13950898039370915, "learning_rate": 8.117969829878735e-06, "loss": 0.4067, "num_tokens": 8215418901.0, "step": 10739 }, { "epoch": 3.934322616103325, "grad_norm": 0.13542549442330495, "learning_rate": 8.115250398935401e-06, "loss": 0.4209, "num_tokens": 8216261007.0, "step": 10740 }, { "epoch": 3.934689017129248, "grad_norm": 0.1236692650995504, "learning_rate": 8.112531750321928e-06, "loss": 0.4189, "num_tokens": 8217060996.0, "step": 10741 }, { "epoch": 3.9350554181551707, "grad_norm": 0.15533475768972937, "learning_rate": 8.109813884191482e-06, "loss": 0.4, "num_tokens": 8217789180.0, "step": 10742 }, { "epoch": 3.9354218191810935, "grad_norm": 0.13652760262135466, "learning_rate": 8.107096800697212e-06, "loss": 0.3702, "num_tokens": 8218532358.0, "step": 10743 }, { "epoch": 3.9357882202070167, "grad_norm": 0.14390307619309245, "learning_rate": 8.104380499992209e-06, "loss": 0.4222, "num_tokens": 8219323991.0, "step": 10744 }, { "epoch": 3.9361546212329395, "grad_norm": 0.12654744987521177, "learning_rate": 8.10166498222952e-06, "loss": 0.4053, "num_tokens": 8220110878.0, "step": 10745 }, { "epoch": 3.9365210222588622, "grad_norm": 0.1383096566569491, "learning_rate": 8.098950247562152e-06, "loss": 0.3906, "num_tokens": 8220889194.0, "step": 10746 }, { "epoch": 3.9368874232847855, "grad_norm": 0.14144581443603907, "learning_rate": 8.096236296143063e-06, "loss": 0.3813, "num_tokens": 8221641572.0, "step": 10747 }, { "epoch": 3.937253824310708, "grad_norm": 0.13993812426822208, "learning_rate": 8.093523128125171e-06, "loss": 0.4016, "num_tokens": 8222361744.0, "step": 10748 }, { "epoch": 3.937620225336631, "grad_norm": 0.14535194241472094, "learning_rate": 8.090810743661346e-06, "loss": 0.4249, "num_tokens": 8223251323.0, "step": 10749 }, { "epoch": 3.937986626362554, "grad_norm": 0.13457399680127322, "learning_rate": 8.088099142904413e-06, "loss": 0.408, "num_tokens": 8223990949.0, "step": 10750 }, { "epoch": 3.9383530273884766, "grad_norm": 0.14085869907544943, "learning_rate": 8.08538832600717e-06, "loss": 0.3826, "num_tokens": 8224846129.0, "step": 10751 }, { "epoch": 3.9387194284144, "grad_norm": 0.14046468867102413, "learning_rate": 8.082678293122344e-06, "loss": 0.429, "num_tokens": 8225502700.0, "step": 10752 }, { "epoch": 3.9390858294403226, "grad_norm": 0.14866178004318717, "learning_rate": 8.079969044402625e-06, "loss": 0.3945, "num_tokens": 8226180546.0, "step": 10753 }, { "epoch": 3.9394522304662454, "grad_norm": 0.14060948216194016, "learning_rate": 8.077260580000681e-06, "loss": 0.3998, "num_tokens": 8226914078.0, "step": 10754 }, { "epoch": 3.939818631492168, "grad_norm": 0.14515277742064833, "learning_rate": 8.07455290006911e-06, "loss": 0.4268, "num_tokens": 8227619263.0, "step": 10755 }, { "epoch": 3.940185032518091, "grad_norm": 0.14982758825294992, "learning_rate": 8.071846004760478e-06, "loss": 0.4186, "num_tokens": 8228343024.0, "step": 10756 }, { "epoch": 3.940551433544014, "grad_norm": 0.13738508531232088, "learning_rate": 8.069139894227301e-06, "loss": 0.3992, "num_tokens": 8229210757.0, "step": 10757 }, { "epoch": 3.940917834569937, "grad_norm": 0.13463324257573395, "learning_rate": 8.066434568622055e-06, "loss": 0.3967, "num_tokens": 8230064792.0, "step": 10758 }, { "epoch": 3.9412842355958597, "grad_norm": 0.1269340994274002, "learning_rate": 8.06373002809717e-06, "loss": 0.4009, "num_tokens": 8230713460.0, "step": 10759 }, { "epoch": 3.9416506366217825, "grad_norm": 0.15928581809015407, "learning_rate": 8.06102627280503e-06, "loss": 0.3983, "num_tokens": 8231408862.0, "step": 10760 }, { "epoch": 3.9420170376477053, "grad_norm": 0.13994474547066788, "learning_rate": 8.05832330289798e-06, "loss": 0.4136, "num_tokens": 8232134259.0, "step": 10761 }, { "epoch": 3.9423834386736285, "grad_norm": 0.1434721292920262, "learning_rate": 8.055621118528316e-06, "loss": 0.3939, "num_tokens": 8232983154.0, "step": 10762 }, { "epoch": 3.9427498396995513, "grad_norm": 0.14142261154962965, "learning_rate": 8.052919719848288e-06, "loss": 0.417, "num_tokens": 8233680201.0, "step": 10763 }, { "epoch": 3.943116240725474, "grad_norm": 0.13385659072866238, "learning_rate": 8.050219107010117e-06, "loss": 0.3973, "num_tokens": 8234333674.0, "step": 10764 }, { "epoch": 3.943482641751397, "grad_norm": 0.1484070276279438, "learning_rate": 8.047519280165957e-06, "loss": 0.3987, "num_tokens": 8235152943.0, "step": 10765 }, { "epoch": 3.9438490427773196, "grad_norm": 0.13548588435850023, "learning_rate": 8.044820239467925e-06, "loss": 0.4054, "num_tokens": 8235864032.0, "step": 10766 }, { "epoch": 3.944215443803243, "grad_norm": 0.14454365672965955, "learning_rate": 8.042121985068107e-06, "loss": 0.3916, "num_tokens": 8236645576.0, "step": 10767 }, { "epoch": 3.9445818448291656, "grad_norm": 0.13290786505816612, "learning_rate": 8.03942451711854e-06, "loss": 0.4112, "num_tokens": 8237308342.0, "step": 10768 }, { "epoch": 3.9449482458550884, "grad_norm": 0.14576852407563348, "learning_rate": 8.03672783577119e-06, "loss": 0.4062, "num_tokens": 8238045098.0, "step": 10769 }, { "epoch": 3.945314646881011, "grad_norm": 0.1363612372196875, "learning_rate": 8.034031941178018e-06, "loss": 0.3956, "num_tokens": 8238757648.0, "step": 10770 }, { "epoch": 3.945681047906934, "grad_norm": 0.13638772935469795, "learning_rate": 8.031336833490917e-06, "loss": 0.4069, "num_tokens": 8239530418.0, "step": 10771 }, { "epoch": 3.946047448932857, "grad_norm": 0.13761674161346882, "learning_rate": 8.028642512861746e-06, "loss": 0.4176, "num_tokens": 8240213694.0, "step": 10772 }, { "epoch": 3.94641384995878, "grad_norm": 0.15657297221173155, "learning_rate": 8.02594897944231e-06, "loss": 0.4046, "num_tokens": 8240971764.0, "step": 10773 }, { "epoch": 3.9467802509847028, "grad_norm": 0.13462596419873485, "learning_rate": 8.023256233384378e-06, "loss": 0.3966, "num_tokens": 8241803056.0, "step": 10774 }, { "epoch": 3.9471466520106255, "grad_norm": 0.13428975189795492, "learning_rate": 8.020564274839668e-06, "loss": 0.3826, "num_tokens": 8242587867.0, "step": 10775 }, { "epoch": 3.9475130530365483, "grad_norm": 0.13573094951155387, "learning_rate": 8.017873103959861e-06, "loss": 0.409, "num_tokens": 8243381053.0, "step": 10776 }, { "epoch": 3.9478794540624715, "grad_norm": 0.13920361048757537, "learning_rate": 8.015182720896583e-06, "loss": 0.3658, "num_tokens": 8244067129.0, "step": 10777 }, { "epoch": 3.9482458550883943, "grad_norm": 0.13777116423955987, "learning_rate": 8.012493125801441e-06, "loss": 0.4265, "num_tokens": 8244869139.0, "step": 10778 }, { "epoch": 3.948612256114317, "grad_norm": 0.1365802335786801, "learning_rate": 8.009804318825958e-06, "loss": 0.4091, "num_tokens": 8245650124.0, "step": 10779 }, { "epoch": 3.94897865714024, "grad_norm": 0.1481663564014644, "learning_rate": 8.007116300121636e-06, "loss": 0.4248, "num_tokens": 8246364016.0, "step": 10780 }, { "epoch": 3.9493450581661627, "grad_norm": 0.13502655126490318, "learning_rate": 8.004429069839945e-06, "loss": 0.3829, "num_tokens": 8247059397.0, "step": 10781 }, { "epoch": 3.949711459192086, "grad_norm": 0.14718058651098187, "learning_rate": 8.001742628132278e-06, "loss": 0.3891, "num_tokens": 8247843099.0, "step": 10782 }, { "epoch": 3.9500778602180087, "grad_norm": 0.1445171133614666, "learning_rate": 7.999056975150015e-06, "loss": 0.4168, "num_tokens": 8248605201.0, "step": 10783 }, { "epoch": 3.9504442612439314, "grad_norm": 0.14900864372507555, "learning_rate": 7.99637211104447e-06, "loss": 0.3819, "num_tokens": 8249333039.0, "step": 10784 }, { "epoch": 3.9508106622698542, "grad_norm": 0.14091260993210844, "learning_rate": 7.993688035966925e-06, "loss": 0.412, "num_tokens": 8250140493.0, "step": 10785 }, { "epoch": 3.951177063295777, "grad_norm": 0.12911578889756814, "learning_rate": 7.99100475006861e-06, "loss": 0.3843, "num_tokens": 8250867890.0, "step": 10786 }, { "epoch": 3.9515434643217002, "grad_norm": 0.13940466686026745, "learning_rate": 7.988322253500715e-06, "loss": 0.3964, "num_tokens": 8251763307.0, "step": 10787 }, { "epoch": 3.951909865347623, "grad_norm": 0.13567035813421943, "learning_rate": 7.985640546414384e-06, "loss": 0.4162, "num_tokens": 8252577241.0, "step": 10788 }, { "epoch": 3.952276266373546, "grad_norm": 0.13987932080772914, "learning_rate": 7.982959628960716e-06, "loss": 0.3776, "num_tokens": 8253378752.0, "step": 10789 }, { "epoch": 3.9526426673994686, "grad_norm": 0.13439007005194437, "learning_rate": 7.980279501290759e-06, "loss": 0.4079, "num_tokens": 8254158747.0, "step": 10790 }, { "epoch": 3.9530090684253913, "grad_norm": 0.1400953330402334, "learning_rate": 7.977600163555543e-06, "loss": 0.3965, "num_tokens": 8254936558.0, "step": 10791 }, { "epoch": 3.9533754694513146, "grad_norm": 0.1365060685221104, "learning_rate": 7.974921615906014e-06, "loss": 0.3794, "num_tokens": 8255675281.0, "step": 10792 }, { "epoch": 3.9537418704772374, "grad_norm": 0.13440318670624002, "learning_rate": 7.9722438584931e-06, "loss": 0.4176, "num_tokens": 8256459248.0, "step": 10793 }, { "epoch": 3.95410827150316, "grad_norm": 0.14328853608443023, "learning_rate": 7.969566891467683e-06, "loss": 0.416, "num_tokens": 8257273095.0, "step": 10794 }, { "epoch": 3.9544746725290834, "grad_norm": 0.1416155122076937, "learning_rate": 7.966890714980596e-06, "loss": 0.4167, "num_tokens": 8258117513.0, "step": 10795 }, { "epoch": 3.9548410735550057, "grad_norm": 0.13229764147317583, "learning_rate": 7.964215329182614e-06, "loss": 0.4137, "num_tokens": 8258836159.0, "step": 10796 }, { "epoch": 3.955207474580929, "grad_norm": 0.13789974025555018, "learning_rate": 7.961540734224494e-06, "loss": 0.4235, "num_tokens": 8259524942.0, "step": 10797 }, { "epoch": 3.9555738756068517, "grad_norm": 0.14342364937077975, "learning_rate": 7.958866930256932e-06, "loss": 0.4336, "num_tokens": 8260323485.0, "step": 10798 }, { "epoch": 3.9559402766327745, "grad_norm": 0.14089932633823027, "learning_rate": 7.956193917430573e-06, "loss": 0.4367, "num_tokens": 8261140527.0, "step": 10799 }, { "epoch": 3.9563066776586977, "grad_norm": 0.14101662240325868, "learning_rate": 7.95352169589604e-06, "loss": 0.3989, "num_tokens": 8261868495.0, "step": 10800 }, { "epoch": 3.9566730786846205, "grad_norm": 0.14638589927120937, "learning_rate": 7.95085026580389e-06, "loss": 0.4044, "num_tokens": 8262650067.0, "step": 10801 }, { "epoch": 3.9570394797105433, "grad_norm": 0.1319988838050675, "learning_rate": 7.948179627304644e-06, "loss": 0.4123, "num_tokens": 8263423599.0, "step": 10802 }, { "epoch": 3.957405880736466, "grad_norm": 0.13419020210888277, "learning_rate": 7.945509780548782e-06, "loss": 0.3978, "num_tokens": 8264151470.0, "step": 10803 }, { "epoch": 3.957772281762389, "grad_norm": 0.1401726799135241, "learning_rate": 7.942840725686733e-06, "loss": 0.41, "num_tokens": 8264978962.0, "step": 10804 }, { "epoch": 3.958138682788312, "grad_norm": 0.1394702620058249, "learning_rate": 7.940172462868881e-06, "loss": 0.445, "num_tokens": 8265743956.0, "step": 10805 }, { "epoch": 3.958505083814235, "grad_norm": 0.1388722284362987, "learning_rate": 7.937504992245573e-06, "loss": 0.3869, "num_tokens": 8266576172.0, "step": 10806 }, { "epoch": 3.9588714848401576, "grad_norm": 0.13083046231246415, "learning_rate": 7.934838313967097e-06, "loss": 0.3835, "num_tokens": 8267337057.0, "step": 10807 }, { "epoch": 3.9592378858660804, "grad_norm": 0.13452192955305992, "learning_rate": 7.932172428183722e-06, "loss": 0.3993, "num_tokens": 8268029732.0, "step": 10808 }, { "epoch": 3.959604286892003, "grad_norm": 0.1428172060694052, "learning_rate": 7.929507335045637e-06, "loss": 0.4038, "num_tokens": 8268829167.0, "step": 10809 }, { "epoch": 3.9599706879179264, "grad_norm": 0.13111772371644648, "learning_rate": 7.92684303470302e-06, "loss": 0.3849, "num_tokens": 8269566485.0, "step": 10810 }, { "epoch": 3.960337088943849, "grad_norm": 0.1381303715806475, "learning_rate": 7.924179527305986e-06, "loss": 0.3795, "num_tokens": 8270247385.0, "step": 10811 }, { "epoch": 3.960703489969772, "grad_norm": 0.15044993536768664, "learning_rate": 7.921516813004609e-06, "loss": 0.4258, "num_tokens": 8271002242.0, "step": 10812 }, { "epoch": 3.9610698909956947, "grad_norm": 0.1480146856557671, "learning_rate": 7.918854891948915e-06, "loss": 0.4051, "num_tokens": 8271755035.0, "step": 10813 }, { "epoch": 3.9614362920216175, "grad_norm": 0.14333216799539358, "learning_rate": 7.916193764288892e-06, "loss": 0.4024, "num_tokens": 8272605671.0, "step": 10814 }, { "epoch": 3.9618026930475407, "grad_norm": 0.1316457668848568, "learning_rate": 7.913533430174481e-06, "loss": 0.4038, "num_tokens": 8273320604.0, "step": 10815 }, { "epoch": 3.9621690940734635, "grad_norm": 0.14726469644777307, "learning_rate": 7.910873889755575e-06, "loss": 0.4185, "num_tokens": 8274132928.0, "step": 10816 }, { "epoch": 3.9625354950993863, "grad_norm": 0.12583178112120824, "learning_rate": 7.90821514318202e-06, "loss": 0.4187, "num_tokens": 8274981661.0, "step": 10817 }, { "epoch": 3.962901896125309, "grad_norm": 0.1414334120003247, "learning_rate": 7.905557190603635e-06, "loss": 0.4469, "num_tokens": 8275724141.0, "step": 10818 }, { "epoch": 3.963268297151232, "grad_norm": 0.14173948180916887, "learning_rate": 7.902900032170166e-06, "loss": 0.4108, "num_tokens": 8276479387.0, "step": 10819 }, { "epoch": 3.963634698177155, "grad_norm": 0.13523506417645204, "learning_rate": 7.900243668031335e-06, "loss": 0.4008, "num_tokens": 8277167314.0, "step": 10820 }, { "epoch": 3.964001099203078, "grad_norm": 0.1417601447342302, "learning_rate": 7.897588098336823e-06, "loss": 0.3927, "num_tokens": 8277872044.0, "step": 10821 }, { "epoch": 3.9643675002290006, "grad_norm": 0.14916977232448994, "learning_rate": 7.894933323236248e-06, "loss": 0.4318, "num_tokens": 8278649015.0, "step": 10822 }, { "epoch": 3.9647339012549234, "grad_norm": 0.13330888998353213, "learning_rate": 7.892279342879186e-06, "loss": 0.4008, "num_tokens": 8279474428.0, "step": 10823 }, { "epoch": 3.965100302280846, "grad_norm": 0.1397512165890915, "learning_rate": 7.889626157415184e-06, "loss": 0.4235, "num_tokens": 8280237851.0, "step": 10824 }, { "epoch": 3.9654667033067694, "grad_norm": 0.1295239725312058, "learning_rate": 7.886973766993738e-06, "loss": 0.4256, "num_tokens": 8281058388.0, "step": 10825 }, { "epoch": 3.965833104332692, "grad_norm": 0.13518649242327288, "learning_rate": 7.88432217176428e-06, "loss": 0.4045, "num_tokens": 8281703100.0, "step": 10826 }, { "epoch": 3.966199505358615, "grad_norm": 0.15043973192099888, "learning_rate": 7.88167137187623e-06, "loss": 0.4375, "num_tokens": 8282396940.0, "step": 10827 }, { "epoch": 3.9665659063845378, "grad_norm": 0.1457971524816641, "learning_rate": 7.879021367478937e-06, "loss": 0.3738, "num_tokens": 8283164721.0, "step": 10828 }, { "epoch": 3.9669323074104605, "grad_norm": 0.1401240410118485, "learning_rate": 7.876372158721714e-06, "loss": 0.4123, "num_tokens": 8283819219.0, "step": 10829 }, { "epoch": 3.9672987084363838, "grad_norm": 0.1389978583200417, "learning_rate": 7.87372374575383e-06, "loss": 0.3727, "num_tokens": 8284575687.0, "step": 10830 }, { "epoch": 3.9676651094623065, "grad_norm": 0.14031761716220933, "learning_rate": 7.871076128724513e-06, "loss": 0.4102, "num_tokens": 8285357836.0, "step": 10831 }, { "epoch": 3.9680315104882293, "grad_norm": 0.14292552181887067, "learning_rate": 7.868429307782938e-06, "loss": 0.4155, "num_tokens": 8286135769.0, "step": 10832 }, { "epoch": 3.968397911514152, "grad_norm": 0.1404211598798834, "learning_rate": 7.86578328307824e-06, "loss": 0.4003, "num_tokens": 8286898869.0, "step": 10833 }, { "epoch": 3.968764312540075, "grad_norm": 0.12979559459143636, "learning_rate": 7.863138054759501e-06, "loss": 0.432, "num_tokens": 8287750246.0, "step": 10834 }, { "epoch": 3.969130713565998, "grad_norm": 0.12988619833169399, "learning_rate": 7.860493622975783e-06, "loss": 0.4277, "num_tokens": 8288603030.0, "step": 10835 }, { "epoch": 3.969497114591921, "grad_norm": 0.12989299381905783, "learning_rate": 7.85784998787607e-06, "loss": 0.3586, "num_tokens": 8289334660.0, "step": 10836 }, { "epoch": 3.9698635156178437, "grad_norm": 0.13727693061975366, "learning_rate": 7.855207149609313e-06, "loss": 0.4044, "num_tokens": 8290108167.0, "step": 10837 }, { "epoch": 3.9702299166437665, "grad_norm": 0.13747376860289648, "learning_rate": 7.85256510832444e-06, "loss": 0.3913, "num_tokens": 8290855113.0, "step": 10838 }, { "epoch": 3.9705963176696892, "grad_norm": 0.13609642371113326, "learning_rate": 7.849923864170296e-06, "loss": 0.3961, "num_tokens": 8291622520.0, "step": 10839 }, { "epoch": 3.9709627186956125, "grad_norm": 0.1380622956437736, "learning_rate": 7.847283417295715e-06, "loss": 0.4179, "num_tokens": 8292300828.0, "step": 10840 }, { "epoch": 3.9713291197215352, "grad_norm": 0.13832158465889022, "learning_rate": 7.844643767849465e-06, "loss": 0.4342, "num_tokens": 8292939551.0, "step": 10841 }, { "epoch": 3.971695520747458, "grad_norm": 0.1549860178356476, "learning_rate": 7.842004915980275e-06, "loss": 0.4178, "num_tokens": 8293719762.0, "step": 10842 }, { "epoch": 3.972061921773381, "grad_norm": 0.14210440776778344, "learning_rate": 7.839366861836835e-06, "loss": 0.43, "num_tokens": 8294581099.0, "step": 10843 }, { "epoch": 3.9724283227993036, "grad_norm": 0.13134011739370943, "learning_rate": 7.83672960556778e-06, "loss": 0.4525, "num_tokens": 8295446803.0, "step": 10844 }, { "epoch": 3.972794723825227, "grad_norm": 0.14334722363369767, "learning_rate": 7.834093147321709e-06, "loss": 0.4253, "num_tokens": 8296182325.0, "step": 10845 }, { "epoch": 3.9731611248511496, "grad_norm": 0.13608778575711442, "learning_rate": 7.831457487247167e-06, "loss": 0.3977, "num_tokens": 8296977223.0, "step": 10846 }, { "epoch": 3.9735275258770724, "grad_norm": 0.13000537440296447, "learning_rate": 7.828822625492658e-06, "loss": 0.4182, "num_tokens": 8297801912.0, "step": 10847 }, { "epoch": 3.9738939269029956, "grad_norm": 0.14936802736875135, "learning_rate": 7.826188562206655e-06, "loss": 0.4172, "num_tokens": 8298471459.0, "step": 10848 }, { "epoch": 3.974260327928918, "grad_norm": 0.14894650923625238, "learning_rate": 7.823555297537561e-06, "loss": 0.3926, "num_tokens": 8299236059.0, "step": 10849 }, { "epoch": 3.974626728954841, "grad_norm": 0.13430988550981224, "learning_rate": 7.820922831633741e-06, "loss": 0.4182, "num_tokens": 8299999260.0, "step": 10850 }, { "epoch": 3.974993129980764, "grad_norm": 0.1582362411685176, "learning_rate": 7.818291164643537e-06, "loss": 0.3723, "num_tokens": 8300727816.0, "step": 10851 }, { "epoch": 3.9753595310066867, "grad_norm": 0.13220479569512203, "learning_rate": 7.815660296715222e-06, "loss": 0.3943, "num_tokens": 8301566730.0, "step": 10852 }, { "epoch": 3.97572593203261, "grad_norm": 0.1301679889034517, "learning_rate": 7.813030227997022e-06, "loss": 0.4239, "num_tokens": 8302299178.0, "step": 10853 }, { "epoch": 3.9760923330585327, "grad_norm": 0.13972095434483947, "learning_rate": 7.810400958637138e-06, "loss": 0.3977, "num_tokens": 8303106314.0, "step": 10854 }, { "epoch": 3.9764587340844555, "grad_norm": 0.13035874509276474, "learning_rate": 7.807772488783718e-06, "loss": 0.4012, "num_tokens": 8303835402.0, "step": 10855 }, { "epoch": 3.9768251351103783, "grad_norm": 0.14818989959284232, "learning_rate": 7.805144818584842e-06, "loss": 0.425, "num_tokens": 8304616785.0, "step": 10856 }, { "epoch": 3.977191536136301, "grad_norm": 0.1424932728849359, "learning_rate": 7.802517948188587e-06, "loss": 0.4066, "num_tokens": 8305321465.0, "step": 10857 }, { "epoch": 3.9775579371622243, "grad_norm": 0.13820751613432478, "learning_rate": 7.799891877742952e-06, "loss": 0.405, "num_tokens": 8306047666.0, "step": 10858 }, { "epoch": 3.977924338188147, "grad_norm": 0.13761939659444955, "learning_rate": 7.797266607395903e-06, "loss": 0.3891, "num_tokens": 8306910397.0, "step": 10859 }, { "epoch": 3.97829073921407, "grad_norm": 0.12919471943858657, "learning_rate": 7.79464213729536e-06, "loss": 0.4292, "num_tokens": 8307705186.0, "step": 10860 }, { "epoch": 3.9786571402399926, "grad_norm": 0.1414527025577568, "learning_rate": 7.7920184675892e-06, "loss": 0.3891, "num_tokens": 8308484500.0, "step": 10861 }, { "epoch": 3.9790235412659154, "grad_norm": 0.1307907480489916, "learning_rate": 7.789395598425248e-06, "loss": 0.3963, "num_tokens": 8309343599.0, "step": 10862 }, { "epoch": 3.9793899422918386, "grad_norm": 0.13393470293228724, "learning_rate": 7.786773529951294e-06, "loss": 0.4247, "num_tokens": 8310087533.0, "step": 10863 }, { "epoch": 3.9797563433177614, "grad_norm": 0.14047524973476463, "learning_rate": 7.784152262315066e-06, "loss": 0.3663, "num_tokens": 8310906033.0, "step": 10864 }, { "epoch": 3.980122744343684, "grad_norm": 0.13299707666654387, "learning_rate": 7.781531795664278e-06, "loss": 0.4152, "num_tokens": 8311716884.0, "step": 10865 }, { "epoch": 3.980489145369607, "grad_norm": 0.1389674726608657, "learning_rate": 7.778912130146557e-06, "loss": 0.4053, "num_tokens": 8312486323.0, "step": 10866 }, { "epoch": 3.9808555463955297, "grad_norm": 0.1338499519157245, "learning_rate": 7.77629326590952e-06, "loss": 0.4049, "num_tokens": 8313277792.0, "step": 10867 }, { "epoch": 3.981221947421453, "grad_norm": 0.1340294644821401, "learning_rate": 7.773675203100726e-06, "loss": 0.4007, "num_tokens": 8314051638.0, "step": 10868 }, { "epoch": 3.9815883484473757, "grad_norm": 0.14402842110760802, "learning_rate": 7.771057941867684e-06, "loss": 0.4539, "num_tokens": 8314733742.0, "step": 10869 }, { "epoch": 3.9819547494732985, "grad_norm": 0.149554308641767, "learning_rate": 7.76844148235786e-06, "loss": 0.3996, "num_tokens": 8315453329.0, "step": 10870 }, { "epoch": 3.9823211504992213, "grad_norm": 0.15324404525299712, "learning_rate": 7.765825824718685e-06, "loss": 0.44, "num_tokens": 8316175237.0, "step": 10871 }, { "epoch": 3.982687551525144, "grad_norm": 0.1365458153192249, "learning_rate": 7.763210969097532e-06, "loss": 0.3829, "num_tokens": 8316904252.0, "step": 10872 }, { "epoch": 3.9830539525510673, "grad_norm": 0.1376989421201293, "learning_rate": 7.760596915641734e-06, "loss": 0.3792, "num_tokens": 8317739843.0, "step": 10873 }, { "epoch": 3.98342035357699, "grad_norm": 0.12929708614743315, "learning_rate": 7.75798366449858e-06, "loss": 0.3999, "num_tokens": 8318469708.0, "step": 10874 }, { "epoch": 3.983786754602913, "grad_norm": 0.14137536723136504, "learning_rate": 7.75537121581531e-06, "loss": 0.3988, "num_tokens": 8319179888.0, "step": 10875 }, { "epoch": 3.9841531556288357, "grad_norm": 0.14611746560106811, "learning_rate": 7.752759569739125e-06, "loss": 0.4253, "num_tokens": 8319957353.0, "step": 10876 }, { "epoch": 3.9845195566547584, "grad_norm": 0.13500868740091998, "learning_rate": 7.75014872641717e-06, "loss": 0.4141, "num_tokens": 8320734669.0, "step": 10877 }, { "epoch": 3.9848859576806817, "grad_norm": 0.1305468297739915, "learning_rate": 7.747538685996565e-06, "loss": 0.3993, "num_tokens": 8321598975.0, "step": 10878 }, { "epoch": 3.9852523587066044, "grad_norm": 0.12885494217009438, "learning_rate": 7.74492944862436e-06, "loss": 0.3947, "num_tokens": 8322353391.0, "step": 10879 }, { "epoch": 3.985618759732527, "grad_norm": 0.14496965793385289, "learning_rate": 7.74232101444757e-06, "loss": 0.3995, "num_tokens": 8323047622.0, "step": 10880 }, { "epoch": 3.98598516075845, "grad_norm": 0.14899941441840464, "learning_rate": 7.739713383613174e-06, "loss": 0.3877, "num_tokens": 8323758491.0, "step": 10881 }, { "epoch": 3.9863515617843728, "grad_norm": 0.1312066263399599, "learning_rate": 7.737106556268099e-06, "loss": 0.3957, "num_tokens": 8324600380.0, "step": 10882 }, { "epoch": 3.986717962810296, "grad_norm": 0.11937758639108283, "learning_rate": 7.734500532559213e-06, "loss": 0.4052, "num_tokens": 8325380352.0, "step": 10883 }, { "epoch": 3.987084363836219, "grad_norm": 0.14503281642821433, "learning_rate": 7.731895312633362e-06, "loss": 0.4332, "num_tokens": 8326066891.0, "step": 10884 }, { "epoch": 3.9874507648621416, "grad_norm": 0.15606583210059313, "learning_rate": 7.729290896637333e-06, "loss": 0.3948, "num_tokens": 8326878191.0, "step": 10885 }, { "epoch": 3.9878171658880643, "grad_norm": 0.13486892542391415, "learning_rate": 7.726687284717873e-06, "loss": 0.406, "num_tokens": 8327592795.0, "step": 10886 }, { "epoch": 3.988183566913987, "grad_norm": 0.14600481068504692, "learning_rate": 7.724084477021679e-06, "loss": 0.4236, "num_tokens": 8328359071.0, "step": 10887 }, { "epoch": 3.9885499679399103, "grad_norm": 0.13900946710720244, "learning_rate": 7.721482473695403e-06, "loss": 0.4139, "num_tokens": 8329176472.0, "step": 10888 }, { "epoch": 3.988916368965833, "grad_norm": 0.13554886450215378, "learning_rate": 7.718881274885657e-06, "loss": 0.3913, "num_tokens": 8330105501.0, "step": 10889 }, { "epoch": 3.989282769991756, "grad_norm": 0.1258443076222599, "learning_rate": 7.716280880739002e-06, "loss": 0.3948, "num_tokens": 8330843479.0, "step": 10890 }, { "epoch": 3.9896491710176787, "grad_norm": 0.13985559041746762, "learning_rate": 7.713681291401951e-06, "loss": 0.4213, "num_tokens": 8331553294.0, "step": 10891 }, { "epoch": 3.9900155720436015, "grad_norm": 0.1443373549018809, "learning_rate": 7.711082507020995e-06, "loss": 0.4496, "num_tokens": 8332255325.0, "step": 10892 }, { "epoch": 3.9903819730695247, "grad_norm": 0.15457649696442616, "learning_rate": 7.708484527742543e-06, "loss": 0.4433, "num_tokens": 8332981354.0, "step": 10893 }, { "epoch": 3.9907483740954475, "grad_norm": 0.13925023589347826, "learning_rate": 7.705887353712978e-06, "loss": 0.4001, "num_tokens": 8333711073.0, "step": 10894 }, { "epoch": 3.9911147751213703, "grad_norm": 0.14194218564040767, "learning_rate": 7.70329098507865e-06, "loss": 0.3993, "num_tokens": 8334421580.0, "step": 10895 }, { "epoch": 3.9914811761472935, "grad_norm": 0.13805614882117884, "learning_rate": 7.700695421985835e-06, "loss": 0.4064, "num_tokens": 8335108985.0, "step": 10896 }, { "epoch": 3.991847577173216, "grad_norm": 0.13451466281220412, "learning_rate": 7.698100664580788e-06, "loss": 0.4204, "num_tokens": 8335897235.0, "step": 10897 }, { "epoch": 3.992213978199139, "grad_norm": 0.14720769308235016, "learning_rate": 7.695506713009707e-06, "loss": 0.425, "num_tokens": 8336685124.0, "step": 10898 }, { "epoch": 3.992580379225062, "grad_norm": 0.13202061202712054, "learning_rate": 7.692913567418748e-06, "loss": 0.3963, "num_tokens": 8337493858.0, "step": 10899 }, { "epoch": 3.9929467802509846, "grad_norm": 0.13612578526065783, "learning_rate": 7.690321227954019e-06, "loss": 0.4214, "num_tokens": 8338293663.0, "step": 10900 }, { "epoch": 3.993313181276908, "grad_norm": 0.1345569719166432, "learning_rate": 7.687729694761586e-06, "loss": 0.3865, "num_tokens": 8339110711.0, "step": 10901 }, { "epoch": 3.9936795823028306, "grad_norm": 0.12820284305134758, "learning_rate": 7.685138967987464e-06, "loss": 0.4209, "num_tokens": 8339846361.0, "step": 10902 }, { "epoch": 3.9940459833287534, "grad_norm": 0.14440966239067538, "learning_rate": 7.682549047777629e-06, "loss": 0.4175, "num_tokens": 8340679086.0, "step": 10903 }, { "epoch": 3.994412384354676, "grad_norm": 0.12834782446001966, "learning_rate": 7.679959934278007e-06, "loss": 0.3819, "num_tokens": 8341557015.0, "step": 10904 }, { "epoch": 3.994778785380599, "grad_norm": 0.13081689322195134, "learning_rate": 7.677371627634491e-06, "loss": 0.4173, "num_tokens": 8342296158.0, "step": 10905 }, { "epoch": 3.995145186406522, "grad_norm": 0.13452123750992875, "learning_rate": 7.674784127992903e-06, "loss": 0.3901, "num_tokens": 8343067580.0, "step": 10906 }, { "epoch": 3.995511587432445, "grad_norm": 0.14086846990319846, "learning_rate": 7.672197435499036e-06, "loss": 0.3803, "num_tokens": 8343856584.0, "step": 10907 }, { "epoch": 3.9958779884583677, "grad_norm": 0.13738626133182097, "learning_rate": 7.669611550298643e-06, "loss": 0.4128, "num_tokens": 8344624421.0, "step": 10908 }, { "epoch": 3.9962443894842905, "grad_norm": 0.14549582245428364, "learning_rate": 7.66702647253743e-06, "loss": 0.4016, "num_tokens": 8345327033.0, "step": 10909 }, { "epoch": 3.9966107905102133, "grad_norm": 0.1369008605743855, "learning_rate": 7.664442202361034e-06, "loss": 0.4361, "num_tokens": 8346170631.0, "step": 10910 }, { "epoch": 3.9969771915361365, "grad_norm": 0.13908700562245732, "learning_rate": 7.66185873991508e-06, "loss": 0.3772, "num_tokens": 8346865844.0, "step": 10911 }, { "epoch": 3.9973435925620593, "grad_norm": 0.1498835843703001, "learning_rate": 7.659276085345124e-06, "loss": 0.3913, "num_tokens": 8347604893.0, "step": 10912 }, { "epoch": 3.997709993587982, "grad_norm": 0.14884739183018153, "learning_rate": 7.65669423879669e-06, "loss": 0.3857, "num_tokens": 8348257163.0, "step": 10913 }, { "epoch": 3.998076394613905, "grad_norm": 0.1443299220283014, "learning_rate": 7.654113200415247e-06, "loss": 0.402, "num_tokens": 8349041944.0, "step": 10914 }, { "epoch": 3.9984427956398276, "grad_norm": 0.13126173756154344, "learning_rate": 7.651532970346223e-06, "loss": 0.421, "num_tokens": 8349888829.0, "step": 10915 }, { "epoch": 3.998809196665751, "grad_norm": 0.1363481362386339, "learning_rate": 7.648953548735001e-06, "loss": 0.4028, "num_tokens": 8350683403.0, "step": 10916 }, { "epoch": 3.9991755976916736, "grad_norm": 0.13651263646452821, "learning_rate": 7.646374935726916e-06, "loss": 0.419, "num_tokens": 8351372263.0, "step": 10917 }, { "epoch": 3.9995419987175964, "grad_norm": 0.14001673272297524, "learning_rate": 7.64379713146726e-06, "loss": 0.3991, "num_tokens": 8352341766.0, "step": 10918 }, { "epoch": 3.999908399743519, "grad_norm": 0.14783914973100773, "learning_rate": 7.64122013610128e-06, "loss": 0.3907, "num_tokens": 8353094630.0, "step": 10919 }, { "epoch": 4.0, "grad_norm": 0.3250085953682915, "learning_rate": 7.638643949774169e-06, "loss": 0.4702, "num_tokens": 8353286673.0, "step": 10920 }, { "epoch": 4.000366401025923, "grad_norm": 0.1912062054070339, "learning_rate": 7.636068572631081e-06, "loss": 0.3903, "num_tokens": 8354099359.0, "step": 10921 }, { "epoch": 4.000732802051846, "grad_norm": 0.16766476844963807, "learning_rate": 7.633494004817142e-06, "loss": 0.3687, "num_tokens": 8354956098.0, "step": 10922 }, { "epoch": 4.001099203077769, "grad_norm": 0.1724572908257053, "learning_rate": 7.630920246477389e-06, "loss": 0.3863, "num_tokens": 8355672220.0, "step": 10923 }, { "epoch": 4.001465604103691, "grad_norm": 0.1704390991761643, "learning_rate": 7.628347297756857e-06, "loss": 0.3656, "num_tokens": 8356499454.0, "step": 10924 }, { "epoch": 4.001832005129614, "grad_norm": 0.18306460498939203, "learning_rate": 7.625775158800513e-06, "loss": 0.3634, "num_tokens": 8357251802.0, "step": 10925 }, { "epoch": 4.002198406155538, "grad_norm": 0.1545122761681756, "learning_rate": 7.623203829753283e-06, "loss": 0.3586, "num_tokens": 8358078917.0, "step": 10926 }, { "epoch": 4.00256480718146, "grad_norm": 0.14225660168295193, "learning_rate": 7.620633310760046e-06, "loss": 0.3769, "num_tokens": 8358843729.0, "step": 10927 }, { "epoch": 4.002931208207383, "grad_norm": 0.14797882374051483, "learning_rate": 7.6180636019656366e-06, "loss": 0.3603, "num_tokens": 8359693171.0, "step": 10928 }, { "epoch": 4.0032976092333055, "grad_norm": 0.17555071464905506, "learning_rate": 7.615494703514845e-06, "loss": 0.3727, "num_tokens": 8360409386.0, "step": 10929 }, { "epoch": 4.003664010259229, "grad_norm": 0.1762858243834124, "learning_rate": 7.612926615552416e-06, "loss": 0.369, "num_tokens": 8361112248.0, "step": 10930 }, { "epoch": 4.004030411285152, "grad_norm": 0.16695034553599272, "learning_rate": 7.610359338223041e-06, "loss": 0.3477, "num_tokens": 8361847404.0, "step": 10931 }, { "epoch": 4.004396812311074, "grad_norm": 0.16971276571663327, "learning_rate": 7.607792871671387e-06, "loss": 0.3416, "num_tokens": 8362555654.0, "step": 10932 }, { "epoch": 4.0047632133369975, "grad_norm": 0.15879943957935863, "learning_rate": 7.605227216042042e-06, "loss": 0.358, "num_tokens": 8363457406.0, "step": 10933 }, { "epoch": 4.00512961436292, "grad_norm": 0.1540885208718771, "learning_rate": 7.602662371479572e-06, "loss": 0.3316, "num_tokens": 8364239417.0, "step": 10934 }, { "epoch": 4.005496015388843, "grad_norm": 0.16024941537341716, "learning_rate": 7.600098338128504e-06, "loss": 0.3652, "num_tokens": 8365043631.0, "step": 10935 }, { "epoch": 4.005862416414766, "grad_norm": 0.14860289969260565, "learning_rate": 7.597535116133295e-06, "loss": 0.385, "num_tokens": 8365841003.0, "step": 10936 }, { "epoch": 4.006228817440689, "grad_norm": 0.1451900029543939, "learning_rate": 7.594972705638366e-06, "loss": 0.3452, "num_tokens": 8366673180.0, "step": 10937 }, { "epoch": 4.006595218466612, "grad_norm": 0.15430595109374656, "learning_rate": 7.5924111067881055e-06, "loss": 0.3544, "num_tokens": 8367400989.0, "step": 10938 }, { "epoch": 4.006961619492534, "grad_norm": 0.14299315827082543, "learning_rate": 7.589850319726845e-06, "loss": 0.3783, "num_tokens": 8368193105.0, "step": 10939 }, { "epoch": 4.007328020518457, "grad_norm": 0.14865139587830914, "learning_rate": 7.587290344598856e-06, "loss": 0.3365, "num_tokens": 8368988430.0, "step": 10940 }, { "epoch": 4.007694421544381, "grad_norm": 0.14441831094571214, "learning_rate": 7.584731181548397e-06, "loss": 0.3756, "num_tokens": 8369859347.0, "step": 10941 }, { "epoch": 4.008060822570303, "grad_norm": 0.14395051166376913, "learning_rate": 7.582172830719654e-06, "loss": 0.3647, "num_tokens": 8370666806.0, "step": 10942 }, { "epoch": 4.008427223596226, "grad_norm": 0.1560348274437277, "learning_rate": 7.5796152922567766e-06, "loss": 0.3664, "num_tokens": 8371492609.0, "step": 10943 }, { "epoch": 4.0087936246221485, "grad_norm": 0.15734054328222635, "learning_rate": 7.577058566303872e-06, "loss": 0.3634, "num_tokens": 8372209119.0, "step": 10944 }, { "epoch": 4.009160025648072, "grad_norm": 0.15104514493386645, "learning_rate": 7.574502653004993e-06, "loss": 0.3789, "num_tokens": 8372974167.0, "step": 10945 }, { "epoch": 4.009526426673995, "grad_norm": 0.15157718034087964, "learning_rate": 7.571947552504154e-06, "loss": 0.3267, "num_tokens": 8373636345.0, "step": 10946 }, { "epoch": 4.009892827699917, "grad_norm": 0.13607405832376065, "learning_rate": 7.569393264945319e-06, "loss": 0.3528, "num_tokens": 8374456420.0, "step": 10947 }, { "epoch": 4.0102592287258405, "grad_norm": 0.1585704831807231, "learning_rate": 7.566839790472407e-06, "loss": 0.3788, "num_tokens": 8375169317.0, "step": 10948 }, { "epoch": 4.010625629751764, "grad_norm": 0.149953846277909, "learning_rate": 7.564287129229303e-06, "loss": 0.3628, "num_tokens": 8375882615.0, "step": 10949 }, { "epoch": 4.010992030777686, "grad_norm": 0.13927433841443912, "learning_rate": 7.5617352813598166e-06, "loss": 0.3577, "num_tokens": 8376712014.0, "step": 10950 }, { "epoch": 4.011358431803609, "grad_norm": 0.13483295540462734, "learning_rate": 7.559184247007748e-06, "loss": 0.3538, "num_tokens": 8377594725.0, "step": 10951 }, { "epoch": 4.011724832829532, "grad_norm": 0.1507552592287433, "learning_rate": 7.556634026316831e-06, "loss": 0.3774, "num_tokens": 8378385334.0, "step": 10952 }, { "epoch": 4.012091233855455, "grad_norm": 0.14794991914737532, "learning_rate": 7.554084619430744e-06, "loss": 0.3692, "num_tokens": 8379215151.0, "step": 10953 }, { "epoch": 4.012457634881378, "grad_norm": 0.14852148823934772, "learning_rate": 7.551536026493145e-06, "loss": 0.3757, "num_tokens": 8379943092.0, "step": 10954 }, { "epoch": 4.0128240359073, "grad_norm": 0.1373340021909784, "learning_rate": 7.548988247647631e-06, "loss": 0.3991, "num_tokens": 8380821066.0, "step": 10955 }, { "epoch": 4.013190436933224, "grad_norm": 0.13619973963094686, "learning_rate": 7.546441283037751e-06, "loss": 0.3518, "num_tokens": 8381672442.0, "step": 10956 }, { "epoch": 4.013556837959146, "grad_norm": 0.14813238797336625, "learning_rate": 7.543895132807018e-06, "loss": 0.362, "num_tokens": 8382458898.0, "step": 10957 }, { "epoch": 4.013923238985069, "grad_norm": 0.15956603208388312, "learning_rate": 7.541349797098889e-06, "loss": 0.3533, "num_tokens": 8383107713.0, "step": 10958 }, { "epoch": 4.014289640010992, "grad_norm": 0.14117383676489728, "learning_rate": 7.538805276056782e-06, "loss": 0.3543, "num_tokens": 8383936218.0, "step": 10959 }, { "epoch": 4.014656041036915, "grad_norm": 0.15038823729568376, "learning_rate": 7.536261569824068e-06, "loss": 0.3603, "num_tokens": 8384740440.0, "step": 10960 }, { "epoch": 4.015022442062838, "grad_norm": 0.15074392548025245, "learning_rate": 7.533718678544064e-06, "loss": 0.3526, "num_tokens": 8385483334.0, "step": 10961 }, { "epoch": 4.01538884308876, "grad_norm": 0.1418075992452587, "learning_rate": 7.531176602360064e-06, "loss": 0.3448, "num_tokens": 8386344482.0, "step": 10962 }, { "epoch": 4.0157552441146835, "grad_norm": 0.15912732704321925, "learning_rate": 7.5286353414152846e-06, "loss": 0.347, "num_tokens": 8387013059.0, "step": 10963 }, { "epoch": 4.016121645140607, "grad_norm": 0.15524899952628857, "learning_rate": 7.5260948958529135e-06, "loss": 0.3471, "num_tokens": 8387683022.0, "step": 10964 }, { "epoch": 4.016488046166529, "grad_norm": 0.14248502020343998, "learning_rate": 7.523555265816096e-06, "loss": 0.347, "num_tokens": 8388450412.0, "step": 10965 }, { "epoch": 4.016854447192452, "grad_norm": 0.1597817883772467, "learning_rate": 7.521016451447932e-06, "loss": 0.3869, "num_tokens": 8389188757.0, "step": 10966 }, { "epoch": 4.017220848218375, "grad_norm": 0.14893890766322407, "learning_rate": 7.518478452891454e-06, "loss": 0.3762, "num_tokens": 8389997042.0, "step": 10967 }, { "epoch": 4.017587249244298, "grad_norm": 0.15044387866651018, "learning_rate": 7.515941270289677e-06, "loss": 0.3462, "num_tokens": 8390732328.0, "step": 10968 }, { "epoch": 4.017953650270221, "grad_norm": 0.15039562522733577, "learning_rate": 7.513404903785554e-06, "loss": 0.3589, "num_tokens": 8391488030.0, "step": 10969 }, { "epoch": 4.0183200512961434, "grad_norm": 0.14895112482304163, "learning_rate": 7.510869353521995e-06, "loss": 0.3624, "num_tokens": 8392185584.0, "step": 10970 }, { "epoch": 4.018686452322067, "grad_norm": 0.14380090210460644, "learning_rate": 7.508334619641864e-06, "loss": 0.3537, "num_tokens": 8393003851.0, "step": 10971 }, { "epoch": 4.019052853347989, "grad_norm": 0.14337920289515005, "learning_rate": 7.505800702287977e-06, "loss": 0.3637, "num_tokens": 8393788400.0, "step": 10972 }, { "epoch": 4.019419254373912, "grad_norm": 0.1571560570558839, "learning_rate": 7.503267601603115e-06, "loss": 0.3244, "num_tokens": 8394450227.0, "step": 10973 }, { "epoch": 4.0197856553998355, "grad_norm": 0.13728255822644295, "learning_rate": 7.500735317729996e-06, "loss": 0.3563, "num_tokens": 8395318989.0, "step": 10974 }, { "epoch": 4.020152056425758, "grad_norm": 0.16060492479788996, "learning_rate": 7.498203850811299e-06, "loss": 0.3886, "num_tokens": 8396026163.0, "step": 10975 }, { "epoch": 4.020518457451681, "grad_norm": 0.14837963533071247, "learning_rate": 7.495673200989669e-06, "loss": 0.3526, "num_tokens": 8396792555.0, "step": 10976 }, { "epoch": 4.020884858477603, "grad_norm": 0.1580897309976183, "learning_rate": 7.493143368407687e-06, "loss": 0.3757, "num_tokens": 8397531659.0, "step": 10977 }, { "epoch": 4.021251259503527, "grad_norm": 0.16098557542116648, "learning_rate": 7.490614353207888e-06, "loss": 0.3669, "num_tokens": 8398193885.0, "step": 10978 }, { "epoch": 4.02161766052945, "grad_norm": 0.16040214733901412, "learning_rate": 7.488086155532786e-06, "loss": 0.359, "num_tokens": 8398890916.0, "step": 10979 }, { "epoch": 4.021984061555372, "grad_norm": 0.1449629748352451, "learning_rate": 7.485558775524812e-06, "loss": 0.3618, "num_tokens": 8399678042.0, "step": 10980 }, { "epoch": 4.022350462581295, "grad_norm": 0.15584939289292918, "learning_rate": 7.4830322133263856e-06, "loss": 0.3233, "num_tokens": 8400340084.0, "step": 10981 }, { "epoch": 4.022716863607218, "grad_norm": 0.16009138827383054, "learning_rate": 7.480506469079856e-06, "loss": 0.3603, "num_tokens": 8400968320.0, "step": 10982 }, { "epoch": 4.023083264633141, "grad_norm": 0.151484747603446, "learning_rate": 7.47798154292754e-06, "loss": 0.353, "num_tokens": 8401700346.0, "step": 10983 }, { "epoch": 4.023449665659064, "grad_norm": 0.16890510580553994, "learning_rate": 7.475457435011699e-06, "loss": 0.3451, "num_tokens": 8402266412.0, "step": 10984 }, { "epoch": 4.0238160666849865, "grad_norm": 0.14789399974099407, "learning_rate": 7.472934145474558e-06, "loss": 0.3959, "num_tokens": 8403101247.0, "step": 10985 }, { "epoch": 4.02418246771091, "grad_norm": 0.15487995992293632, "learning_rate": 7.470411674458284e-06, "loss": 0.3522, "num_tokens": 8403855906.0, "step": 10986 }, { "epoch": 4.024548868736832, "grad_norm": 0.14690678938943594, "learning_rate": 7.46789002210501e-06, "loss": 0.382, "num_tokens": 8404619043.0, "step": 10987 }, { "epoch": 4.024915269762755, "grad_norm": 0.15576311493117484, "learning_rate": 7.465369188556808e-06, "loss": 0.3537, "num_tokens": 8405347795.0, "step": 10988 }, { "epoch": 4.0252816707886785, "grad_norm": 0.15590095428857528, "learning_rate": 7.462849173955732e-06, "loss": 0.3713, "num_tokens": 8406056567.0, "step": 10989 }, { "epoch": 4.025648071814601, "grad_norm": 0.17076329047765462, "learning_rate": 7.460329978443755e-06, "loss": 0.4078, "num_tokens": 8406726165.0, "step": 10990 }, { "epoch": 4.026014472840524, "grad_norm": 0.14319934729873438, "learning_rate": 7.457811602162819e-06, "loss": 0.3655, "num_tokens": 8407527845.0, "step": 10991 }, { "epoch": 4.026380873866446, "grad_norm": 0.1590085924313648, "learning_rate": 7.455294045254835e-06, "loss": 0.3715, "num_tokens": 8408197947.0, "step": 10992 }, { "epoch": 4.02674727489237, "grad_norm": 0.1392461819755917, "learning_rate": 7.452777307861641e-06, "loss": 0.3698, "num_tokens": 8409045613.0, "step": 10993 }, { "epoch": 4.027113675918293, "grad_norm": 0.14176862703663387, "learning_rate": 7.450261390125039e-06, "loss": 0.3556, "num_tokens": 8409830640.0, "step": 10994 }, { "epoch": 4.027480076944215, "grad_norm": 0.14732535974842098, "learning_rate": 7.4477462921867975e-06, "loss": 0.3631, "num_tokens": 8410635690.0, "step": 10995 }, { "epoch": 4.027846477970138, "grad_norm": 0.13904780465398706, "learning_rate": 7.4452320141886315e-06, "loss": 0.3306, "num_tokens": 8411404867.0, "step": 10996 }, { "epoch": 4.028212878996062, "grad_norm": 0.1446534637972814, "learning_rate": 7.4427185562721905e-06, "loss": 0.3556, "num_tokens": 8412203289.0, "step": 10997 }, { "epoch": 4.028579280021984, "grad_norm": 0.16065535267525496, "learning_rate": 7.4402059185791054e-06, "loss": 0.3792, "num_tokens": 8412871972.0, "step": 10998 }, { "epoch": 4.028945681047907, "grad_norm": 0.1350916998246478, "learning_rate": 7.437694101250949e-06, "loss": 0.3387, "num_tokens": 8413688520.0, "step": 10999 }, { "epoch": 4.0293120820738295, "grad_norm": 0.15908127190023388, "learning_rate": 7.435183104429247e-06, "loss": 0.3609, "num_tokens": 8414374098.0, "step": 11000 }, { "epoch": 4.029678483099753, "grad_norm": 0.15698725716718362, "learning_rate": 7.43267292825548e-06, "loss": 0.3605, "num_tokens": 8415164427.0, "step": 11001 }, { "epoch": 4.030044884125676, "grad_norm": 0.15659175546859846, "learning_rate": 7.430163572871083e-06, "loss": 0.3783, "num_tokens": 8415891705.0, "step": 11002 }, { "epoch": 4.030411285151598, "grad_norm": 0.14473585439816833, "learning_rate": 7.427655038417445e-06, "loss": 0.3657, "num_tokens": 8416685162.0, "step": 11003 }, { "epoch": 4.0307776861775215, "grad_norm": 0.14375287393019276, "learning_rate": 7.425147325035907e-06, "loss": 0.3416, "num_tokens": 8417459231.0, "step": 11004 }, { "epoch": 4.031144087203444, "grad_norm": 0.14835995831589713, "learning_rate": 7.422640432867762e-06, "loss": 0.3701, "num_tokens": 8418242465.0, "step": 11005 }, { "epoch": 4.031510488229367, "grad_norm": 0.14898566237607486, "learning_rate": 7.420134362054274e-06, "loss": 0.3091, "num_tokens": 8418891085.0, "step": 11006 }, { "epoch": 4.03187688925529, "grad_norm": 0.13618161675917625, "learning_rate": 7.417629112736626e-06, "loss": 0.3514, "num_tokens": 8419763544.0, "step": 11007 }, { "epoch": 4.032243290281213, "grad_norm": 0.16194474816360535, "learning_rate": 7.41512468505599e-06, "loss": 0.3887, "num_tokens": 8420460067.0, "step": 11008 }, { "epoch": 4.032609691307136, "grad_norm": 0.14786872884163615, "learning_rate": 7.412621079153477e-06, "loss": 0.3854, "num_tokens": 8421219916.0, "step": 11009 }, { "epoch": 4.032976092333058, "grad_norm": 0.14442675141109335, "learning_rate": 7.410118295170136e-06, "loss": 0.3534, "num_tokens": 8422008449.0, "step": 11010 }, { "epoch": 4.033342493358981, "grad_norm": 0.141137981116753, "learning_rate": 7.407616333247e-06, "loss": 0.3736, "num_tokens": 8422853418.0, "step": 11011 }, { "epoch": 4.033708894384905, "grad_norm": 0.13769179313868193, "learning_rate": 7.405115193525037e-06, "loss": 0.388, "num_tokens": 8423695962.0, "step": 11012 }, { "epoch": 4.034075295410827, "grad_norm": 0.15336006349708534, "learning_rate": 7.402614876145171e-06, "loss": 0.3602, "num_tokens": 8424383601.0, "step": 11013 }, { "epoch": 4.03444169643675, "grad_norm": 0.1422069078745559, "learning_rate": 7.400115381248285e-06, "loss": 0.3699, "num_tokens": 8425308784.0, "step": 11014 }, { "epoch": 4.0348080974626725, "grad_norm": 0.1504729608855331, "learning_rate": 7.397616708975203e-06, "loss": 0.3981, "num_tokens": 8426036362.0, "step": 11015 }, { "epoch": 4.035174498488596, "grad_norm": 0.15899774774194866, "learning_rate": 7.3951188594667275e-06, "loss": 0.3677, "num_tokens": 8426685337.0, "step": 11016 }, { "epoch": 4.035540899514519, "grad_norm": 0.14647481347157476, "learning_rate": 7.392621832863584e-06, "loss": 0.365, "num_tokens": 8427451971.0, "step": 11017 }, { "epoch": 4.035907300540441, "grad_norm": 0.16448646097948108, "learning_rate": 7.390125629306465e-06, "loss": 0.3551, "num_tokens": 8428138925.0, "step": 11018 }, { "epoch": 4.036273701566365, "grad_norm": 0.14975276184745506, "learning_rate": 7.387630248936036e-06, "loss": 0.3696, "num_tokens": 8428891869.0, "step": 11019 }, { "epoch": 4.036640102592287, "grad_norm": 0.14856612748247774, "learning_rate": 7.385135691892879e-06, "loss": 0.3862, "num_tokens": 8429632012.0, "step": 11020 }, { "epoch": 4.03700650361821, "grad_norm": 0.15445827209953517, "learning_rate": 7.382641958317555e-06, "loss": 0.37, "num_tokens": 8430357518.0, "step": 11021 }, { "epoch": 4.037372904644133, "grad_norm": 0.16105119145042035, "learning_rate": 7.380149048350576e-06, "loss": 0.3373, "num_tokens": 8431076505.0, "step": 11022 }, { "epoch": 4.037739305670056, "grad_norm": 0.15315866239908418, "learning_rate": 7.377656962132405e-06, "loss": 0.3915, "num_tokens": 8431876647.0, "step": 11023 }, { "epoch": 4.038105706695979, "grad_norm": 0.1399089871415265, "learning_rate": 7.375165699803446e-06, "loss": 0.3503, "num_tokens": 8432709543.0, "step": 11024 }, { "epoch": 4.038472107721901, "grad_norm": 0.14977061862057373, "learning_rate": 7.372675261504078e-06, "loss": 0.3605, "num_tokens": 8433477831.0, "step": 11025 }, { "epoch": 4.0388385087478245, "grad_norm": 0.15688670988691106, "learning_rate": 7.3701856473746235e-06, "loss": 0.3363, "num_tokens": 8434203221.0, "step": 11026 }, { "epoch": 4.039204909773748, "grad_norm": 0.15798646180534837, "learning_rate": 7.367696857555355e-06, "loss": 0.3678, "num_tokens": 8434873249.0, "step": 11027 }, { "epoch": 4.03957131079967, "grad_norm": 0.15754348565191623, "learning_rate": 7.365208892186504e-06, "loss": 0.3416, "num_tokens": 8435509178.0, "step": 11028 }, { "epoch": 4.039937711825593, "grad_norm": 0.14776875022024782, "learning_rate": 7.362721751408255e-06, "loss": 0.3458, "num_tokens": 8436260452.0, "step": 11029 }, { "epoch": 4.040304112851516, "grad_norm": 0.1549550342822407, "learning_rate": 7.3602354353607405e-06, "loss": 0.3653, "num_tokens": 8436978242.0, "step": 11030 }, { "epoch": 4.040670513877439, "grad_norm": 0.17553663503629938, "learning_rate": 7.357749944184054e-06, "loss": 0.3715, "num_tokens": 8437609916.0, "step": 11031 }, { "epoch": 4.041036914903362, "grad_norm": 0.14009780665534885, "learning_rate": 7.355265278018235e-06, "loss": 0.3509, "num_tokens": 8438423399.0, "step": 11032 }, { "epoch": 4.041403315929284, "grad_norm": 0.14744055106459686, "learning_rate": 7.352781437003295e-06, "loss": 0.3619, "num_tokens": 8439181702.0, "step": 11033 }, { "epoch": 4.041769716955208, "grad_norm": 0.15648987517020915, "learning_rate": 7.350298421279165e-06, "loss": 0.3715, "num_tokens": 8439920018.0, "step": 11034 }, { "epoch": 4.04213611798113, "grad_norm": 0.14618973050996378, "learning_rate": 7.3478162309857625e-06, "loss": 0.3802, "num_tokens": 8440729260.0, "step": 11035 }, { "epoch": 4.042502519007053, "grad_norm": 0.1402194794759075, "learning_rate": 7.345334866262948e-06, "loss": 0.3479, "num_tokens": 8441500962.0, "step": 11036 }, { "epoch": 4.042868920032976, "grad_norm": 0.14417426967511676, "learning_rate": 7.3428543272505156e-06, "loss": 0.3576, "num_tokens": 8442282563.0, "step": 11037 }, { "epoch": 4.043235321058899, "grad_norm": 0.14378918568970084, "learning_rate": 7.340374614088248e-06, "loss": 0.3668, "num_tokens": 8443113370.0, "step": 11038 }, { "epoch": 4.043601722084822, "grad_norm": 0.14236480694161707, "learning_rate": 7.337895726915853e-06, "loss": 0.3574, "num_tokens": 8443913178.0, "step": 11039 }, { "epoch": 4.043968123110744, "grad_norm": 0.14944743154207357, "learning_rate": 7.33541766587301e-06, "loss": 0.3484, "num_tokens": 8444643437.0, "step": 11040 }, { "epoch": 4.0443345241366675, "grad_norm": 0.12100687467953211, "learning_rate": 7.33294043109934e-06, "loss": 0.3498, "num_tokens": 8445697054.0, "step": 11041 }, { "epoch": 4.044700925162591, "grad_norm": 0.1589744155554042, "learning_rate": 7.3304640227344204e-06, "loss": 0.4048, "num_tokens": 8446425929.0, "step": 11042 }, { "epoch": 4.045067326188513, "grad_norm": 0.14668849952818946, "learning_rate": 7.327988440917786e-06, "loss": 0.3437, "num_tokens": 8447175751.0, "step": 11043 }, { "epoch": 4.045433727214436, "grad_norm": 0.13579985615260287, "learning_rate": 7.3255136857889205e-06, "loss": 0.3583, "num_tokens": 8448087292.0, "step": 11044 }, { "epoch": 4.0458001282403595, "grad_norm": 0.14882087355910445, "learning_rate": 7.32303975748726e-06, "loss": 0.3532, "num_tokens": 8448863195.0, "step": 11045 }, { "epoch": 4.046166529266282, "grad_norm": 0.14821215822701392, "learning_rate": 7.320566656152209e-06, "loss": 0.3299, "num_tokens": 8449623812.0, "step": 11046 }, { "epoch": 4.046532930292205, "grad_norm": 0.17549789787403813, "learning_rate": 7.3180943819231e-06, "loss": 0.4126, "num_tokens": 8450239404.0, "step": 11047 }, { "epoch": 4.046899331318127, "grad_norm": 0.1445734368978359, "learning_rate": 7.315622934939233e-06, "loss": 0.3829, "num_tokens": 8451065470.0, "step": 11048 }, { "epoch": 4.047265732344051, "grad_norm": 0.1523808775278894, "learning_rate": 7.313152315339873e-06, "loss": 0.3698, "num_tokens": 8451823733.0, "step": 11049 }, { "epoch": 4.047632133369974, "grad_norm": 0.14806437473549997, "learning_rate": 7.310682523264212e-06, "loss": 0.3562, "num_tokens": 8452583594.0, "step": 11050 }, { "epoch": 4.047998534395896, "grad_norm": 0.13964632867893562, "learning_rate": 7.308213558851412e-06, "loss": 0.3514, "num_tokens": 8453400379.0, "step": 11051 }, { "epoch": 4.048364935421819, "grad_norm": 0.1467308268346002, "learning_rate": 7.305745422240593e-06, "loss": 0.362, "num_tokens": 8454144377.0, "step": 11052 }, { "epoch": 4.048731336447742, "grad_norm": 0.14123425615997204, "learning_rate": 7.303278113570813e-06, "loss": 0.3511, "num_tokens": 8454987914.0, "step": 11053 }, { "epoch": 4.049097737473665, "grad_norm": 0.16579042623120127, "learning_rate": 7.3008116329811e-06, "loss": 0.3765, "num_tokens": 8455742443.0, "step": 11054 }, { "epoch": 4.049464138499588, "grad_norm": 0.14794715586024473, "learning_rate": 7.298345980610417e-06, "loss": 0.3635, "num_tokens": 8456556179.0, "step": 11055 }, { "epoch": 4.0498305395255105, "grad_norm": 0.15994015624842395, "learning_rate": 7.295881156597695e-06, "loss": 0.3582, "num_tokens": 8457258713.0, "step": 11056 }, { "epoch": 4.050196940551434, "grad_norm": 0.15014344939103463, "learning_rate": 7.293417161081815e-06, "loss": 0.3451, "num_tokens": 8458019178.0, "step": 11057 }, { "epoch": 4.050563341577356, "grad_norm": 0.14849449394499564, "learning_rate": 7.290953994201607e-06, "loss": 0.344, "num_tokens": 8458751461.0, "step": 11058 }, { "epoch": 4.050929742603279, "grad_norm": 0.1615900993981102, "learning_rate": 7.288491656095856e-06, "loss": 0.3567, "num_tokens": 8459391707.0, "step": 11059 }, { "epoch": 4.0512961436292025, "grad_norm": 0.15345848582467186, "learning_rate": 7.286030146903305e-06, "loss": 0.3527, "num_tokens": 8460096016.0, "step": 11060 }, { "epoch": 4.051662544655125, "grad_norm": 0.14302641763639984, "learning_rate": 7.283569466762643e-06, "loss": 0.3486, "num_tokens": 8460899338.0, "step": 11061 }, { "epoch": 4.052028945681048, "grad_norm": 0.1570540842707584, "learning_rate": 7.281109615812514e-06, "loss": 0.3592, "num_tokens": 8461588234.0, "step": 11062 }, { "epoch": 4.05239534670697, "grad_norm": 0.15400028902519053, "learning_rate": 7.27865059419153e-06, "loss": 0.349, "num_tokens": 8462316415.0, "step": 11063 }, { "epoch": 4.052761747732894, "grad_norm": 0.14738143123769828, "learning_rate": 7.276192402038222e-06, "loss": 0.3634, "num_tokens": 8463079375.0, "step": 11064 }, { "epoch": 4.053128148758817, "grad_norm": 0.16383837763951348, "learning_rate": 7.273735039491116e-06, "loss": 0.3692, "num_tokens": 8463812120.0, "step": 11065 }, { "epoch": 4.053494549784739, "grad_norm": 0.16644874827113987, "learning_rate": 7.271278506688664e-06, "loss": 0.3716, "num_tokens": 8464458742.0, "step": 11066 }, { "epoch": 4.0538609508106624, "grad_norm": 0.15928426249870944, "learning_rate": 7.268822803769269e-06, "loss": 0.3737, "num_tokens": 8465178071.0, "step": 11067 }, { "epoch": 4.054227351836585, "grad_norm": 0.14621078517970065, "learning_rate": 7.26636793087131e-06, "loss": 0.3598, "num_tokens": 8465962036.0, "step": 11068 }, { "epoch": 4.054593752862508, "grad_norm": 0.15167717925395668, "learning_rate": 7.263913888133098e-06, "loss": 0.3345, "num_tokens": 8466660990.0, "step": 11069 }, { "epoch": 4.054960153888431, "grad_norm": 0.158580756124074, "learning_rate": 7.261460675692908e-06, "loss": 0.3881, "num_tokens": 8467381813.0, "step": 11070 }, { "epoch": 4.055326554914354, "grad_norm": 0.15335423745614535, "learning_rate": 7.259008293688966e-06, "loss": 0.3357, "num_tokens": 8468113834.0, "step": 11071 }, { "epoch": 4.055692955940277, "grad_norm": 0.1490399614482343, "learning_rate": 7.256556742259442e-06, "loss": 0.3674, "num_tokens": 8468903959.0, "step": 11072 }, { "epoch": 4.056059356966199, "grad_norm": 0.14524288891301734, "learning_rate": 7.254106021542483e-06, "loss": 0.3775, "num_tokens": 8469755249.0, "step": 11073 }, { "epoch": 4.056425757992122, "grad_norm": 0.13877229654462203, "learning_rate": 7.251656131676162e-06, "loss": 0.37, "num_tokens": 8470545319.0, "step": 11074 }, { "epoch": 4.056792159018046, "grad_norm": 0.16688391429681085, "learning_rate": 7.249207072798511e-06, "loss": 0.3658, "num_tokens": 8471173570.0, "step": 11075 }, { "epoch": 4.057158560043968, "grad_norm": 0.13528180011578236, "learning_rate": 7.246758845047543e-06, "loss": 0.3591, "num_tokens": 8472079808.0, "step": 11076 }, { "epoch": 4.057524961069891, "grad_norm": 0.1547874904713705, "learning_rate": 7.244311448561183e-06, "loss": 0.3743, "num_tokens": 8472814594.0, "step": 11077 }, { "epoch": 4.0578913620958135, "grad_norm": 0.15041963214082082, "learning_rate": 7.241864883477328e-06, "loss": 0.3705, "num_tokens": 8473580129.0, "step": 11078 }, { "epoch": 4.058257763121737, "grad_norm": 0.14872240690957458, "learning_rate": 7.2394191499338415e-06, "loss": 0.3555, "num_tokens": 8474309934.0, "step": 11079 }, { "epoch": 4.05862416414766, "grad_norm": 0.14013034037257283, "learning_rate": 7.236974248068527e-06, "loss": 0.3407, "num_tokens": 8475105412.0, "step": 11080 }, { "epoch": 4.058990565173582, "grad_norm": 0.14930756277093057, "learning_rate": 7.234530178019123e-06, "loss": 0.3443, "num_tokens": 8475837308.0, "step": 11081 }, { "epoch": 4.0593569661995055, "grad_norm": 0.14736319884736196, "learning_rate": 7.232086939923357e-06, "loss": 0.3625, "num_tokens": 8476584072.0, "step": 11082 }, { "epoch": 4.059723367225428, "grad_norm": 0.1524388992451111, "learning_rate": 7.229644533918888e-06, "loss": 0.3833, "num_tokens": 8477309182.0, "step": 11083 }, { "epoch": 4.060089768251351, "grad_norm": 0.14663358654228148, "learning_rate": 7.2272029601433304e-06, "loss": 0.3599, "num_tokens": 8478070322.0, "step": 11084 }, { "epoch": 4.060456169277274, "grad_norm": 0.15523761711718972, "learning_rate": 7.224762218734254e-06, "loss": 0.3758, "num_tokens": 8478824153.0, "step": 11085 }, { "epoch": 4.060822570303197, "grad_norm": 0.13726178384860657, "learning_rate": 7.2223223098291815e-06, "loss": 0.3442, "num_tokens": 8479673756.0, "step": 11086 }, { "epoch": 4.06118897132912, "grad_norm": 0.1346582028730691, "learning_rate": 7.21988323356559e-06, "loss": 0.3427, "num_tokens": 8480525906.0, "step": 11087 }, { "epoch": 4.061555372355042, "grad_norm": 0.15611891189665988, "learning_rate": 7.217444990080904e-06, "loss": 0.3639, "num_tokens": 8481211595.0, "step": 11088 }, { "epoch": 4.061921773380965, "grad_norm": 0.14681302179502098, "learning_rate": 7.215007579512505e-06, "loss": 0.3779, "num_tokens": 8482022244.0, "step": 11089 }, { "epoch": 4.062288174406889, "grad_norm": 0.14688437828337933, "learning_rate": 7.212571001997739e-06, "loss": 0.3332, "num_tokens": 8482736393.0, "step": 11090 }, { "epoch": 4.062654575432811, "grad_norm": 0.149398097232867, "learning_rate": 7.210135257673876e-06, "loss": 0.3494, "num_tokens": 8483589262.0, "step": 11091 }, { "epoch": 4.063020976458734, "grad_norm": 0.14901694640446714, "learning_rate": 7.20770034667817e-06, "loss": 0.4029, "num_tokens": 8484400185.0, "step": 11092 }, { "epoch": 4.063387377484657, "grad_norm": 0.1344074450954831, "learning_rate": 7.205266269147817e-06, "loss": 0.3702, "num_tokens": 8485298612.0, "step": 11093 }, { "epoch": 4.06375377851058, "grad_norm": 0.1609213017270756, "learning_rate": 7.202833025219949e-06, "loss": 0.3877, "num_tokens": 8485998707.0, "step": 11094 }, { "epoch": 4.064120179536503, "grad_norm": 0.15134209391533054, "learning_rate": 7.20040061503168e-06, "loss": 0.3922, "num_tokens": 8486727967.0, "step": 11095 }, { "epoch": 4.064486580562425, "grad_norm": 0.15864393572656396, "learning_rate": 7.1979690387200585e-06, "loss": 0.354, "num_tokens": 8487398324.0, "step": 11096 }, { "epoch": 4.0648529815883485, "grad_norm": 0.14044381064636074, "learning_rate": 7.195538296422089e-06, "loss": 0.3472, "num_tokens": 8488264043.0, "step": 11097 }, { "epoch": 4.065219382614272, "grad_norm": 0.1633137438375896, "learning_rate": 7.193108388274734e-06, "loss": 0.3799, "num_tokens": 8488971602.0, "step": 11098 }, { "epoch": 4.065585783640194, "grad_norm": 0.1563361419352264, "learning_rate": 7.190679314414899e-06, "loss": 0.356, "num_tokens": 8489709656.0, "step": 11099 }, { "epoch": 4.065952184666117, "grad_norm": 0.13453040957336765, "learning_rate": 7.188251074979458e-06, "loss": 0.3954, "num_tokens": 8490652104.0, "step": 11100 }, { "epoch": 4.06631858569204, "grad_norm": 0.158443896886622, "learning_rate": 7.185823670105223e-06, "loss": 0.3645, "num_tokens": 8491349844.0, "step": 11101 }, { "epoch": 4.066684986717963, "grad_norm": 0.14564940414264346, "learning_rate": 7.183397099928962e-06, "loss": 0.3336, "num_tokens": 8492109792.0, "step": 11102 }, { "epoch": 4.067051387743886, "grad_norm": 0.160396235570004, "learning_rate": 7.1809713645874125e-06, "loss": 0.369, "num_tokens": 8492851816.0, "step": 11103 }, { "epoch": 4.067417788769808, "grad_norm": 0.15396912931736192, "learning_rate": 7.178546464217238e-06, "loss": 0.3661, "num_tokens": 8493586780.0, "step": 11104 }, { "epoch": 4.067784189795732, "grad_norm": 0.15115938392644718, "learning_rate": 7.17612239895507e-06, "loss": 0.3633, "num_tokens": 8494283549.0, "step": 11105 }, { "epoch": 4.068150590821654, "grad_norm": 0.13254171842695506, "learning_rate": 7.173699168937496e-06, "loss": 0.3839, "num_tokens": 8495206021.0, "step": 11106 }, { "epoch": 4.068516991847577, "grad_norm": 0.16350415894690892, "learning_rate": 7.171276774301055e-06, "loss": 0.3833, "num_tokens": 8495897498.0, "step": 11107 }, { "epoch": 4.0688833928735, "grad_norm": 0.15261912526419158, "learning_rate": 7.168855215182223e-06, "loss": 0.361, "num_tokens": 8496676719.0, "step": 11108 }, { "epoch": 4.069249793899423, "grad_norm": 0.14644701007994645, "learning_rate": 7.166434491717453e-06, "loss": 0.3567, "num_tokens": 8497435300.0, "step": 11109 }, { "epoch": 4.069616194925346, "grad_norm": 0.13489731052347578, "learning_rate": 7.164014604043137e-06, "loss": 0.3807, "num_tokens": 8498314203.0, "step": 11110 }, { "epoch": 4.069982595951268, "grad_norm": 0.1587729429911102, "learning_rate": 7.16159555229562e-06, "loss": 0.3566, "num_tokens": 8498976929.0, "step": 11111 }, { "epoch": 4.0703489969771915, "grad_norm": 0.1549927008954268, "learning_rate": 7.159177336611205e-06, "loss": 0.3804, "num_tokens": 8499701871.0, "step": 11112 }, { "epoch": 4.070715398003115, "grad_norm": 0.14755287862400235, "learning_rate": 7.156759957126141e-06, "loss": 0.3851, "num_tokens": 8500552517.0, "step": 11113 }, { "epoch": 4.071081799029037, "grad_norm": 0.1362776081612452, "learning_rate": 7.154343413976638e-06, "loss": 0.3658, "num_tokens": 8501466139.0, "step": 11114 }, { "epoch": 4.07144820005496, "grad_norm": 0.15003258431756786, "learning_rate": 7.151927707298858e-06, "loss": 0.3544, "num_tokens": 8502207459.0, "step": 11115 }, { "epoch": 4.071814601080883, "grad_norm": 0.1541695792426534, "learning_rate": 7.149512837228907e-06, "loss": 0.3471, "num_tokens": 8502988031.0, "step": 11116 }, { "epoch": 4.072181002106806, "grad_norm": 0.149095173473088, "learning_rate": 7.14709880390285e-06, "loss": 0.3977, "num_tokens": 8503727029.0, "step": 11117 }, { "epoch": 4.072547403132729, "grad_norm": 0.15084708675795439, "learning_rate": 7.144685607456707e-06, "loss": 0.3555, "num_tokens": 8504480626.0, "step": 11118 }, { "epoch": 4.0729138041586515, "grad_norm": 0.16114202924415647, "learning_rate": 7.142273248026446e-06, "loss": 0.3781, "num_tokens": 8505142891.0, "step": 11119 }, { "epoch": 4.073280205184575, "grad_norm": 0.1603469270207439, "learning_rate": 7.139861725747999e-06, "loss": 0.3921, "num_tokens": 8505814670.0, "step": 11120 }, { "epoch": 4.073646606210497, "grad_norm": 0.151198013961634, "learning_rate": 7.137451040757226e-06, "loss": 0.3454, "num_tokens": 8506495688.0, "step": 11121 }, { "epoch": 4.07401300723642, "grad_norm": 0.14312330271055484, "learning_rate": 7.135041193189971e-06, "loss": 0.3891, "num_tokens": 8507332054.0, "step": 11122 }, { "epoch": 4.0743794082623435, "grad_norm": 0.1595748113544021, "learning_rate": 7.132632183182011e-06, "loss": 0.3767, "num_tokens": 8508046868.0, "step": 11123 }, { "epoch": 4.074745809288266, "grad_norm": 0.14467585592562474, "learning_rate": 7.130224010869076e-06, "loss": 0.3625, "num_tokens": 8508833442.0, "step": 11124 }, { "epoch": 4.075112210314189, "grad_norm": 0.1391798681247968, "learning_rate": 7.127816676386856e-06, "loss": 0.3475, "num_tokens": 8509619871.0, "step": 11125 }, { "epoch": 4.075478611340111, "grad_norm": 0.15190165697316296, "learning_rate": 7.125410179870995e-06, "loss": 0.3669, "num_tokens": 8510315054.0, "step": 11126 }, { "epoch": 4.075845012366035, "grad_norm": 0.15459271821034218, "learning_rate": 7.123004521457082e-06, "loss": 0.3789, "num_tokens": 8511036391.0, "step": 11127 }, { "epoch": 4.076211413391958, "grad_norm": 0.1439246813387647, "learning_rate": 7.120599701280664e-06, "loss": 0.3642, "num_tokens": 8511904880.0, "step": 11128 }, { "epoch": 4.07657781441788, "grad_norm": 0.15089825217124467, "learning_rate": 7.118195719477235e-06, "loss": 0.3623, "num_tokens": 8512685910.0, "step": 11129 }, { "epoch": 4.076944215443803, "grad_norm": 0.14591547560999749, "learning_rate": 7.1157925761822585e-06, "loss": 0.384, "num_tokens": 8513437694.0, "step": 11130 }, { "epoch": 4.077310616469726, "grad_norm": 0.16487906221639212, "learning_rate": 7.113390271531126e-06, "loss": 0.3961, "num_tokens": 8514191451.0, "step": 11131 }, { "epoch": 4.077677017495649, "grad_norm": 0.1421959677343209, "learning_rate": 7.110988805659196e-06, "loss": 0.3511, "num_tokens": 8514956974.0, "step": 11132 }, { "epoch": 4.078043418521572, "grad_norm": 0.1412003467263834, "learning_rate": 7.108588178701787e-06, "loss": 0.3637, "num_tokens": 8515819947.0, "step": 11133 }, { "epoch": 4.0784098195474945, "grad_norm": 0.14885838949574462, "learning_rate": 7.106188390794152e-06, "loss": 0.3557, "num_tokens": 8516595813.0, "step": 11134 }, { "epoch": 4.078776220573418, "grad_norm": 0.14969259274087568, "learning_rate": 7.103789442071505e-06, "loss": 0.3531, "num_tokens": 8517296649.0, "step": 11135 }, { "epoch": 4.07914262159934, "grad_norm": 0.15276042344867025, "learning_rate": 7.101391332669022e-06, "loss": 0.3857, "num_tokens": 8518109628.0, "step": 11136 }, { "epoch": 4.079509022625263, "grad_norm": 0.14653369293361654, "learning_rate": 7.098994062721818e-06, "loss": 0.3561, "num_tokens": 8518871302.0, "step": 11137 }, { "epoch": 4.0798754236511865, "grad_norm": 0.1481983478977061, "learning_rate": 7.096597632364968e-06, "loss": 0.3562, "num_tokens": 8519639755.0, "step": 11138 }, { "epoch": 4.080241824677109, "grad_norm": 0.1470764123376354, "learning_rate": 7.094202041733493e-06, "loss": 0.3792, "num_tokens": 8520402888.0, "step": 11139 }, { "epoch": 4.080608225703032, "grad_norm": 0.141590397224793, "learning_rate": 7.091807290962381e-06, "loss": 0.356, "num_tokens": 8521290266.0, "step": 11140 }, { "epoch": 4.080974626728954, "grad_norm": 0.16478084568848173, "learning_rate": 7.089413380186554e-06, "loss": 0.3922, "num_tokens": 8521979827.0, "step": 11141 }, { "epoch": 4.081341027754878, "grad_norm": 0.16676747109984985, "learning_rate": 7.0870203095409e-06, "loss": 0.3458, "num_tokens": 8522623614.0, "step": 11142 }, { "epoch": 4.081707428780801, "grad_norm": 0.15775223024647322, "learning_rate": 7.084628079160254e-06, "loss": 0.3738, "num_tokens": 8523299938.0, "step": 11143 }, { "epoch": 4.082073829806723, "grad_norm": 0.15039693338263418, "learning_rate": 7.082236689179406e-06, "loss": 0.3594, "num_tokens": 8524032325.0, "step": 11144 }, { "epoch": 4.082440230832646, "grad_norm": 0.14642453627497, "learning_rate": 7.079846139733095e-06, "loss": 0.3533, "num_tokens": 8524766285.0, "step": 11145 }, { "epoch": 4.08280663185857, "grad_norm": 0.14913217209882776, "learning_rate": 7.077456430956018e-06, "loss": 0.359, "num_tokens": 8525519841.0, "step": 11146 }, { "epoch": 4.083173032884492, "grad_norm": 0.13947721951968453, "learning_rate": 7.075067562982829e-06, "loss": 0.3628, "num_tokens": 8526393531.0, "step": 11147 }, { "epoch": 4.083539433910415, "grad_norm": 0.15717840008446773, "learning_rate": 7.072679535948113e-06, "loss": 0.3438, "num_tokens": 8527032120.0, "step": 11148 }, { "epoch": 4.0839058349363375, "grad_norm": 0.16604548722827422, "learning_rate": 7.070292349986431e-06, "loss": 0.3915, "num_tokens": 8527657110.0, "step": 11149 }, { "epoch": 4.084272235962261, "grad_norm": 0.1540794036779594, "learning_rate": 7.067906005232292e-06, "loss": 0.3765, "num_tokens": 8528345987.0, "step": 11150 }, { "epoch": 4.084638636988184, "grad_norm": 0.15011159767182333, "learning_rate": 7.0655205018201414e-06, "loss": 0.38, "num_tokens": 8529198985.0, "step": 11151 }, { "epoch": 4.085005038014106, "grad_norm": 0.13988266658689127, "learning_rate": 7.063135839884401e-06, "loss": 0.3582, "num_tokens": 8530029541.0, "step": 11152 }, { "epoch": 4.0853714390400295, "grad_norm": 0.15275628402415828, "learning_rate": 7.060752019559428e-06, "loss": 0.3801, "num_tokens": 8530783753.0, "step": 11153 }, { "epoch": 4.085737840065952, "grad_norm": 0.163607756572921, "learning_rate": 7.058369040979538e-06, "loss": 0.3802, "num_tokens": 8531429716.0, "step": 11154 }, { "epoch": 4.086104241091875, "grad_norm": 0.14191477181742118, "learning_rate": 7.055986904279e-06, "loss": 0.3781, "num_tokens": 8532269038.0, "step": 11155 }, { "epoch": 4.086470642117798, "grad_norm": 0.16031344120961424, "learning_rate": 7.053605609592033e-06, "loss": 0.3396, "num_tokens": 8532933279.0, "step": 11156 }, { "epoch": 4.086837043143721, "grad_norm": 0.1394657529297658, "learning_rate": 7.051225157052809e-06, "loss": 0.3747, "num_tokens": 8533773966.0, "step": 11157 }, { "epoch": 4.087203444169644, "grad_norm": 0.15003915140945775, "learning_rate": 7.048845546795457e-06, "loss": 0.3605, "num_tokens": 8534503016.0, "step": 11158 }, { "epoch": 4.087569845195566, "grad_norm": 0.13484685825673676, "learning_rate": 7.0464667789540486e-06, "loss": 0.3684, "num_tokens": 8535423276.0, "step": 11159 }, { "epoch": 4.087936246221489, "grad_norm": 0.13741168761076947, "learning_rate": 7.044088853662628e-06, "loss": 0.3694, "num_tokens": 8536278147.0, "step": 11160 }, { "epoch": 4.088302647247413, "grad_norm": 0.14339293858284016, "learning_rate": 7.041711771055164e-06, "loss": 0.3717, "num_tokens": 8537105039.0, "step": 11161 }, { "epoch": 4.088669048273335, "grad_norm": 0.14466790768671345, "learning_rate": 7.039335531265594e-06, "loss": 0.3577, "num_tokens": 8537913041.0, "step": 11162 }, { "epoch": 4.089035449299258, "grad_norm": 0.14783988325441194, "learning_rate": 7.036960134427813e-06, "loss": 0.3713, "num_tokens": 8538666544.0, "step": 11163 }, { "epoch": 4.0894018503251806, "grad_norm": 0.14537732680702434, "learning_rate": 7.034585580675662e-06, "loss": 0.3597, "num_tokens": 8539450596.0, "step": 11164 }, { "epoch": 4.089768251351104, "grad_norm": 0.15936592275696954, "learning_rate": 7.032211870142924e-06, "loss": 0.3864, "num_tokens": 8540217514.0, "step": 11165 }, { "epoch": 4.090134652377027, "grad_norm": 0.1604408032508568, "learning_rate": 7.029839002963354e-06, "loss": 0.3406, "num_tokens": 8540919459.0, "step": 11166 }, { "epoch": 4.090501053402949, "grad_norm": 0.1524319263202094, "learning_rate": 7.027466979270647e-06, "loss": 0.3737, "num_tokens": 8541677018.0, "step": 11167 }, { "epoch": 4.090867454428873, "grad_norm": 0.15037459226400715, "learning_rate": 7.0250957991984535e-06, "loss": 0.3781, "num_tokens": 8542388405.0, "step": 11168 }, { "epoch": 4.091233855454795, "grad_norm": 0.15271768425996343, "learning_rate": 7.02272546288038e-06, "loss": 0.3793, "num_tokens": 8543213228.0, "step": 11169 }, { "epoch": 4.091600256480718, "grad_norm": 0.140673239508453, "learning_rate": 7.020355970449974e-06, "loss": 0.3526, "num_tokens": 8544051074.0, "step": 11170 }, { "epoch": 4.091966657506641, "grad_norm": 0.16259987910775597, "learning_rate": 7.0179873220407515e-06, "loss": 0.367, "num_tokens": 8544718104.0, "step": 11171 }, { "epoch": 4.092333058532564, "grad_norm": 0.1434864327602001, "learning_rate": 7.0156195177861675e-06, "loss": 0.341, "num_tokens": 8545484296.0, "step": 11172 }, { "epoch": 4.092699459558487, "grad_norm": 0.14975387420444897, "learning_rate": 7.013252557819639e-06, "loss": 0.3518, "num_tokens": 8546245908.0, "step": 11173 }, { "epoch": 4.093065860584409, "grad_norm": 0.16405351810688798, "learning_rate": 7.0108864422745274e-06, "loss": 0.3723, "num_tokens": 8546917708.0, "step": 11174 }, { "epoch": 4.0934322616103325, "grad_norm": 0.15418935650604187, "learning_rate": 7.0085211712841505e-06, "loss": 0.4113, "num_tokens": 8547710113.0, "step": 11175 }, { "epoch": 4.093798662636256, "grad_norm": 0.15474845100444287, "learning_rate": 7.006156744981782e-06, "loss": 0.3625, "num_tokens": 8548508447.0, "step": 11176 }, { "epoch": 4.094165063662178, "grad_norm": 0.1476766740335003, "learning_rate": 7.003793163500649e-06, "loss": 0.4009, "num_tokens": 8549344975.0, "step": 11177 }, { "epoch": 4.094531464688101, "grad_norm": 0.1354963686917617, "learning_rate": 7.00143042697391e-06, "loss": 0.3468, "num_tokens": 8550193809.0, "step": 11178 }, { "epoch": 4.094897865714024, "grad_norm": 0.15478205431277225, "learning_rate": 6.999068535534708e-06, "loss": 0.383, "num_tokens": 8550931892.0, "step": 11179 }, { "epoch": 4.095264266739947, "grad_norm": 0.15938525511920582, "learning_rate": 6.996707489316119e-06, "loss": 0.3776, "num_tokens": 8551629785.0, "step": 11180 }, { "epoch": 4.09563066776587, "grad_norm": 0.15095150252153167, "learning_rate": 6.994347288451167e-06, "loss": 0.3957, "num_tokens": 8552434889.0, "step": 11181 }, { "epoch": 4.095997068791792, "grad_norm": 0.1404792551864487, "learning_rate": 6.9919879330728434e-06, "loss": 0.3625, "num_tokens": 8553280095.0, "step": 11182 }, { "epoch": 4.096363469817716, "grad_norm": 0.16361131097782347, "learning_rate": 6.989629423314087e-06, "loss": 0.3664, "num_tokens": 8554002930.0, "step": 11183 }, { "epoch": 4.096729870843638, "grad_norm": 0.15273178584087996, "learning_rate": 6.987271759307784e-06, "loss": 0.3852, "num_tokens": 8554826990.0, "step": 11184 }, { "epoch": 4.097096271869561, "grad_norm": 0.14546591131036862, "learning_rate": 6.9849149411867755e-06, "loss": 0.3735, "num_tokens": 8555669810.0, "step": 11185 }, { "epoch": 4.097462672895484, "grad_norm": 0.14517828785611553, "learning_rate": 6.982558969083852e-06, "loss": 0.3686, "num_tokens": 8556501039.0, "step": 11186 }, { "epoch": 4.097829073921407, "grad_norm": 0.15235283447545558, "learning_rate": 6.980203843131772e-06, "loss": 0.3514, "num_tokens": 8557236812.0, "step": 11187 }, { "epoch": 4.09819547494733, "grad_norm": 0.14729669031409096, "learning_rate": 6.97784956346322e-06, "loss": 0.376, "num_tokens": 8557999203.0, "step": 11188 }, { "epoch": 4.098561875973253, "grad_norm": 0.1693999986127191, "learning_rate": 6.975496130210848e-06, "loss": 0.3946, "num_tokens": 8558628278.0, "step": 11189 }, { "epoch": 4.0989282769991755, "grad_norm": 0.14914340425600325, "learning_rate": 6.973143543507274e-06, "loss": 0.3525, "num_tokens": 8559414839.0, "step": 11190 }, { "epoch": 4.099294678025099, "grad_norm": 0.14055637265241344, "learning_rate": 6.970791803485038e-06, "loss": 0.3849, "num_tokens": 8560242841.0, "step": 11191 }, { "epoch": 4.099661079051021, "grad_norm": 0.14872317733514331, "learning_rate": 6.968440910276648e-06, "loss": 0.3485, "num_tokens": 8561004181.0, "step": 11192 }, { "epoch": 4.100027480076944, "grad_norm": 0.14706629340071758, "learning_rate": 6.9660908640145715e-06, "loss": 0.36, "num_tokens": 8561780515.0, "step": 11193 }, { "epoch": 4.1003938811028675, "grad_norm": 0.1496792986833796, "learning_rate": 6.963741664831219e-06, "loss": 0.3547, "num_tokens": 8562492508.0, "step": 11194 }, { "epoch": 4.10076028212879, "grad_norm": 0.1466195491346033, "learning_rate": 6.961393312858953e-06, "loss": 0.3403, "num_tokens": 8563266333.0, "step": 11195 }, { "epoch": 4.101126683154713, "grad_norm": 0.14184085585383108, "learning_rate": 6.959045808230093e-06, "loss": 0.3567, "num_tokens": 8564058297.0, "step": 11196 }, { "epoch": 4.101493084180635, "grad_norm": 0.15199672774910378, "learning_rate": 6.9566991510769045e-06, "loss": 0.3621, "num_tokens": 8564777846.0, "step": 11197 }, { "epoch": 4.101859485206559, "grad_norm": 0.15736949651406115, "learning_rate": 6.95435334153161e-06, "loss": 0.3796, "num_tokens": 8565487292.0, "step": 11198 }, { "epoch": 4.102225886232482, "grad_norm": 0.15640368079356462, "learning_rate": 6.952008379726387e-06, "loss": 0.3695, "num_tokens": 8566187990.0, "step": 11199 }, { "epoch": 4.102592287258404, "grad_norm": 0.15993656628916575, "learning_rate": 6.949664265793355e-06, "loss": 0.3463, "num_tokens": 8566833930.0, "step": 11200 }, { "epoch": 4.102958688284327, "grad_norm": 0.13573977454767114, "learning_rate": 6.947320999864595e-06, "loss": 0.3573, "num_tokens": 8567691424.0, "step": 11201 }, { "epoch": 4.10332508931025, "grad_norm": 0.16255479117804927, "learning_rate": 6.9449785820721384e-06, "loss": 0.3644, "num_tokens": 8568410099.0, "step": 11202 }, { "epoch": 4.103691490336173, "grad_norm": 0.1643531616354483, "learning_rate": 6.942637012547963e-06, "loss": 0.4159, "num_tokens": 8569187204.0, "step": 11203 }, { "epoch": 4.104057891362096, "grad_norm": 0.15732563140488107, "learning_rate": 6.940296291424015e-06, "loss": 0.3787, "num_tokens": 8569927367.0, "step": 11204 }, { "epoch": 4.1044242923880185, "grad_norm": 0.1525212670217817, "learning_rate": 6.937956418832164e-06, "loss": 0.3646, "num_tokens": 8570737185.0, "step": 11205 }, { "epoch": 4.104790693413942, "grad_norm": 0.15576043181533755, "learning_rate": 6.935617394904261e-06, "loss": 0.3774, "num_tokens": 8571425111.0, "step": 11206 }, { "epoch": 4.105157094439864, "grad_norm": 0.13900613162855793, "learning_rate": 6.9332792197721e-06, "loss": 0.3751, "num_tokens": 8572274401.0, "step": 11207 }, { "epoch": 4.105523495465787, "grad_norm": 0.14805426326027807, "learning_rate": 6.9309418935674115e-06, "loss": 0.3635, "num_tokens": 8573002512.0, "step": 11208 }, { "epoch": 4.1058898964917105, "grad_norm": 0.1379717148138194, "learning_rate": 6.9286054164219005e-06, "loss": 0.3477, "num_tokens": 8573860738.0, "step": 11209 }, { "epoch": 4.106256297517633, "grad_norm": 0.13747733293194306, "learning_rate": 6.926269788467215e-06, "loss": 0.3574, "num_tokens": 8574694926.0, "step": 11210 }, { "epoch": 4.106622698543556, "grad_norm": 0.14019747579235536, "learning_rate": 6.923935009834952e-06, "loss": 0.3642, "num_tokens": 8575531650.0, "step": 11211 }, { "epoch": 4.106989099569478, "grad_norm": 0.15108211188089263, "learning_rate": 6.921601080656664e-06, "loss": 0.3442, "num_tokens": 8576280401.0, "step": 11212 }, { "epoch": 4.107355500595402, "grad_norm": 0.1483415530784204, "learning_rate": 6.919268001063855e-06, "loss": 0.3641, "num_tokens": 8577018849.0, "step": 11213 }, { "epoch": 4.107721901621325, "grad_norm": 0.15940353334564952, "learning_rate": 6.91693577118798e-06, "loss": 0.3909, "num_tokens": 8577689236.0, "step": 11214 }, { "epoch": 4.108088302647247, "grad_norm": 0.14897269228654628, "learning_rate": 6.914604391160451e-06, "loss": 0.3508, "num_tokens": 8578504128.0, "step": 11215 }, { "epoch": 4.1084547036731704, "grad_norm": 0.14875411412866574, "learning_rate": 6.912273861112623e-06, "loss": 0.348, "num_tokens": 8579298719.0, "step": 11216 }, { "epoch": 4.108821104699093, "grad_norm": 0.146806044423449, "learning_rate": 6.909944181175821e-06, "loss": 0.3805, "num_tokens": 8580048460.0, "step": 11217 }, { "epoch": 4.109187505725016, "grad_norm": 0.14614079874145885, "learning_rate": 6.9076153514812935e-06, "loss": 0.355, "num_tokens": 8580832149.0, "step": 11218 }, { "epoch": 4.109553906750939, "grad_norm": 0.1430015239583534, "learning_rate": 6.905287372160263e-06, "loss": 0.3474, "num_tokens": 8581637313.0, "step": 11219 }, { "epoch": 4.109920307776862, "grad_norm": 0.16247035341109456, "learning_rate": 6.902960243343906e-06, "loss": 0.3452, "num_tokens": 8582360730.0, "step": 11220 }, { "epoch": 4.110286708802785, "grad_norm": 0.15455005922435197, "learning_rate": 6.900633965163339e-06, "loss": 0.4014, "num_tokens": 8583119801.0, "step": 11221 }, { "epoch": 4.110653109828707, "grad_norm": 0.16574437987523793, "learning_rate": 6.898308537749627e-06, "loss": 0.3768, "num_tokens": 8583819198.0, "step": 11222 }, { "epoch": 4.11101951085463, "grad_norm": 0.14942216787471008, "learning_rate": 6.895983961233805e-06, "loss": 0.3666, "num_tokens": 8584583584.0, "step": 11223 }, { "epoch": 4.111385911880554, "grad_norm": 0.1365242749784724, "learning_rate": 6.893660235746846e-06, "loss": 0.3479, "num_tokens": 8585469538.0, "step": 11224 }, { "epoch": 4.111752312906476, "grad_norm": 0.16356270919479107, "learning_rate": 6.8913373614196835e-06, "loss": 0.3731, "num_tokens": 8586194747.0, "step": 11225 }, { "epoch": 4.112118713932399, "grad_norm": 0.14895658500170275, "learning_rate": 6.8890153383831934e-06, "loss": 0.3801, "num_tokens": 8587022661.0, "step": 11226 }, { "epoch": 4.1124851149583215, "grad_norm": 0.15302696256708087, "learning_rate": 6.886694166768212e-06, "loss": 0.377, "num_tokens": 8587789869.0, "step": 11227 }, { "epoch": 4.112851515984245, "grad_norm": 0.14662812936782518, "learning_rate": 6.884373846705524e-06, "loss": 0.3703, "num_tokens": 8588617581.0, "step": 11228 }, { "epoch": 4.113217917010168, "grad_norm": 0.15901961325158534, "learning_rate": 6.882054378325866e-06, "loss": 0.3703, "num_tokens": 8589309568.0, "step": 11229 }, { "epoch": 4.11358431803609, "grad_norm": 0.13781289940014138, "learning_rate": 6.879735761759932e-06, "loss": 0.3696, "num_tokens": 8590188237.0, "step": 11230 }, { "epoch": 4.1139507190620135, "grad_norm": 0.16333499629968695, "learning_rate": 6.877417997138354e-06, "loss": 0.3493, "num_tokens": 8590895990.0, "step": 11231 }, { "epoch": 4.114317120087936, "grad_norm": 0.1537525418312244, "learning_rate": 6.87510108459173e-06, "loss": 0.3553, "num_tokens": 8591619932.0, "step": 11232 }, { "epoch": 4.114683521113859, "grad_norm": 0.14368158910014808, "learning_rate": 6.87278502425061e-06, "loss": 0.3674, "num_tokens": 8592447503.0, "step": 11233 }, { "epoch": 4.115049922139782, "grad_norm": 0.14676567773931845, "learning_rate": 6.870469816245491e-06, "loss": 0.3583, "num_tokens": 8593211394.0, "step": 11234 }, { "epoch": 4.115416323165705, "grad_norm": 0.1448834604773498, "learning_rate": 6.868155460706811e-06, "loss": 0.3673, "num_tokens": 8594021397.0, "step": 11235 }, { "epoch": 4.115782724191628, "grad_norm": 0.16139764742173776, "learning_rate": 6.865841957764983e-06, "loss": 0.3856, "num_tokens": 8594691188.0, "step": 11236 }, { "epoch": 4.11614912521755, "grad_norm": 0.137143539626084, "learning_rate": 6.863529307550358e-06, "loss": 0.3646, "num_tokens": 8595571547.0, "step": 11237 }, { "epoch": 4.116515526243473, "grad_norm": 0.15064834383375664, "learning_rate": 6.861217510193238e-06, "loss": 0.3854, "num_tokens": 8596383560.0, "step": 11238 }, { "epoch": 4.116881927269397, "grad_norm": 0.15613397477366492, "learning_rate": 6.8589065658238815e-06, "loss": 0.3438, "num_tokens": 8597115517.0, "step": 11239 }, { "epoch": 4.117248328295319, "grad_norm": 0.13800335630642213, "learning_rate": 6.8565964745725e-06, "loss": 0.382, "num_tokens": 8598029133.0, "step": 11240 }, { "epoch": 4.117614729321242, "grad_norm": 0.14793088902002266, "learning_rate": 6.854287236569252e-06, "loss": 0.4015, "num_tokens": 8598816788.0, "step": 11241 }, { "epoch": 4.117981130347165, "grad_norm": 0.14649977496865738, "learning_rate": 6.851978851944252e-06, "loss": 0.3524, "num_tokens": 8599573383.0, "step": 11242 }, { "epoch": 4.118347531373088, "grad_norm": 0.14145928085126463, "learning_rate": 6.849671320827555e-06, "loss": 0.3416, "num_tokens": 8600341867.0, "step": 11243 }, { "epoch": 4.118713932399011, "grad_norm": 0.13754067484511398, "learning_rate": 6.847364643349202e-06, "loss": 0.3627, "num_tokens": 8601222403.0, "step": 11244 }, { "epoch": 4.119080333424933, "grad_norm": 0.15133948254495594, "learning_rate": 6.845058819639137e-06, "loss": 0.3421, "num_tokens": 8602014375.0, "step": 11245 }, { "epoch": 4.1194467344508565, "grad_norm": 0.13932679792850047, "learning_rate": 6.842753849827286e-06, "loss": 0.3717, "num_tokens": 8602872616.0, "step": 11246 }, { "epoch": 4.11981313547678, "grad_norm": 0.17488310318376557, "learning_rate": 6.840449734043535e-06, "loss": 0.3571, "num_tokens": 8603501534.0, "step": 11247 }, { "epoch": 4.120179536502702, "grad_norm": 0.15345859937415185, "learning_rate": 6.838146472417695e-06, "loss": 0.3948, "num_tokens": 8604223536.0, "step": 11248 }, { "epoch": 4.120545937528625, "grad_norm": 0.15674762878988727, "learning_rate": 6.835844065079542e-06, "loss": 0.3732, "num_tokens": 8604924042.0, "step": 11249 }, { "epoch": 4.120912338554548, "grad_norm": 0.1428978513735413, "learning_rate": 6.833542512158812e-06, "loss": 0.3616, "num_tokens": 8605778603.0, "step": 11250 }, { "epoch": 4.121278739580471, "grad_norm": 0.15853893956327553, "learning_rate": 6.83124181378518e-06, "loss": 0.3549, "num_tokens": 8606489894.0, "step": 11251 }, { "epoch": 4.121645140606394, "grad_norm": 0.15489760002271746, "learning_rate": 6.828941970088279e-06, "loss": 0.3838, "num_tokens": 8607242331.0, "step": 11252 }, { "epoch": 4.122011541632316, "grad_norm": 0.14978679672694414, "learning_rate": 6.826642981197694e-06, "loss": 0.3902, "num_tokens": 8608015509.0, "step": 11253 }, { "epoch": 4.12237794265824, "grad_norm": 0.16671485714721862, "learning_rate": 6.824344847242958e-06, "loss": 0.3874, "num_tokens": 8608737055.0, "step": 11254 }, { "epoch": 4.122744343684162, "grad_norm": 0.16303489351177056, "learning_rate": 6.822047568353558e-06, "loss": 0.3755, "num_tokens": 8609353080.0, "step": 11255 }, { "epoch": 4.123110744710085, "grad_norm": 0.15242086913291344, "learning_rate": 6.819751144658936e-06, "loss": 0.3556, "num_tokens": 8610045342.0, "step": 11256 }, { "epoch": 4.123477145736008, "grad_norm": 0.14679444154790242, "learning_rate": 6.817455576288481e-06, "loss": 0.3382, "num_tokens": 8610804120.0, "step": 11257 }, { "epoch": 4.123843546761931, "grad_norm": 0.14867810273619878, "learning_rate": 6.815160863371535e-06, "loss": 0.3855, "num_tokens": 8611572960.0, "step": 11258 }, { "epoch": 4.124209947787854, "grad_norm": 0.14993601231991133, "learning_rate": 6.812867006037389e-06, "loss": 0.3718, "num_tokens": 8612366724.0, "step": 11259 }, { "epoch": 4.124576348813776, "grad_norm": 0.1617044680858772, "learning_rate": 6.8105740044153e-06, "loss": 0.3633, "num_tokens": 8613113399.0, "step": 11260 }, { "epoch": 4.1249427498396996, "grad_norm": 0.14232936888426212, "learning_rate": 6.808281858634464e-06, "loss": 0.3661, "num_tokens": 8613880011.0, "step": 11261 }, { "epoch": 4.125309150865623, "grad_norm": 0.164779466425723, "learning_rate": 6.805990568824017e-06, "loss": 0.3587, "num_tokens": 8614540744.0, "step": 11262 }, { "epoch": 4.125675551891545, "grad_norm": 0.15007785887733735, "learning_rate": 6.803700135113076e-06, "loss": 0.3651, "num_tokens": 8615347429.0, "step": 11263 }, { "epoch": 4.126041952917468, "grad_norm": 0.14271759191550087, "learning_rate": 6.801410557630692e-06, "loss": 0.3599, "num_tokens": 8616179671.0, "step": 11264 }, { "epoch": 4.126408353943391, "grad_norm": 0.1508416108793113, "learning_rate": 6.79912183650586e-06, "loss": 0.3649, "num_tokens": 8616949400.0, "step": 11265 }, { "epoch": 4.126774754969314, "grad_norm": 0.14689782928972364, "learning_rate": 6.796833971867547e-06, "loss": 0.3813, "num_tokens": 8617723728.0, "step": 11266 }, { "epoch": 4.127141155995237, "grad_norm": 0.14014057874705338, "learning_rate": 6.79454696384466e-06, "loss": 0.3546, "num_tokens": 8618552999.0, "step": 11267 }, { "epoch": 4.1275075570211595, "grad_norm": 0.1587631607246286, "learning_rate": 6.7922608125660585e-06, "loss": 0.3914, "num_tokens": 8619320372.0, "step": 11268 }, { "epoch": 4.127873958047083, "grad_norm": 0.16831357251298232, "learning_rate": 6.7899755181605565e-06, "loss": 0.3457, "num_tokens": 8619920174.0, "step": 11269 }, { "epoch": 4.128240359073005, "grad_norm": 0.15734659866322326, "learning_rate": 6.787691080756913e-06, "loss": 0.3636, "num_tokens": 8620681773.0, "step": 11270 }, { "epoch": 4.128606760098928, "grad_norm": 0.1549891419326732, "learning_rate": 6.785407500483845e-06, "loss": 0.3777, "num_tokens": 8621411895.0, "step": 11271 }, { "epoch": 4.1289731611248515, "grad_norm": 0.14591686770350817, "learning_rate": 6.7831247774700236e-06, "loss": 0.3842, "num_tokens": 8622230869.0, "step": 11272 }, { "epoch": 4.129339562150774, "grad_norm": 0.14867431971149486, "learning_rate": 6.780842911844061e-06, "loss": 0.3766, "num_tokens": 8622998063.0, "step": 11273 }, { "epoch": 4.129705963176697, "grad_norm": 0.1538470229092371, "learning_rate": 6.7785619037345395e-06, "loss": 0.3754, "num_tokens": 8623793716.0, "step": 11274 }, { "epoch": 4.130072364202619, "grad_norm": 0.1540345156241731, "learning_rate": 6.776281753269973e-06, "loss": 0.3615, "num_tokens": 8624556909.0, "step": 11275 }, { "epoch": 4.130438765228543, "grad_norm": 0.18315787891353497, "learning_rate": 6.774002460578829e-06, "loss": 0.3622, "num_tokens": 8625143827.0, "step": 11276 }, { "epoch": 4.130805166254466, "grad_norm": 0.13921799291791756, "learning_rate": 6.7717240257895475e-06, "loss": 0.3711, "num_tokens": 8625962909.0, "step": 11277 }, { "epoch": 4.131171567280388, "grad_norm": 0.14694249915166815, "learning_rate": 6.769446449030499e-06, "loss": 0.388, "num_tokens": 8626730688.0, "step": 11278 }, { "epoch": 4.131537968306311, "grad_norm": 0.15016441256448348, "learning_rate": 6.767169730430012e-06, "loss": 0.3551, "num_tokens": 8627536733.0, "step": 11279 }, { "epoch": 4.131904369332234, "grad_norm": 0.1473305242438887, "learning_rate": 6.764893870116367e-06, "loss": 0.3472, "num_tokens": 8628314282.0, "step": 11280 }, { "epoch": 4.132270770358157, "grad_norm": 0.13863430982605518, "learning_rate": 6.762618868217798e-06, "loss": 0.3686, "num_tokens": 8629156097.0, "step": 11281 }, { "epoch": 4.13263717138408, "grad_norm": 0.15829797132807083, "learning_rate": 6.760344724862489e-06, "loss": 0.3886, "num_tokens": 8629902776.0, "step": 11282 }, { "epoch": 4.1330035724100025, "grad_norm": 0.14608973810019646, "learning_rate": 6.758071440178575e-06, "loss": 0.363, "num_tokens": 8630733821.0, "step": 11283 }, { "epoch": 4.133369973435926, "grad_norm": 0.15293602117352648, "learning_rate": 6.75579901429414e-06, "loss": 0.3843, "num_tokens": 8631519177.0, "step": 11284 }, { "epoch": 4.133736374461849, "grad_norm": 0.14317928010288233, "learning_rate": 6.753527447337229e-06, "loss": 0.3615, "num_tokens": 8632369936.0, "step": 11285 }, { "epoch": 4.134102775487771, "grad_norm": 0.15037207195295157, "learning_rate": 6.751256739435829e-06, "loss": 0.4018, "num_tokens": 8633100889.0, "step": 11286 }, { "epoch": 4.1344691765136945, "grad_norm": 0.1483989972846133, "learning_rate": 6.748986890717881e-06, "loss": 0.3599, "num_tokens": 8633848546.0, "step": 11287 }, { "epoch": 4.134835577539617, "grad_norm": 0.14821523778653056, "learning_rate": 6.746717901311279e-06, "loss": 0.3584, "num_tokens": 8634638619.0, "step": 11288 }, { "epoch": 4.13520197856554, "grad_norm": 0.15110432187606623, "learning_rate": 6.744449771343866e-06, "loss": 0.3576, "num_tokens": 8635384644.0, "step": 11289 }, { "epoch": 4.135568379591463, "grad_norm": 0.15026165729543692, "learning_rate": 6.742182500943446e-06, "loss": 0.3671, "num_tokens": 8636132984.0, "step": 11290 }, { "epoch": 4.135934780617386, "grad_norm": 0.16150959627835543, "learning_rate": 6.7399160902377666e-06, "loss": 0.3741, "num_tokens": 8636882019.0, "step": 11291 }, { "epoch": 4.136301181643309, "grad_norm": 0.15796977974656626, "learning_rate": 6.737650539354515e-06, "loss": 0.3854, "num_tokens": 8637589626.0, "step": 11292 }, { "epoch": 4.136667582669231, "grad_norm": 0.15061362734991485, "learning_rate": 6.7353858484213585e-06, "loss": 0.3237, "num_tokens": 8638270546.0, "step": 11293 }, { "epoch": 4.137033983695154, "grad_norm": 0.1582439420807718, "learning_rate": 6.7331220175658905e-06, "loss": 0.3537, "num_tokens": 8639015196.0, "step": 11294 }, { "epoch": 4.137400384721078, "grad_norm": 0.15612045477371053, "learning_rate": 6.73085904691567e-06, "loss": 0.3906, "num_tokens": 8639786575.0, "step": 11295 }, { "epoch": 4.137766785747, "grad_norm": 0.13345042967614731, "learning_rate": 6.728596936598201e-06, "loss": 0.3584, "num_tokens": 8640758656.0, "step": 11296 }, { "epoch": 4.138133186772923, "grad_norm": 0.13433906488446787, "learning_rate": 6.72633568674094e-06, "loss": 0.3705, "num_tokens": 8641661034.0, "step": 11297 }, { "epoch": 4.1384995877988455, "grad_norm": 0.1489264289546196, "learning_rate": 6.724075297471299e-06, "loss": 0.3718, "num_tokens": 8642398223.0, "step": 11298 }, { "epoch": 4.138865988824769, "grad_norm": 0.1503383184880255, "learning_rate": 6.721815768916637e-06, "loss": 0.376, "num_tokens": 8643134224.0, "step": 11299 }, { "epoch": 4.139232389850692, "grad_norm": 0.14862149016784867, "learning_rate": 6.71955710120426e-06, "loss": 0.4068, "num_tokens": 8643958335.0, "step": 11300 }, { "epoch": 4.139598790876614, "grad_norm": 0.14411051514645168, "learning_rate": 6.717299294461448e-06, "loss": 0.3431, "num_tokens": 8644760413.0, "step": 11301 }, { "epoch": 4.1399651919025375, "grad_norm": 0.15422999121762745, "learning_rate": 6.7150423488154e-06, "loss": 0.3738, "num_tokens": 8645512776.0, "step": 11302 }, { "epoch": 4.14033159292846, "grad_norm": 0.1480882446106005, "learning_rate": 6.712786264393285e-06, "loss": 0.3944, "num_tokens": 8646283429.0, "step": 11303 }, { "epoch": 4.140697993954383, "grad_norm": 0.14255799281055492, "learning_rate": 6.710531041322235e-06, "loss": 0.3489, "num_tokens": 8647084316.0, "step": 11304 }, { "epoch": 4.141064394980306, "grad_norm": 0.15138652591789015, "learning_rate": 6.708276679729303e-06, "loss": 0.3516, "num_tokens": 8647784548.0, "step": 11305 }, { "epoch": 4.141430796006229, "grad_norm": 0.12908991510872056, "learning_rate": 6.706023179741511e-06, "loss": 0.3503, "num_tokens": 8648690103.0, "step": 11306 }, { "epoch": 4.141797197032152, "grad_norm": 0.14928291202103436, "learning_rate": 6.703770541485843e-06, "loss": 0.3657, "num_tokens": 8649472029.0, "step": 11307 }, { "epoch": 4.142163598058074, "grad_norm": 0.14233750982642093, "learning_rate": 6.701518765089214e-06, "loss": 0.3871, "num_tokens": 8650294573.0, "step": 11308 }, { "epoch": 4.142529999083997, "grad_norm": 0.14971057024836948, "learning_rate": 6.699267850678502e-06, "loss": 0.3807, "num_tokens": 8651063883.0, "step": 11309 }, { "epoch": 4.142896400109921, "grad_norm": 0.1417285735226768, "learning_rate": 6.697017798380533e-06, "loss": 0.3619, "num_tokens": 8651864825.0, "step": 11310 }, { "epoch": 4.143262801135843, "grad_norm": 0.14640871319490756, "learning_rate": 6.694768608322085e-06, "loss": 0.35, "num_tokens": 8652658219.0, "step": 11311 }, { "epoch": 4.143629202161766, "grad_norm": 0.16475103167457344, "learning_rate": 6.69252028062989e-06, "loss": 0.3756, "num_tokens": 8653341887.0, "step": 11312 }, { "epoch": 4.143995603187689, "grad_norm": 0.1472618372771876, "learning_rate": 6.690272815430624e-06, "loss": 0.3605, "num_tokens": 8654082932.0, "step": 11313 }, { "epoch": 4.144362004213612, "grad_norm": 0.14138179717832572, "learning_rate": 6.688026212850924e-06, "loss": 0.3593, "num_tokens": 8654901056.0, "step": 11314 }, { "epoch": 4.144728405239535, "grad_norm": 0.14584299808651324, "learning_rate": 6.685780473017374e-06, "loss": 0.3572, "num_tokens": 8655722641.0, "step": 11315 }, { "epoch": 4.145094806265457, "grad_norm": 0.13772794283787848, "learning_rate": 6.683535596056501e-06, "loss": 0.3506, "num_tokens": 8656540860.0, "step": 11316 }, { "epoch": 4.145461207291381, "grad_norm": 0.16161863886846528, "learning_rate": 6.6812915820948025e-06, "loss": 0.3619, "num_tokens": 8657215869.0, "step": 11317 }, { "epoch": 4.145827608317303, "grad_norm": 0.15815886200759352, "learning_rate": 6.679048431258717e-06, "loss": 0.3714, "num_tokens": 8657860750.0, "step": 11318 }, { "epoch": 4.146194009343226, "grad_norm": 0.1417506311618088, "learning_rate": 6.67680614367462e-06, "loss": 0.3699, "num_tokens": 8658635866.0, "step": 11319 }, { "epoch": 4.146560410369149, "grad_norm": 0.15075343388636453, "learning_rate": 6.674564719468863e-06, "loss": 0.3609, "num_tokens": 8659356764.0, "step": 11320 }, { "epoch": 4.146926811395072, "grad_norm": 0.13554992053534298, "learning_rate": 6.672324158767742e-06, "loss": 0.3746, "num_tokens": 8660251565.0, "step": 11321 }, { "epoch": 4.147293212420995, "grad_norm": 0.16881458064150354, "learning_rate": 6.670084461697486e-06, "loss": 0.3904, "num_tokens": 8660863768.0, "step": 11322 }, { "epoch": 4.147659613446917, "grad_norm": 0.13410317782023842, "learning_rate": 6.667845628384303e-06, "loss": 0.3675, "num_tokens": 8661734637.0, "step": 11323 }, { "epoch": 4.1480260144728405, "grad_norm": 0.16897399267813104, "learning_rate": 6.6656076589543315e-06, "loss": 0.3913, "num_tokens": 8662363440.0, "step": 11324 }, { "epoch": 4.148392415498764, "grad_norm": 0.1666049769169323, "learning_rate": 6.663370553533675e-06, "loss": 0.3883, "num_tokens": 8663074364.0, "step": 11325 }, { "epoch": 4.148758816524686, "grad_norm": 0.14870857172384633, "learning_rate": 6.661134312248374e-06, "loss": 0.3782, "num_tokens": 8663850438.0, "step": 11326 }, { "epoch": 4.149125217550609, "grad_norm": 0.13866017056708937, "learning_rate": 6.658898935224436e-06, "loss": 0.3508, "num_tokens": 8664725299.0, "step": 11327 }, { "epoch": 4.149491618576532, "grad_norm": 0.16227342043127246, "learning_rate": 6.656664422587809e-06, "loss": 0.3461, "num_tokens": 8665459033.0, "step": 11328 }, { "epoch": 4.149858019602455, "grad_norm": 0.14948651025108953, "learning_rate": 6.654430774464395e-06, "loss": 0.3722, "num_tokens": 8666319431.0, "step": 11329 }, { "epoch": 4.150224420628378, "grad_norm": 0.14098886595392385, "learning_rate": 6.652197990980045e-06, "loss": 0.3446, "num_tokens": 8667113066.0, "step": 11330 }, { "epoch": 4.1505908216543, "grad_norm": 0.14452625599812088, "learning_rate": 6.6499660722605774e-06, "loss": 0.3576, "num_tokens": 8667902563.0, "step": 11331 }, { "epoch": 4.150957222680224, "grad_norm": 0.16150225106416943, "learning_rate": 6.647735018431733e-06, "loss": 0.3518, "num_tokens": 8668536073.0, "step": 11332 }, { "epoch": 4.151323623706146, "grad_norm": 0.14398139109864908, "learning_rate": 6.645504829619222e-06, "loss": 0.3704, "num_tokens": 8669342886.0, "step": 11333 }, { "epoch": 4.151690024732069, "grad_norm": 0.14719987847851593, "learning_rate": 6.643275505948712e-06, "loss": 0.3607, "num_tokens": 8670161946.0, "step": 11334 }, { "epoch": 4.152056425757992, "grad_norm": 0.1558732912796229, "learning_rate": 6.64104704754581e-06, "loss": 0.3332, "num_tokens": 8670890436.0, "step": 11335 }, { "epoch": 4.152422826783915, "grad_norm": 0.15729155073221668, "learning_rate": 6.638819454536074e-06, "loss": 0.3595, "num_tokens": 8671591087.0, "step": 11336 }, { "epoch": 4.152789227809838, "grad_norm": 0.13180312566906363, "learning_rate": 6.636592727045017e-06, "loss": 0.3559, "num_tokens": 8672503895.0, "step": 11337 }, { "epoch": 4.15315562883576, "grad_norm": 0.14528435514328347, "learning_rate": 6.634366865198107e-06, "loss": 0.3915, "num_tokens": 8673325420.0, "step": 11338 }, { "epoch": 4.1535220298616835, "grad_norm": 0.148060703053751, "learning_rate": 6.632141869120754e-06, "loss": 0.3835, "num_tokens": 8674158595.0, "step": 11339 }, { "epoch": 4.153888430887607, "grad_norm": 0.14314208650615867, "learning_rate": 6.62991773893833e-06, "loss": 0.3385, "num_tokens": 8674910649.0, "step": 11340 }, { "epoch": 4.154254831913529, "grad_norm": 0.15222647710285206, "learning_rate": 6.6276944747761475e-06, "loss": 0.3922, "num_tokens": 8675673815.0, "step": 11341 }, { "epoch": 4.154621232939452, "grad_norm": 0.15390030468170182, "learning_rate": 6.625472076759477e-06, "loss": 0.3656, "num_tokens": 8676442914.0, "step": 11342 }, { "epoch": 4.1549876339653755, "grad_norm": 0.15888744864089382, "learning_rate": 6.62325054501354e-06, "loss": 0.3923, "num_tokens": 8677223584.0, "step": 11343 }, { "epoch": 4.155354034991298, "grad_norm": 0.15060809573726583, "learning_rate": 6.6210298796635075e-06, "loss": 0.353, "num_tokens": 8677963257.0, "step": 11344 }, { "epoch": 4.155720436017221, "grad_norm": 0.14380397123870642, "learning_rate": 6.618810080834498e-06, "loss": 0.3586, "num_tokens": 8678834654.0, "step": 11345 }, { "epoch": 4.156086837043143, "grad_norm": 0.1505082632262542, "learning_rate": 6.616591148651585e-06, "loss": 0.3811, "num_tokens": 8679641590.0, "step": 11346 }, { "epoch": 4.156453238069067, "grad_norm": 0.1350796496148389, "learning_rate": 6.614373083239801e-06, "loss": 0.3713, "num_tokens": 8680513219.0, "step": 11347 }, { "epoch": 4.15681963909499, "grad_norm": 0.14812339875696942, "learning_rate": 6.6121558847241205e-06, "loss": 0.3619, "num_tokens": 8681258318.0, "step": 11348 }, { "epoch": 4.157186040120912, "grad_norm": 0.15848254343296342, "learning_rate": 6.609939553229461e-06, "loss": 0.3772, "num_tokens": 8681987841.0, "step": 11349 }, { "epoch": 4.157552441146835, "grad_norm": 0.1446729642327397, "learning_rate": 6.607724088880708e-06, "loss": 0.339, "num_tokens": 8682741515.0, "step": 11350 }, { "epoch": 4.157918842172758, "grad_norm": 0.16485981327598614, "learning_rate": 6.605509491802689e-06, "loss": 0.3696, "num_tokens": 8683428014.0, "step": 11351 }, { "epoch": 4.158285243198681, "grad_norm": 0.14853809426119485, "learning_rate": 6.603295762120188e-06, "loss": 0.3423, "num_tokens": 8684208695.0, "step": 11352 }, { "epoch": 4.158651644224604, "grad_norm": 0.13826226042975934, "learning_rate": 6.6010828999579325e-06, "loss": 0.3656, "num_tokens": 8685068645.0, "step": 11353 }, { "epoch": 4.1590180452505265, "grad_norm": 0.1447530219594101, "learning_rate": 6.5988709054406065e-06, "loss": 0.361, "num_tokens": 8685878215.0, "step": 11354 }, { "epoch": 4.15938444627645, "grad_norm": 0.1436055045170197, "learning_rate": 6.596659778692844e-06, "loss": 0.354, "num_tokens": 8686697923.0, "step": 11355 }, { "epoch": 4.159750847302372, "grad_norm": 0.15034203737604765, "learning_rate": 6.594449519839228e-06, "loss": 0.3578, "num_tokens": 8687490007.0, "step": 11356 }, { "epoch": 4.160117248328295, "grad_norm": 0.15238440638150744, "learning_rate": 6.592240129004295e-06, "loss": 0.3476, "num_tokens": 8688217386.0, "step": 11357 }, { "epoch": 4.1604836493542185, "grad_norm": 0.15766982255370326, "learning_rate": 6.590031606312537e-06, "loss": 0.3656, "num_tokens": 8688894393.0, "step": 11358 }, { "epoch": 4.160850050380141, "grad_norm": 0.15330899315514582, "learning_rate": 6.587823951888386e-06, "loss": 0.3776, "num_tokens": 8689599289.0, "step": 11359 }, { "epoch": 4.161216451406064, "grad_norm": 0.15321569919976302, "learning_rate": 6.585617165856232e-06, "loss": 0.3799, "num_tokens": 8690360365.0, "step": 11360 }, { "epoch": 4.1615828524319864, "grad_norm": 0.16081224758736662, "learning_rate": 6.583411248340423e-06, "loss": 0.3809, "num_tokens": 8691067709.0, "step": 11361 }, { "epoch": 4.16194925345791, "grad_norm": 0.1455478002530935, "learning_rate": 6.581206199465242e-06, "loss": 0.3526, "num_tokens": 8691829294.0, "step": 11362 }, { "epoch": 4.162315654483833, "grad_norm": 0.14132740072876326, "learning_rate": 6.579002019354928e-06, "loss": 0.3738, "num_tokens": 8692663919.0, "step": 11363 }, { "epoch": 4.162682055509755, "grad_norm": 0.15877280166887825, "learning_rate": 6.576798708133685e-06, "loss": 0.3586, "num_tokens": 8693343970.0, "step": 11364 }, { "epoch": 4.1630484565356785, "grad_norm": 0.17086085764416611, "learning_rate": 6.574596265925654e-06, "loss": 0.3898, "num_tokens": 8694010352.0, "step": 11365 }, { "epoch": 4.163414857561601, "grad_norm": 0.15211756452537825, "learning_rate": 6.572394692854929e-06, "loss": 0.3954, "num_tokens": 8694769722.0, "step": 11366 }, { "epoch": 4.163781258587524, "grad_norm": 0.13592275504020185, "learning_rate": 6.570193989045557e-06, "loss": 0.349, "num_tokens": 8695655751.0, "step": 11367 }, { "epoch": 4.164147659613447, "grad_norm": 0.15160983314884374, "learning_rate": 6.567994154621536e-06, "loss": 0.4089, "num_tokens": 8696476698.0, "step": 11368 }, { "epoch": 4.16451406063937, "grad_norm": 0.14165887594407373, "learning_rate": 6.5657951897068135e-06, "loss": 0.3465, "num_tokens": 8697288892.0, "step": 11369 }, { "epoch": 4.164880461665293, "grad_norm": 0.16161680699409509, "learning_rate": 6.563597094425291e-06, "loss": 0.3947, "num_tokens": 8697993193.0, "step": 11370 }, { "epoch": 4.165246862691215, "grad_norm": 0.15186558609065406, "learning_rate": 6.56139986890082e-06, "loss": 0.3696, "num_tokens": 8698737006.0, "step": 11371 }, { "epoch": 4.165613263717138, "grad_norm": 0.14962349417102097, "learning_rate": 6.5592035132572e-06, "loss": 0.3485, "num_tokens": 8699517435.0, "step": 11372 }, { "epoch": 4.165979664743062, "grad_norm": 0.14446967105038017, "learning_rate": 6.557008027618178e-06, "loss": 0.3459, "num_tokens": 8700350517.0, "step": 11373 }, { "epoch": 4.166346065768984, "grad_norm": 0.14733048910909394, "learning_rate": 6.55481341210747e-06, "loss": 0.3836, "num_tokens": 8701125216.0, "step": 11374 }, { "epoch": 4.166712466794907, "grad_norm": 0.1551408446664978, "learning_rate": 6.552619666848728e-06, "loss": 0.3668, "num_tokens": 8701901584.0, "step": 11375 }, { "epoch": 4.1670788678208295, "grad_norm": 0.14355212787274024, "learning_rate": 6.550426791965545e-06, "loss": 0.3666, "num_tokens": 8702738872.0, "step": 11376 }, { "epoch": 4.167445268846753, "grad_norm": 0.15374378179670664, "learning_rate": 6.548234787581491e-06, "loss": 0.3942, "num_tokens": 8703548565.0, "step": 11377 }, { "epoch": 4.167811669872676, "grad_norm": 0.14985849711974944, "learning_rate": 6.546043653820075e-06, "loss": 0.3609, "num_tokens": 8704292494.0, "step": 11378 }, { "epoch": 4.168178070898598, "grad_norm": 0.14025911205194241, "learning_rate": 6.543853390804739e-06, "loss": 0.3393, "num_tokens": 8705134781.0, "step": 11379 }, { "epoch": 4.1685444719245215, "grad_norm": 0.13854486210064704, "learning_rate": 6.541663998658907e-06, "loss": 0.3622, "num_tokens": 8705988781.0, "step": 11380 }, { "epoch": 4.168910872950445, "grad_norm": 0.15881587335943484, "learning_rate": 6.539475477505936e-06, "loss": 0.363, "num_tokens": 8706703608.0, "step": 11381 }, { "epoch": 4.169277273976367, "grad_norm": 0.1506659213049354, "learning_rate": 6.537287827469135e-06, "loss": 0.3696, "num_tokens": 8707464920.0, "step": 11382 }, { "epoch": 4.16964367500229, "grad_norm": 0.16171668679430934, "learning_rate": 6.535101048671771e-06, "loss": 0.3488, "num_tokens": 8708106279.0, "step": 11383 }, { "epoch": 4.170010076028213, "grad_norm": 0.15349926257830163, "learning_rate": 6.532915141237046e-06, "loss": 0.3632, "num_tokens": 8708929100.0, "step": 11384 }, { "epoch": 4.170376477054136, "grad_norm": 0.1650038812065278, "learning_rate": 6.530730105288139e-06, "loss": 0.3989, "num_tokens": 8709665689.0, "step": 11385 }, { "epoch": 4.170742878080059, "grad_norm": 0.15309816151632819, "learning_rate": 6.5285459409481564e-06, "loss": 0.3568, "num_tokens": 8710336971.0, "step": 11386 }, { "epoch": 4.171109279105981, "grad_norm": 0.15060999896136693, "learning_rate": 6.526362648340159e-06, "loss": 0.3817, "num_tokens": 8711044191.0, "step": 11387 }, { "epoch": 4.171475680131905, "grad_norm": 0.14107648558797262, "learning_rate": 6.524180227587178e-06, "loss": 0.3656, "num_tokens": 8711850556.0, "step": 11388 }, { "epoch": 4.171842081157827, "grad_norm": 0.16889169438988547, "learning_rate": 6.521998678812167e-06, "loss": 0.3737, "num_tokens": 8712451908.0, "step": 11389 }, { "epoch": 4.17220848218375, "grad_norm": 0.14732175327083355, "learning_rate": 6.519818002138045e-06, "loss": 0.3807, "num_tokens": 8713267233.0, "step": 11390 }, { "epoch": 4.172574883209673, "grad_norm": 0.1520951502504411, "learning_rate": 6.517638197687692e-06, "loss": 0.3958, "num_tokens": 8714027484.0, "step": 11391 }, { "epoch": 4.172941284235596, "grad_norm": 0.14095861939574664, "learning_rate": 6.515459265583921e-06, "loss": 0.3427, "num_tokens": 8714850181.0, "step": 11392 }, { "epoch": 4.173307685261519, "grad_norm": 0.15028589047920454, "learning_rate": 6.513281205949506e-06, "loss": 0.3873, "num_tokens": 8715637056.0, "step": 11393 }, { "epoch": 4.173674086287441, "grad_norm": 0.15714433484534504, "learning_rate": 6.511104018907164e-06, "loss": 0.3719, "num_tokens": 8716325399.0, "step": 11394 }, { "epoch": 4.1740404873133645, "grad_norm": 0.14690544567990071, "learning_rate": 6.508927704579568e-06, "loss": 0.3911, "num_tokens": 8717151991.0, "step": 11395 }, { "epoch": 4.174406888339288, "grad_norm": 0.15173885448385255, "learning_rate": 6.506752263089349e-06, "loss": 0.3669, "num_tokens": 8717942321.0, "step": 11396 }, { "epoch": 4.17477328936521, "grad_norm": 0.14410940414723736, "learning_rate": 6.504577694559071e-06, "loss": 0.3405, "num_tokens": 8718746482.0, "step": 11397 }, { "epoch": 4.175139690391133, "grad_norm": 0.162369569499134, "learning_rate": 6.502403999111267e-06, "loss": 0.362, "num_tokens": 8719412155.0, "step": 11398 }, { "epoch": 4.175506091417056, "grad_norm": 0.15275277364836795, "learning_rate": 6.500231176868408e-06, "loss": 0.354, "num_tokens": 8720099673.0, "step": 11399 }, { "epoch": 4.175872492442979, "grad_norm": 0.15682968131772082, "learning_rate": 6.498059227952919e-06, "loss": 0.374, "num_tokens": 8720861732.0, "step": 11400 }, { "epoch": 4.176238893468902, "grad_norm": 0.14675060882452878, "learning_rate": 6.495888152487191e-06, "loss": 0.3463, "num_tokens": 8721669580.0, "step": 11401 }, { "epoch": 4.176605294494824, "grad_norm": 0.15720520794932247, "learning_rate": 6.493717950593537e-06, "loss": 0.3752, "num_tokens": 8722422673.0, "step": 11402 }, { "epoch": 4.176971695520748, "grad_norm": 0.15090024355698228, "learning_rate": 6.4915486223942345e-06, "loss": 0.362, "num_tokens": 8723142830.0, "step": 11403 }, { "epoch": 4.17733809654667, "grad_norm": 0.15319336990991625, "learning_rate": 6.489380168011527e-06, "loss": 0.3706, "num_tokens": 8723882888.0, "step": 11404 }, { "epoch": 4.177704497572593, "grad_norm": 0.14056576111136493, "learning_rate": 6.48721258756759e-06, "loss": 0.3291, "num_tokens": 8724722732.0, "step": 11405 }, { "epoch": 4.178070898598516, "grad_norm": 0.14340353496552277, "learning_rate": 6.485045881184548e-06, "loss": 0.3676, "num_tokens": 8725574514.0, "step": 11406 }, { "epoch": 4.178437299624439, "grad_norm": 0.1775768609909666, "learning_rate": 6.48288004898449e-06, "loss": 0.3168, "num_tokens": 8726190790.0, "step": 11407 }, { "epoch": 4.178803700650362, "grad_norm": 0.16491517681463463, "learning_rate": 6.480715091089445e-06, "loss": 0.3579, "num_tokens": 8726781560.0, "step": 11408 }, { "epoch": 4.179170101676284, "grad_norm": 0.14337729952641426, "learning_rate": 6.478551007621402e-06, "loss": 0.3839, "num_tokens": 8727632961.0, "step": 11409 }, { "epoch": 4.179536502702208, "grad_norm": 0.14305849028412698, "learning_rate": 6.47638779870229e-06, "loss": 0.3649, "num_tokens": 8728450925.0, "step": 11410 }, { "epoch": 4.179902903728131, "grad_norm": 0.14876885337100187, "learning_rate": 6.474225464453996e-06, "loss": 0.3408, "num_tokens": 8729179766.0, "step": 11411 }, { "epoch": 4.180269304754053, "grad_norm": 0.14673838100989464, "learning_rate": 6.472064004998355e-06, "loss": 0.3781, "num_tokens": 8729963012.0, "step": 11412 }, { "epoch": 4.180635705779976, "grad_norm": 0.14837433012745938, "learning_rate": 6.4699034204571545e-06, "loss": 0.3633, "num_tokens": 8730772474.0, "step": 11413 }, { "epoch": 4.181002106805899, "grad_norm": 0.13634665206458427, "learning_rate": 6.4677437109521255e-06, "loss": 0.3449, "num_tokens": 8731692660.0, "step": 11414 }, { "epoch": 4.181368507831822, "grad_norm": 0.14889626756505486, "learning_rate": 6.465584876604972e-06, "loss": 0.37, "num_tokens": 8732430789.0, "step": 11415 }, { "epoch": 4.181734908857745, "grad_norm": 0.1509763074712144, "learning_rate": 6.463426917537315e-06, "loss": 0.3523, "num_tokens": 8733216531.0, "step": 11416 }, { "epoch": 4.1821013098836675, "grad_norm": 0.14097779327661875, "learning_rate": 6.4612698338707495e-06, "loss": 0.3595, "num_tokens": 8734038865.0, "step": 11417 }, { "epoch": 4.182467710909591, "grad_norm": 0.16251907243305216, "learning_rate": 6.459113625726822e-06, "loss": 0.3554, "num_tokens": 8734721599.0, "step": 11418 }, { "epoch": 4.182834111935513, "grad_norm": 0.1468058356184088, "learning_rate": 6.456958293227011e-06, "loss": 0.3497, "num_tokens": 8735468610.0, "step": 11419 }, { "epoch": 4.183200512961436, "grad_norm": 0.1506918359068948, "learning_rate": 6.454803836492767e-06, "loss": 0.3837, "num_tokens": 8736255947.0, "step": 11420 }, { "epoch": 4.1835669139873595, "grad_norm": 0.15308591140089148, "learning_rate": 6.45265025564548e-06, "loss": 0.3654, "num_tokens": 8737043098.0, "step": 11421 }, { "epoch": 4.183933315013282, "grad_norm": 0.14853546024365502, "learning_rate": 6.450497550806494e-06, "loss": 0.3617, "num_tokens": 8737806708.0, "step": 11422 }, { "epoch": 4.184299716039205, "grad_norm": 0.1641155767727704, "learning_rate": 6.448345722097096e-06, "loss": 0.3684, "num_tokens": 8738493476.0, "step": 11423 }, { "epoch": 4.184666117065127, "grad_norm": 0.1519644782508195, "learning_rate": 6.446194769638535e-06, "loss": 0.398, "num_tokens": 8739303079.0, "step": 11424 }, { "epoch": 4.185032518091051, "grad_norm": 0.1407181880092195, "learning_rate": 6.444044693552004e-06, "loss": 0.3667, "num_tokens": 8740135765.0, "step": 11425 }, { "epoch": 4.185398919116974, "grad_norm": 0.1516352889952899, "learning_rate": 6.441895493958649e-06, "loss": 0.3825, "num_tokens": 8740916902.0, "step": 11426 }, { "epoch": 4.185765320142896, "grad_norm": 0.1527357239565709, "learning_rate": 6.439747170979564e-06, "loss": 0.3444, "num_tokens": 8741670467.0, "step": 11427 }, { "epoch": 4.186131721168819, "grad_norm": 0.15258750825555573, "learning_rate": 6.437599724735797e-06, "loss": 0.3427, "num_tokens": 8742411669.0, "step": 11428 }, { "epoch": 4.186498122194742, "grad_norm": 0.17501057202840062, "learning_rate": 6.435453155348343e-06, "loss": 0.4005, "num_tokens": 8743036225.0, "step": 11429 }, { "epoch": 4.186864523220665, "grad_norm": 0.14900121697683183, "learning_rate": 6.433307462938147e-06, "loss": 0.3498, "num_tokens": 8743779290.0, "step": 11430 }, { "epoch": 4.187230924246588, "grad_norm": 0.14573589018064553, "learning_rate": 6.431162647626115e-06, "loss": 0.3429, "num_tokens": 8744557166.0, "step": 11431 }, { "epoch": 4.1875973252725105, "grad_norm": 0.16987038839402152, "learning_rate": 6.4290187095330935e-06, "loss": 0.3517, "num_tokens": 8745217020.0, "step": 11432 }, { "epoch": 4.187963726298434, "grad_norm": 0.15944025997357159, "learning_rate": 6.4268756487798726e-06, "loss": 0.3964, "num_tokens": 8745963825.0, "step": 11433 }, { "epoch": 4.188330127324356, "grad_norm": 0.15708218584936187, "learning_rate": 6.424733465487212e-06, "loss": 0.3744, "num_tokens": 8746653390.0, "step": 11434 }, { "epoch": 4.188696528350279, "grad_norm": 0.15392920555017664, "learning_rate": 6.422592159775812e-06, "loss": 0.3635, "num_tokens": 8747386663.0, "step": 11435 }, { "epoch": 4.1890629293762025, "grad_norm": 0.14797050945728962, "learning_rate": 6.420451731766315e-06, "loss": 0.3832, "num_tokens": 8748259207.0, "step": 11436 }, { "epoch": 4.189429330402125, "grad_norm": 0.14352812529234338, "learning_rate": 6.418312181579329e-06, "loss": 0.3557, "num_tokens": 8749064459.0, "step": 11437 }, { "epoch": 4.189795731428048, "grad_norm": 0.1557317409361132, "learning_rate": 6.4161735093354064e-06, "loss": 0.3542, "num_tokens": 8749744614.0, "step": 11438 }, { "epoch": 4.190162132453971, "grad_norm": 0.15514456905745694, "learning_rate": 6.414035715155047e-06, "loss": 0.3792, "num_tokens": 8750438414.0, "step": 11439 }, { "epoch": 4.190528533479894, "grad_norm": 0.1348768002594147, "learning_rate": 6.411898799158708e-06, "loss": 0.3526, "num_tokens": 8751271919.0, "step": 11440 }, { "epoch": 4.190894934505817, "grad_norm": 0.14019270339222817, "learning_rate": 6.409762761466781e-06, "loss": 0.3815, "num_tokens": 8752097968.0, "step": 11441 }, { "epoch": 4.191261335531739, "grad_norm": 0.15752126216524362, "learning_rate": 6.40762760219964e-06, "loss": 0.3495, "num_tokens": 8752851150.0, "step": 11442 }, { "epoch": 4.191627736557662, "grad_norm": 0.15490363438348725, "learning_rate": 6.405493321477575e-06, "loss": 0.4013, "num_tokens": 8753648447.0, "step": 11443 }, { "epoch": 4.191994137583586, "grad_norm": 0.15018920831307433, "learning_rate": 6.403359919420839e-06, "loss": 0.3773, "num_tokens": 8754454464.0, "step": 11444 }, { "epoch": 4.192360538609508, "grad_norm": 0.1457669671250058, "learning_rate": 6.401227396149651e-06, "loss": 0.3759, "num_tokens": 8755194024.0, "step": 11445 }, { "epoch": 4.192726939635431, "grad_norm": 0.15187092069848554, "learning_rate": 6.399095751784156e-06, "loss": 0.3353, "num_tokens": 8755974479.0, "step": 11446 }, { "epoch": 4.1930933406613535, "grad_norm": 0.1432864502915568, "learning_rate": 6.396964986444459e-06, "loss": 0.3514, "num_tokens": 8756757873.0, "step": 11447 }, { "epoch": 4.193459741687277, "grad_norm": 0.14970600786956578, "learning_rate": 6.394835100250626e-06, "loss": 0.3452, "num_tokens": 8757505959.0, "step": 11448 }, { "epoch": 4.1938261427132, "grad_norm": 0.14610669416625618, "learning_rate": 6.39270609332266e-06, "loss": 0.403, "num_tokens": 8758340786.0, "step": 11449 }, { "epoch": 4.194192543739122, "grad_norm": 0.16113723580101638, "learning_rate": 6.39057796578052e-06, "loss": 0.3727, "num_tokens": 8759030151.0, "step": 11450 }, { "epoch": 4.1945589447650455, "grad_norm": 0.1402998099363099, "learning_rate": 6.388450717744112e-06, "loss": 0.358, "num_tokens": 8759848858.0, "step": 11451 }, { "epoch": 4.194925345790968, "grad_norm": 0.17811836589882735, "learning_rate": 6.386324349333296e-06, "loss": 0.3815, "num_tokens": 8760488776.0, "step": 11452 }, { "epoch": 4.195291746816891, "grad_norm": 0.13777306289746893, "learning_rate": 6.3841988606678816e-06, "loss": 0.3543, "num_tokens": 8761364236.0, "step": 11453 }, { "epoch": 4.195658147842814, "grad_norm": 0.13245824754543958, "learning_rate": 6.38207425186763e-06, "loss": 0.3522, "num_tokens": 8762259430.0, "step": 11454 }, { "epoch": 4.196024548868737, "grad_norm": 0.13983741323067714, "learning_rate": 6.3799505230522485e-06, "loss": 0.3619, "num_tokens": 8763140387.0, "step": 11455 }, { "epoch": 4.19639094989466, "grad_norm": 0.1501675458987489, "learning_rate": 6.3778276743413996e-06, "loss": 0.3944, "num_tokens": 8763873105.0, "step": 11456 }, { "epoch": 4.196757350920582, "grad_norm": 0.1518458446235569, "learning_rate": 6.37570570585469e-06, "loss": 0.3952, "num_tokens": 8764620506.0, "step": 11457 }, { "epoch": 4.197123751946505, "grad_norm": 0.1512796121129767, "learning_rate": 6.37358461771169e-06, "loss": 0.3651, "num_tokens": 8765310333.0, "step": 11458 }, { "epoch": 4.197490152972429, "grad_norm": 0.14565151202308863, "learning_rate": 6.371464410031902e-06, "loss": 0.3457, "num_tokens": 8766067081.0, "step": 11459 }, { "epoch": 4.197856553998351, "grad_norm": 0.1504833300220626, "learning_rate": 6.369345082934791e-06, "loss": 0.3717, "num_tokens": 8766781316.0, "step": 11460 }, { "epoch": 4.198222955024274, "grad_norm": 0.13918381587622458, "learning_rate": 6.367226636539773e-06, "loss": 0.3537, "num_tokens": 8767633821.0, "step": 11461 }, { "epoch": 4.198589356050197, "grad_norm": 0.15555065225815495, "learning_rate": 6.365109070966211e-06, "loss": 0.3672, "num_tokens": 8768311588.0, "step": 11462 }, { "epoch": 4.19895575707612, "grad_norm": 0.16412069433909698, "learning_rate": 6.362992386333408e-06, "loss": 0.3576, "num_tokens": 8768973551.0, "step": 11463 }, { "epoch": 4.199322158102043, "grad_norm": 0.1380745876547622, "learning_rate": 6.36087658276064e-06, "loss": 0.3483, "num_tokens": 8769828474.0, "step": 11464 }, { "epoch": 4.199688559127965, "grad_norm": 0.15801457125187063, "learning_rate": 6.358761660367118e-06, "loss": 0.3761, "num_tokens": 8770608656.0, "step": 11465 }, { "epoch": 4.200054960153889, "grad_norm": 0.14266996890743278, "learning_rate": 6.356647619272002e-06, "loss": 0.3489, "num_tokens": 8771385747.0, "step": 11466 }, { "epoch": 4.200421361179811, "grad_norm": 0.15268873485462922, "learning_rate": 6.35453445959441e-06, "loss": 0.3419, "num_tokens": 8772114152.0, "step": 11467 }, { "epoch": 4.200787762205734, "grad_norm": 0.15118927254572534, "learning_rate": 6.352422181453406e-06, "loss": 0.3519, "num_tokens": 8772833517.0, "step": 11468 }, { "epoch": 4.201154163231657, "grad_norm": 0.1673406902331774, "learning_rate": 6.350310784968007e-06, "loss": 0.3941, "num_tokens": 8773509630.0, "step": 11469 }, { "epoch": 4.20152056425758, "grad_norm": 0.14932153005640122, "learning_rate": 6.348200270257175e-06, "loss": 0.3694, "num_tokens": 8774260598.0, "step": 11470 }, { "epoch": 4.201886965283503, "grad_norm": 0.16312215009022832, "learning_rate": 6.346090637439826e-06, "loss": 0.3843, "num_tokens": 8774977314.0, "step": 11471 }, { "epoch": 4.202253366309425, "grad_norm": 0.14809449731280527, "learning_rate": 6.343981886634835e-06, "loss": 0.3783, "num_tokens": 8775770462.0, "step": 11472 }, { "epoch": 4.2026197673353485, "grad_norm": 0.15285987978260654, "learning_rate": 6.341874017961012e-06, "loss": 0.3565, "num_tokens": 8776486370.0, "step": 11473 }, { "epoch": 4.202986168361272, "grad_norm": 0.15618420015582382, "learning_rate": 6.3397670315371165e-06, "loss": 0.3726, "num_tokens": 8777201936.0, "step": 11474 }, { "epoch": 4.203352569387194, "grad_norm": 0.14009062941717113, "learning_rate": 6.337660927481881e-06, "loss": 0.3753, "num_tokens": 8778019041.0, "step": 11475 }, { "epoch": 4.203718970413117, "grad_norm": 0.1716594450404035, "learning_rate": 6.335555705913959e-06, "loss": 0.3705, "num_tokens": 8778612996.0, "step": 11476 }, { "epoch": 4.2040853714390405, "grad_norm": 0.15879651732932815, "learning_rate": 6.333451366951977e-06, "loss": 0.3639, "num_tokens": 8779323969.0, "step": 11477 }, { "epoch": 4.204451772464963, "grad_norm": 0.14700804264665726, "learning_rate": 6.331347910714501e-06, "loss": 0.3664, "num_tokens": 8780159622.0, "step": 11478 }, { "epoch": 4.204818173490886, "grad_norm": 0.15247074293607685, "learning_rate": 6.329245337320048e-06, "loss": 0.3848, "num_tokens": 8780930301.0, "step": 11479 }, { "epoch": 4.205184574516808, "grad_norm": 0.14367790421626975, "learning_rate": 6.327143646887088e-06, "loss": 0.3468, "num_tokens": 8781723682.0, "step": 11480 }, { "epoch": 4.205550975542732, "grad_norm": 0.15039675133414607, "learning_rate": 6.325042839534041e-06, "loss": 0.3633, "num_tokens": 8782450998.0, "step": 11481 }, { "epoch": 4.205917376568655, "grad_norm": 0.14478258407373545, "learning_rate": 6.322942915379271e-06, "loss": 0.3568, "num_tokens": 8783210863.0, "step": 11482 }, { "epoch": 4.206283777594577, "grad_norm": 0.1514469489004927, "learning_rate": 6.320843874541102e-06, "loss": 0.3517, "num_tokens": 8783982710.0, "step": 11483 }, { "epoch": 4.2066501786205, "grad_norm": 0.1407486817295046, "learning_rate": 6.318745717137796e-06, "loss": 0.3761, "num_tokens": 8784839961.0, "step": 11484 }, { "epoch": 4.207016579646423, "grad_norm": 0.15341779855612672, "learning_rate": 6.316648443287588e-06, "loss": 0.3546, "num_tokens": 8785572906.0, "step": 11485 }, { "epoch": 4.207382980672346, "grad_norm": 0.14332582226306112, "learning_rate": 6.3145520531086355e-06, "loss": 0.3744, "num_tokens": 8786397598.0, "step": 11486 }, { "epoch": 4.207749381698269, "grad_norm": 0.1498402081071574, "learning_rate": 6.312456546719057e-06, "loss": 0.3652, "num_tokens": 8787146403.0, "step": 11487 }, { "epoch": 4.2081157827241915, "grad_norm": 0.1621966292701081, "learning_rate": 6.3103619242369305e-06, "loss": 0.3698, "num_tokens": 8787814200.0, "step": 11488 }, { "epoch": 4.208482183750115, "grad_norm": 0.16020995532912774, "learning_rate": 6.308268185780279e-06, "loss": 0.3548, "num_tokens": 8788539620.0, "step": 11489 }, { "epoch": 4.208848584776037, "grad_norm": 0.14271391108593334, "learning_rate": 6.306175331467059e-06, "loss": 0.3454, "num_tokens": 8789319957.0, "step": 11490 }, { "epoch": 4.20921498580196, "grad_norm": 0.1549364333317358, "learning_rate": 6.304083361415203e-06, "loss": 0.3247, "num_tokens": 8789973487.0, "step": 11491 }, { "epoch": 4.2095813868278835, "grad_norm": 0.1435151065601723, "learning_rate": 6.301992275742585e-06, "loss": 0.3527, "num_tokens": 8790811783.0, "step": 11492 }, { "epoch": 4.209947787853806, "grad_norm": 0.150676454354976, "learning_rate": 6.2999020745670125e-06, "loss": 0.3416, "num_tokens": 8791580242.0, "step": 11493 }, { "epoch": 4.210314188879729, "grad_norm": 0.15478125724954392, "learning_rate": 6.297812758006267e-06, "loss": 0.3499, "num_tokens": 8792381270.0, "step": 11494 }, { "epoch": 4.210680589905651, "grad_norm": 0.1555058677412237, "learning_rate": 6.29572432617807e-06, "loss": 0.3383, "num_tokens": 8793074269.0, "step": 11495 }, { "epoch": 4.211046990931575, "grad_norm": 0.15392746883466382, "learning_rate": 6.293636779200092e-06, "loss": 0.3533, "num_tokens": 8793787916.0, "step": 11496 }, { "epoch": 4.211413391957498, "grad_norm": 0.15075826143133977, "learning_rate": 6.291550117189953e-06, "loss": 0.3606, "num_tokens": 8794544314.0, "step": 11497 }, { "epoch": 4.21177979298342, "grad_norm": 0.15070565297049393, "learning_rate": 6.289464340265223e-06, "loss": 0.3765, "num_tokens": 8795344084.0, "step": 11498 }, { "epoch": 4.212146194009343, "grad_norm": 0.13930324405502167, "learning_rate": 6.287379448543434e-06, "loss": 0.351, "num_tokens": 8796164065.0, "step": 11499 }, { "epoch": 4.212512595035266, "grad_norm": 0.15386194659378583, "learning_rate": 6.285295442142047e-06, "loss": 0.3433, "num_tokens": 8796892161.0, "step": 11500 }, { "epoch": 4.212878996061189, "grad_norm": 0.15514926435574752, "learning_rate": 6.2832123211784855e-06, "loss": 0.3513, "num_tokens": 8797586851.0, "step": 11501 }, { "epoch": 4.213245397087112, "grad_norm": 0.1683487742602798, "learning_rate": 6.281130085770134e-06, "loss": 0.3586, "num_tokens": 8798234861.0, "step": 11502 }, { "epoch": 4.2136117981130345, "grad_norm": 0.1490378180702417, "learning_rate": 6.279048736034299e-06, "loss": 0.3528, "num_tokens": 8798983563.0, "step": 11503 }, { "epoch": 4.213978199138958, "grad_norm": 0.14406910959633323, "learning_rate": 6.276968272088266e-06, "loss": 0.3476, "num_tokens": 8799754413.0, "step": 11504 }, { "epoch": 4.21434460016488, "grad_norm": 0.1573715785619023, "learning_rate": 6.2748886940492504e-06, "loss": 0.3553, "num_tokens": 8800461879.0, "step": 11505 }, { "epoch": 4.214711001190803, "grad_norm": 0.1564679933399756, "learning_rate": 6.272810002034428e-06, "loss": 0.3814, "num_tokens": 8801225481.0, "step": 11506 }, { "epoch": 4.2150774022167266, "grad_norm": 0.14967651459304546, "learning_rate": 6.2707321961609204e-06, "loss": 0.3783, "num_tokens": 8801961515.0, "step": 11507 }, { "epoch": 4.215443803242649, "grad_norm": 0.14123876040891273, "learning_rate": 6.268655276545801e-06, "loss": 0.3637, "num_tokens": 8802761756.0, "step": 11508 }, { "epoch": 4.215810204268572, "grad_norm": 0.1413015369597013, "learning_rate": 6.2665792433060945e-06, "loss": 0.3579, "num_tokens": 8803533447.0, "step": 11509 }, { "epoch": 4.2161766052944945, "grad_norm": 0.15319670607714053, "learning_rate": 6.264504096558772e-06, "loss": 0.3798, "num_tokens": 8804244449.0, "step": 11510 }, { "epoch": 4.216543006320418, "grad_norm": 0.14887972814439596, "learning_rate": 6.262429836420756e-06, "loss": 0.3355, "num_tokens": 8805064454.0, "step": 11511 }, { "epoch": 4.216909407346341, "grad_norm": 0.13678978524360058, "learning_rate": 6.260356463008923e-06, "loss": 0.3704, "num_tokens": 8805944129.0, "step": 11512 }, { "epoch": 4.217275808372263, "grad_norm": 0.15085311226419085, "learning_rate": 6.258283976440094e-06, "loss": 0.3695, "num_tokens": 8806702923.0, "step": 11513 }, { "epoch": 4.2176422093981865, "grad_norm": 0.14104001876133018, "learning_rate": 6.256212376831038e-06, "loss": 0.3513, "num_tokens": 8807517709.0, "step": 11514 }, { "epoch": 4.218008610424109, "grad_norm": 0.15189198926888497, "learning_rate": 6.254141664298488e-06, "loss": 0.3554, "num_tokens": 8808230653.0, "step": 11515 }, { "epoch": 4.218375011450032, "grad_norm": 0.15729186773122225, "learning_rate": 6.252071838959113e-06, "loss": 0.3516, "num_tokens": 8808931745.0, "step": 11516 }, { "epoch": 4.218741412475955, "grad_norm": 0.153729337825595, "learning_rate": 6.250002900929532e-06, "loss": 0.3557, "num_tokens": 8809715636.0, "step": 11517 }, { "epoch": 4.219107813501878, "grad_norm": 0.14030879275412547, "learning_rate": 6.247934850326324e-06, "loss": 0.3908, "num_tokens": 8810603684.0, "step": 11518 }, { "epoch": 4.219474214527801, "grad_norm": 0.14771637094559914, "learning_rate": 6.245867687266015e-06, "loss": 0.3512, "num_tokens": 8811390689.0, "step": 11519 }, { "epoch": 4.219840615553723, "grad_norm": 0.15940153063142096, "learning_rate": 6.243801411865065e-06, "loss": 0.3483, "num_tokens": 8812102318.0, "step": 11520 }, { "epoch": 4.220207016579646, "grad_norm": 0.1313847973811951, "learning_rate": 6.241736024239912e-06, "loss": 0.3479, "num_tokens": 8812987701.0, "step": 11521 }, { "epoch": 4.22057341760557, "grad_norm": 0.13582065753167566, "learning_rate": 6.23967152450692e-06, "loss": 0.3542, "num_tokens": 8813871447.0, "step": 11522 }, { "epoch": 4.220939818631492, "grad_norm": 0.14926514320380962, "learning_rate": 6.237607912782419e-06, "loss": 0.3666, "num_tokens": 8814645912.0, "step": 11523 }, { "epoch": 4.221306219657415, "grad_norm": 0.16562455459131897, "learning_rate": 6.235545189182678e-06, "loss": 0.3636, "num_tokens": 8815262890.0, "step": 11524 }, { "epoch": 4.2216726206833375, "grad_norm": 0.13209672484895157, "learning_rate": 6.233483353823919e-06, "loss": 0.3459, "num_tokens": 8816218034.0, "step": 11525 }, { "epoch": 4.222039021709261, "grad_norm": 0.14007676328163676, "learning_rate": 6.23142240682232e-06, "loss": 0.3648, "num_tokens": 8817029969.0, "step": 11526 }, { "epoch": 4.222405422735184, "grad_norm": 0.14419568511175038, "learning_rate": 6.229362348294001e-06, "loss": 0.3918, "num_tokens": 8817873412.0, "step": 11527 }, { "epoch": 4.222771823761106, "grad_norm": 0.14615264005056056, "learning_rate": 6.2273031783550304e-06, "loss": 0.3712, "num_tokens": 8818647966.0, "step": 11528 }, { "epoch": 4.2231382247870295, "grad_norm": 0.16348125911993522, "learning_rate": 6.225244897121447e-06, "loss": 0.3742, "num_tokens": 8819369356.0, "step": 11529 }, { "epoch": 4.223504625812952, "grad_norm": 0.14125706618828565, "learning_rate": 6.2231875047092075e-06, "loss": 0.3571, "num_tokens": 8820227814.0, "step": 11530 }, { "epoch": 4.223871026838875, "grad_norm": 0.15312710932971624, "learning_rate": 6.221131001234235e-06, "loss": 0.36, "num_tokens": 8820944862.0, "step": 11531 }, { "epoch": 4.224237427864798, "grad_norm": 0.14270781443820935, "learning_rate": 6.219075386812416e-06, "loss": 0.364, "num_tokens": 8821887137.0, "step": 11532 }, { "epoch": 4.224603828890721, "grad_norm": 0.14822369212685746, "learning_rate": 6.217020661559558e-06, "loss": 0.354, "num_tokens": 8822717569.0, "step": 11533 }, { "epoch": 4.224970229916644, "grad_norm": 0.1520826228503055, "learning_rate": 6.214966825591443e-06, "loss": 0.3651, "num_tokens": 8823478090.0, "step": 11534 }, { "epoch": 4.225336630942567, "grad_norm": 0.15445239582725206, "learning_rate": 6.212913879023792e-06, "loss": 0.3605, "num_tokens": 8824209214.0, "step": 11535 }, { "epoch": 4.225703031968489, "grad_norm": 0.16014771992457766, "learning_rate": 6.210861821972274e-06, "loss": 0.3461, "num_tokens": 8824941637.0, "step": 11536 }, { "epoch": 4.226069432994413, "grad_norm": 0.14372628539807883, "learning_rate": 6.208810654552515e-06, "loss": 0.3403, "num_tokens": 8825774189.0, "step": 11537 }, { "epoch": 4.226435834020335, "grad_norm": 0.1476518328807774, "learning_rate": 6.206760376880084e-06, "loss": 0.3659, "num_tokens": 8826574278.0, "step": 11538 }, { "epoch": 4.226802235046258, "grad_norm": 0.14614867933072714, "learning_rate": 6.204710989070506e-06, "loss": 0.3668, "num_tokens": 8827439529.0, "step": 11539 }, { "epoch": 4.227168636072181, "grad_norm": 0.15366910706793252, "learning_rate": 6.20266249123925e-06, "loss": 0.3788, "num_tokens": 8828217535.0, "step": 11540 }, { "epoch": 4.227535037098104, "grad_norm": 0.15912783038043352, "learning_rate": 6.200614883501734e-06, "loss": 0.3561, "num_tokens": 8828883076.0, "step": 11541 }, { "epoch": 4.227901438124027, "grad_norm": 0.1491818089627072, "learning_rate": 6.198568165973343e-06, "loss": 0.3636, "num_tokens": 8829619825.0, "step": 11542 }, { "epoch": 4.228267839149949, "grad_norm": 0.14996950325851488, "learning_rate": 6.196522338769384e-06, "loss": 0.3882, "num_tokens": 8830442362.0, "step": 11543 }, { "epoch": 4.2286342401758725, "grad_norm": 0.15649245311387175, "learning_rate": 6.194477402005131e-06, "loss": 0.4148, "num_tokens": 8831212396.0, "step": 11544 }, { "epoch": 4.229000641201796, "grad_norm": 0.14158505771299343, "learning_rate": 6.192433355795813e-06, "loss": 0.3472, "num_tokens": 8832056158.0, "step": 11545 }, { "epoch": 4.229367042227718, "grad_norm": 0.16004386274709417, "learning_rate": 6.190390200256599e-06, "loss": 0.3607, "num_tokens": 8832719094.0, "step": 11546 }, { "epoch": 4.229733443253641, "grad_norm": 0.13311164286644694, "learning_rate": 6.1883479355026e-06, "loss": 0.3581, "num_tokens": 8833606336.0, "step": 11547 }, { "epoch": 4.230099844279564, "grad_norm": 0.14505763351044307, "learning_rate": 6.186306561648894e-06, "loss": 0.3615, "num_tokens": 8834384036.0, "step": 11548 }, { "epoch": 4.230466245305487, "grad_norm": 0.14243995690418307, "learning_rate": 6.184266078810505e-06, "loss": 0.3456, "num_tokens": 8835197373.0, "step": 11549 }, { "epoch": 4.23083264633141, "grad_norm": 0.15332100998510145, "learning_rate": 6.182226487102389e-06, "loss": 0.3867, "num_tokens": 8835958177.0, "step": 11550 }, { "epoch": 4.231199047357332, "grad_norm": 0.16141740121110304, "learning_rate": 6.180187786639482e-06, "loss": 0.3889, "num_tokens": 8836643549.0, "step": 11551 }, { "epoch": 4.231565448383256, "grad_norm": 0.13873242284834203, "learning_rate": 6.178149977536645e-06, "loss": 0.3675, "num_tokens": 8837541354.0, "step": 11552 }, { "epoch": 4.231931849409178, "grad_norm": 0.14722521595221613, "learning_rate": 6.1761130599086996e-06, "loss": 0.3726, "num_tokens": 8838360939.0, "step": 11553 }, { "epoch": 4.232298250435101, "grad_norm": 0.16896158809192968, "learning_rate": 6.1740770338704135e-06, "loss": 0.3822, "num_tokens": 8839042291.0, "step": 11554 }, { "epoch": 4.232664651461024, "grad_norm": 0.16349465808911565, "learning_rate": 6.172041899536504e-06, "loss": 0.3769, "num_tokens": 8839701266.0, "step": 11555 }, { "epoch": 4.233031052486947, "grad_norm": 0.15050215412893084, "learning_rate": 6.17000765702165e-06, "loss": 0.3805, "num_tokens": 8840482945.0, "step": 11556 }, { "epoch": 4.23339745351287, "grad_norm": 0.1626077062951677, "learning_rate": 6.167974306440457e-06, "loss": 0.368, "num_tokens": 8841176510.0, "step": 11557 }, { "epoch": 4.233763854538792, "grad_norm": 0.15116871320411152, "learning_rate": 6.165941847907497e-06, "loss": 0.3678, "num_tokens": 8841989228.0, "step": 11558 }, { "epoch": 4.234130255564716, "grad_norm": 0.1515419166562894, "learning_rate": 6.163910281537295e-06, "loss": 0.3782, "num_tokens": 8842828893.0, "step": 11559 }, { "epoch": 4.234496656590639, "grad_norm": 0.14139238618649266, "learning_rate": 6.161879607444307e-06, "loss": 0.3404, "num_tokens": 8843665361.0, "step": 11560 }, { "epoch": 4.234863057616561, "grad_norm": 0.156470268144422, "learning_rate": 6.159849825742961e-06, "loss": 0.3621, "num_tokens": 8844409701.0, "step": 11561 }, { "epoch": 4.235229458642484, "grad_norm": 0.15440090862082503, "learning_rate": 6.157820936547618e-06, "loss": 0.3791, "num_tokens": 8845111916.0, "step": 11562 }, { "epoch": 4.235595859668407, "grad_norm": 0.1480440238688963, "learning_rate": 6.155792939972598e-06, "loss": 0.3559, "num_tokens": 8845895507.0, "step": 11563 }, { "epoch": 4.23596226069433, "grad_norm": 0.1493027815715179, "learning_rate": 6.153765836132165e-06, "loss": 0.3729, "num_tokens": 8846607845.0, "step": 11564 }, { "epoch": 4.236328661720253, "grad_norm": 0.15042903276758132, "learning_rate": 6.151739625140539e-06, "loss": 0.3603, "num_tokens": 8847380937.0, "step": 11565 }, { "epoch": 4.2366950627461755, "grad_norm": 0.14371349673495942, "learning_rate": 6.1497143071118804e-06, "loss": 0.3702, "num_tokens": 8848199713.0, "step": 11566 }, { "epoch": 4.237061463772099, "grad_norm": 0.14520070241908625, "learning_rate": 6.14768988216031e-06, "loss": 0.3675, "num_tokens": 8849005137.0, "step": 11567 }, { "epoch": 4.237427864798021, "grad_norm": 0.15723272349568812, "learning_rate": 6.145666350399888e-06, "loss": 0.4073, "num_tokens": 8849784998.0, "step": 11568 }, { "epoch": 4.237794265823944, "grad_norm": 0.14406286630128853, "learning_rate": 6.1436437119446355e-06, "loss": 0.3769, "num_tokens": 8850586707.0, "step": 11569 }, { "epoch": 4.2381606668498675, "grad_norm": 0.15655937676956608, "learning_rate": 6.141621966908511e-06, "loss": 0.3879, "num_tokens": 8851318060.0, "step": 11570 }, { "epoch": 4.23852706787579, "grad_norm": 0.15418156721085893, "learning_rate": 6.139601115405429e-06, "loss": 0.3864, "num_tokens": 8852121034.0, "step": 11571 }, { "epoch": 4.238893468901713, "grad_norm": 0.14252837887815364, "learning_rate": 6.137581157549261e-06, "loss": 0.3442, "num_tokens": 8852917736.0, "step": 11572 }, { "epoch": 4.239259869927635, "grad_norm": 0.14790457610634555, "learning_rate": 6.135562093453817e-06, "loss": 0.3393, "num_tokens": 8853646317.0, "step": 11573 }, { "epoch": 4.239626270953559, "grad_norm": 0.13744561085620297, "learning_rate": 6.1335439232328544e-06, "loss": 0.331, "num_tokens": 8854462069.0, "step": 11574 }, { "epoch": 4.239992671979482, "grad_norm": 0.15688446246420407, "learning_rate": 6.13152664700009e-06, "loss": 0.3805, "num_tokens": 8855161510.0, "step": 11575 }, { "epoch": 4.240359073005404, "grad_norm": 0.14821472595128227, "learning_rate": 6.129510264869194e-06, "loss": 0.3427, "num_tokens": 8855929302.0, "step": 11576 }, { "epoch": 4.240725474031327, "grad_norm": 0.1524570181746871, "learning_rate": 6.127494776953764e-06, "loss": 0.3627, "num_tokens": 8856674580.0, "step": 11577 }, { "epoch": 4.241091875057251, "grad_norm": 0.14667801435390782, "learning_rate": 6.125480183367372e-06, "loss": 0.369, "num_tokens": 8857442571.0, "step": 11578 }, { "epoch": 4.241458276083173, "grad_norm": 0.171505916497578, "learning_rate": 6.123466484223527e-06, "loss": 0.4305, "num_tokens": 8858085189.0, "step": 11579 }, { "epoch": 4.241824677109096, "grad_norm": 0.1513833281660754, "learning_rate": 6.121453679635688e-06, "loss": 0.3514, "num_tokens": 8858839163.0, "step": 11580 }, { "epoch": 4.2421910781350185, "grad_norm": 0.14740021676558213, "learning_rate": 6.11944176971727e-06, "loss": 0.3787, "num_tokens": 8859641322.0, "step": 11581 }, { "epoch": 4.242557479160942, "grad_norm": 0.15001511987974062, "learning_rate": 6.117430754581627e-06, "loss": 0.3519, "num_tokens": 8860425724.0, "step": 11582 }, { "epoch": 4.242923880186865, "grad_norm": 0.1367053953936047, "learning_rate": 6.115420634342073e-06, "loss": 0.3878, "num_tokens": 8861279047.0, "step": 11583 }, { "epoch": 4.243290281212787, "grad_norm": 0.15291404622835, "learning_rate": 6.113411409111867e-06, "loss": 0.3518, "num_tokens": 8861996642.0, "step": 11584 }, { "epoch": 4.2436566822387105, "grad_norm": 0.1356422742064192, "learning_rate": 6.111403079004213e-06, "loss": 0.3634, "num_tokens": 8862843736.0, "step": 11585 }, { "epoch": 4.244023083264633, "grad_norm": 0.15115575466071468, "learning_rate": 6.109395644132281e-06, "loss": 0.3875, "num_tokens": 8863653628.0, "step": 11586 }, { "epoch": 4.244389484290556, "grad_norm": 0.14541501555188832, "learning_rate": 6.107389104609166e-06, "loss": 0.3799, "num_tokens": 8864453835.0, "step": 11587 }, { "epoch": 4.244755885316479, "grad_norm": 0.15111703720370748, "learning_rate": 6.105383460547931e-06, "loss": 0.3646, "num_tokens": 8865194584.0, "step": 11588 }, { "epoch": 4.245122286342402, "grad_norm": 0.1484380450106189, "learning_rate": 6.1033787120615875e-06, "loss": 0.3718, "num_tokens": 8865971649.0, "step": 11589 }, { "epoch": 4.245488687368325, "grad_norm": 0.15135428317411595, "learning_rate": 6.101374859263083e-06, "loss": 0.3739, "num_tokens": 8866708206.0, "step": 11590 }, { "epoch": 4.245855088394247, "grad_norm": 0.1529022859828295, "learning_rate": 6.099371902265328e-06, "loss": 0.3866, "num_tokens": 8867437337.0, "step": 11591 }, { "epoch": 4.24622148942017, "grad_norm": 0.14451811926647437, "learning_rate": 6.0973698411811825e-06, "loss": 0.3592, "num_tokens": 8868269962.0, "step": 11592 }, { "epoch": 4.246587890446094, "grad_norm": 0.15198838047834723, "learning_rate": 6.095368676123446e-06, "loss": 0.362, "num_tokens": 8869058319.0, "step": 11593 }, { "epoch": 4.246954291472016, "grad_norm": 0.15742240269507557, "learning_rate": 6.093368407204876e-06, "loss": 0.391, "num_tokens": 8869744000.0, "step": 11594 }, { "epoch": 4.247320692497939, "grad_norm": 0.15333318971044882, "learning_rate": 6.091369034538176e-06, "loss": 0.3647, "num_tokens": 8870484965.0, "step": 11595 }, { "epoch": 4.2476870935238615, "grad_norm": 0.14943005159759512, "learning_rate": 6.0893705582359984e-06, "loss": 0.3767, "num_tokens": 8871251259.0, "step": 11596 }, { "epoch": 4.248053494549785, "grad_norm": 0.13451141072227307, "learning_rate": 6.087372978410948e-06, "loss": 0.3713, "num_tokens": 8872144281.0, "step": 11597 }, { "epoch": 4.248419895575708, "grad_norm": 0.1432956224915428, "learning_rate": 6.085376295175575e-06, "loss": 0.3544, "num_tokens": 8872907978.0, "step": 11598 }, { "epoch": 4.24878629660163, "grad_norm": 0.15090821002279314, "learning_rate": 6.08338050864239e-06, "loss": 0.382, "num_tokens": 8873694927.0, "step": 11599 }, { "epoch": 4.2491526976275535, "grad_norm": 0.16175700921213376, "learning_rate": 6.081385618923834e-06, "loss": 0.3807, "num_tokens": 8874456046.0, "step": 11600 }, { "epoch": 4.249519098653476, "grad_norm": 0.16222519251414289, "learning_rate": 6.07939162613231e-06, "loss": 0.3726, "num_tokens": 8875182179.0, "step": 11601 }, { "epoch": 4.249885499679399, "grad_norm": 0.16234635316365348, "learning_rate": 6.077398530380173e-06, "loss": 0.3606, "num_tokens": 8875810903.0, "step": 11602 }, { "epoch": 4.250251900705322, "grad_norm": 0.14585296549989285, "learning_rate": 6.0754063317797265e-06, "loss": 0.3818, "num_tokens": 8876640159.0, "step": 11603 }, { "epoch": 4.250618301731245, "grad_norm": 0.1423146129695785, "learning_rate": 6.073415030443209e-06, "loss": 0.3738, "num_tokens": 8877471697.0, "step": 11604 }, { "epoch": 4.250984702757168, "grad_norm": 0.16547936775873143, "learning_rate": 6.071424626482827e-06, "loss": 0.3935, "num_tokens": 8878108633.0, "step": 11605 }, { "epoch": 4.25135110378309, "grad_norm": 0.15779527444086933, "learning_rate": 6.069435120010726e-06, "loss": 0.3977, "num_tokens": 8878833117.0, "step": 11606 }, { "epoch": 4.2517175048090134, "grad_norm": 0.1408068779315942, "learning_rate": 6.067446511139006e-06, "loss": 0.3672, "num_tokens": 8879700796.0, "step": 11607 }, { "epoch": 4.252083905834937, "grad_norm": 0.14849926616506717, "learning_rate": 6.065458799979713e-06, "loss": 0.3812, "num_tokens": 8880535780.0, "step": 11608 }, { "epoch": 4.252450306860859, "grad_norm": 0.14489373921608995, "learning_rate": 6.063471986644845e-06, "loss": 0.3614, "num_tokens": 8881316142.0, "step": 11609 }, { "epoch": 4.252816707886782, "grad_norm": 0.16242013260995833, "learning_rate": 6.061486071246346e-06, "loss": 0.3929, "num_tokens": 8882013342.0, "step": 11610 }, { "epoch": 4.253183108912705, "grad_norm": 0.15474695790438453, "learning_rate": 6.059501053896112e-06, "loss": 0.3555, "num_tokens": 8882705173.0, "step": 11611 }, { "epoch": 4.253549509938628, "grad_norm": 0.14957999383067366, "learning_rate": 6.057516934705986e-06, "loss": 0.3966, "num_tokens": 8883502804.0, "step": 11612 }, { "epoch": 4.253915910964551, "grad_norm": 0.1321358683819934, "learning_rate": 6.055533713787771e-06, "loss": 0.3587, "num_tokens": 8884442249.0, "step": 11613 }, { "epoch": 4.254282311990473, "grad_norm": 0.1477064945470068, "learning_rate": 6.053551391253202e-06, "loss": 0.3833, "num_tokens": 8885234578.0, "step": 11614 }, { "epoch": 4.254648713016397, "grad_norm": 0.15698969540756708, "learning_rate": 6.0515699672139705e-06, "loss": 0.375, "num_tokens": 8885955962.0, "step": 11615 }, { "epoch": 4.255015114042319, "grad_norm": 0.17056008022437244, "learning_rate": 6.049589441781727e-06, "loss": 0.3701, "num_tokens": 8886635191.0, "step": 11616 }, { "epoch": 4.255381515068242, "grad_norm": 0.15571940701039377, "learning_rate": 6.047609815068054e-06, "loss": 0.3697, "num_tokens": 8887422332.0, "step": 11617 }, { "epoch": 4.255747916094165, "grad_norm": 0.15371268337562033, "learning_rate": 6.0456310871845015e-06, "loss": 0.3397, "num_tokens": 8888105118.0, "step": 11618 }, { "epoch": 4.256114317120088, "grad_norm": 0.16277918164389665, "learning_rate": 6.043653258242558e-06, "loss": 0.4, "num_tokens": 8888750781.0, "step": 11619 }, { "epoch": 4.256480718146011, "grad_norm": 0.14198047528255156, "learning_rate": 6.041676328353658e-06, "loss": 0.3685, "num_tokens": 8889582047.0, "step": 11620 }, { "epoch": 4.256847119171933, "grad_norm": 0.15324663364353075, "learning_rate": 6.039700297629198e-06, "loss": 0.3689, "num_tokens": 8890352740.0, "step": 11621 }, { "epoch": 4.2572135201978565, "grad_norm": 0.16050062639945686, "learning_rate": 6.03772516618051e-06, "loss": 0.3679, "num_tokens": 8891028298.0, "step": 11622 }, { "epoch": 4.25757992122378, "grad_norm": 0.172224636013477, "learning_rate": 6.035750934118887e-06, "loss": 0.3423, "num_tokens": 8891622761.0, "step": 11623 }, { "epoch": 4.257946322249702, "grad_norm": 0.14702037909703644, "learning_rate": 6.033777601555561e-06, "loss": 0.3591, "num_tokens": 8892391340.0, "step": 11624 }, { "epoch": 4.258312723275625, "grad_norm": 0.13894581230544598, "learning_rate": 6.031805168601721e-06, "loss": 0.3763, "num_tokens": 8893278939.0, "step": 11625 }, { "epoch": 4.258679124301548, "grad_norm": 0.15508244624168185, "learning_rate": 6.02983363536851e-06, "loss": 0.3689, "num_tokens": 8893996647.0, "step": 11626 }, { "epoch": 4.259045525327471, "grad_norm": 0.16843885830263966, "learning_rate": 6.027863001967002e-06, "loss": 0.3812, "num_tokens": 8894738851.0, "step": 11627 }, { "epoch": 4.259411926353394, "grad_norm": 0.14733202026477515, "learning_rate": 6.0258932685082315e-06, "loss": 0.3645, "num_tokens": 8895490097.0, "step": 11628 }, { "epoch": 4.259778327379316, "grad_norm": 0.14030938816362679, "learning_rate": 6.023924435103191e-06, "loss": 0.3655, "num_tokens": 8896342824.0, "step": 11629 }, { "epoch": 4.26014472840524, "grad_norm": 0.16709358802826832, "learning_rate": 6.021956501862811e-06, "loss": 0.3822, "num_tokens": 8896972217.0, "step": 11630 }, { "epoch": 4.260511129431162, "grad_norm": 0.1604310761172339, "learning_rate": 6.019989468897969e-06, "loss": 0.3508, "num_tokens": 8897658338.0, "step": 11631 }, { "epoch": 4.260877530457085, "grad_norm": 0.16767989058284702, "learning_rate": 6.018023336319501e-06, "loss": 0.3781, "num_tokens": 8898369169.0, "step": 11632 }, { "epoch": 4.261243931483008, "grad_norm": 0.15124556555749452, "learning_rate": 6.016058104238187e-06, "loss": 0.3271, "num_tokens": 8899128186.0, "step": 11633 }, { "epoch": 4.261610332508931, "grad_norm": 0.15820605802971735, "learning_rate": 6.0140937727647534e-06, "loss": 0.3668, "num_tokens": 8899866672.0, "step": 11634 }, { "epoch": 4.261976733534854, "grad_norm": 0.13975061219193105, "learning_rate": 6.012130342009883e-06, "loss": 0.3728, "num_tokens": 8900725560.0, "step": 11635 }, { "epoch": 4.262343134560777, "grad_norm": 0.14365950405386504, "learning_rate": 6.010167812084208e-06, "loss": 0.3351, "num_tokens": 8901487823.0, "step": 11636 }, { "epoch": 4.2627095355866995, "grad_norm": 0.161602646859634, "learning_rate": 6.008206183098299e-06, "loss": 0.3776, "num_tokens": 8902191531.0, "step": 11637 }, { "epoch": 4.263075936612623, "grad_norm": 0.14953781485374687, "learning_rate": 6.006245455162686e-06, "loss": 0.382, "num_tokens": 8902959713.0, "step": 11638 }, { "epoch": 4.263442337638545, "grad_norm": 0.14295782299919912, "learning_rate": 6.004285628387847e-06, "loss": 0.3843, "num_tokens": 8903796418.0, "step": 11639 }, { "epoch": 4.263808738664468, "grad_norm": 0.1578198909663403, "learning_rate": 6.002326702884207e-06, "loss": 0.3621, "num_tokens": 8904499795.0, "step": 11640 }, { "epoch": 4.2641751396903915, "grad_norm": 0.14048203106334525, "learning_rate": 6.0003686787621385e-06, "loss": 0.3777, "num_tokens": 8905335294.0, "step": 11641 }, { "epoch": 4.264541540716314, "grad_norm": 0.15352121020389373, "learning_rate": 5.998411556131964e-06, "loss": 0.3371, "num_tokens": 8906064746.0, "step": 11642 }, { "epoch": 4.264907941742237, "grad_norm": 0.1446383674459692, "learning_rate": 5.996455335103967e-06, "loss": 0.3369, "num_tokens": 8906817056.0, "step": 11643 }, { "epoch": 4.265274342768159, "grad_norm": 0.14609160055552647, "learning_rate": 5.994500015788356e-06, "loss": 0.3438, "num_tokens": 8907529211.0, "step": 11644 }, { "epoch": 4.265640743794083, "grad_norm": 0.1821378761303869, "learning_rate": 5.99254559829531e-06, "loss": 0.372, "num_tokens": 8908092408.0, "step": 11645 }, { "epoch": 4.266007144820006, "grad_norm": 0.16965099983877943, "learning_rate": 5.990592082734951e-06, "loss": 0.3589, "num_tokens": 8908760267.0, "step": 11646 }, { "epoch": 4.266373545845928, "grad_norm": 0.1595721309994886, "learning_rate": 5.9886394692173474e-06, "loss": 0.3637, "num_tokens": 8909464280.0, "step": 11647 }, { "epoch": 4.266739946871851, "grad_norm": 0.16624050706614446, "learning_rate": 5.986687757852516e-06, "loss": 0.3726, "num_tokens": 8910259486.0, "step": 11648 }, { "epoch": 4.267106347897774, "grad_norm": 0.15792349897794933, "learning_rate": 5.984736948750427e-06, "loss": 0.3765, "num_tokens": 8911100936.0, "step": 11649 }, { "epoch": 4.267472748923697, "grad_norm": 0.16253991190730074, "learning_rate": 5.982787042020996e-06, "loss": 0.3426, "num_tokens": 8911717835.0, "step": 11650 }, { "epoch": 4.26783914994962, "grad_norm": 0.16061187402662203, "learning_rate": 5.980838037774095e-06, "loss": 0.3587, "num_tokens": 8912399510.0, "step": 11651 }, { "epoch": 4.2682055509755426, "grad_norm": 0.16689015489354356, "learning_rate": 5.978889936119534e-06, "loss": 0.3543, "num_tokens": 8913098573.0, "step": 11652 }, { "epoch": 4.268571952001466, "grad_norm": 0.1629944401874464, "learning_rate": 5.97694273716708e-06, "loss": 0.3622, "num_tokens": 8913788407.0, "step": 11653 }, { "epoch": 4.268938353027388, "grad_norm": 0.16141099364730208, "learning_rate": 5.974996441026446e-06, "loss": 0.3811, "num_tokens": 8914508157.0, "step": 11654 }, { "epoch": 4.269304754053311, "grad_norm": 0.15689845263113963, "learning_rate": 5.973051047807295e-06, "loss": 0.3757, "num_tokens": 8915230657.0, "step": 11655 }, { "epoch": 4.269671155079235, "grad_norm": 0.15053642267644887, "learning_rate": 5.971106557619247e-06, "loss": 0.3423, "num_tokens": 8915979218.0, "step": 11656 }, { "epoch": 4.270037556105157, "grad_norm": 0.13992383253099833, "learning_rate": 5.96916297057185e-06, "loss": 0.3699, "num_tokens": 8916858378.0, "step": 11657 }, { "epoch": 4.27040395713108, "grad_norm": 0.16132561447308855, "learning_rate": 5.967220286774619e-06, "loss": 0.3577, "num_tokens": 8917557719.0, "step": 11658 }, { "epoch": 4.2707703581570025, "grad_norm": 0.14292441651287566, "learning_rate": 5.965278506337021e-06, "loss": 0.3557, "num_tokens": 8918359769.0, "step": 11659 }, { "epoch": 4.271136759182926, "grad_norm": 0.14524243706484463, "learning_rate": 5.963337629368462e-06, "loss": 0.3473, "num_tokens": 8919146037.0, "step": 11660 }, { "epoch": 4.271503160208849, "grad_norm": 0.16325269820244195, "learning_rate": 5.961397655978289e-06, "loss": 0.3697, "num_tokens": 8919865075.0, "step": 11661 }, { "epoch": 4.271869561234771, "grad_norm": 0.1539097662589714, "learning_rate": 5.959458586275823e-06, "loss": 0.3591, "num_tokens": 8920562440.0, "step": 11662 }, { "epoch": 4.2722359622606945, "grad_norm": 0.14574708140021633, "learning_rate": 5.9575204203703144e-06, "loss": 0.36, "num_tokens": 8921403418.0, "step": 11663 }, { "epoch": 4.272602363286617, "grad_norm": 0.1486320304218108, "learning_rate": 5.955583158370968e-06, "loss": 0.3617, "num_tokens": 8922270995.0, "step": 11664 }, { "epoch": 4.27296876431254, "grad_norm": 0.15898714257668473, "learning_rate": 5.953646800386938e-06, "loss": 0.4138, "num_tokens": 8923021404.0, "step": 11665 }, { "epoch": 4.273335165338463, "grad_norm": 0.15058989372122505, "learning_rate": 5.951711346527327e-06, "loss": 0.3538, "num_tokens": 8923860846.0, "step": 11666 }, { "epoch": 4.273701566364386, "grad_norm": 0.14592826907471004, "learning_rate": 5.9497767969011875e-06, "loss": 0.343, "num_tokens": 8924616535.0, "step": 11667 }, { "epoch": 4.274067967390309, "grad_norm": 0.13838776137409806, "learning_rate": 5.947843151617525e-06, "loss": 0.3622, "num_tokens": 8925462597.0, "step": 11668 }, { "epoch": 4.274434368416232, "grad_norm": 0.1757803168383562, "learning_rate": 5.945910410785279e-06, "loss": 0.4112, "num_tokens": 8926123171.0, "step": 11669 }, { "epoch": 4.274800769442154, "grad_norm": 0.16374408771265278, "learning_rate": 5.943978574513366e-06, "loss": 0.3643, "num_tokens": 8926767347.0, "step": 11670 }, { "epoch": 4.275167170468078, "grad_norm": 0.15042923503830605, "learning_rate": 5.942047642910618e-06, "loss": 0.3186, "num_tokens": 8927486932.0, "step": 11671 }, { "epoch": 4.275533571494, "grad_norm": 0.14118027203495312, "learning_rate": 5.940117616085836e-06, "loss": 0.3616, "num_tokens": 8928386498.0, "step": 11672 }, { "epoch": 4.275899972519923, "grad_norm": 0.1549121928353992, "learning_rate": 5.938188494147778e-06, "loss": 0.3796, "num_tokens": 8929145882.0, "step": 11673 }, { "epoch": 4.276266373545846, "grad_norm": 0.15722641866395787, "learning_rate": 5.936260277205123e-06, "loss": 0.3812, "num_tokens": 8929875963.0, "step": 11674 }, { "epoch": 4.276632774571769, "grad_norm": 0.14891936792955404, "learning_rate": 5.934332965366527e-06, "loss": 0.367, "num_tokens": 8930642700.0, "step": 11675 }, { "epoch": 4.276999175597692, "grad_norm": 0.15086161426922953, "learning_rate": 5.932406558740581e-06, "loss": 0.3624, "num_tokens": 8931357152.0, "step": 11676 }, { "epoch": 4.277365576623614, "grad_norm": 0.15113202525203673, "learning_rate": 5.930481057435825e-06, "loss": 0.3794, "num_tokens": 8932121098.0, "step": 11677 }, { "epoch": 4.2777319776495375, "grad_norm": 0.14086180464655376, "learning_rate": 5.928556461560752e-06, "loss": 0.3854, "num_tokens": 8932969038.0, "step": 11678 }, { "epoch": 4.278098378675461, "grad_norm": 0.1675790052699389, "learning_rate": 5.926632771223804e-06, "loss": 0.3613, "num_tokens": 8933604398.0, "step": 11679 }, { "epoch": 4.278464779701383, "grad_norm": 0.13539647629280305, "learning_rate": 5.924709986533368e-06, "loss": 0.3578, "num_tokens": 8934467158.0, "step": 11680 }, { "epoch": 4.278831180727306, "grad_norm": 0.15018538040352086, "learning_rate": 5.922788107597782e-06, "loss": 0.3625, "num_tokens": 8935261499.0, "step": 11681 }, { "epoch": 4.279197581753229, "grad_norm": 0.15606616899592168, "learning_rate": 5.920867134525333e-06, "loss": 0.3599, "num_tokens": 8935978741.0, "step": 11682 }, { "epoch": 4.279563982779152, "grad_norm": 0.15216288996574373, "learning_rate": 5.918947067424264e-06, "loss": 0.3677, "num_tokens": 8936718655.0, "step": 11683 }, { "epoch": 4.279930383805075, "grad_norm": 0.15559492886089496, "learning_rate": 5.9170279064027545e-06, "loss": 0.3656, "num_tokens": 8937465461.0, "step": 11684 }, { "epoch": 4.280296784830997, "grad_norm": 0.14029647780464832, "learning_rate": 5.915109651568936e-06, "loss": 0.3677, "num_tokens": 8938369043.0, "step": 11685 }, { "epoch": 4.280663185856921, "grad_norm": 0.16494073027736833, "learning_rate": 5.9131923030308965e-06, "loss": 0.3574, "num_tokens": 8939021102.0, "step": 11686 }, { "epoch": 4.281029586882843, "grad_norm": 0.14289310135593047, "learning_rate": 5.9112758608966705e-06, "loss": 0.3876, "num_tokens": 8939853921.0, "step": 11687 }, { "epoch": 4.281395987908766, "grad_norm": 0.14403026124915305, "learning_rate": 5.909360325274229e-06, "loss": 0.3447, "num_tokens": 8940639337.0, "step": 11688 }, { "epoch": 4.281762388934689, "grad_norm": 0.14813394552440556, "learning_rate": 5.907445696271513e-06, "loss": 0.3456, "num_tokens": 8941375626.0, "step": 11689 }, { "epoch": 4.282128789960612, "grad_norm": 0.15065212257370617, "learning_rate": 5.905531973996399e-06, "loss": 0.3623, "num_tokens": 8942140494.0, "step": 11690 }, { "epoch": 4.282495190986535, "grad_norm": 0.16934900155048574, "learning_rate": 5.903619158556707e-06, "loss": 0.3817, "num_tokens": 8942827273.0, "step": 11691 }, { "epoch": 4.282861592012457, "grad_norm": 0.1416504605439222, "learning_rate": 5.901707250060221e-06, "loss": 0.3656, "num_tokens": 8943638506.0, "step": 11692 }, { "epoch": 4.2832279930383805, "grad_norm": 0.15674920579544416, "learning_rate": 5.899796248614666e-06, "loss": 0.3341, "num_tokens": 8944342530.0, "step": 11693 }, { "epoch": 4.283594394064304, "grad_norm": 0.15673925173972528, "learning_rate": 5.897886154327716e-06, "loss": 0.3732, "num_tokens": 8945100305.0, "step": 11694 }, { "epoch": 4.283960795090226, "grad_norm": 0.14981164268127461, "learning_rate": 5.8959769673069935e-06, "loss": 0.3701, "num_tokens": 8945872748.0, "step": 11695 }, { "epoch": 4.284327196116149, "grad_norm": 0.16479226148040452, "learning_rate": 5.89406868766007e-06, "loss": 0.3393, "num_tokens": 8946539008.0, "step": 11696 }, { "epoch": 4.284693597142072, "grad_norm": 0.1624980005453143, "learning_rate": 5.892161315494468e-06, "loss": 0.3601, "num_tokens": 8947189409.0, "step": 11697 }, { "epoch": 4.285059998167995, "grad_norm": 0.16025913765521316, "learning_rate": 5.890254850917658e-06, "loss": 0.363, "num_tokens": 8947882625.0, "step": 11698 }, { "epoch": 4.285426399193918, "grad_norm": 0.1355357290987722, "learning_rate": 5.888349294037054e-06, "loss": 0.3605, "num_tokens": 8948759622.0, "step": 11699 }, { "epoch": 4.28579280021984, "grad_norm": 0.14073672715018284, "learning_rate": 5.8864446449600354e-06, "loss": 0.3583, "num_tokens": 8949552401.0, "step": 11700 }, { "epoch": 4.286159201245764, "grad_norm": 0.15267693574015645, "learning_rate": 5.884540903793905e-06, "loss": 0.3626, "num_tokens": 8950247518.0, "step": 11701 }, { "epoch": 4.286525602271686, "grad_norm": 0.1584175603220921, "learning_rate": 5.882638070645936e-06, "loss": 0.3933, "num_tokens": 8951026359.0, "step": 11702 }, { "epoch": 4.286892003297609, "grad_norm": 0.15086518536966828, "learning_rate": 5.880736145623342e-06, "loss": 0.3757, "num_tokens": 8951806569.0, "step": 11703 }, { "epoch": 4.2872584043235324, "grad_norm": 0.17325320071266662, "learning_rate": 5.8788351288332855e-06, "loss": 0.3791, "num_tokens": 8952523273.0, "step": 11704 }, { "epoch": 4.287624805349455, "grad_norm": 0.1610715307930322, "learning_rate": 5.8769350203828775e-06, "loss": 0.3959, "num_tokens": 8953199335.0, "step": 11705 }, { "epoch": 4.287991206375378, "grad_norm": 0.16042130865774853, "learning_rate": 5.8750358203791815e-06, "loss": 0.4091, "num_tokens": 8953884665.0, "step": 11706 }, { "epoch": 4.2883576074013, "grad_norm": 0.1410866694876706, "learning_rate": 5.873137528929202e-06, "loss": 0.3933, "num_tokens": 8954707745.0, "step": 11707 }, { "epoch": 4.288724008427224, "grad_norm": 0.14957515638507193, "learning_rate": 5.871240146139901e-06, "loss": 0.333, "num_tokens": 8955424314.0, "step": 11708 }, { "epoch": 4.289090409453147, "grad_norm": 0.15437444656325952, "learning_rate": 5.869343672118186e-06, "loss": 0.3636, "num_tokens": 8956138268.0, "step": 11709 }, { "epoch": 4.289456810479069, "grad_norm": 0.14247495918793476, "learning_rate": 5.867448106970911e-06, "loss": 0.3529, "num_tokens": 8956952753.0, "step": 11710 }, { "epoch": 4.289823211504992, "grad_norm": 0.13906426788239928, "learning_rate": 5.865553450804882e-06, "loss": 0.3676, "num_tokens": 8957814354.0, "step": 11711 }, { "epoch": 4.290189612530915, "grad_norm": 0.13467865391048486, "learning_rate": 5.8636597037268495e-06, "loss": 0.3509, "num_tokens": 8958688632.0, "step": 11712 }, { "epoch": 4.290556013556838, "grad_norm": 0.1496059398193658, "learning_rate": 5.861766865843525e-06, "loss": 0.3929, "num_tokens": 8959451694.0, "step": 11713 }, { "epoch": 4.290922414582761, "grad_norm": 0.14124385391288144, "learning_rate": 5.859874937261548e-06, "loss": 0.3823, "num_tokens": 8960351671.0, "step": 11714 }, { "epoch": 4.2912888156086835, "grad_norm": 0.16020421897151402, "learning_rate": 5.8579839180875205e-06, "loss": 0.3971, "num_tokens": 8961083765.0, "step": 11715 }, { "epoch": 4.291655216634607, "grad_norm": 0.15229870077207874, "learning_rate": 5.856093808427999e-06, "loss": 0.3897, "num_tokens": 8961860188.0, "step": 11716 }, { "epoch": 4.292021617660529, "grad_norm": 0.16834154651265115, "learning_rate": 5.85420460838948e-06, "loss": 0.3697, "num_tokens": 8962557949.0, "step": 11717 }, { "epoch": 4.292388018686452, "grad_norm": 0.16885414345589028, "learning_rate": 5.852316318078396e-06, "loss": 0.3945, "num_tokens": 8963255368.0, "step": 11718 }, { "epoch": 4.2927544197123755, "grad_norm": 0.15925918800695793, "learning_rate": 5.8504289376011556e-06, "loss": 0.3849, "num_tokens": 8964050979.0, "step": 11719 }, { "epoch": 4.293120820738298, "grad_norm": 0.15076749078421015, "learning_rate": 5.8485424670641e-06, "loss": 0.3642, "num_tokens": 8964809837.0, "step": 11720 }, { "epoch": 4.293487221764221, "grad_norm": 0.15704691217018318, "learning_rate": 5.846656906573518e-06, "loss": 0.3867, "num_tokens": 8965537062.0, "step": 11721 }, { "epoch": 4.293853622790143, "grad_norm": 0.16044533121696408, "learning_rate": 5.844772256235653e-06, "loss": 0.3887, "num_tokens": 8966185848.0, "step": 11722 }, { "epoch": 4.294220023816067, "grad_norm": 0.15213039211085416, "learning_rate": 5.8428885161566925e-06, "loss": 0.3882, "num_tokens": 8966945725.0, "step": 11723 }, { "epoch": 4.29458642484199, "grad_norm": 0.14583841872024866, "learning_rate": 5.841005686442778e-06, "loss": 0.3727, "num_tokens": 8967715579.0, "step": 11724 }, { "epoch": 4.294952825867912, "grad_norm": 0.15489448138388048, "learning_rate": 5.839123767199995e-06, "loss": 0.3592, "num_tokens": 8968471134.0, "step": 11725 }, { "epoch": 4.295319226893835, "grad_norm": 0.15286252485847632, "learning_rate": 5.837242758534377e-06, "loss": 0.3717, "num_tokens": 8969204185.0, "step": 11726 }, { "epoch": 4.295685627919758, "grad_norm": 0.14390849605380554, "learning_rate": 5.835362660551918e-06, "loss": 0.3508, "num_tokens": 8970026624.0, "step": 11727 }, { "epoch": 4.296052028945681, "grad_norm": 0.1707163721730481, "learning_rate": 5.833483473358537e-06, "loss": 0.3963, "num_tokens": 8970712136.0, "step": 11728 }, { "epoch": 4.296418429971604, "grad_norm": 0.1322081872656568, "learning_rate": 5.831605197060126e-06, "loss": 0.3704, "num_tokens": 8971651681.0, "step": 11729 }, { "epoch": 4.2967848309975265, "grad_norm": 0.1703887411658016, "learning_rate": 5.829727831762516e-06, "loss": 0.3781, "num_tokens": 8972301071.0, "step": 11730 }, { "epoch": 4.29715123202345, "grad_norm": 0.14122609231272962, "learning_rate": 5.8278513775714786e-06, "loss": 0.3705, "num_tokens": 8973126821.0, "step": 11731 }, { "epoch": 4.297517633049373, "grad_norm": 0.15521256226479238, "learning_rate": 5.825975834592748e-06, "loss": 0.3609, "num_tokens": 8973828268.0, "step": 11732 }, { "epoch": 4.297884034075295, "grad_norm": 0.1518664805380843, "learning_rate": 5.824101202932e-06, "loss": 0.3856, "num_tokens": 8974603401.0, "step": 11733 }, { "epoch": 4.2982504351012185, "grad_norm": 0.15332024072773667, "learning_rate": 5.8222274826948595e-06, "loss": 0.3707, "num_tokens": 8975356565.0, "step": 11734 }, { "epoch": 4.298616836127141, "grad_norm": 0.14737294910989426, "learning_rate": 5.820354673986899e-06, "loss": 0.3538, "num_tokens": 8976088357.0, "step": 11735 }, { "epoch": 4.298983237153064, "grad_norm": 0.1508088168662736, "learning_rate": 5.818482776913643e-06, "loss": 0.3539, "num_tokens": 8976820818.0, "step": 11736 }, { "epoch": 4.299349638178987, "grad_norm": 0.17389830502241596, "learning_rate": 5.816611791580561e-06, "loss": 0.4036, "num_tokens": 8977490870.0, "step": 11737 }, { "epoch": 4.29971603920491, "grad_norm": 0.15382145557083238, "learning_rate": 5.814741718093076e-06, "loss": 0.3736, "num_tokens": 8978222925.0, "step": 11738 }, { "epoch": 4.300082440230833, "grad_norm": 0.14734497863595544, "learning_rate": 5.812872556556547e-06, "loss": 0.3562, "num_tokens": 8978980345.0, "step": 11739 }, { "epoch": 4.300448841256755, "grad_norm": 0.1421874488828427, "learning_rate": 5.811004307076307e-06, "loss": 0.3943, "num_tokens": 8979813101.0, "step": 11740 }, { "epoch": 4.300815242282678, "grad_norm": 0.16457251928210867, "learning_rate": 5.809136969757607e-06, "loss": 0.3859, "num_tokens": 8980559438.0, "step": 11741 }, { "epoch": 4.301181643308602, "grad_norm": 0.14167341028876815, "learning_rate": 5.807270544705667e-06, "loss": 0.3572, "num_tokens": 8981351099.0, "step": 11742 }, { "epoch": 4.301548044334524, "grad_norm": 0.1450382178990829, "learning_rate": 5.805405032025651e-06, "loss": 0.3365, "num_tokens": 8982177201.0, "step": 11743 }, { "epoch": 4.301914445360447, "grad_norm": 0.16242416303964333, "learning_rate": 5.8035404318226725e-06, "loss": 0.3787, "num_tokens": 8982913679.0, "step": 11744 }, { "epoch": 4.3022808463863695, "grad_norm": 0.17150187939804293, "learning_rate": 5.801676744201783e-06, "loss": 0.3641, "num_tokens": 8983570541.0, "step": 11745 }, { "epoch": 4.302647247412293, "grad_norm": 0.14894254262893614, "learning_rate": 5.799813969268e-06, "loss": 0.3642, "num_tokens": 8984347151.0, "step": 11746 }, { "epoch": 4.303013648438216, "grad_norm": 0.14576335943106114, "learning_rate": 5.797952107126274e-06, "loss": 0.3368, "num_tokens": 8985163821.0, "step": 11747 }, { "epoch": 4.303380049464138, "grad_norm": 0.14936637411898318, "learning_rate": 5.796091157881517e-06, "loss": 0.368, "num_tokens": 8985943243.0, "step": 11748 }, { "epoch": 4.3037464504900615, "grad_norm": 0.15399502708651172, "learning_rate": 5.794231121638581e-06, "loss": 0.3561, "num_tokens": 8986696780.0, "step": 11749 }, { "epoch": 4.304112851515984, "grad_norm": 0.1544522515066741, "learning_rate": 5.792371998502266e-06, "loss": 0.3945, "num_tokens": 8987507202.0, "step": 11750 }, { "epoch": 4.304479252541907, "grad_norm": 0.16104913670341625, "learning_rate": 5.790513788577325e-06, "loss": 0.3343, "num_tokens": 8988220511.0, "step": 11751 }, { "epoch": 4.30484565356783, "grad_norm": 0.14186997948029909, "learning_rate": 5.788656491968461e-06, "loss": 0.3667, "num_tokens": 8989063931.0, "step": 11752 }, { "epoch": 4.305212054593753, "grad_norm": 0.16713324744754937, "learning_rate": 5.786800108780318e-06, "loss": 0.3913, "num_tokens": 8989770871.0, "step": 11753 }, { "epoch": 4.305578455619676, "grad_norm": 0.16033033053917534, "learning_rate": 5.7849446391174955e-06, "loss": 0.3826, "num_tokens": 8990491886.0, "step": 11754 }, { "epoch": 4.305944856645598, "grad_norm": 0.1584990428658376, "learning_rate": 5.78309008308454e-06, "loss": 0.3865, "num_tokens": 8991215430.0, "step": 11755 }, { "epoch": 4.3063112576715215, "grad_norm": 0.139570460180429, "learning_rate": 5.78123644078594e-06, "loss": 0.3566, "num_tokens": 8992052256.0, "step": 11756 }, { "epoch": 4.306677658697445, "grad_norm": 0.17676886816953963, "learning_rate": 5.7793837123261495e-06, "loss": 0.3778, "num_tokens": 8992654913.0, "step": 11757 }, { "epoch": 4.307044059723367, "grad_norm": 0.14160183420210476, "learning_rate": 5.777531897809546e-06, "loss": 0.3497, "num_tokens": 8993534685.0, "step": 11758 }, { "epoch": 4.30741046074929, "grad_norm": 0.1631381475617089, "learning_rate": 5.775680997340479e-06, "loss": 0.363, "num_tokens": 8994245813.0, "step": 11759 }, { "epoch": 4.307776861775213, "grad_norm": 0.14601007241816663, "learning_rate": 5.773831011023235e-06, "loss": 0.358, "num_tokens": 8995057881.0, "step": 11760 }, { "epoch": 4.308143262801136, "grad_norm": 0.15675193925622136, "learning_rate": 5.771981938962048e-06, "loss": 0.3775, "num_tokens": 8995748325.0, "step": 11761 }, { "epoch": 4.308509663827059, "grad_norm": 0.15386620370171383, "learning_rate": 5.770133781261103e-06, "loss": 0.3677, "num_tokens": 8996549395.0, "step": 11762 }, { "epoch": 4.308876064852981, "grad_norm": 0.1658627405324945, "learning_rate": 5.768286538024536e-06, "loss": 0.3791, "num_tokens": 8997375672.0, "step": 11763 }, { "epoch": 4.309242465878905, "grad_norm": 0.15746077367887434, "learning_rate": 5.766440209356426e-06, "loss": 0.3594, "num_tokens": 8998102965.0, "step": 11764 }, { "epoch": 4.309608866904827, "grad_norm": 0.13963712678057538, "learning_rate": 5.764594795360807e-06, "loss": 0.3708, "num_tokens": 8998969829.0, "step": 11765 }, { "epoch": 4.30997526793075, "grad_norm": 0.15233037333719907, "learning_rate": 5.762750296141654e-06, "loss": 0.3743, "num_tokens": 8999719821.0, "step": 11766 }, { "epoch": 4.310341668956673, "grad_norm": 0.1552092012935175, "learning_rate": 5.760906711802901e-06, "loss": 0.3457, "num_tokens": 9000506101.0, "step": 11767 }, { "epoch": 4.310708069982596, "grad_norm": 0.15969565600105542, "learning_rate": 5.759064042448418e-06, "loss": 0.3567, "num_tokens": 9001204374.0, "step": 11768 }, { "epoch": 4.311074471008519, "grad_norm": 0.1461152617884319, "learning_rate": 5.7572222881820275e-06, "loss": 0.3388, "num_tokens": 9001890108.0, "step": 11769 }, { "epoch": 4.311440872034442, "grad_norm": 0.15376703914139844, "learning_rate": 5.755381449107512e-06, "loss": 0.411, "num_tokens": 9002666409.0, "step": 11770 }, { "epoch": 4.3118072730603645, "grad_norm": 0.16354361046392252, "learning_rate": 5.753541525328585e-06, "loss": 0.3568, "num_tokens": 9003309239.0, "step": 11771 }, { "epoch": 4.312173674086288, "grad_norm": 0.18053081407565566, "learning_rate": 5.751702516948913e-06, "loss": 0.3777, "num_tokens": 9003897721.0, "step": 11772 }, { "epoch": 4.31254007511221, "grad_norm": 0.1529673302978222, "learning_rate": 5.749864424072123e-06, "loss": 0.383, "num_tokens": 9004698711.0, "step": 11773 }, { "epoch": 4.312906476138133, "grad_norm": 0.14495105633177546, "learning_rate": 5.748027246801781e-06, "loss": 0.3622, "num_tokens": 9005533388.0, "step": 11774 }, { "epoch": 4.3132728771640565, "grad_norm": 0.1452288591720857, "learning_rate": 5.74619098524139e-06, "loss": 0.3746, "num_tokens": 9006328896.0, "step": 11775 }, { "epoch": 4.313639278189979, "grad_norm": 0.14964352693224367, "learning_rate": 5.744355639494425e-06, "loss": 0.3759, "num_tokens": 9007117369.0, "step": 11776 }, { "epoch": 4.314005679215902, "grad_norm": 0.1574949314905056, "learning_rate": 5.742521209664296e-06, "loss": 0.3691, "num_tokens": 9007815604.0, "step": 11777 }, { "epoch": 4.314372080241824, "grad_norm": 0.1610125313116774, "learning_rate": 5.740687695854361e-06, "loss": 0.3901, "num_tokens": 9008508106.0, "step": 11778 }, { "epoch": 4.314738481267748, "grad_norm": 0.15978499598313875, "learning_rate": 5.738855098167929e-06, "loss": 0.3764, "num_tokens": 9009321499.0, "step": 11779 }, { "epoch": 4.315104882293671, "grad_norm": 0.1411175252489171, "learning_rate": 5.7370234167082565e-06, "loss": 0.3718, "num_tokens": 9010212860.0, "step": 11780 }, { "epoch": 4.315471283319593, "grad_norm": 0.14203570581835362, "learning_rate": 5.735192651578548e-06, "loss": 0.3388, "num_tokens": 9011035057.0, "step": 11781 }, { "epoch": 4.315837684345516, "grad_norm": 0.14497196137431587, "learning_rate": 5.73336280288196e-06, "loss": 0.3685, "num_tokens": 9011856363.0, "step": 11782 }, { "epoch": 4.316204085371439, "grad_norm": 0.1573397254219382, "learning_rate": 5.7315338707215886e-06, "loss": 0.3748, "num_tokens": 9012549942.0, "step": 11783 }, { "epoch": 4.316570486397362, "grad_norm": 0.14160046766826295, "learning_rate": 5.729705855200494e-06, "loss": 0.3535, "num_tokens": 9013384374.0, "step": 11784 }, { "epoch": 4.316936887423285, "grad_norm": 0.16615323316134797, "learning_rate": 5.7278787564216634e-06, "loss": 0.3528, "num_tokens": 9014107508.0, "step": 11785 }, { "epoch": 4.3173032884492075, "grad_norm": 0.15493304670693547, "learning_rate": 5.726052574488055e-06, "loss": 0.3844, "num_tokens": 9014851963.0, "step": 11786 }, { "epoch": 4.317669689475131, "grad_norm": 0.14274980193811304, "learning_rate": 5.724227309502559e-06, "loss": 0.3703, "num_tokens": 9015651596.0, "step": 11787 }, { "epoch": 4.318036090501053, "grad_norm": 0.15061130085403523, "learning_rate": 5.722402961568012e-06, "loss": 0.3852, "num_tokens": 9016451441.0, "step": 11788 }, { "epoch": 4.318402491526976, "grad_norm": 0.15660101338837867, "learning_rate": 5.720579530787219e-06, "loss": 0.3736, "num_tokens": 9017167454.0, "step": 11789 }, { "epoch": 4.3187688925528995, "grad_norm": 0.1549259463765079, "learning_rate": 5.718757017262913e-06, "loss": 0.3459, "num_tokens": 9017877338.0, "step": 11790 }, { "epoch": 4.319135293578822, "grad_norm": 0.1554690886303353, "learning_rate": 5.716935421097782e-06, "loss": 0.3954, "num_tokens": 9018594786.0, "step": 11791 }, { "epoch": 4.319501694604745, "grad_norm": 0.13862585598938518, "learning_rate": 5.715114742394468e-06, "loss": 0.3479, "num_tokens": 9019383505.0, "step": 11792 }, { "epoch": 4.319868095630667, "grad_norm": 0.15535460014648744, "learning_rate": 5.713294981255552e-06, "loss": 0.3857, "num_tokens": 9020078585.0, "step": 11793 }, { "epoch": 4.320234496656591, "grad_norm": 0.14706014891997482, "learning_rate": 5.7114761377835694e-06, "loss": 0.3358, "num_tokens": 9020829649.0, "step": 11794 }, { "epoch": 4.320600897682514, "grad_norm": 0.15006659034565178, "learning_rate": 5.709658212081002e-06, "loss": 0.3516, "num_tokens": 9021601218.0, "step": 11795 }, { "epoch": 4.320967298708436, "grad_norm": 0.15357174078059482, "learning_rate": 5.707841204250274e-06, "loss": 0.385, "num_tokens": 9022389174.0, "step": 11796 }, { "epoch": 4.321333699734359, "grad_norm": 0.15751381773039502, "learning_rate": 5.706025114393778e-06, "loss": 0.3754, "num_tokens": 9023113292.0, "step": 11797 }, { "epoch": 4.321700100760282, "grad_norm": 0.14794555267900958, "learning_rate": 5.704209942613829e-06, "loss": 0.3523, "num_tokens": 9023871495.0, "step": 11798 }, { "epoch": 4.322066501786205, "grad_norm": 0.15213731996829533, "learning_rate": 5.702395689012701e-06, "loss": 0.3542, "num_tokens": 9024608314.0, "step": 11799 }, { "epoch": 4.322432902812128, "grad_norm": 0.14585368037868077, "learning_rate": 5.7005823536926256e-06, "loss": 0.3694, "num_tokens": 9025389726.0, "step": 11800 }, { "epoch": 4.3227993038380506, "grad_norm": 0.16200780624775735, "learning_rate": 5.6987699367557725e-06, "loss": 0.3661, "num_tokens": 9026050443.0, "step": 11801 }, { "epoch": 4.323165704863974, "grad_norm": 0.14738091654073684, "learning_rate": 5.696958438304254e-06, "loss": 0.383, "num_tokens": 9026913031.0, "step": 11802 }, { "epoch": 4.323532105889896, "grad_norm": 0.14495009169993675, "learning_rate": 5.695147858440145e-06, "loss": 0.3421, "num_tokens": 9027738740.0, "step": 11803 }, { "epoch": 4.323898506915819, "grad_norm": 0.1471552099587436, "learning_rate": 5.693338197265461e-06, "loss": 0.3879, "num_tokens": 9028548153.0, "step": 11804 }, { "epoch": 4.324264907941743, "grad_norm": 0.15547512960286478, "learning_rate": 5.691529454882169e-06, "loss": 0.358, "num_tokens": 9029247639.0, "step": 11805 }, { "epoch": 4.324631308967665, "grad_norm": 0.15299454640147953, "learning_rate": 5.689721631392175e-06, "loss": 0.3805, "num_tokens": 9029967951.0, "step": 11806 }, { "epoch": 4.324997709993588, "grad_norm": 0.1668907403489729, "learning_rate": 5.687914726897343e-06, "loss": 0.3274, "num_tokens": 9030592012.0, "step": 11807 }, { "epoch": 4.3253641110195105, "grad_norm": 0.1448959375079899, "learning_rate": 5.686108741499484e-06, "loss": 0.3532, "num_tokens": 9031450397.0, "step": 11808 }, { "epoch": 4.325730512045434, "grad_norm": 0.15078204335568365, "learning_rate": 5.6843036753003535e-06, "loss": 0.3659, "num_tokens": 9032190601.0, "step": 11809 }, { "epoch": 4.326096913071357, "grad_norm": 0.14316597768467765, "learning_rate": 5.682499528401659e-06, "loss": 0.3804, "num_tokens": 9033047531.0, "step": 11810 }, { "epoch": 4.326463314097279, "grad_norm": 0.1464791458298019, "learning_rate": 5.680696300905052e-06, "loss": 0.3584, "num_tokens": 9033819009.0, "step": 11811 }, { "epoch": 4.3268297151232025, "grad_norm": 0.1480324507839501, "learning_rate": 5.678893992912138e-06, "loss": 0.3455, "num_tokens": 9034565841.0, "step": 11812 }, { "epoch": 4.327196116149125, "grad_norm": 0.1532755860790131, "learning_rate": 5.677092604524459e-06, "loss": 0.3573, "num_tokens": 9035308577.0, "step": 11813 }, { "epoch": 4.327562517175048, "grad_norm": 0.1578590496905785, "learning_rate": 5.675292135843524e-06, "loss": 0.3431, "num_tokens": 9036020541.0, "step": 11814 }, { "epoch": 4.327928918200971, "grad_norm": 0.14113910682513556, "learning_rate": 5.6734925869707726e-06, "loss": 0.3581, "num_tokens": 9036870770.0, "step": 11815 }, { "epoch": 4.328295319226894, "grad_norm": 0.15457143519927408, "learning_rate": 5.671693958007603e-06, "loss": 0.3757, "num_tokens": 9037596986.0, "step": 11816 }, { "epoch": 4.328661720252817, "grad_norm": 0.14188128946035297, "learning_rate": 5.669896249055357e-06, "loss": 0.363, "num_tokens": 9038403943.0, "step": 11817 }, { "epoch": 4.329028121278739, "grad_norm": 0.14218677641949307, "learning_rate": 5.668099460215325e-06, "loss": 0.3784, "num_tokens": 9039224455.0, "step": 11818 }, { "epoch": 4.329394522304662, "grad_norm": 0.13264379832213205, "learning_rate": 5.666303591588747e-06, "loss": 0.358, "num_tokens": 9040106378.0, "step": 11819 }, { "epoch": 4.329760923330586, "grad_norm": 0.1498185082138331, "learning_rate": 5.66450864327681e-06, "loss": 0.3868, "num_tokens": 9040882807.0, "step": 11820 }, { "epoch": 4.330127324356508, "grad_norm": 0.15386445503088442, "learning_rate": 5.6627146153806526e-06, "loss": 0.3242, "num_tokens": 9041597581.0, "step": 11821 }, { "epoch": 4.330493725382431, "grad_norm": 0.16676283983313062, "learning_rate": 5.660921508001353e-06, "loss": 0.3983, "num_tokens": 9042236006.0, "step": 11822 }, { "epoch": 4.3308601264083535, "grad_norm": 0.14255682107822956, "learning_rate": 5.659129321239942e-06, "loss": 0.3658, "num_tokens": 9043039864.0, "step": 11823 }, { "epoch": 4.331226527434277, "grad_norm": 0.14500828281552278, "learning_rate": 5.65733805519741e-06, "loss": 0.3445, "num_tokens": 9043865262.0, "step": 11824 }, { "epoch": 4.3315929284602, "grad_norm": 0.1601445971036802, "learning_rate": 5.655547709974676e-06, "loss": 0.3809, "num_tokens": 9044564025.0, "step": 11825 }, { "epoch": 4.331959329486122, "grad_norm": 0.15629235323986035, "learning_rate": 5.6537582856726155e-06, "loss": 0.381, "num_tokens": 9045278212.0, "step": 11826 }, { "epoch": 4.3323257305120455, "grad_norm": 0.14774335712225703, "learning_rate": 5.65196978239206e-06, "loss": 0.3715, "num_tokens": 9046095905.0, "step": 11827 }, { "epoch": 4.332692131537968, "grad_norm": 0.15086121572783953, "learning_rate": 5.650182200233777e-06, "loss": 0.3527, "num_tokens": 9046845389.0, "step": 11828 }, { "epoch": 4.333058532563891, "grad_norm": 0.1477016425512143, "learning_rate": 5.6483955392984826e-06, "loss": 0.3533, "num_tokens": 9047525785.0, "step": 11829 }, { "epoch": 4.333424933589814, "grad_norm": 0.1493266575239156, "learning_rate": 5.6466097996868556e-06, "loss": 0.3443, "num_tokens": 9048324903.0, "step": 11830 }, { "epoch": 4.333791334615737, "grad_norm": 0.14897245769104364, "learning_rate": 5.644824981499506e-06, "loss": 0.389, "num_tokens": 9049126218.0, "step": 11831 }, { "epoch": 4.33415773564166, "grad_norm": 0.14382026904953202, "learning_rate": 5.643041084837e-06, "loss": 0.3579, "num_tokens": 9049916088.0, "step": 11832 }, { "epoch": 4.334524136667583, "grad_norm": 0.1564900556687178, "learning_rate": 5.641258109799851e-06, "loss": 0.361, "num_tokens": 9050595665.0, "step": 11833 }, { "epoch": 4.334890537693505, "grad_norm": 0.14150251805595562, "learning_rate": 5.639476056488519e-06, "loss": 0.3391, "num_tokens": 9051387660.0, "step": 11834 }, { "epoch": 4.335256938719429, "grad_norm": 0.1359852817179788, "learning_rate": 5.637694925003413e-06, "loss": 0.3591, "num_tokens": 9052274594.0, "step": 11835 }, { "epoch": 4.335623339745351, "grad_norm": 0.16036851850062167, "learning_rate": 5.635914715444891e-06, "loss": 0.3555, "num_tokens": 9053023048.0, "step": 11836 }, { "epoch": 4.335989740771274, "grad_norm": 0.1502349649796186, "learning_rate": 5.634135427913257e-06, "loss": 0.3879, "num_tokens": 9053746488.0, "step": 11837 }, { "epoch": 4.336356141797197, "grad_norm": 0.1554680115331758, "learning_rate": 5.632357062508766e-06, "loss": 0.3826, "num_tokens": 9054568266.0, "step": 11838 }, { "epoch": 4.33672254282312, "grad_norm": 0.14933414167766676, "learning_rate": 5.630579619331617e-06, "loss": 0.3823, "num_tokens": 9055377414.0, "step": 11839 }, { "epoch": 4.337088943849043, "grad_norm": 0.1451563393855085, "learning_rate": 5.628803098481956e-06, "loss": 0.3663, "num_tokens": 9056189496.0, "step": 11840 }, { "epoch": 4.337455344874965, "grad_norm": 0.1599881460387221, "learning_rate": 5.627027500059894e-06, "loss": 0.3646, "num_tokens": 9056883605.0, "step": 11841 }, { "epoch": 4.3378217459008885, "grad_norm": 0.1443650531613158, "learning_rate": 5.625252824165458e-06, "loss": 0.3757, "num_tokens": 9057677989.0, "step": 11842 }, { "epoch": 4.338188146926812, "grad_norm": 0.14685193883951977, "learning_rate": 5.623479070898653e-06, "loss": 0.3674, "num_tokens": 9058438840.0, "step": 11843 }, { "epoch": 4.338554547952734, "grad_norm": 0.15547688821652, "learning_rate": 5.62170624035942e-06, "loss": 0.3782, "num_tokens": 9059124833.0, "step": 11844 }, { "epoch": 4.338920948978657, "grad_norm": 0.14527403844875725, "learning_rate": 5.619934332647641e-06, "loss": 0.393, "num_tokens": 9059967219.0, "step": 11845 }, { "epoch": 4.33928735000458, "grad_norm": 0.14822679061620084, "learning_rate": 5.61816334786316e-06, "loss": 0.3852, "num_tokens": 9060826711.0, "step": 11846 }, { "epoch": 4.339653751030503, "grad_norm": 0.1438995476640616, "learning_rate": 5.616393286105758e-06, "loss": 0.3648, "num_tokens": 9061649831.0, "step": 11847 }, { "epoch": 4.340020152056426, "grad_norm": 0.14163371786220977, "learning_rate": 5.614624147475174e-06, "loss": 0.3854, "num_tokens": 9062495890.0, "step": 11848 }, { "epoch": 4.340386553082348, "grad_norm": 0.15650058528371988, "learning_rate": 5.612855932071084e-06, "loss": 0.3851, "num_tokens": 9063251472.0, "step": 11849 }, { "epoch": 4.340752954108272, "grad_norm": 0.15822649025378918, "learning_rate": 5.611088639993116e-06, "loss": 0.3741, "num_tokens": 9063964051.0, "step": 11850 }, { "epoch": 4.341119355134194, "grad_norm": 0.14595187887700067, "learning_rate": 5.609322271340858e-06, "loss": 0.3589, "num_tokens": 9064714476.0, "step": 11851 }, { "epoch": 4.341485756160117, "grad_norm": 0.15568617827350534, "learning_rate": 5.607556826213823e-06, "loss": 0.416, "num_tokens": 9065475401.0, "step": 11852 }, { "epoch": 4.3418521571860405, "grad_norm": 0.15037748929208078, "learning_rate": 5.605792304711485e-06, "loss": 0.3543, "num_tokens": 9066220302.0, "step": 11853 }, { "epoch": 4.342218558211963, "grad_norm": 0.14529013112626224, "learning_rate": 5.604028706933278e-06, "loss": 0.3614, "num_tokens": 9066992676.0, "step": 11854 }, { "epoch": 4.342584959237886, "grad_norm": 0.1598941444310731, "learning_rate": 5.602266032978556e-06, "loss": 0.3658, "num_tokens": 9067760332.0, "step": 11855 }, { "epoch": 4.342951360263808, "grad_norm": 0.14089871208159774, "learning_rate": 5.600504282946639e-06, "loss": 0.3977, "num_tokens": 9068621075.0, "step": 11856 }, { "epoch": 4.343317761289732, "grad_norm": 0.14672697356859257, "learning_rate": 5.598743456936802e-06, "loss": 0.3486, "num_tokens": 9069416678.0, "step": 11857 }, { "epoch": 4.343684162315655, "grad_norm": 0.15507275612841384, "learning_rate": 5.596983555048251e-06, "loss": 0.3779, "num_tokens": 9070204123.0, "step": 11858 }, { "epoch": 4.344050563341577, "grad_norm": 0.1605316894728454, "learning_rate": 5.595224577380141e-06, "loss": 0.3839, "num_tokens": 9070894936.0, "step": 11859 }, { "epoch": 4.3444169643675, "grad_norm": 0.15847020873344028, "learning_rate": 5.593466524031591e-06, "loss": 0.384, "num_tokens": 9071638577.0, "step": 11860 }, { "epoch": 4.344783365393423, "grad_norm": 0.15117684815701443, "learning_rate": 5.591709395101651e-06, "loss": 0.3592, "num_tokens": 9072445526.0, "step": 11861 }, { "epoch": 4.345149766419346, "grad_norm": 0.15502280743266472, "learning_rate": 5.589953190689332e-06, "loss": 0.357, "num_tokens": 9073166514.0, "step": 11862 }, { "epoch": 4.345516167445269, "grad_norm": 0.15234152004494264, "learning_rate": 5.58819791089358e-06, "loss": 0.4063, "num_tokens": 9074007910.0, "step": 11863 }, { "epoch": 4.3458825684711915, "grad_norm": 0.1524551654015603, "learning_rate": 5.586443555813298e-06, "loss": 0.3859, "num_tokens": 9074759671.0, "step": 11864 }, { "epoch": 4.346248969497115, "grad_norm": 0.1497377479208481, "learning_rate": 5.5846901255473355e-06, "loss": 0.3576, "num_tokens": 9075481451.0, "step": 11865 }, { "epoch": 4.346615370523038, "grad_norm": 0.1499868413711719, "learning_rate": 5.5829376201944864e-06, "loss": 0.384, "num_tokens": 9076337175.0, "step": 11866 }, { "epoch": 4.34698177154896, "grad_norm": 0.14183175464444522, "learning_rate": 5.581186039853495e-06, "loss": 0.3409, "num_tokens": 9077084343.0, "step": 11867 }, { "epoch": 4.3473481725748835, "grad_norm": 0.14093633967483246, "learning_rate": 5.579435384623055e-06, "loss": 0.3694, "num_tokens": 9077929531.0, "step": 11868 }, { "epoch": 4.347714573600806, "grad_norm": 0.13921778260254547, "learning_rate": 5.577685654601806e-06, "loss": 0.3562, "num_tokens": 9078747204.0, "step": 11869 }, { "epoch": 4.348080974626729, "grad_norm": 0.14649358376818608, "learning_rate": 5.575936849888334e-06, "loss": 0.3721, "num_tokens": 9079544244.0, "step": 11870 }, { "epoch": 4.348447375652652, "grad_norm": 0.1541071195697, "learning_rate": 5.574188970581181e-06, "loss": 0.3797, "num_tokens": 9080278391.0, "step": 11871 }, { "epoch": 4.348813776678575, "grad_norm": 0.13286225691417672, "learning_rate": 5.572442016778818e-06, "loss": 0.3463, "num_tokens": 9081211500.0, "step": 11872 }, { "epoch": 4.349180177704498, "grad_norm": 0.1373007871242905, "learning_rate": 5.5706959885796875e-06, "loss": 0.365, "num_tokens": 9082064697.0, "step": 11873 }, { "epoch": 4.34954657873042, "grad_norm": 0.14184862902487289, "learning_rate": 5.568950886082165e-06, "loss": 0.3616, "num_tokens": 9082878963.0, "step": 11874 }, { "epoch": 4.349912979756343, "grad_norm": 0.16734624909581564, "learning_rate": 5.567206709384579e-06, "loss": 0.3836, "num_tokens": 9083519353.0, "step": 11875 }, { "epoch": 4.350279380782267, "grad_norm": 0.14112221742883702, "learning_rate": 5.565463458585199e-06, "loss": 0.3618, "num_tokens": 9084324200.0, "step": 11876 }, { "epoch": 4.350645781808189, "grad_norm": 0.15293562021554621, "learning_rate": 5.563721133782254e-06, "loss": 0.3682, "num_tokens": 9085062335.0, "step": 11877 }, { "epoch": 4.351012182834112, "grad_norm": 0.13873856838063583, "learning_rate": 5.5619797350739105e-06, "loss": 0.3698, "num_tokens": 9085919094.0, "step": 11878 }, { "epoch": 4.3513785838600345, "grad_norm": 0.13922146425365864, "learning_rate": 5.560239262558289e-06, "loss": 0.3901, "num_tokens": 9086757416.0, "step": 11879 }, { "epoch": 4.351744984885958, "grad_norm": 0.14712716478480375, "learning_rate": 5.558499716333451e-06, "loss": 0.3583, "num_tokens": 9087518165.0, "step": 11880 }, { "epoch": 4.352111385911881, "grad_norm": 0.15737982774387244, "learning_rate": 5.55676109649742e-06, "loss": 0.3925, "num_tokens": 9088247025.0, "step": 11881 }, { "epoch": 4.352477786937803, "grad_norm": 0.14951709823856005, "learning_rate": 5.55502340314815e-06, "loss": 0.3661, "num_tokens": 9089094927.0, "step": 11882 }, { "epoch": 4.3528441879637265, "grad_norm": 0.13412859630421792, "learning_rate": 5.5532866363835484e-06, "loss": 0.3483, "num_tokens": 9089943696.0, "step": 11883 }, { "epoch": 4.353210588989649, "grad_norm": 0.1475398935257331, "learning_rate": 5.551550796301483e-06, "loss": 0.3697, "num_tokens": 9090688404.0, "step": 11884 }, { "epoch": 4.353576990015572, "grad_norm": 0.15692449060054703, "learning_rate": 5.549815882999749e-06, "loss": 0.3754, "num_tokens": 9091397436.0, "step": 11885 }, { "epoch": 4.353943391041495, "grad_norm": 0.13779022255259732, "learning_rate": 5.548081896576101e-06, "loss": 0.353, "num_tokens": 9092231022.0, "step": 11886 }, { "epoch": 4.354309792067418, "grad_norm": 0.1473180643587141, "learning_rate": 5.546348837128242e-06, "loss": 0.3662, "num_tokens": 9093077095.0, "step": 11887 }, { "epoch": 4.354676193093341, "grad_norm": 0.1405002183457358, "learning_rate": 5.5446167047538244e-06, "loss": 0.3617, "num_tokens": 9093911358.0, "step": 11888 }, { "epoch": 4.355042594119263, "grad_norm": 0.14062708163479193, "learning_rate": 5.542885499550436e-06, "loss": 0.368, "num_tokens": 9094756266.0, "step": 11889 }, { "epoch": 4.355408995145186, "grad_norm": 0.14068736948393545, "learning_rate": 5.541155221615625e-06, "loss": 0.339, "num_tokens": 9095541683.0, "step": 11890 }, { "epoch": 4.35577539617111, "grad_norm": 0.15564645978004196, "learning_rate": 5.539425871046884e-06, "loss": 0.3936, "num_tokens": 9096323062.0, "step": 11891 }, { "epoch": 4.356141797197032, "grad_norm": 0.15135138357197542, "learning_rate": 5.5376974479416526e-06, "loss": 0.3734, "num_tokens": 9097047779.0, "step": 11892 }, { "epoch": 4.356508198222955, "grad_norm": 0.14773709422974296, "learning_rate": 5.535969952397313e-06, "loss": 0.3703, "num_tokens": 9097781072.0, "step": 11893 }, { "epoch": 4.3568745992488775, "grad_norm": 0.1482680939462299, "learning_rate": 5.534243384511207e-06, "loss": 0.3541, "num_tokens": 9098538397.0, "step": 11894 }, { "epoch": 4.357241000274801, "grad_norm": 0.15886680618392876, "learning_rate": 5.532517744380613e-06, "loss": 0.3952, "num_tokens": 9099264587.0, "step": 11895 }, { "epoch": 4.357607401300724, "grad_norm": 0.1641566524441249, "learning_rate": 5.530793032102763e-06, "loss": 0.3736, "num_tokens": 9099865015.0, "step": 11896 }, { "epoch": 4.357973802326646, "grad_norm": 0.15618781996988929, "learning_rate": 5.529069247774829e-06, "loss": 0.3551, "num_tokens": 9100538025.0, "step": 11897 }, { "epoch": 4.3583402033525696, "grad_norm": 0.1509217969124725, "learning_rate": 5.527346391493953e-06, "loss": 0.3277, "num_tokens": 9101265521.0, "step": 11898 }, { "epoch": 4.358706604378492, "grad_norm": 0.16559562990715246, "learning_rate": 5.525624463357189e-06, "loss": 0.3718, "num_tokens": 9101946637.0, "step": 11899 }, { "epoch": 4.359073005404415, "grad_norm": 0.14413478280151498, "learning_rate": 5.523903463461572e-06, "loss": 0.3647, "num_tokens": 9102765373.0, "step": 11900 }, { "epoch": 4.359439406430338, "grad_norm": 0.143713702732701, "learning_rate": 5.522183391904067e-06, "loss": 0.3832, "num_tokens": 9103586981.0, "step": 11901 }, { "epoch": 4.359805807456261, "grad_norm": 0.17618947193673146, "learning_rate": 5.520464248781584e-06, "loss": 0.3778, "num_tokens": 9104213614.0, "step": 11902 }, { "epoch": 4.360172208482184, "grad_norm": 0.1422760313171373, "learning_rate": 5.518746034190997e-06, "loss": 0.3619, "num_tokens": 9105034071.0, "step": 11903 }, { "epoch": 4.360538609508106, "grad_norm": 0.14284848776284562, "learning_rate": 5.517028748229116e-06, "loss": 0.3677, "num_tokens": 9105867050.0, "step": 11904 }, { "epoch": 4.3609050105340295, "grad_norm": 0.15926606871120552, "learning_rate": 5.515312390992695e-06, "loss": 0.3534, "num_tokens": 9106591583.0, "step": 11905 }, { "epoch": 4.361271411559953, "grad_norm": 0.14707740630791993, "learning_rate": 5.513596962578448e-06, "loss": 0.351, "num_tokens": 9107301451.0, "step": 11906 }, { "epoch": 4.361637812585875, "grad_norm": 0.15373664870163428, "learning_rate": 5.511882463083025e-06, "loss": 0.383, "num_tokens": 9108043019.0, "step": 11907 }, { "epoch": 4.362004213611798, "grad_norm": 0.1488270482503779, "learning_rate": 5.510168892603036e-06, "loss": 0.3693, "num_tokens": 9108819167.0, "step": 11908 }, { "epoch": 4.362370614637721, "grad_norm": 0.15186860762441792, "learning_rate": 5.5084562512350225e-06, "loss": 0.3609, "num_tokens": 9109602182.0, "step": 11909 }, { "epoch": 4.362737015663644, "grad_norm": 0.167645584635272, "learning_rate": 5.506744539075483e-06, "loss": 0.3908, "num_tokens": 9110276370.0, "step": 11910 }, { "epoch": 4.363103416689567, "grad_norm": 0.13206461961410587, "learning_rate": 5.505033756220875e-06, "loss": 0.3444, "num_tokens": 9111124230.0, "step": 11911 }, { "epoch": 4.363469817715489, "grad_norm": 0.1666748903237918, "learning_rate": 5.503323902767579e-06, "loss": 0.3672, "num_tokens": 9111816493.0, "step": 11912 }, { "epoch": 4.363836218741413, "grad_norm": 0.17068714334156163, "learning_rate": 5.501614978811934e-06, "loss": 0.3832, "num_tokens": 9112568322.0, "step": 11913 }, { "epoch": 4.364202619767335, "grad_norm": 0.15514378949724736, "learning_rate": 5.499906984450241e-06, "loss": 0.3589, "num_tokens": 9113276111.0, "step": 11914 }, { "epoch": 4.364569020793258, "grad_norm": 0.15035594236105781, "learning_rate": 5.498199919778732e-06, "loss": 0.4052, "num_tokens": 9114076115.0, "step": 11915 }, { "epoch": 4.364935421819181, "grad_norm": 0.15532506897497098, "learning_rate": 5.496493784893583e-06, "loss": 0.3899, "num_tokens": 9114911379.0, "step": 11916 }, { "epoch": 4.365301822845104, "grad_norm": 0.1504986519503502, "learning_rate": 5.494788579890933e-06, "loss": 0.3828, "num_tokens": 9115683694.0, "step": 11917 }, { "epoch": 4.365668223871027, "grad_norm": 0.1513165870208054, "learning_rate": 5.493084304866862e-06, "loss": 0.3609, "num_tokens": 9116410787.0, "step": 11918 }, { "epoch": 4.366034624896949, "grad_norm": 0.15295986225898794, "learning_rate": 5.491380959917391e-06, "loss": 0.3981, "num_tokens": 9117204458.0, "step": 11919 }, { "epoch": 4.3664010259228725, "grad_norm": 0.1538578812277951, "learning_rate": 5.489678545138497e-06, "loss": 0.3522, "num_tokens": 9117950911.0, "step": 11920 }, { "epoch": 4.366767426948796, "grad_norm": 0.149979847993869, "learning_rate": 5.4879770606261005e-06, "loss": 0.3751, "num_tokens": 9118731940.0, "step": 11921 }, { "epoch": 4.367133827974718, "grad_norm": 0.1480975772611784, "learning_rate": 5.486276506476074e-06, "loss": 0.3714, "num_tokens": 9119514676.0, "step": 11922 }, { "epoch": 4.367500229000641, "grad_norm": 0.14915812662178393, "learning_rate": 5.48457688278423e-06, "loss": 0.3715, "num_tokens": 9120377772.0, "step": 11923 }, { "epoch": 4.367866630026564, "grad_norm": 0.1508137123053996, "learning_rate": 5.4828781896463344e-06, "loss": 0.3345, "num_tokens": 9121102819.0, "step": 11924 }, { "epoch": 4.368233031052487, "grad_norm": 0.6229628184285486, "learning_rate": 5.481180427158105e-06, "loss": 0.3974, "num_tokens": 9121908628.0, "step": 11925 }, { "epoch": 4.36859943207841, "grad_norm": 0.15282786922992897, "learning_rate": 5.479483595415192e-06, "loss": 0.3871, "num_tokens": 9122688533.0, "step": 11926 }, { "epoch": 4.368965833104332, "grad_norm": 0.15118711642634106, "learning_rate": 5.477787694513208e-06, "loss": 0.3758, "num_tokens": 9123482561.0, "step": 11927 }, { "epoch": 4.369332234130256, "grad_norm": 0.14747628536148985, "learning_rate": 5.476092724547715e-06, "loss": 0.3752, "num_tokens": 9124262031.0, "step": 11928 }, { "epoch": 4.369698635156179, "grad_norm": 0.1396724136105995, "learning_rate": 5.474398685614195e-06, "loss": 0.3557, "num_tokens": 9125088584.0, "step": 11929 }, { "epoch": 4.370065036182101, "grad_norm": 0.16067611714896254, "learning_rate": 5.472705577808117e-06, "loss": 0.3907, "num_tokens": 9125795293.0, "step": 11930 }, { "epoch": 4.370431437208024, "grad_norm": 0.13295648163432092, "learning_rate": 5.471013401224872e-06, "loss": 0.346, "num_tokens": 9126637087.0, "step": 11931 }, { "epoch": 4.370797838233947, "grad_norm": 0.15868646672504033, "learning_rate": 5.469322155959803e-06, "loss": 0.3507, "num_tokens": 9127295085.0, "step": 11932 }, { "epoch": 4.37116423925987, "grad_norm": 0.8430963792707803, "learning_rate": 5.467631842108205e-06, "loss": 0.3387, "num_tokens": 9128082526.0, "step": 11933 }, { "epoch": 4.371530640285793, "grad_norm": 0.15525285835056576, "learning_rate": 5.465942459765315e-06, "loss": 0.352, "num_tokens": 9128816801.0, "step": 11934 }, { "epoch": 4.3718970413117155, "grad_norm": 0.14080366747914344, "learning_rate": 5.4642540090263265e-06, "loss": 0.3739, "num_tokens": 9129631587.0, "step": 11935 }, { "epoch": 4.372263442337639, "grad_norm": 0.45340243269854, "learning_rate": 5.462566489986366e-06, "loss": 0.3653, "num_tokens": 9130429042.0, "step": 11936 }, { "epoch": 4.372629843363561, "grad_norm": 0.1605283547002475, "learning_rate": 5.460879902740519e-06, "loss": 0.3826, "num_tokens": 9131160656.0, "step": 11937 }, { "epoch": 4.372996244389484, "grad_norm": 0.14631036290399546, "learning_rate": 5.459194247383825e-06, "loss": 0.3537, "num_tokens": 9131943961.0, "step": 11938 }, { "epoch": 4.3733626454154075, "grad_norm": 0.14683977041749754, "learning_rate": 5.457509524011246e-06, "loss": 0.3502, "num_tokens": 9132737005.0, "step": 11939 }, { "epoch": 4.37372904644133, "grad_norm": 0.15771910638688566, "learning_rate": 5.455825732717715e-06, "loss": 0.369, "num_tokens": 9133571536.0, "step": 11940 }, { "epoch": 4.374095447467253, "grad_norm": 0.15606275119450927, "learning_rate": 5.454142873598109e-06, "loss": 0.3485, "num_tokens": 9134310939.0, "step": 11941 }, { "epoch": 4.374461848493175, "grad_norm": 0.16829048712635475, "learning_rate": 5.4524609467472375e-06, "loss": 0.3733, "num_tokens": 9134932355.0, "step": 11942 }, { "epoch": 4.374828249519099, "grad_norm": 0.14315788527625883, "learning_rate": 5.450779952259873e-06, "loss": 0.3781, "num_tokens": 9135790674.0, "step": 11943 }, { "epoch": 4.375194650545022, "grad_norm": 0.14012112023305032, "learning_rate": 5.449099890230732e-06, "loss": 0.3887, "num_tokens": 9136638539.0, "step": 11944 }, { "epoch": 4.375561051570944, "grad_norm": 0.15767868528024662, "learning_rate": 5.447420760754474e-06, "loss": 0.3743, "num_tokens": 9137419025.0, "step": 11945 }, { "epoch": 4.375927452596867, "grad_norm": 0.1494847570165442, "learning_rate": 5.4457425639257136e-06, "loss": 0.3562, "num_tokens": 9138193596.0, "step": 11946 }, { "epoch": 4.37629385362279, "grad_norm": 0.14149761706214292, "learning_rate": 5.444065299839001e-06, "loss": 0.3442, "num_tokens": 9139004796.0, "step": 11947 }, { "epoch": 4.376660254648713, "grad_norm": 0.14911097146384764, "learning_rate": 5.442388968588845e-06, "loss": 0.3746, "num_tokens": 9139722238.0, "step": 11948 }, { "epoch": 4.377026655674636, "grad_norm": 0.13920425159739408, "learning_rate": 5.440713570269699e-06, "loss": 0.3404, "num_tokens": 9140507531.0, "step": 11949 }, { "epoch": 4.377393056700559, "grad_norm": 0.1521517147772522, "learning_rate": 5.4390391049759585e-06, "loss": 0.4103, "num_tokens": 9141334565.0, "step": 11950 }, { "epoch": 4.377759457726482, "grad_norm": 0.15413730072557533, "learning_rate": 5.437365572801975e-06, "loss": 0.3736, "num_tokens": 9142031651.0, "step": 11951 }, { "epoch": 4.378125858752404, "grad_norm": 0.1571977745225044, "learning_rate": 5.435692973842038e-06, "loss": 0.3855, "num_tokens": 9142768262.0, "step": 11952 }, { "epoch": 4.378492259778327, "grad_norm": 0.1494099242818962, "learning_rate": 5.434021308190392e-06, "loss": 0.3665, "num_tokens": 9143508664.0, "step": 11953 }, { "epoch": 4.378858660804251, "grad_norm": 0.15711130789317468, "learning_rate": 5.4323505759412245e-06, "loss": 0.4017, "num_tokens": 9144260330.0, "step": 11954 }, { "epoch": 4.379225061830173, "grad_norm": 0.1530707206514363, "learning_rate": 5.43068077718868e-06, "loss": 0.3546, "num_tokens": 9144943958.0, "step": 11955 }, { "epoch": 4.379591462856096, "grad_norm": 0.1558661551616602, "learning_rate": 5.429011912026829e-06, "loss": 0.3746, "num_tokens": 9145655038.0, "step": 11956 }, { "epoch": 4.3799578638820185, "grad_norm": 0.13998040726042224, "learning_rate": 5.4273439805497155e-06, "loss": 0.3986, "num_tokens": 9146560645.0, "step": 11957 }, { "epoch": 4.380324264907942, "grad_norm": 0.13484362739327788, "learning_rate": 5.425676982851313e-06, "loss": 0.3739, "num_tokens": 9147462329.0, "step": 11958 }, { "epoch": 4.380690665933865, "grad_norm": 0.15439889550412705, "learning_rate": 5.424010919025543e-06, "loss": 0.3954, "num_tokens": 9148203780.0, "step": 11959 }, { "epoch": 4.381057066959787, "grad_norm": 0.155518184426593, "learning_rate": 5.422345789166287e-06, "loss": 0.3495, "num_tokens": 9148941819.0, "step": 11960 }, { "epoch": 4.3814234679857105, "grad_norm": 0.15341391372569593, "learning_rate": 5.420681593367362e-06, "loss": 0.3682, "num_tokens": 9149664748.0, "step": 11961 }, { "epoch": 4.381789869011634, "grad_norm": 0.15044965357915988, "learning_rate": 5.419018331722538e-06, "loss": 0.3807, "num_tokens": 9150396055.0, "step": 11962 }, { "epoch": 4.382156270037556, "grad_norm": 0.15126160437771957, "learning_rate": 5.41735600432553e-06, "loss": 0.363, "num_tokens": 9151116166.0, "step": 11963 }, { "epoch": 4.382522671063479, "grad_norm": 0.1413605719923076, "learning_rate": 5.415694611269998e-06, "loss": 0.3456, "num_tokens": 9151910655.0, "step": 11964 }, { "epoch": 4.382889072089402, "grad_norm": 0.13891427198697903, "learning_rate": 5.41403415264956e-06, "loss": 0.3536, "num_tokens": 9152741530.0, "step": 11965 }, { "epoch": 4.383255473115325, "grad_norm": 0.1749720764813997, "learning_rate": 5.4123746285577686e-06, "loss": 0.3471, "num_tokens": 9153302638.0, "step": 11966 }, { "epoch": 4.383621874141248, "grad_norm": 0.15452253732571328, "learning_rate": 5.410716039088125e-06, "loss": 0.3366, "num_tokens": 9154076537.0, "step": 11967 }, { "epoch": 4.38398827516717, "grad_norm": 0.1448005278192478, "learning_rate": 5.409058384334091e-06, "loss": 0.3765, "num_tokens": 9154864553.0, "step": 11968 }, { "epoch": 4.384354676193094, "grad_norm": 0.15510829621155522, "learning_rate": 5.40740166438906e-06, "loss": 0.3853, "num_tokens": 9155593344.0, "step": 11969 }, { "epoch": 4.384721077219016, "grad_norm": 0.15697253387645776, "learning_rate": 5.405745879346379e-06, "loss": 0.3575, "num_tokens": 9156333286.0, "step": 11970 }, { "epoch": 4.385087478244939, "grad_norm": 0.14168790380159055, "learning_rate": 5.4040910292993466e-06, "loss": 0.3531, "num_tokens": 9157178983.0, "step": 11971 }, { "epoch": 4.385453879270862, "grad_norm": 0.15798801968472123, "learning_rate": 5.402437114341202e-06, "loss": 0.3476, "num_tokens": 9157865528.0, "step": 11972 }, { "epoch": 4.385820280296785, "grad_norm": 0.1520107865689854, "learning_rate": 5.400784134565134e-06, "loss": 0.3287, "num_tokens": 9158510680.0, "step": 11973 }, { "epoch": 4.386186681322708, "grad_norm": 0.16104339546405205, "learning_rate": 5.399132090064278e-06, "loss": 0.3733, "num_tokens": 9159190792.0, "step": 11974 }, { "epoch": 4.38655308234863, "grad_norm": 0.1431501635553128, "learning_rate": 5.397480980931721e-06, "loss": 0.358, "num_tokens": 9160045375.0, "step": 11975 }, { "epoch": 4.3869194833745535, "grad_norm": 0.13642489025486185, "learning_rate": 5.395830807260491e-06, "loss": 0.3607, "num_tokens": 9160900586.0, "step": 11976 }, { "epoch": 4.387285884400477, "grad_norm": 0.15361591350249262, "learning_rate": 5.3941815691435665e-06, "loss": 0.3886, "num_tokens": 9161635446.0, "step": 11977 }, { "epoch": 4.387652285426399, "grad_norm": 0.15472713580520073, "learning_rate": 5.392533266673876e-06, "loss": 0.3821, "num_tokens": 9162362590.0, "step": 11978 }, { "epoch": 4.388018686452322, "grad_norm": 0.1452896467890078, "learning_rate": 5.390885899944289e-06, "loss": 0.3732, "num_tokens": 9163200410.0, "step": 11979 }, { "epoch": 4.388385087478245, "grad_norm": 0.14318216618543642, "learning_rate": 5.389239469047627e-06, "loss": 0.3792, "num_tokens": 9164011120.0, "step": 11980 }, { "epoch": 4.388751488504168, "grad_norm": 0.14369328013621857, "learning_rate": 5.387593974076656e-06, "loss": 0.3531, "num_tokens": 9164833786.0, "step": 11981 }, { "epoch": 4.389117889530091, "grad_norm": 0.15403216685283164, "learning_rate": 5.385949415124097e-06, "loss": 0.3947, "num_tokens": 9165556662.0, "step": 11982 }, { "epoch": 4.389484290556013, "grad_norm": 0.1472293717125052, "learning_rate": 5.384305792282598e-06, "loss": 0.3416, "num_tokens": 9166315048.0, "step": 11983 }, { "epoch": 4.389850691581937, "grad_norm": 0.1378719553936045, "learning_rate": 5.382663105644783e-06, "loss": 0.3488, "num_tokens": 9167192491.0, "step": 11984 }, { "epoch": 4.390217092607859, "grad_norm": 0.15027858191191512, "learning_rate": 5.381021355303205e-06, "loss": 0.3674, "num_tokens": 9168006869.0, "step": 11985 }, { "epoch": 4.390583493633782, "grad_norm": 0.1647925831290778, "learning_rate": 5.379380541350358e-06, "loss": 0.3848, "num_tokens": 9168684910.0, "step": 11986 }, { "epoch": 4.390949894659705, "grad_norm": 0.15203251222041472, "learning_rate": 5.377740663878705e-06, "loss": 0.3861, "num_tokens": 9169485061.0, "step": 11987 }, { "epoch": 4.391316295685628, "grad_norm": 0.1514330867891938, "learning_rate": 5.376101722980639e-06, "loss": 0.3746, "num_tokens": 9170287201.0, "step": 11988 }, { "epoch": 4.391682696711551, "grad_norm": 0.14224073982335428, "learning_rate": 5.374463718748506e-06, "loss": 0.3573, "num_tokens": 9171062852.0, "step": 11989 }, { "epoch": 4.392049097737473, "grad_norm": 0.14222115322935305, "learning_rate": 5.372826651274596e-06, "loss": 0.3542, "num_tokens": 9171824033.0, "step": 11990 }, { "epoch": 4.3924154987633965, "grad_norm": 0.15701300756768272, "learning_rate": 5.3711905206511505e-06, "loss": 0.3625, "num_tokens": 9172563826.0, "step": 11991 }, { "epoch": 4.39278189978932, "grad_norm": 0.1406221378128393, "learning_rate": 5.369555326970359e-06, "loss": 0.3735, "num_tokens": 9173462155.0, "step": 11992 }, { "epoch": 4.393148300815242, "grad_norm": 0.15408580420680928, "learning_rate": 5.367921070324355e-06, "loss": 0.3509, "num_tokens": 9174204510.0, "step": 11993 }, { "epoch": 4.393514701841165, "grad_norm": 0.1636079143229877, "learning_rate": 5.366287750805213e-06, "loss": 0.3775, "num_tokens": 9174877207.0, "step": 11994 }, { "epoch": 4.393881102867088, "grad_norm": 0.1594521337609587, "learning_rate": 5.364655368504974e-06, "loss": 0.3902, "num_tokens": 9175576328.0, "step": 11995 }, { "epoch": 4.394247503893011, "grad_norm": 0.17848422949949158, "learning_rate": 5.363023923515603e-06, "loss": 0.3587, "num_tokens": 9176210827.0, "step": 11996 }, { "epoch": 4.394613904918934, "grad_norm": 0.14974621123113033, "learning_rate": 5.361393415929028e-06, "loss": 0.3474, "num_tokens": 9176936754.0, "step": 11997 }, { "epoch": 4.3949803059448564, "grad_norm": 0.1583169037269033, "learning_rate": 5.35976384583712e-06, "loss": 0.3721, "num_tokens": 9177600356.0, "step": 11998 }, { "epoch": 4.39534670697078, "grad_norm": 0.15362698688084872, "learning_rate": 5.358135213331696e-06, "loss": 0.4028, "num_tokens": 9178341458.0, "step": 11999 }, { "epoch": 4.395713107996702, "grad_norm": 0.15042675646133133, "learning_rate": 5.356507518504514e-06, "loss": 0.3502, "num_tokens": 9179072296.0, "step": 12000 }, { "epoch": 4.396079509022625, "grad_norm": 0.14395495357003826, "learning_rate": 5.354880761447293e-06, "loss": 0.3594, "num_tokens": 9179824582.0, "step": 12001 }, { "epoch": 4.3964459100485485, "grad_norm": 0.1583338307957263, "learning_rate": 5.353254942251691e-06, "loss": 0.3531, "num_tokens": 9180473779.0, "step": 12002 }, { "epoch": 4.396812311074471, "grad_norm": 0.14844120903977182, "learning_rate": 5.351630061009314e-06, "loss": 0.3498, "num_tokens": 9181171791.0, "step": 12003 }, { "epoch": 4.397178712100394, "grad_norm": 0.15756862652472767, "learning_rate": 5.350006117811714e-06, "loss": 0.3477, "num_tokens": 9181859400.0, "step": 12004 }, { "epoch": 4.397545113126316, "grad_norm": 0.16104477787234078, "learning_rate": 5.348383112750388e-06, "loss": 0.3747, "num_tokens": 9182513759.0, "step": 12005 }, { "epoch": 4.39791151415224, "grad_norm": 0.15123862195629112, "learning_rate": 5.34676104591679e-06, "loss": 0.3556, "num_tokens": 9183275664.0, "step": 12006 }, { "epoch": 4.398277915178163, "grad_norm": 0.16500896618481536, "learning_rate": 5.345139917402311e-06, "loss": 0.3795, "num_tokens": 9183951866.0, "step": 12007 }, { "epoch": 4.398644316204085, "grad_norm": 0.14862877700492194, "learning_rate": 5.343519727298294e-06, "loss": 0.3699, "num_tokens": 9184690674.0, "step": 12008 }, { "epoch": 4.399010717230008, "grad_norm": 0.14281756473500037, "learning_rate": 5.341900475696027e-06, "loss": 0.3533, "num_tokens": 9185459138.0, "step": 12009 }, { "epoch": 4.399377118255931, "grad_norm": 0.15245762665747883, "learning_rate": 5.340282162686744e-06, "loss": 0.3705, "num_tokens": 9186215168.0, "step": 12010 }, { "epoch": 4.399743519281854, "grad_norm": 0.13979795630074374, "learning_rate": 5.3386647883616305e-06, "loss": 0.3882, "num_tokens": 9187080346.0, "step": 12011 }, { "epoch": 4.400109920307777, "grad_norm": 0.15565966608650106, "learning_rate": 5.337048352811821e-06, "loss": 0.3537, "num_tokens": 9187864115.0, "step": 12012 }, { "epoch": 4.4004763213336995, "grad_norm": 0.14328919535314694, "learning_rate": 5.335432856128384e-06, "loss": 0.3603, "num_tokens": 9188632376.0, "step": 12013 }, { "epoch": 4.400842722359623, "grad_norm": 0.1405123303014857, "learning_rate": 5.333818298402349e-06, "loss": 0.3588, "num_tokens": 9189412225.0, "step": 12014 }, { "epoch": 4.401209123385545, "grad_norm": 0.16452227440582304, "learning_rate": 5.332204679724691e-06, "loss": 0.3842, "num_tokens": 9190038639.0, "step": 12015 }, { "epoch": 4.401575524411468, "grad_norm": 0.14985792009084803, "learning_rate": 5.330592000186318e-06, "loss": 0.3508, "num_tokens": 9190719767.0, "step": 12016 }, { "epoch": 4.4019419254373915, "grad_norm": 0.15222596573411593, "learning_rate": 5.328980259878103e-06, "loss": 0.3825, "num_tokens": 9191503001.0, "step": 12017 }, { "epoch": 4.402308326463314, "grad_norm": 0.14786876250813094, "learning_rate": 5.32736945889086e-06, "loss": 0.3802, "num_tokens": 9192259802.0, "step": 12018 }, { "epoch": 4.402674727489237, "grad_norm": 0.1587388063147504, "learning_rate": 5.325759597315348e-06, "loss": 0.3989, "num_tokens": 9192993875.0, "step": 12019 }, { "epoch": 4.403041128515159, "grad_norm": 0.16219720001521803, "learning_rate": 5.32415067524227e-06, "loss": 0.3681, "num_tokens": 9193720954.0, "step": 12020 }, { "epoch": 4.403407529541083, "grad_norm": 0.144579576749152, "learning_rate": 5.322542692762281e-06, "loss": 0.3296, "num_tokens": 9194468785.0, "step": 12021 }, { "epoch": 4.403773930567006, "grad_norm": 0.14472158031620094, "learning_rate": 5.320935649965988e-06, "loss": 0.3784, "num_tokens": 9195276054.0, "step": 12022 }, { "epoch": 4.404140331592928, "grad_norm": 0.14509705602520884, "learning_rate": 5.319329546943935e-06, "loss": 0.3492, "num_tokens": 9196054005.0, "step": 12023 }, { "epoch": 4.404506732618851, "grad_norm": 0.16876546653405372, "learning_rate": 5.31772438378661e-06, "loss": 0.37, "num_tokens": 9196707303.0, "step": 12024 }, { "epoch": 4.404873133644775, "grad_norm": 0.14979043049439356, "learning_rate": 5.316120160584469e-06, "loss": 0.3901, "num_tokens": 9197483840.0, "step": 12025 }, { "epoch": 4.405239534670697, "grad_norm": 0.14612256175064603, "learning_rate": 5.314516877427892e-06, "loss": 0.3622, "num_tokens": 9198378647.0, "step": 12026 }, { "epoch": 4.40560593569662, "grad_norm": 0.13562754457540813, "learning_rate": 5.3129145344072145e-06, "loss": 0.3633, "num_tokens": 9199247543.0, "step": 12027 }, { "epoch": 4.4059723367225425, "grad_norm": 0.16264665504462444, "learning_rate": 5.311313131612726e-06, "loss": 0.3595, "num_tokens": 9199910413.0, "step": 12028 }, { "epoch": 4.406338737748466, "grad_norm": 0.14996606009005228, "learning_rate": 5.309712669134653e-06, "loss": 0.3931, "num_tokens": 9200670054.0, "step": 12029 }, { "epoch": 4.406705138774389, "grad_norm": 0.1352239190519887, "learning_rate": 5.308113147063173e-06, "loss": 0.3942, "num_tokens": 9201666408.0, "step": 12030 }, { "epoch": 4.407071539800311, "grad_norm": 0.14879996206088403, "learning_rate": 5.3065145654884095e-06, "loss": 0.3609, "num_tokens": 9202429495.0, "step": 12031 }, { "epoch": 4.4074379408262345, "grad_norm": 0.15557365644128926, "learning_rate": 5.304916924500437e-06, "loss": 0.3529, "num_tokens": 9203120908.0, "step": 12032 }, { "epoch": 4.407804341852157, "grad_norm": 0.1507160352647913, "learning_rate": 5.303320224189273e-06, "loss": 0.3828, "num_tokens": 9203881921.0, "step": 12033 }, { "epoch": 4.40817074287808, "grad_norm": 0.14188672281809042, "learning_rate": 5.301724464644882e-06, "loss": 0.3393, "num_tokens": 9204721707.0, "step": 12034 }, { "epoch": 4.408537143904003, "grad_norm": 0.14433870886485967, "learning_rate": 5.300129645957172e-06, "loss": 0.3698, "num_tokens": 9205573500.0, "step": 12035 }, { "epoch": 4.408903544929926, "grad_norm": 0.14782389172123087, "learning_rate": 5.298535768216009e-06, "loss": 0.3448, "num_tokens": 9206324280.0, "step": 12036 }, { "epoch": 4.409269945955849, "grad_norm": 0.14561414001655548, "learning_rate": 5.296942831511198e-06, "loss": 0.3775, "num_tokens": 9207110756.0, "step": 12037 }, { "epoch": 4.409636346981771, "grad_norm": 0.1382807388487827, "learning_rate": 5.2953508359324865e-06, "loss": 0.3703, "num_tokens": 9207966590.0, "step": 12038 }, { "epoch": 4.410002748007694, "grad_norm": 0.14956074139253953, "learning_rate": 5.293759781569585e-06, "loss": 0.3493, "num_tokens": 9208712880.0, "step": 12039 }, { "epoch": 4.410369149033618, "grad_norm": 0.15133808186683007, "learning_rate": 5.292169668512129e-06, "loss": 0.3529, "num_tokens": 9209469277.0, "step": 12040 }, { "epoch": 4.41073555005954, "grad_norm": 0.1496245772057845, "learning_rate": 5.290580496849724e-06, "loss": 0.3579, "num_tokens": 9210209375.0, "step": 12041 }, { "epoch": 4.411101951085463, "grad_norm": 0.13732143029294905, "learning_rate": 5.2889922666719066e-06, "loss": 0.3515, "num_tokens": 9211042248.0, "step": 12042 }, { "epoch": 4.4114683521113855, "grad_norm": 0.14864657669105555, "learning_rate": 5.2874049780681595e-06, "loss": 0.3422, "num_tokens": 9211776265.0, "step": 12043 }, { "epoch": 4.411834753137309, "grad_norm": 0.14631794254837166, "learning_rate": 5.285818631127924e-06, "loss": 0.3714, "num_tokens": 9212556110.0, "step": 12044 }, { "epoch": 4.412201154163232, "grad_norm": 0.15378857711706553, "learning_rate": 5.284233225940582e-06, "loss": 0.3398, "num_tokens": 9213336427.0, "step": 12045 }, { "epoch": 4.412567555189154, "grad_norm": 0.16693599581006816, "learning_rate": 5.28264876259546e-06, "loss": 0.3571, "num_tokens": 9213983228.0, "step": 12046 }, { "epoch": 4.412933956215078, "grad_norm": 0.1409537055163266, "learning_rate": 5.281065241181834e-06, "loss": 0.3695, "num_tokens": 9214833596.0, "step": 12047 }, { "epoch": 4.413300357241, "grad_norm": 0.16177229026724071, "learning_rate": 5.279482661788929e-06, "loss": 0.3482, "num_tokens": 9215502191.0, "step": 12048 }, { "epoch": 4.413666758266923, "grad_norm": 0.15073953786482563, "learning_rate": 5.2779010245059135e-06, "loss": 0.3816, "num_tokens": 9216286613.0, "step": 12049 }, { "epoch": 4.414033159292846, "grad_norm": 0.15076091585197104, "learning_rate": 5.276320329421902e-06, "loss": 0.3754, "num_tokens": 9216995361.0, "step": 12050 }, { "epoch": 4.414399560318769, "grad_norm": 0.14807796892022168, "learning_rate": 5.274740576625959e-06, "loss": 0.3591, "num_tokens": 9217749389.0, "step": 12051 }, { "epoch": 4.414765961344692, "grad_norm": 0.16148158806373866, "learning_rate": 5.2731617662071014e-06, "loss": 0.3917, "num_tokens": 9218480434.0, "step": 12052 }, { "epoch": 4.415132362370614, "grad_norm": 0.15620161703997823, "learning_rate": 5.271583898254278e-06, "loss": 0.3696, "num_tokens": 9219258138.0, "step": 12053 }, { "epoch": 4.4154987633965375, "grad_norm": 0.14283764388182005, "learning_rate": 5.270006972856394e-06, "loss": 0.3837, "num_tokens": 9220069450.0, "step": 12054 }, { "epoch": 4.415865164422461, "grad_norm": 0.1497403673349771, "learning_rate": 5.268430990102303e-06, "loss": 0.3853, "num_tokens": 9220855474.0, "step": 12055 }, { "epoch": 4.416231565448383, "grad_norm": 0.15407746454399124, "learning_rate": 5.266855950080807e-06, "loss": 0.3773, "num_tokens": 9221587934.0, "step": 12056 }, { "epoch": 4.416597966474306, "grad_norm": 0.15052762706394493, "learning_rate": 5.2652818528806416e-06, "loss": 0.3757, "num_tokens": 9222379097.0, "step": 12057 }, { "epoch": 4.4169643675002295, "grad_norm": 0.1459305591282411, "learning_rate": 5.263708698590506e-06, "loss": 0.381, "num_tokens": 9223197442.0, "step": 12058 }, { "epoch": 4.417330768526152, "grad_norm": 0.146684403003666, "learning_rate": 5.2621364872990355e-06, "loss": 0.3811, "num_tokens": 9224016462.0, "step": 12059 }, { "epoch": 4.417697169552075, "grad_norm": 0.13789656683039042, "learning_rate": 5.260565219094818e-06, "loss": 0.3908, "num_tokens": 9224907724.0, "step": 12060 }, { "epoch": 4.418063570577997, "grad_norm": 0.16755328558757304, "learning_rate": 5.258994894066384e-06, "loss": 0.4016, "num_tokens": 9225526185.0, "step": 12061 }, { "epoch": 4.418429971603921, "grad_norm": 0.15617488054454062, "learning_rate": 5.257425512302213e-06, "loss": 0.3619, "num_tokens": 9226334130.0, "step": 12062 }, { "epoch": 4.418796372629844, "grad_norm": 0.15375904562607948, "learning_rate": 5.255857073890732e-06, "loss": 0.3406, "num_tokens": 9226995144.0, "step": 12063 }, { "epoch": 4.419162773655766, "grad_norm": 0.1668170071566447, "learning_rate": 5.2542895789203105e-06, "loss": 0.4116, "num_tokens": 9227736482.0, "step": 12064 }, { "epoch": 4.419529174681689, "grad_norm": 0.14496901312645705, "learning_rate": 5.252723027479275e-06, "loss": 0.3714, "num_tokens": 9228485855.0, "step": 12065 }, { "epoch": 4.419895575707612, "grad_norm": 0.13983295179769165, "learning_rate": 5.251157419655886e-06, "loss": 0.3594, "num_tokens": 9229345073.0, "step": 12066 }, { "epoch": 4.420261976733535, "grad_norm": 0.15166017875519267, "learning_rate": 5.249592755538358e-06, "loss": 0.3654, "num_tokens": 9230146836.0, "step": 12067 }, { "epoch": 4.420628377759458, "grad_norm": 0.14835510729135754, "learning_rate": 5.248029035214851e-06, "loss": 0.3266, "num_tokens": 9230854780.0, "step": 12068 }, { "epoch": 4.4209947787853805, "grad_norm": 0.16370570209319948, "learning_rate": 5.246466258773481e-06, "loss": 0.3535, "num_tokens": 9231591963.0, "step": 12069 }, { "epoch": 4.421361179811304, "grad_norm": 0.1452157656612784, "learning_rate": 5.244904426302285e-06, "loss": 0.3741, "num_tokens": 9232423405.0, "step": 12070 }, { "epoch": 4.421727580837226, "grad_norm": 0.15696924094967726, "learning_rate": 5.243343537889278e-06, "loss": 0.3697, "num_tokens": 9233200659.0, "step": 12071 }, { "epoch": 4.422093981863149, "grad_norm": 0.14241809433797015, "learning_rate": 5.241783593622406e-06, "loss": 0.369, "num_tokens": 9234039468.0, "step": 12072 }, { "epoch": 4.4224603828890725, "grad_norm": 0.14765121640202825, "learning_rate": 5.240224593589553e-06, "loss": 0.3768, "num_tokens": 9234793198.0, "step": 12073 }, { "epoch": 4.422826783914995, "grad_norm": 0.13909210286301268, "learning_rate": 5.238666537878571e-06, "loss": 0.361, "num_tokens": 9235612668.0, "step": 12074 }, { "epoch": 4.423193184940918, "grad_norm": 0.15418658499905152, "learning_rate": 5.237109426577245e-06, "loss": 0.365, "num_tokens": 9236359916.0, "step": 12075 }, { "epoch": 4.42355958596684, "grad_norm": 0.15682756357497718, "learning_rate": 5.2355532597733075e-06, "loss": 0.3562, "num_tokens": 9237037504.0, "step": 12076 }, { "epoch": 4.423925986992764, "grad_norm": 0.14303078835653885, "learning_rate": 5.2339980375544396e-06, "loss": 0.3487, "num_tokens": 9237803848.0, "step": 12077 }, { "epoch": 4.424292388018687, "grad_norm": 0.1425234718756057, "learning_rate": 5.23244376000827e-06, "loss": 0.3752, "num_tokens": 9238604065.0, "step": 12078 }, { "epoch": 4.424658789044609, "grad_norm": 0.17453395113311562, "learning_rate": 5.230890427222383e-06, "loss": 0.3993, "num_tokens": 9239277679.0, "step": 12079 }, { "epoch": 4.425025190070532, "grad_norm": 0.15531997838253414, "learning_rate": 5.229338039284288e-06, "loss": 0.3873, "num_tokens": 9239977487.0, "step": 12080 }, { "epoch": 4.425391591096455, "grad_norm": 0.14918550376613207, "learning_rate": 5.227786596281453e-06, "loss": 0.3606, "num_tokens": 9240755026.0, "step": 12081 }, { "epoch": 4.425757992122378, "grad_norm": 0.1484421965298629, "learning_rate": 5.226236098301306e-06, "loss": 0.3852, "num_tokens": 9241522014.0, "step": 12082 }, { "epoch": 4.426124393148301, "grad_norm": 0.13510993091607462, "learning_rate": 5.224686545431199e-06, "loss": 0.3423, "num_tokens": 9242395273.0, "step": 12083 }, { "epoch": 4.4264907941742235, "grad_norm": 0.1531190389760783, "learning_rate": 5.22313793775844e-06, "loss": 0.3541, "num_tokens": 9243108658.0, "step": 12084 }, { "epoch": 4.426857195200147, "grad_norm": 0.14519736789673365, "learning_rate": 5.221590275370291e-06, "loss": 0.3576, "num_tokens": 9243883704.0, "step": 12085 }, { "epoch": 4.427223596226069, "grad_norm": 0.16190317866761855, "learning_rate": 5.220043558353951e-06, "loss": 0.3557, "num_tokens": 9244660282.0, "step": 12086 }, { "epoch": 4.427589997251992, "grad_norm": 0.1454215184561152, "learning_rate": 5.218497786796571e-06, "loss": 0.3904, "num_tokens": 9245452297.0, "step": 12087 }, { "epoch": 4.4279563982779155, "grad_norm": 0.14792303011995683, "learning_rate": 5.216952960785242e-06, "loss": 0.3498, "num_tokens": 9246245192.0, "step": 12088 }, { "epoch": 4.428322799303838, "grad_norm": 0.15234741374506194, "learning_rate": 5.215409080407014e-06, "loss": 0.3604, "num_tokens": 9246985858.0, "step": 12089 }, { "epoch": 4.428689200329761, "grad_norm": 0.14062159665923954, "learning_rate": 5.213866145748869e-06, "loss": 0.3591, "num_tokens": 9247823101.0, "step": 12090 }, { "epoch": 4.429055601355683, "grad_norm": 0.18395735426893686, "learning_rate": 5.212324156897746e-06, "loss": 0.4102, "num_tokens": 9248399428.0, "step": 12091 }, { "epoch": 4.429422002381607, "grad_norm": 0.16262879245762604, "learning_rate": 5.210783113940526e-06, "loss": 0.3573, "num_tokens": 9249068823.0, "step": 12092 }, { "epoch": 4.42978840340753, "grad_norm": 0.13972193562815202, "learning_rate": 5.209243016964044e-06, "loss": 0.3381, "num_tokens": 9249871561.0, "step": 12093 }, { "epoch": 4.430154804433452, "grad_norm": 0.1433898020947258, "learning_rate": 5.207703866055065e-06, "loss": 0.3762, "num_tokens": 9250780116.0, "step": 12094 }, { "epoch": 4.4305212054593754, "grad_norm": 0.15871824767263126, "learning_rate": 5.206165661300323e-06, "loss": 0.406, "num_tokens": 9251549640.0, "step": 12095 }, { "epoch": 4.430887606485298, "grad_norm": 0.16028433655237115, "learning_rate": 5.204628402786487e-06, "loss": 0.3732, "num_tokens": 9252232985.0, "step": 12096 }, { "epoch": 4.431254007511221, "grad_norm": 0.1420022631132209, "learning_rate": 5.203092090600164e-06, "loss": 0.3707, "num_tokens": 9253068183.0, "step": 12097 }, { "epoch": 4.431620408537144, "grad_norm": 0.14457849687300373, "learning_rate": 5.2015567248279235e-06, "loss": 0.3594, "num_tokens": 9253880932.0, "step": 12098 }, { "epoch": 4.431986809563067, "grad_norm": 0.13525756091409966, "learning_rate": 5.200022305556275e-06, "loss": 0.3663, "num_tokens": 9254771884.0, "step": 12099 }, { "epoch": 4.43235321058899, "grad_norm": 0.13846099916735338, "learning_rate": 5.1984888328716685e-06, "loss": 0.3886, "num_tokens": 9255646715.0, "step": 12100 }, { "epoch": 4.432719611614912, "grad_norm": 0.1633715601171954, "learning_rate": 5.196956306860517e-06, "loss": 0.3678, "num_tokens": 9256332444.0, "step": 12101 }, { "epoch": 4.433086012640835, "grad_norm": 0.15380740722205405, "learning_rate": 5.1954247276091605e-06, "loss": 0.3716, "num_tokens": 9257116167.0, "step": 12102 }, { "epoch": 4.433452413666759, "grad_norm": 0.15096292065053088, "learning_rate": 5.193894095203901e-06, "loss": 0.357, "num_tokens": 9257839961.0, "step": 12103 }, { "epoch": 4.433818814692681, "grad_norm": 0.14708449474551674, "learning_rate": 5.192364409730979e-06, "loss": 0.3645, "num_tokens": 9258631416.0, "step": 12104 }, { "epoch": 4.434185215718604, "grad_norm": 0.15898764767022508, "learning_rate": 5.190835671276585e-06, "loss": 0.3794, "num_tokens": 9259357674.0, "step": 12105 }, { "epoch": 4.4345516167445265, "grad_norm": 0.1580766041857501, "learning_rate": 5.189307879926854e-06, "loss": 0.3665, "num_tokens": 9260051593.0, "step": 12106 }, { "epoch": 4.43491801777045, "grad_norm": 0.13937539367247576, "learning_rate": 5.187781035767868e-06, "loss": 0.3725, "num_tokens": 9260904713.0, "step": 12107 }, { "epoch": 4.435284418796373, "grad_norm": 0.13409191160313816, "learning_rate": 5.186255138885657e-06, "loss": 0.3643, "num_tokens": 9261767655.0, "step": 12108 }, { "epoch": 4.435650819822295, "grad_norm": 0.13690906154391566, "learning_rate": 5.184730189366201e-06, "loss": 0.3228, "num_tokens": 9262625680.0, "step": 12109 }, { "epoch": 4.4360172208482185, "grad_norm": 0.15449373671016578, "learning_rate": 5.183206187295417e-06, "loss": 0.3823, "num_tokens": 9263341202.0, "step": 12110 }, { "epoch": 4.436383621874141, "grad_norm": 0.15549222880295605, "learning_rate": 5.181683132759172e-06, "loss": 0.3876, "num_tokens": 9264064371.0, "step": 12111 }, { "epoch": 4.436750022900064, "grad_norm": 0.1557225630452819, "learning_rate": 5.180161025843291e-06, "loss": 0.3734, "num_tokens": 9264778258.0, "step": 12112 }, { "epoch": 4.437116423925987, "grad_norm": 0.17090684550776356, "learning_rate": 5.17863986663353e-06, "loss": 0.3802, "num_tokens": 9265392343.0, "step": 12113 }, { "epoch": 4.43748282495191, "grad_norm": 0.14910930991271673, "learning_rate": 5.177119655215599e-06, "loss": 0.3678, "num_tokens": 9266202119.0, "step": 12114 }, { "epoch": 4.437849225977833, "grad_norm": 0.15059303043713745, "learning_rate": 5.175600391675155e-06, "loss": 0.3653, "num_tokens": 9266991030.0, "step": 12115 }, { "epoch": 4.438215627003755, "grad_norm": 0.15320363849741947, "learning_rate": 5.174082076097798e-06, "loss": 0.3951, "num_tokens": 9267727273.0, "step": 12116 }, { "epoch": 4.438582028029678, "grad_norm": 0.1646555697055502, "learning_rate": 5.172564708569079e-06, "loss": 0.3775, "num_tokens": 9268440592.0, "step": 12117 }, { "epoch": 4.438948429055602, "grad_norm": 0.1409202187038197, "learning_rate": 5.1710482891744925e-06, "loss": 0.36, "num_tokens": 9269259546.0, "step": 12118 }, { "epoch": 4.439314830081524, "grad_norm": 0.16104798313215718, "learning_rate": 5.16953281799948e-06, "loss": 0.3797, "num_tokens": 9269956826.0, "step": 12119 }, { "epoch": 4.439681231107447, "grad_norm": 0.1552480591268351, "learning_rate": 5.168018295129432e-06, "loss": 0.3415, "num_tokens": 9270655554.0, "step": 12120 }, { "epoch": 4.4400476321333695, "grad_norm": 0.13900871415348873, "learning_rate": 5.16650472064968e-06, "loss": 0.3435, "num_tokens": 9271459014.0, "step": 12121 }, { "epoch": 4.440414033159293, "grad_norm": 0.16622298089569076, "learning_rate": 5.164992094645508e-06, "loss": 0.3541, "num_tokens": 9272130427.0, "step": 12122 }, { "epoch": 4.440780434185216, "grad_norm": 0.1520405797748504, "learning_rate": 5.163480417202144e-06, "loss": 0.3461, "num_tokens": 9272837901.0, "step": 12123 }, { "epoch": 4.441146835211138, "grad_norm": 0.14859012542730335, "learning_rate": 5.16196968840476e-06, "loss": 0.3665, "num_tokens": 9273629143.0, "step": 12124 }, { "epoch": 4.4415132362370615, "grad_norm": 0.14907579156183906, "learning_rate": 5.1604599083384835e-06, "loss": 0.3451, "num_tokens": 9274363647.0, "step": 12125 }, { "epoch": 4.441879637262985, "grad_norm": 0.14922782044381097, "learning_rate": 5.15895107708838e-06, "loss": 0.3542, "num_tokens": 9275084973.0, "step": 12126 }, { "epoch": 4.442246038288907, "grad_norm": 0.15735918654137793, "learning_rate": 5.157443194739457e-06, "loss": 0.3579, "num_tokens": 9275762470.0, "step": 12127 }, { "epoch": 4.44261243931483, "grad_norm": 0.15562824264637568, "learning_rate": 5.155936261376685e-06, "loss": 0.3499, "num_tokens": 9276430345.0, "step": 12128 }, { "epoch": 4.442978840340753, "grad_norm": 0.15397395716911505, "learning_rate": 5.154430277084971e-06, "loss": 0.3603, "num_tokens": 9277129991.0, "step": 12129 }, { "epoch": 4.443345241366676, "grad_norm": 0.15513387011852006, "learning_rate": 5.152925241949158e-06, "loss": 0.3883, "num_tokens": 9277840048.0, "step": 12130 }, { "epoch": 4.443711642392599, "grad_norm": 0.14889427502893002, "learning_rate": 5.151421156054058e-06, "loss": 0.3719, "num_tokens": 9278605321.0, "step": 12131 }, { "epoch": 4.444078043418521, "grad_norm": 0.1504152893192845, "learning_rate": 5.149918019484415e-06, "loss": 0.3816, "num_tokens": 9279336560.0, "step": 12132 }, { "epoch": 4.444444444444445, "grad_norm": 0.1439453943986617, "learning_rate": 5.148415832324921e-06, "loss": 0.3734, "num_tokens": 9280092623.0, "step": 12133 }, { "epoch": 4.444810845470367, "grad_norm": 0.14257525201484247, "learning_rate": 5.1469145946602165e-06, "loss": 0.3491, "num_tokens": 9280948393.0, "step": 12134 }, { "epoch": 4.44517724649629, "grad_norm": 0.14640090581061757, "learning_rate": 5.145414306574887e-06, "loss": 0.3535, "num_tokens": 9281750137.0, "step": 12135 }, { "epoch": 4.445543647522213, "grad_norm": 0.1463880653250558, "learning_rate": 5.1439149681534714e-06, "loss": 0.386, "num_tokens": 9282579317.0, "step": 12136 }, { "epoch": 4.445910048548136, "grad_norm": 0.15543034950232149, "learning_rate": 5.1424165794804415e-06, "loss": 0.3904, "num_tokens": 9283319173.0, "step": 12137 }, { "epoch": 4.446276449574059, "grad_norm": 0.1424949248859691, "learning_rate": 5.140919140640224e-06, "loss": 0.3958, "num_tokens": 9284167482.0, "step": 12138 }, { "epoch": 4.446642850599981, "grad_norm": 0.1550513784466013, "learning_rate": 5.1394226517171984e-06, "loss": 0.3674, "num_tokens": 9284876012.0, "step": 12139 }, { "epoch": 4.4470092516259045, "grad_norm": 0.1518882256978556, "learning_rate": 5.137927112795678e-06, "loss": 0.3269, "num_tokens": 9285547721.0, "step": 12140 }, { "epoch": 4.447375652651828, "grad_norm": 0.15480422713243872, "learning_rate": 5.136432523959926e-06, "loss": 0.344, "num_tokens": 9286249608.0, "step": 12141 }, { "epoch": 4.44774205367775, "grad_norm": 0.14782872654468254, "learning_rate": 5.134938885294162e-06, "loss": 0.3564, "num_tokens": 9287008160.0, "step": 12142 }, { "epoch": 4.448108454703673, "grad_norm": 0.15862644320009198, "learning_rate": 5.133446196882539e-06, "loss": 0.3511, "num_tokens": 9287670671.0, "step": 12143 }, { "epoch": 4.448474855729596, "grad_norm": 0.1451590893743496, "learning_rate": 5.131954458809162e-06, "loss": 0.3889, "num_tokens": 9288524284.0, "step": 12144 }, { "epoch": 4.448841256755519, "grad_norm": 0.15494007175954796, "learning_rate": 5.130463671158084e-06, "loss": 0.3752, "num_tokens": 9289234921.0, "step": 12145 }, { "epoch": 4.449207657781442, "grad_norm": 0.160203800666223, "learning_rate": 5.128973834013304e-06, "loss": 0.3967, "num_tokens": 9290048729.0, "step": 12146 }, { "epoch": 4.4495740588073645, "grad_norm": 0.15315171480935627, "learning_rate": 5.127484947458762e-06, "loss": 0.3683, "num_tokens": 9290798703.0, "step": 12147 }, { "epoch": 4.449940459833288, "grad_norm": 0.13510988623024442, "learning_rate": 5.125997011578351e-06, "loss": 0.3763, "num_tokens": 9291730334.0, "step": 12148 }, { "epoch": 4.45030686085921, "grad_norm": 0.15743099360941334, "learning_rate": 5.124510026455909e-06, "loss": 0.381, "num_tokens": 9292437714.0, "step": 12149 }, { "epoch": 4.450673261885133, "grad_norm": 0.15356673607933596, "learning_rate": 5.123023992175218e-06, "loss": 0.3913, "num_tokens": 9293150238.0, "step": 12150 }, { "epoch": 4.4510396629110565, "grad_norm": 0.1642207696459477, "learning_rate": 5.121538908820005e-06, "loss": 0.3933, "num_tokens": 9293847449.0, "step": 12151 }, { "epoch": 4.451406063936979, "grad_norm": 0.14701982410120876, "learning_rate": 5.120054776473953e-06, "loss": 0.3976, "num_tokens": 9294672722.0, "step": 12152 }, { "epoch": 4.451772464962902, "grad_norm": 0.1646919665139389, "learning_rate": 5.118571595220683e-06, "loss": 0.3872, "num_tokens": 9295384573.0, "step": 12153 }, { "epoch": 4.452138865988824, "grad_norm": 0.15324883752966414, "learning_rate": 5.117089365143759e-06, "loss": 0.3884, "num_tokens": 9296174446.0, "step": 12154 }, { "epoch": 4.452505267014748, "grad_norm": 0.15780880517862156, "learning_rate": 5.115608086326704e-06, "loss": 0.4031, "num_tokens": 9296935420.0, "step": 12155 }, { "epoch": 4.452871668040671, "grad_norm": 0.15820394904919957, "learning_rate": 5.114127758852976e-06, "loss": 0.3763, "num_tokens": 9297649777.0, "step": 12156 }, { "epoch": 4.453238069066593, "grad_norm": 0.14025300078757041, "learning_rate": 5.112648382805979e-06, "loss": 0.3626, "num_tokens": 9298481626.0, "step": 12157 }, { "epoch": 4.453604470092516, "grad_norm": 0.14811217479960523, "learning_rate": 5.111169958269075e-06, "loss": 0.3956, "num_tokens": 9299286832.0, "step": 12158 }, { "epoch": 4.45397087111844, "grad_norm": 0.15762839368381132, "learning_rate": 5.109692485325564e-06, "loss": 0.3616, "num_tokens": 9300005435.0, "step": 12159 }, { "epoch": 4.454337272144362, "grad_norm": 0.1508213867896557, "learning_rate": 5.108215964058691e-06, "loss": 0.3585, "num_tokens": 9300739544.0, "step": 12160 }, { "epoch": 4.454703673170285, "grad_norm": 0.15505978925503, "learning_rate": 5.10674039455165e-06, "loss": 0.3979, "num_tokens": 9301546967.0, "step": 12161 }, { "epoch": 4.4550700741962075, "grad_norm": 0.16223043849578875, "learning_rate": 5.105265776887583e-06, "loss": 0.3624, "num_tokens": 9302272932.0, "step": 12162 }, { "epoch": 4.455436475222131, "grad_norm": 0.15955391738082741, "learning_rate": 5.103792111149577e-06, "loss": 0.3952, "num_tokens": 9302971530.0, "step": 12163 }, { "epoch": 4.455802876248054, "grad_norm": 0.16136101067089048, "learning_rate": 5.102319397420662e-06, "loss": 0.3692, "num_tokens": 9303711145.0, "step": 12164 }, { "epoch": 4.456169277273976, "grad_norm": 0.13650124895618163, "learning_rate": 5.100847635783818e-06, "loss": 0.3663, "num_tokens": 9304599461.0, "step": 12165 }, { "epoch": 4.4565356782998995, "grad_norm": 0.1553791336361303, "learning_rate": 5.099376826321977e-06, "loss": 0.386, "num_tokens": 9305457378.0, "step": 12166 }, { "epoch": 4.456902079325822, "grad_norm": 0.1404456695769721, "learning_rate": 5.097906969118002e-06, "loss": 0.3659, "num_tokens": 9306294341.0, "step": 12167 }, { "epoch": 4.457268480351745, "grad_norm": 0.15435519380761692, "learning_rate": 5.096438064254713e-06, "loss": 0.3855, "num_tokens": 9307004656.0, "step": 12168 }, { "epoch": 4.457634881377668, "grad_norm": 0.1512220406210516, "learning_rate": 5.094970111814883e-06, "loss": 0.3715, "num_tokens": 9307748682.0, "step": 12169 }, { "epoch": 4.458001282403591, "grad_norm": 0.15144040210182275, "learning_rate": 5.093503111881215e-06, "loss": 0.3832, "num_tokens": 9308498370.0, "step": 12170 }, { "epoch": 4.458367683429514, "grad_norm": 0.1372911425933032, "learning_rate": 5.092037064536368e-06, "loss": 0.3567, "num_tokens": 9309356783.0, "step": 12171 }, { "epoch": 4.458734084455436, "grad_norm": 0.1518607249325814, "learning_rate": 5.090571969862947e-06, "loss": 0.3524, "num_tokens": 9310110577.0, "step": 12172 }, { "epoch": 4.459100485481359, "grad_norm": 0.15751471132516007, "learning_rate": 5.089107827943505e-06, "loss": 0.3622, "num_tokens": 9310874819.0, "step": 12173 }, { "epoch": 4.459466886507283, "grad_norm": 0.14650044659373557, "learning_rate": 5.087644638860532e-06, "loss": 0.3891, "num_tokens": 9311703880.0, "step": 12174 }, { "epoch": 4.459833287533205, "grad_norm": 0.16550019606105465, "learning_rate": 5.086182402696474e-06, "loss": 0.3706, "num_tokens": 9312411823.0, "step": 12175 }, { "epoch": 4.460199688559128, "grad_norm": 0.16639182287630194, "learning_rate": 5.0847211195337195e-06, "loss": 0.391, "num_tokens": 9313079150.0, "step": 12176 }, { "epoch": 4.4605660895850505, "grad_norm": 0.14541069238088555, "learning_rate": 5.083260789454607e-06, "loss": 0.3485, "num_tokens": 9313841827.0, "step": 12177 }, { "epoch": 4.460932490610974, "grad_norm": 0.14937454609596915, "learning_rate": 5.081801412541411e-06, "loss": 0.3597, "num_tokens": 9314588096.0, "step": 12178 }, { "epoch": 4.461298891636897, "grad_norm": 0.14722453493170695, "learning_rate": 5.0803429888763665e-06, "loss": 0.3641, "num_tokens": 9315325313.0, "step": 12179 }, { "epoch": 4.461665292662819, "grad_norm": 0.13852660517972795, "learning_rate": 5.078885518541646e-06, "loss": 0.3537, "num_tokens": 9316167779.0, "step": 12180 }, { "epoch": 4.4620316936887425, "grad_norm": 0.16371427520189688, "learning_rate": 5.077429001619365e-06, "loss": 0.3909, "num_tokens": 9316878993.0, "step": 12181 }, { "epoch": 4.462398094714665, "grad_norm": 0.1439243707152751, "learning_rate": 5.075973438191595e-06, "loss": 0.3686, "num_tokens": 9317690436.0, "step": 12182 }, { "epoch": 4.462764495740588, "grad_norm": 0.1526626287852805, "learning_rate": 5.074518828340353e-06, "loss": 0.3689, "num_tokens": 9318413663.0, "step": 12183 }, { "epoch": 4.463130896766511, "grad_norm": 0.16160428670810953, "learning_rate": 5.073065172147589e-06, "loss": 0.3564, "num_tokens": 9319049292.0, "step": 12184 }, { "epoch": 4.463497297792434, "grad_norm": 0.16791543033764647, "learning_rate": 5.0716124696952125e-06, "loss": 0.3977, "num_tokens": 9319700051.0, "step": 12185 }, { "epoch": 4.463863698818357, "grad_norm": 0.14697374051978085, "learning_rate": 5.070160721065079e-06, "loss": 0.3697, "num_tokens": 9320476245.0, "step": 12186 }, { "epoch": 4.464230099844279, "grad_norm": 0.14865035267084517, "learning_rate": 5.068709926338983e-06, "loss": 0.3728, "num_tokens": 9321285841.0, "step": 12187 }, { "epoch": 4.464596500870202, "grad_norm": 0.15608302451078043, "learning_rate": 5.06726008559867e-06, "loss": 0.3398, "num_tokens": 9321943613.0, "step": 12188 }, { "epoch": 4.464962901896126, "grad_norm": 0.1562816117251467, "learning_rate": 5.06581119892583e-06, "loss": 0.3908, "num_tokens": 9322652344.0, "step": 12189 }, { "epoch": 4.465329302922048, "grad_norm": 0.14488472401979394, "learning_rate": 5.064363266402098e-06, "loss": 0.3678, "num_tokens": 9323454240.0, "step": 12190 }, { "epoch": 4.465695703947971, "grad_norm": 0.13273783275018985, "learning_rate": 5.062916288109061e-06, "loss": 0.3592, "num_tokens": 9324326440.0, "step": 12191 }, { "epoch": 4.4660621049738936, "grad_norm": 0.1317059316863793, "learning_rate": 5.061470264128243e-06, "loss": 0.3342, "num_tokens": 9325183233.0, "step": 12192 }, { "epoch": 4.466428505999817, "grad_norm": 0.16629275713901437, "learning_rate": 5.060025194541129e-06, "loss": 0.3835, "num_tokens": 9325829372.0, "step": 12193 }, { "epoch": 4.46679490702574, "grad_norm": 0.1454101129175694, "learning_rate": 5.058581079429131e-06, "loss": 0.3649, "num_tokens": 9326612905.0, "step": 12194 }, { "epoch": 4.467161308051662, "grad_norm": 0.15324591233331764, "learning_rate": 5.05713791887362e-06, "loss": 0.3592, "num_tokens": 9327345858.0, "step": 12195 }, { "epoch": 4.467527709077586, "grad_norm": 0.14988216665543436, "learning_rate": 5.055695712955915e-06, "loss": 0.3943, "num_tokens": 9328154733.0, "step": 12196 }, { "epoch": 4.467894110103508, "grad_norm": 0.14026715507086224, "learning_rate": 5.054254461757266e-06, "loss": 0.3359, "num_tokens": 9329045940.0, "step": 12197 }, { "epoch": 4.468260511129431, "grad_norm": 0.15042585367541958, "learning_rate": 5.052814165358891e-06, "loss": 0.3686, "num_tokens": 9329792405.0, "step": 12198 }, { "epoch": 4.468626912155354, "grad_norm": 0.14282210023292416, "learning_rate": 5.051374823841938e-06, "loss": 0.3968, "num_tokens": 9330614389.0, "step": 12199 }, { "epoch": 4.468993313181277, "grad_norm": 0.141237511110296, "learning_rate": 5.049936437287504e-06, "loss": 0.3484, "num_tokens": 9331393836.0, "step": 12200 }, { "epoch": 4.4693597142072, "grad_norm": 0.15294375354162054, "learning_rate": 5.048499005776637e-06, "loss": 0.3707, "num_tokens": 9332115440.0, "step": 12201 }, { "epoch": 4.469726115233122, "grad_norm": 0.14212676859037993, "learning_rate": 5.047062529390327e-06, "loss": 0.3671, "num_tokens": 9332959800.0, "step": 12202 }, { "epoch": 4.4700925162590455, "grad_norm": 0.14151044033063184, "learning_rate": 5.045627008209513e-06, "loss": 0.394, "num_tokens": 9333818755.0, "step": 12203 }, { "epoch": 4.470458917284969, "grad_norm": 0.1410418879016936, "learning_rate": 5.044192442315077e-06, "loss": 0.3721, "num_tokens": 9334657853.0, "step": 12204 }, { "epoch": 4.470825318310891, "grad_norm": 0.14520774863884414, "learning_rate": 5.04275883178785e-06, "loss": 0.3618, "num_tokens": 9335413712.0, "step": 12205 }, { "epoch": 4.471191719336814, "grad_norm": 0.1550241394580044, "learning_rate": 5.04132617670861e-06, "loss": 0.3588, "num_tokens": 9336110647.0, "step": 12206 }, { "epoch": 4.471558120362737, "grad_norm": 0.13388958390830824, "learning_rate": 5.0398944771580775e-06, "loss": 0.3537, "num_tokens": 9336991360.0, "step": 12207 }, { "epoch": 4.47192452138866, "grad_norm": 0.14102424304616357, "learning_rate": 5.038463733216915e-06, "loss": 0.3734, "num_tokens": 9337822030.0, "step": 12208 }, { "epoch": 4.472290922414583, "grad_norm": 0.13534104972615907, "learning_rate": 5.037033944965748e-06, "loss": 0.3646, "num_tokens": 9338665938.0, "step": 12209 }, { "epoch": 4.472657323440505, "grad_norm": 0.15341989646137047, "learning_rate": 5.0356051124851335e-06, "loss": 0.3709, "num_tokens": 9339438091.0, "step": 12210 }, { "epoch": 4.473023724466429, "grad_norm": 0.17675243131694623, "learning_rate": 5.034177235855572e-06, "loss": 0.383, "num_tokens": 9340243316.0, "step": 12211 }, { "epoch": 4.473390125492351, "grad_norm": 0.14410600263367937, "learning_rate": 5.032750315157527e-06, "loss": 0.3622, "num_tokens": 9341049337.0, "step": 12212 }, { "epoch": 4.473756526518274, "grad_norm": 0.16067096059330868, "learning_rate": 5.031324350471392e-06, "loss": 0.3669, "num_tokens": 9341723327.0, "step": 12213 }, { "epoch": 4.474122927544197, "grad_norm": 0.15092082669440668, "learning_rate": 5.029899341877507e-06, "loss": 0.3562, "num_tokens": 9342482796.0, "step": 12214 }, { "epoch": 4.47448932857012, "grad_norm": 0.14822846891353297, "learning_rate": 5.028475289456173e-06, "loss": 0.3521, "num_tokens": 9343266441.0, "step": 12215 }, { "epoch": 4.474855729596043, "grad_norm": 0.16222648075511073, "learning_rate": 5.027052193287624e-06, "loss": 0.3885, "num_tokens": 9343960871.0, "step": 12216 }, { "epoch": 4.475222130621965, "grad_norm": 0.13308825291466947, "learning_rate": 5.025630053452042e-06, "loss": 0.3542, "num_tokens": 9344880031.0, "step": 12217 }, { "epoch": 4.4755885316478885, "grad_norm": 0.1405866932614527, "learning_rate": 5.024208870029559e-06, "loss": 0.3636, "num_tokens": 9345708702.0, "step": 12218 }, { "epoch": 4.475954932673812, "grad_norm": 0.16376622836924393, "learning_rate": 5.022788643100251e-06, "loss": 0.4008, "num_tokens": 9346419799.0, "step": 12219 }, { "epoch": 4.476321333699734, "grad_norm": 0.14095268531258598, "learning_rate": 5.021369372744138e-06, "loss": 0.3562, "num_tokens": 9347195403.0, "step": 12220 }, { "epoch": 4.476687734725657, "grad_norm": 0.15676673806597757, "learning_rate": 5.01995105904119e-06, "loss": 0.3699, "num_tokens": 9347923625.0, "step": 12221 }, { "epoch": 4.4770541357515805, "grad_norm": 0.15134257961512423, "learning_rate": 5.018533702071318e-06, "loss": 0.3685, "num_tokens": 9348689656.0, "step": 12222 }, { "epoch": 4.477420536777503, "grad_norm": 0.16033718216539158, "learning_rate": 5.0171173019143905e-06, "loss": 0.3711, "num_tokens": 9349407251.0, "step": 12223 }, { "epoch": 4.477786937803426, "grad_norm": 0.139886325435385, "learning_rate": 5.0157018586502035e-06, "loss": 0.3529, "num_tokens": 9350207019.0, "step": 12224 }, { "epoch": 4.478153338829348, "grad_norm": 0.1681040377068299, "learning_rate": 5.014287372358514e-06, "loss": 0.3899, "num_tokens": 9350854605.0, "step": 12225 }, { "epoch": 4.478519739855272, "grad_norm": 0.15049086417817228, "learning_rate": 5.0128738431190225e-06, "loss": 0.3753, "num_tokens": 9351647040.0, "step": 12226 }, { "epoch": 4.478886140881195, "grad_norm": 0.1458203844725687, "learning_rate": 5.011461271011372e-06, "loss": 0.3339, "num_tokens": 9352412693.0, "step": 12227 }, { "epoch": 4.479252541907117, "grad_norm": 0.15514494780010532, "learning_rate": 5.010049656115155e-06, "loss": 0.363, "num_tokens": 9353234289.0, "step": 12228 }, { "epoch": 4.47961894293304, "grad_norm": 0.17635092368797228, "learning_rate": 5.0086389985099045e-06, "loss": 0.3742, "num_tokens": 9353901324.0, "step": 12229 }, { "epoch": 4.479985343958963, "grad_norm": 0.14922524519591102, "learning_rate": 5.007229298275106e-06, "loss": 0.3647, "num_tokens": 9354698969.0, "step": 12230 }, { "epoch": 4.480351744984886, "grad_norm": 0.1644003580264273, "learning_rate": 5.0058205554901895e-06, "loss": 0.3792, "num_tokens": 9355404261.0, "step": 12231 }, { "epoch": 4.480718146010809, "grad_norm": 0.1452990045790677, "learning_rate": 5.004412770234526e-06, "loss": 0.3567, "num_tokens": 9356228667.0, "step": 12232 }, { "epoch": 4.4810845470367315, "grad_norm": 0.14366229739469863, "learning_rate": 5.0030059425874385e-06, "loss": 0.3656, "num_tokens": 9357010728.0, "step": 12233 }, { "epoch": 4.481450948062655, "grad_norm": 0.14960172653230414, "learning_rate": 5.001600072628197e-06, "loss": 0.3579, "num_tokens": 9357788057.0, "step": 12234 }, { "epoch": 4.481817349088577, "grad_norm": 0.1471904594241344, "learning_rate": 5.000195160436006e-06, "loss": 0.3527, "num_tokens": 9358602732.0, "step": 12235 }, { "epoch": 4.4821837501145, "grad_norm": 0.1430131535103589, "learning_rate": 4.998791206090039e-06, "loss": 0.3761, "num_tokens": 9359433721.0, "step": 12236 }, { "epoch": 4.4825501511404235, "grad_norm": 0.14410549338692896, "learning_rate": 4.997388209669388e-06, "loss": 0.3771, "num_tokens": 9360197090.0, "step": 12237 }, { "epoch": 4.482916552166346, "grad_norm": 0.14783773887084375, "learning_rate": 4.995986171253107e-06, "loss": 0.3793, "num_tokens": 9361001175.0, "step": 12238 }, { "epoch": 4.483282953192269, "grad_norm": 0.14452216635635284, "learning_rate": 4.994585090920198e-06, "loss": 0.3511, "num_tokens": 9361810656.0, "step": 12239 }, { "epoch": 4.483649354218191, "grad_norm": 0.14951737630128004, "learning_rate": 4.993184968749604e-06, "loss": 0.3829, "num_tokens": 9362603661.0, "step": 12240 }, { "epoch": 4.484015755244115, "grad_norm": 0.1359590765508507, "learning_rate": 4.991785804820208e-06, "loss": 0.3377, "num_tokens": 9363468798.0, "step": 12241 }, { "epoch": 4.484382156270038, "grad_norm": 0.14683306654678754, "learning_rate": 4.990387599210851e-06, "loss": 0.353, "num_tokens": 9364249235.0, "step": 12242 }, { "epoch": 4.48474855729596, "grad_norm": 0.1607218220527976, "learning_rate": 4.98899035200031e-06, "loss": 0.3749, "num_tokens": 9364909347.0, "step": 12243 }, { "epoch": 4.4851149583218834, "grad_norm": 0.15457374228173779, "learning_rate": 4.9875940632673165e-06, "loss": 0.352, "num_tokens": 9365607457.0, "step": 12244 }, { "epoch": 4.485481359347806, "grad_norm": 0.15210098126976643, "learning_rate": 4.9861987330905395e-06, "loss": 0.3771, "num_tokens": 9366324119.0, "step": 12245 }, { "epoch": 4.485847760373729, "grad_norm": 0.1364938324394805, "learning_rate": 4.984804361548602e-06, "loss": 0.355, "num_tokens": 9367248232.0, "step": 12246 }, { "epoch": 4.486214161399652, "grad_norm": 0.1635976581709813, "learning_rate": 4.983410948720067e-06, "loss": 0.3816, "num_tokens": 9367955976.0, "step": 12247 }, { "epoch": 4.486580562425575, "grad_norm": 0.1666208759674664, "learning_rate": 4.9820184946834475e-06, "loss": 0.3882, "num_tokens": 9368664592.0, "step": 12248 }, { "epoch": 4.486946963451498, "grad_norm": 0.15770066314727088, "learning_rate": 4.980626999517195e-06, "loss": 0.3491, "num_tokens": 9369375534.0, "step": 12249 }, { "epoch": 4.48731336447742, "grad_norm": 0.1763333012208348, "learning_rate": 4.979236463299724e-06, "loss": 0.4017, "num_tokens": 9370031560.0, "step": 12250 }, { "epoch": 4.487679765503343, "grad_norm": 0.14366185157988554, "learning_rate": 4.977846886109374e-06, "loss": 0.3591, "num_tokens": 9370901626.0, "step": 12251 }, { "epoch": 4.488046166529267, "grad_norm": 0.15210523429844766, "learning_rate": 4.97645826802444e-06, "loss": 0.3659, "num_tokens": 9371645152.0, "step": 12252 }, { "epoch": 4.488412567555189, "grad_norm": 0.15583619769901957, "learning_rate": 4.975070609123172e-06, "loss": 0.3802, "num_tokens": 9372444928.0, "step": 12253 }, { "epoch": 4.488778968581112, "grad_norm": 0.16329086808338256, "learning_rate": 4.973683909483743e-06, "loss": 0.3658, "num_tokens": 9373160808.0, "step": 12254 }, { "epoch": 4.489145369607035, "grad_norm": 0.14603417156946083, "learning_rate": 4.9722981691842985e-06, "loss": 0.3596, "num_tokens": 9373933914.0, "step": 12255 }, { "epoch": 4.489511770632958, "grad_norm": 0.1504737011655144, "learning_rate": 4.9709133883029125e-06, "loss": 0.3375, "num_tokens": 9374656817.0, "step": 12256 }, { "epoch": 4.489878171658881, "grad_norm": 0.1601686050978877, "learning_rate": 4.9695295669176115e-06, "loss": 0.3942, "num_tokens": 9375395584.0, "step": 12257 }, { "epoch": 4.490244572684803, "grad_norm": 0.1572882801212082, "learning_rate": 4.968146705106364e-06, "loss": 0.3629, "num_tokens": 9376114294.0, "step": 12258 }, { "epoch": 4.4906109737107265, "grad_norm": 0.15857716416065515, "learning_rate": 4.9667648029470875e-06, "loss": 0.388, "num_tokens": 9376858500.0, "step": 12259 }, { "epoch": 4.49097737473665, "grad_norm": 0.14759999962165077, "learning_rate": 4.965383860517643e-06, "loss": 0.3858, "num_tokens": 9377716783.0, "step": 12260 }, { "epoch": 4.491343775762572, "grad_norm": 0.14776450993813142, "learning_rate": 4.964003877895843e-06, "loss": 0.3515, "num_tokens": 9378475824.0, "step": 12261 }, { "epoch": 4.491710176788495, "grad_norm": 0.15802385939513644, "learning_rate": 4.96262485515944e-06, "loss": 0.3956, "num_tokens": 9379169821.0, "step": 12262 }, { "epoch": 4.492076577814418, "grad_norm": 0.1367984412544397, "learning_rate": 4.961246792386135e-06, "loss": 0.3701, "num_tokens": 9380054875.0, "step": 12263 }, { "epoch": 4.492442978840341, "grad_norm": 0.15428025396777997, "learning_rate": 4.959869689653572e-06, "loss": 0.3777, "num_tokens": 9380809413.0, "step": 12264 }, { "epoch": 4.492809379866264, "grad_norm": 0.14355211855351363, "learning_rate": 4.958493547039342e-06, "loss": 0.3766, "num_tokens": 9381631714.0, "step": 12265 }, { "epoch": 4.493175780892186, "grad_norm": 0.14819401368504315, "learning_rate": 4.95711836462099e-06, "loss": 0.3834, "num_tokens": 9382412990.0, "step": 12266 }, { "epoch": 4.49354218191811, "grad_norm": 0.16344321705864281, "learning_rate": 4.955744142475997e-06, "loss": 0.3881, "num_tokens": 9383103380.0, "step": 12267 }, { "epoch": 4.493908582944032, "grad_norm": 0.14510126496920978, "learning_rate": 4.954370880681788e-06, "loss": 0.3618, "num_tokens": 9383952503.0, "step": 12268 }, { "epoch": 4.494274983969955, "grad_norm": 0.16150907803586761, "learning_rate": 4.952998579315742e-06, "loss": 0.3727, "num_tokens": 9384651639.0, "step": 12269 }, { "epoch": 4.494641384995878, "grad_norm": 0.14604555428437843, "learning_rate": 4.951627238455187e-06, "loss": 0.3686, "num_tokens": 9385451887.0, "step": 12270 }, { "epoch": 4.495007786021801, "grad_norm": 0.15245507052335275, "learning_rate": 4.950256858177379e-06, "loss": 0.374, "num_tokens": 9386206919.0, "step": 12271 }, { "epoch": 4.495374187047724, "grad_norm": 0.15513813237955543, "learning_rate": 4.94888743855954e-06, "loss": 0.3499, "num_tokens": 9386901636.0, "step": 12272 }, { "epoch": 4.495740588073646, "grad_norm": 0.1632864527351991, "learning_rate": 4.9475189796788274e-06, "loss": 0.3531, "num_tokens": 9387587970.0, "step": 12273 }, { "epoch": 4.4961069890995695, "grad_norm": 0.16931250595228078, "learning_rate": 4.9461514816123445e-06, "loss": 0.3526, "num_tokens": 9388184581.0, "step": 12274 }, { "epoch": 4.496473390125493, "grad_norm": 0.15961213676041885, "learning_rate": 4.9447849444371445e-06, "loss": 0.3501, "num_tokens": 9388831203.0, "step": 12275 }, { "epoch": 4.496839791151415, "grad_norm": 0.14235739214839757, "learning_rate": 4.9434193682302235e-06, "loss": 0.3521, "num_tokens": 9389679088.0, "step": 12276 }, { "epoch": 4.497206192177338, "grad_norm": 0.1624787393029558, "learning_rate": 4.9420547530685234e-06, "loss": 0.3959, "num_tokens": 9390399974.0, "step": 12277 }, { "epoch": 4.497572593203261, "grad_norm": 0.15795884596499254, "learning_rate": 4.940691099028934e-06, "loss": 0.3822, "num_tokens": 9391078513.0, "step": 12278 }, { "epoch": 4.497938994229184, "grad_norm": 0.15118697531143246, "learning_rate": 4.939328406188287e-06, "loss": 0.3449, "num_tokens": 9391845786.0, "step": 12279 }, { "epoch": 4.498305395255107, "grad_norm": 0.1465444275033739, "learning_rate": 4.93796667462337e-06, "loss": 0.3525, "num_tokens": 9392566143.0, "step": 12280 }, { "epoch": 4.498671796281029, "grad_norm": 0.15347473541856185, "learning_rate": 4.936605904410901e-06, "loss": 0.3561, "num_tokens": 9393269421.0, "step": 12281 }, { "epoch": 4.499038197306953, "grad_norm": 0.13493999825805475, "learning_rate": 4.935246095627553e-06, "loss": 0.3447, "num_tokens": 9394213745.0, "step": 12282 }, { "epoch": 4.499404598332875, "grad_norm": 0.14628371914845284, "learning_rate": 4.933887248349948e-06, "loss": 0.371, "num_tokens": 9395038251.0, "step": 12283 }, { "epoch": 4.499770999358798, "grad_norm": 0.14643539275663073, "learning_rate": 4.932529362654648e-06, "loss": 0.3771, "num_tokens": 9395809190.0, "step": 12284 }, { "epoch": 4.500137400384721, "grad_norm": 0.15798757492860158, "learning_rate": 4.931172438618162e-06, "loss": 0.339, "num_tokens": 9396524252.0, "step": 12285 }, { "epoch": 4.500503801410644, "grad_norm": 0.15115194371814109, "learning_rate": 4.929816476316945e-06, "loss": 0.3636, "num_tokens": 9397265197.0, "step": 12286 }, { "epoch": 4.500870202436567, "grad_norm": 0.14942188146209437, "learning_rate": 4.928461475827398e-06, "loss": 0.3445, "num_tokens": 9398013781.0, "step": 12287 }, { "epoch": 4.501236603462489, "grad_norm": 0.14868179149778127, "learning_rate": 4.9271074372258685e-06, "loss": 0.3774, "num_tokens": 9398762056.0, "step": 12288 }, { "epoch": 4.5016030044884126, "grad_norm": 0.13753102103064585, "learning_rate": 4.925754360588651e-06, "loss": 0.3572, "num_tokens": 9399611823.0, "step": 12289 }, { "epoch": 4.501969405514336, "grad_norm": 0.14600147548745365, "learning_rate": 4.9244022459919775e-06, "loss": 0.3782, "num_tokens": 9400441182.0, "step": 12290 }, { "epoch": 4.502335806540258, "grad_norm": 0.14825572882478788, "learning_rate": 4.92305109351204e-06, "loss": 0.3538, "num_tokens": 9401174022.0, "step": 12291 }, { "epoch": 4.502702207566181, "grad_norm": 0.14000178362244717, "learning_rate": 4.921700903224962e-06, "loss": 0.3594, "num_tokens": 9401998504.0, "step": 12292 }, { "epoch": 4.503068608592104, "grad_norm": 0.1562388923449026, "learning_rate": 4.920351675206828e-06, "loss": 0.3949, "num_tokens": 9402707056.0, "step": 12293 }, { "epoch": 4.503435009618027, "grad_norm": 0.15251602206414988, "learning_rate": 4.919003409533652e-06, "loss": 0.3562, "num_tokens": 9403388851.0, "step": 12294 }, { "epoch": 4.50380141064395, "grad_norm": 0.1440132685431696, "learning_rate": 4.9176561062814e-06, "loss": 0.3632, "num_tokens": 9404219597.0, "step": 12295 }, { "epoch": 4.5041678116698725, "grad_norm": 0.16050137522761615, "learning_rate": 4.91630976552599e-06, "loss": 0.3683, "num_tokens": 9404943526.0, "step": 12296 }, { "epoch": 4.504534212695796, "grad_norm": 0.1539607584046287, "learning_rate": 4.914964387343284e-06, "loss": 0.3733, "num_tokens": 9405641410.0, "step": 12297 }, { "epoch": 4.504900613721718, "grad_norm": 0.14691849039668914, "learning_rate": 4.913619971809077e-06, "loss": 0.3754, "num_tokens": 9406411570.0, "step": 12298 }, { "epoch": 4.505267014747641, "grad_norm": 0.14189506780229352, "learning_rate": 4.912276518999128e-06, "loss": 0.3603, "num_tokens": 9407275700.0, "step": 12299 }, { "epoch": 4.5056334157735645, "grad_norm": 0.14781272415640334, "learning_rate": 4.910934028989131e-06, "loss": 0.351, "num_tokens": 9408062486.0, "step": 12300 }, { "epoch": 4.505999816799487, "grad_norm": 0.17222849295201087, "learning_rate": 4.909592501854726e-06, "loss": 0.4198, "num_tokens": 9408765601.0, "step": 12301 }, { "epoch": 4.50636621782541, "grad_norm": 0.15601069209440582, "learning_rate": 4.908251937671503e-06, "loss": 0.3412, "num_tokens": 9409450391.0, "step": 12302 }, { "epoch": 4.506732618851332, "grad_norm": 0.15379286684220575, "learning_rate": 4.906912336514992e-06, "loss": 0.3726, "num_tokens": 9410214763.0, "step": 12303 }, { "epoch": 4.507099019877256, "grad_norm": 0.17812089269263534, "learning_rate": 4.905573698460675e-06, "loss": 0.3795, "num_tokens": 9410881620.0, "step": 12304 }, { "epoch": 4.507465420903179, "grad_norm": 0.1528074640791648, "learning_rate": 4.904236023583979e-06, "loss": 0.379, "num_tokens": 9411639949.0, "step": 12305 }, { "epoch": 4.507831821929101, "grad_norm": 0.13991554958641786, "learning_rate": 4.9028993119602675e-06, "loss": 0.3573, "num_tokens": 9412510516.0, "step": 12306 }, { "epoch": 4.508198222955024, "grad_norm": 0.14320113711670432, "learning_rate": 4.901563563664869e-06, "loss": 0.3411, "num_tokens": 9413335566.0, "step": 12307 }, { "epoch": 4.508564623980947, "grad_norm": 0.14301248537510436, "learning_rate": 4.900228778773034e-06, "loss": 0.3646, "num_tokens": 9414140488.0, "step": 12308 }, { "epoch": 4.50893102500687, "grad_norm": 0.14622888714297266, "learning_rate": 4.898894957359973e-06, "loss": 0.3673, "num_tokens": 9414908848.0, "step": 12309 }, { "epoch": 4.509297426032793, "grad_norm": 0.14666301544944157, "learning_rate": 4.897562099500844e-06, "loss": 0.3741, "num_tokens": 9415703146.0, "step": 12310 }, { "epoch": 4.5096638270587155, "grad_norm": 0.14612490883698548, "learning_rate": 4.896230205270739e-06, "loss": 0.3662, "num_tokens": 9416498801.0, "step": 12311 }, { "epoch": 4.510030228084639, "grad_norm": 0.14327073548719244, "learning_rate": 4.894899274744712e-06, "loss": 0.3673, "num_tokens": 9417283512.0, "step": 12312 }, { "epoch": 4.510396629110561, "grad_norm": 0.17186117774236334, "learning_rate": 4.893569307997748e-06, "loss": 0.3943, "num_tokens": 9417976628.0, "step": 12313 }, { "epoch": 4.510763030136484, "grad_norm": 0.14641050448771678, "learning_rate": 4.892240305104784e-06, "loss": 0.3788, "num_tokens": 9418840531.0, "step": 12314 }, { "epoch": 4.5111294311624075, "grad_norm": 0.15244066317384092, "learning_rate": 4.890912266140703e-06, "loss": 0.372, "num_tokens": 9419651385.0, "step": 12315 }, { "epoch": 4.51149583218833, "grad_norm": 0.13772859419644987, "learning_rate": 4.889585191180331e-06, "loss": 0.3698, "num_tokens": 9420500844.0, "step": 12316 }, { "epoch": 4.511862233214253, "grad_norm": 0.1579553650766781, "learning_rate": 4.888259080298444e-06, "loss": 0.389, "num_tokens": 9421195089.0, "step": 12317 }, { "epoch": 4.512228634240175, "grad_norm": 0.14418222465458932, "learning_rate": 4.88693393356976e-06, "loss": 0.3632, "num_tokens": 9422011233.0, "step": 12318 }, { "epoch": 4.512595035266099, "grad_norm": 0.1436378803874224, "learning_rate": 4.885609751068939e-06, "loss": 0.3976, "num_tokens": 9422838706.0, "step": 12319 }, { "epoch": 4.512961436292022, "grad_norm": 0.1499427684420391, "learning_rate": 4.884286532870603e-06, "loss": 0.3925, "num_tokens": 9423629854.0, "step": 12320 }, { "epoch": 4.513327837317944, "grad_norm": 0.14014848521460782, "learning_rate": 4.882964279049298e-06, "loss": 0.3478, "num_tokens": 9424472957.0, "step": 12321 }, { "epoch": 4.513694238343867, "grad_norm": 0.14467398437020568, "learning_rate": 4.881642989679527e-06, "loss": 0.3303, "num_tokens": 9425211104.0, "step": 12322 }, { "epoch": 4.51406063936979, "grad_norm": 0.16606684930684204, "learning_rate": 4.8803226648357426e-06, "loss": 0.3647, "num_tokens": 9425846682.0, "step": 12323 }, { "epoch": 4.514427040395713, "grad_norm": 0.1390087167100911, "learning_rate": 4.8790033045923356e-06, "loss": 0.3434, "num_tokens": 9426656140.0, "step": 12324 }, { "epoch": 4.514793441421636, "grad_norm": 0.16030157079540552, "learning_rate": 4.877684909023639e-06, "loss": 0.4176, "num_tokens": 9427390969.0, "step": 12325 }, { "epoch": 4.5151598424475585, "grad_norm": 0.16732351371381163, "learning_rate": 4.876367478203945e-06, "loss": 0.3771, "num_tokens": 9428072981.0, "step": 12326 }, { "epoch": 4.515526243473482, "grad_norm": 0.14874602739403114, "learning_rate": 4.8750510122074814e-06, "loss": 0.3672, "num_tokens": 9428847614.0, "step": 12327 }, { "epoch": 4.515892644499405, "grad_norm": 0.147472446262228, "learning_rate": 4.87373551110842e-06, "loss": 0.3338, "num_tokens": 9429622825.0, "step": 12328 }, { "epoch": 4.516259045525327, "grad_norm": 0.14611789176425438, "learning_rate": 4.872420974980889e-06, "loss": 0.3651, "num_tokens": 9430455815.0, "step": 12329 }, { "epoch": 4.5166254465512505, "grad_norm": 0.14337570523198453, "learning_rate": 4.871107403898948e-06, "loss": 0.3644, "num_tokens": 9431304537.0, "step": 12330 }, { "epoch": 4.516991847577173, "grad_norm": 0.12873744361319403, "learning_rate": 4.869794797936615e-06, "loss": 0.3382, "num_tokens": 9432205734.0, "step": 12331 }, { "epoch": 4.517358248603096, "grad_norm": 0.1636395766095345, "learning_rate": 4.868483157167846e-06, "loss": 0.3779, "num_tokens": 9432879288.0, "step": 12332 }, { "epoch": 4.517724649629019, "grad_norm": 0.15289773190677838, "learning_rate": 4.8671724816665465e-06, "loss": 0.3776, "num_tokens": 9433618023.0, "step": 12333 }, { "epoch": 4.518091050654942, "grad_norm": 0.16089610250684405, "learning_rate": 4.865862771506564e-06, "loss": 0.3824, "num_tokens": 9434298163.0, "step": 12334 }, { "epoch": 4.518457451680865, "grad_norm": 0.16649186919515316, "learning_rate": 4.864554026761693e-06, "loss": 0.3584, "num_tokens": 9434986180.0, "step": 12335 }, { "epoch": 4.518823852706787, "grad_norm": 0.15399223088613467, "learning_rate": 4.863246247505673e-06, "loss": 0.3779, "num_tokens": 9435781155.0, "step": 12336 }, { "epoch": 4.51919025373271, "grad_norm": 0.15343165257575406, "learning_rate": 4.861939433812197e-06, "loss": 0.4081, "num_tokens": 9436532261.0, "step": 12337 }, { "epoch": 4.519556654758634, "grad_norm": 0.14718101769002576, "learning_rate": 4.860633585754888e-06, "loss": 0.3659, "num_tokens": 9437338102.0, "step": 12338 }, { "epoch": 4.519923055784556, "grad_norm": 0.14442402505127272, "learning_rate": 4.8593287034073295e-06, "loss": 0.3534, "num_tokens": 9438095335.0, "step": 12339 }, { "epoch": 4.520289456810479, "grad_norm": 0.1507211501250341, "learning_rate": 4.8580247868430415e-06, "loss": 0.3608, "num_tokens": 9438812266.0, "step": 12340 }, { "epoch": 4.520655857836402, "grad_norm": 0.14387050749109126, "learning_rate": 4.856721836135497e-06, "loss": 0.3714, "num_tokens": 9439573099.0, "step": 12341 }, { "epoch": 4.521022258862325, "grad_norm": 0.1343375354314589, "learning_rate": 4.855419851358104e-06, "loss": 0.348, "num_tokens": 9440460950.0, "step": 12342 }, { "epoch": 4.521388659888248, "grad_norm": 0.14321355429189225, "learning_rate": 4.854118832584226e-06, "loss": 0.3497, "num_tokens": 9441283001.0, "step": 12343 }, { "epoch": 4.52175506091417, "grad_norm": 0.16981424393710573, "learning_rate": 4.852818779887167e-06, "loss": 0.3751, "num_tokens": 9441990833.0, "step": 12344 }, { "epoch": 4.522121461940094, "grad_norm": 0.1479418012905814, "learning_rate": 4.85151969334018e-06, "loss": 0.3889, "num_tokens": 9442802666.0, "step": 12345 }, { "epoch": 4.522487862966017, "grad_norm": 0.14287832888329108, "learning_rate": 4.850221573016458e-06, "loss": 0.3813, "num_tokens": 9443623240.0, "step": 12346 }, { "epoch": 4.522854263991939, "grad_norm": 0.16190073713831452, "learning_rate": 4.848924418989145e-06, "loss": 0.3803, "num_tokens": 9444321333.0, "step": 12347 }, { "epoch": 4.523220665017862, "grad_norm": 0.16455312525779034, "learning_rate": 4.847628231331328e-06, "loss": 0.3967, "num_tokens": 9445027331.0, "step": 12348 }, { "epoch": 4.523587066043785, "grad_norm": 0.1576514596404308, "learning_rate": 4.846333010116037e-06, "loss": 0.3578, "num_tokens": 9445683434.0, "step": 12349 }, { "epoch": 4.523953467069708, "grad_norm": 0.14362247056127006, "learning_rate": 4.845038755416261e-06, "loss": 0.3478, "num_tokens": 9446450515.0, "step": 12350 }, { "epoch": 4.524319868095631, "grad_norm": 0.13576655465116125, "learning_rate": 4.8437454673049155e-06, "loss": 0.3503, "num_tokens": 9447301340.0, "step": 12351 }, { "epoch": 4.5246862691215535, "grad_norm": 0.14325280481181446, "learning_rate": 4.842453145854867e-06, "loss": 0.3702, "num_tokens": 9448101762.0, "step": 12352 }, { "epoch": 4.525052670147477, "grad_norm": 0.14030149870212275, "learning_rate": 4.841161791138938e-06, "loss": 0.3736, "num_tokens": 9448986060.0, "step": 12353 }, { "epoch": 4.525419071173399, "grad_norm": 0.14384887232967677, "learning_rate": 4.839871403229891e-06, "loss": 0.3754, "num_tokens": 9449759109.0, "step": 12354 }, { "epoch": 4.525785472199322, "grad_norm": 0.15053248956160262, "learning_rate": 4.838581982200423e-06, "loss": 0.3815, "num_tokens": 9450574309.0, "step": 12355 }, { "epoch": 4.5261518732252455, "grad_norm": 0.14828185739472186, "learning_rate": 4.837293528123192e-06, "loss": 0.3826, "num_tokens": 9451372968.0, "step": 12356 }, { "epoch": 4.526518274251168, "grad_norm": 0.14892090690492674, "learning_rate": 4.836006041070798e-06, "loss": 0.3581, "num_tokens": 9452180408.0, "step": 12357 }, { "epoch": 4.526884675277091, "grad_norm": 0.14682581743965822, "learning_rate": 4.834719521115777e-06, "loss": 0.3634, "num_tokens": 9452979295.0, "step": 12358 }, { "epoch": 4.527251076303013, "grad_norm": 0.1808138227778923, "learning_rate": 4.833433968330621e-06, "loss": 0.3592, "num_tokens": 9453607417.0, "step": 12359 }, { "epoch": 4.527617477328937, "grad_norm": 0.14660737033323398, "learning_rate": 4.832149382787765e-06, "loss": 0.3279, "num_tokens": 9454351243.0, "step": 12360 }, { "epoch": 4.52798387835486, "grad_norm": 0.1435981571145343, "learning_rate": 4.830865764559586e-06, "loss": 0.3526, "num_tokens": 9455187861.0, "step": 12361 }, { "epoch": 4.528350279380782, "grad_norm": 0.15925108675796681, "learning_rate": 4.8295831137184105e-06, "loss": 0.3884, "num_tokens": 9455922905.0, "step": 12362 }, { "epoch": 4.528716680406705, "grad_norm": 0.14487252309457838, "learning_rate": 4.8283014303365045e-06, "loss": 0.3627, "num_tokens": 9456713632.0, "step": 12363 }, { "epoch": 4.529083081432628, "grad_norm": 0.14895697816407807, "learning_rate": 4.827020714486093e-06, "loss": 0.3652, "num_tokens": 9457433513.0, "step": 12364 }, { "epoch": 4.529449482458551, "grad_norm": 0.1475674711384926, "learning_rate": 4.825740966239331e-06, "loss": 0.3581, "num_tokens": 9458220667.0, "step": 12365 }, { "epoch": 4.529815883484474, "grad_norm": 0.15282729973648382, "learning_rate": 4.824462185668323e-06, "loss": 0.3609, "num_tokens": 9458900407.0, "step": 12366 }, { "epoch": 4.5301822845103965, "grad_norm": 0.14615794883841166, "learning_rate": 4.8231843728451274e-06, "loss": 0.364, "num_tokens": 9459741654.0, "step": 12367 }, { "epoch": 4.53054868553632, "grad_norm": 0.14380507593287933, "learning_rate": 4.821907527841737e-06, "loss": 0.3356, "num_tokens": 9460534194.0, "step": 12368 }, { "epoch": 4.530915086562242, "grad_norm": 0.13676400448318232, "learning_rate": 4.8206316507300995e-06, "loss": 0.3622, "num_tokens": 9461403234.0, "step": 12369 }, { "epoch": 4.531281487588165, "grad_norm": 0.14469986724381664, "learning_rate": 4.819356741582099e-06, "loss": 0.3338, "num_tokens": 9462123080.0, "step": 12370 }, { "epoch": 4.5316478886140885, "grad_norm": 0.1605716715487634, "learning_rate": 4.818082800469575e-06, "loss": 0.3543, "num_tokens": 9462798933.0, "step": 12371 }, { "epoch": 4.532014289640011, "grad_norm": 0.15267404537851578, "learning_rate": 4.816809827464302e-06, "loss": 0.3448, "num_tokens": 9463537450.0, "step": 12372 }, { "epoch": 4.532380690665934, "grad_norm": 0.16391848909893628, "learning_rate": 4.815537822638009e-06, "loss": 0.4, "num_tokens": 9464226883.0, "step": 12373 }, { "epoch": 4.532747091691856, "grad_norm": 0.13804385812239026, "learning_rate": 4.814266786062363e-06, "loss": 0.3329, "num_tokens": 9465030147.0, "step": 12374 }, { "epoch": 4.53311349271778, "grad_norm": 0.13871014546053242, "learning_rate": 4.812996717808982e-06, "loss": 0.3447, "num_tokens": 9465872206.0, "step": 12375 }, { "epoch": 4.533479893743703, "grad_norm": 0.1454324738180561, "learning_rate": 4.811727617949424e-06, "loss": 0.3797, "num_tokens": 9466677573.0, "step": 12376 }, { "epoch": 4.533846294769625, "grad_norm": 0.14698392274797048, "learning_rate": 4.810459486555205e-06, "loss": 0.3389, "num_tokens": 9467409180.0, "step": 12377 }, { "epoch": 4.534212695795548, "grad_norm": 0.14086402240680942, "learning_rate": 4.809192323697769e-06, "loss": 0.3608, "num_tokens": 9468232428.0, "step": 12378 }, { "epoch": 4.534579096821471, "grad_norm": 0.13424478470718765, "learning_rate": 4.807926129448513e-06, "loss": 0.3658, "num_tokens": 9469076257.0, "step": 12379 }, { "epoch": 4.534945497847394, "grad_norm": 0.15564362256421246, "learning_rate": 4.806660903878785e-06, "loss": 0.3535, "num_tokens": 9469742309.0, "step": 12380 }, { "epoch": 4.535311898873317, "grad_norm": 0.14586313603346887, "learning_rate": 4.805396647059876e-06, "loss": 0.3672, "num_tokens": 9470506254.0, "step": 12381 }, { "epoch": 4.5356782998992395, "grad_norm": 0.15048163857642471, "learning_rate": 4.804133359063008e-06, "loss": 0.3689, "num_tokens": 9471226821.0, "step": 12382 }, { "epoch": 4.536044700925163, "grad_norm": 0.14310877586586124, "learning_rate": 4.802871039959373e-06, "loss": 0.368, "num_tokens": 9472035717.0, "step": 12383 }, { "epoch": 4.536411101951085, "grad_norm": 0.15130069429856044, "learning_rate": 4.80160968982009e-06, "loss": 0.3905, "num_tokens": 9472830418.0, "step": 12384 }, { "epoch": 4.536777502977008, "grad_norm": 0.14211437220242576, "learning_rate": 4.8003493087162275e-06, "loss": 0.3487, "num_tokens": 9473605151.0, "step": 12385 }, { "epoch": 4.5371439040029315, "grad_norm": 0.15787107242198417, "learning_rate": 4.799089896718806e-06, "loss": 0.3871, "num_tokens": 9474320963.0, "step": 12386 }, { "epoch": 4.537510305028854, "grad_norm": 0.15700495964658023, "learning_rate": 4.797831453898785e-06, "loss": 0.3839, "num_tokens": 9475052158.0, "step": 12387 }, { "epoch": 4.537876706054777, "grad_norm": 0.15817764306583684, "learning_rate": 4.796573980327069e-06, "loss": 0.381, "num_tokens": 9475773017.0, "step": 12388 }, { "epoch": 4.5382431070806994, "grad_norm": 0.1507708573257589, "learning_rate": 4.79531747607451e-06, "loss": 0.3516, "num_tokens": 9476583003.0, "step": 12389 }, { "epoch": 4.538609508106623, "grad_norm": 0.15959970694853837, "learning_rate": 4.794061941211904e-06, "loss": 0.3592, "num_tokens": 9477246373.0, "step": 12390 }, { "epoch": 4.538975909132546, "grad_norm": 0.16125611418368382, "learning_rate": 4.792807375809999e-06, "loss": 0.374, "num_tokens": 9477919573.0, "step": 12391 }, { "epoch": 4.539342310158468, "grad_norm": 0.14775127760848483, "learning_rate": 4.79155377993948e-06, "loss": 0.3748, "num_tokens": 9478708081.0, "step": 12392 }, { "epoch": 4.5397087111843915, "grad_norm": 0.1576867108362059, "learning_rate": 4.7903011536709746e-06, "loss": 0.3528, "num_tokens": 9479433590.0, "step": 12393 }, { "epoch": 4.540075112210314, "grad_norm": 0.15553126951898646, "learning_rate": 4.789049497075072e-06, "loss": 0.3664, "num_tokens": 9480237753.0, "step": 12394 }, { "epoch": 4.540441513236237, "grad_norm": 0.1513192817712806, "learning_rate": 4.787798810222286e-06, "loss": 0.3648, "num_tokens": 9480968302.0, "step": 12395 }, { "epoch": 4.54080791426216, "grad_norm": 0.1551569717164274, "learning_rate": 4.786549093183092e-06, "loss": 0.3773, "num_tokens": 9481734199.0, "step": 12396 }, { "epoch": 4.541174315288083, "grad_norm": 0.13545200229480306, "learning_rate": 4.7853003460279054e-06, "loss": 0.3587, "num_tokens": 9482624237.0, "step": 12397 }, { "epoch": 4.541540716314006, "grad_norm": 0.13488667196765858, "learning_rate": 4.784052568827082e-06, "loss": 0.3616, "num_tokens": 9483529419.0, "step": 12398 }, { "epoch": 4.541907117339928, "grad_norm": 0.14655037501149756, "learning_rate": 4.7828057616509315e-06, "loss": 0.3522, "num_tokens": 9484334176.0, "step": 12399 }, { "epoch": 4.542273518365851, "grad_norm": 0.1448928868502295, "learning_rate": 4.781559924569701e-06, "loss": 0.4004, "num_tokens": 9485211503.0, "step": 12400 }, { "epoch": 4.542639919391775, "grad_norm": 0.16003817487531755, "learning_rate": 4.780315057653591e-06, "loss": 0.3719, "num_tokens": 9485943071.0, "step": 12401 }, { "epoch": 4.543006320417697, "grad_norm": 0.1543524309051291, "learning_rate": 4.779071160972739e-06, "loss": 0.3875, "num_tokens": 9486693935.0, "step": 12402 }, { "epoch": 4.54337272144362, "grad_norm": 0.14066025753555927, "learning_rate": 4.77782823459723e-06, "loss": 0.349, "num_tokens": 9487478735.0, "step": 12403 }, { "epoch": 4.5437391224695425, "grad_norm": 0.1596973051017025, "learning_rate": 4.776586278597102e-06, "loss": 0.367, "num_tokens": 9488200612.0, "step": 12404 }, { "epoch": 4.544105523495466, "grad_norm": 0.1566125235110819, "learning_rate": 4.77534529304233e-06, "loss": 0.3612, "num_tokens": 9488904464.0, "step": 12405 }, { "epoch": 4.544471924521389, "grad_norm": 0.1512637123260421, "learning_rate": 4.774105278002835e-06, "loss": 0.3616, "num_tokens": 9489612959.0, "step": 12406 }, { "epoch": 4.544838325547311, "grad_norm": 0.15537885283377934, "learning_rate": 4.77286623354849e-06, "loss": 0.3832, "num_tokens": 9490389287.0, "step": 12407 }, { "epoch": 4.5452047265732345, "grad_norm": 0.1504022000525421, "learning_rate": 4.7716281597491e-06, "loss": 0.3869, "num_tokens": 9491199553.0, "step": 12408 }, { "epoch": 4.545571127599157, "grad_norm": 0.1363518335737707, "learning_rate": 4.7703910566744295e-06, "loss": 0.3594, "num_tokens": 9492077954.0, "step": 12409 }, { "epoch": 4.54593752862508, "grad_norm": 0.13793060670133991, "learning_rate": 4.7691549243941824e-06, "loss": 0.3875, "num_tokens": 9492980989.0, "step": 12410 }, { "epoch": 4.546303929651003, "grad_norm": 0.14942767436392457, "learning_rate": 4.76791976297801e-06, "loss": 0.3856, "num_tokens": 9493757220.0, "step": 12411 }, { "epoch": 4.546670330676926, "grad_norm": 0.1755258497860101, "learning_rate": 4.766685572495498e-06, "loss": 0.4078, "num_tokens": 9494435436.0, "step": 12412 }, { "epoch": 4.547036731702849, "grad_norm": 0.15289885848181023, "learning_rate": 4.765452353016198e-06, "loss": 0.363, "num_tokens": 9495212530.0, "step": 12413 }, { "epoch": 4.547403132728771, "grad_norm": 0.15592571746443065, "learning_rate": 4.764220104609586e-06, "loss": 0.3676, "num_tokens": 9495878745.0, "step": 12414 }, { "epoch": 4.547769533754694, "grad_norm": 0.1484231919810744, "learning_rate": 4.762988827345098e-06, "loss": 0.3499, "num_tokens": 9496632054.0, "step": 12415 }, { "epoch": 4.548135934780618, "grad_norm": 0.15260999825902105, "learning_rate": 4.761758521292107e-06, "loss": 0.3599, "num_tokens": 9497380224.0, "step": 12416 }, { "epoch": 4.54850233580654, "grad_norm": 0.15340482882329332, "learning_rate": 4.760529186519935e-06, "loss": 0.3642, "num_tokens": 9498172682.0, "step": 12417 }, { "epoch": 4.548868736832463, "grad_norm": 0.14679278836092052, "learning_rate": 4.759300823097849e-06, "loss": 0.3737, "num_tokens": 9498918113.0, "step": 12418 }, { "epoch": 4.5492351378583855, "grad_norm": 0.15214737575998052, "learning_rate": 4.758073431095059e-06, "loss": 0.3662, "num_tokens": 9499663872.0, "step": 12419 }, { "epoch": 4.549601538884309, "grad_norm": 0.14228557454263194, "learning_rate": 4.756847010580721e-06, "loss": 0.3565, "num_tokens": 9500530957.0, "step": 12420 }, { "epoch": 4.549967939910232, "grad_norm": 0.1539652920289289, "learning_rate": 4.755621561623941e-06, "loss": 0.3327, "num_tokens": 9501203190.0, "step": 12421 }, { "epoch": 4.550334340936154, "grad_norm": 0.14143135028033219, "learning_rate": 4.754397084293763e-06, "loss": 0.3595, "num_tokens": 9502033607.0, "step": 12422 }, { "epoch": 4.5507007419620775, "grad_norm": 0.14830991744137761, "learning_rate": 4.75317357865918e-06, "loss": 0.3654, "num_tokens": 9502841034.0, "step": 12423 }, { "epoch": 4.551067142988001, "grad_norm": 0.1570582811151681, "learning_rate": 4.751951044789132e-06, "loss": 0.384, "num_tokens": 9503526974.0, "step": 12424 }, { "epoch": 4.551433544013923, "grad_norm": 0.15109306579583662, "learning_rate": 4.750729482752498e-06, "loss": 0.3504, "num_tokens": 9504219094.0, "step": 12425 }, { "epoch": 4.551799945039846, "grad_norm": 0.1549409606617478, "learning_rate": 4.74950889261811e-06, "loss": 0.3602, "num_tokens": 9504922165.0, "step": 12426 }, { "epoch": 4.552166346065769, "grad_norm": 0.14405696738358584, "learning_rate": 4.748289274454743e-06, "loss": 0.3434, "num_tokens": 9505711216.0, "step": 12427 }, { "epoch": 4.552532747091692, "grad_norm": 0.13722432373514595, "learning_rate": 4.7470706283311115e-06, "loss": 0.3509, "num_tokens": 9506584253.0, "step": 12428 }, { "epoch": 4.552899148117615, "grad_norm": 0.14474897086142385, "learning_rate": 4.745852954315881e-06, "loss": 0.385, "num_tokens": 9507425423.0, "step": 12429 }, { "epoch": 4.553265549143537, "grad_norm": 0.14364693431267586, "learning_rate": 4.744636252477662e-06, "loss": 0.3708, "num_tokens": 9508306034.0, "step": 12430 }, { "epoch": 4.553631950169461, "grad_norm": 0.1499792814886671, "learning_rate": 4.743420522885009e-06, "loss": 0.3457, "num_tokens": 9509026009.0, "step": 12431 }, { "epoch": 4.553998351195383, "grad_norm": 0.1562875513358708, "learning_rate": 4.742205765606419e-06, "loss": 0.3464, "num_tokens": 9509785007.0, "step": 12432 }, { "epoch": 4.554364752221306, "grad_norm": 0.14480725365599748, "learning_rate": 4.740991980710339e-06, "loss": 0.3884, "num_tokens": 9510580418.0, "step": 12433 }, { "epoch": 4.554731153247229, "grad_norm": 0.13716513821054194, "learning_rate": 4.73977916826516e-06, "loss": 0.3724, "num_tokens": 9511423612.0, "step": 12434 }, { "epoch": 4.555097554273152, "grad_norm": 0.14626147371655007, "learning_rate": 4.738567328339216e-06, "loss": 0.3754, "num_tokens": 9512206102.0, "step": 12435 }, { "epoch": 4.555463955299075, "grad_norm": 0.14095828595484286, "learning_rate": 4.737356461000787e-06, "loss": 0.3825, "num_tokens": 9513093890.0, "step": 12436 }, { "epoch": 4.555830356324997, "grad_norm": 0.15184659394722386, "learning_rate": 4.736146566318101e-06, "loss": 0.3398, "num_tokens": 9513790142.0, "step": 12437 }, { "epoch": 4.556196757350921, "grad_norm": 0.15622524217175382, "learning_rate": 4.734937644359331e-06, "loss": 0.3954, "num_tokens": 9514588411.0, "step": 12438 }, { "epoch": 4.556563158376844, "grad_norm": 0.1642909040451507, "learning_rate": 4.733729695192584e-06, "loss": 0.3964, "num_tokens": 9515303754.0, "step": 12439 }, { "epoch": 4.556929559402766, "grad_norm": 0.1680944400133411, "learning_rate": 4.73252271888593e-06, "loss": 0.363, "num_tokens": 9515937182.0, "step": 12440 }, { "epoch": 4.557295960428689, "grad_norm": 0.12991253570556632, "learning_rate": 4.7313167155073716e-06, "loss": 0.345, "num_tokens": 9516853536.0, "step": 12441 }, { "epoch": 4.557662361454613, "grad_norm": 0.14273768924879487, "learning_rate": 4.730111685124861e-06, "loss": 0.3819, "num_tokens": 9517693974.0, "step": 12442 }, { "epoch": 4.558028762480535, "grad_norm": 0.15443726846691658, "learning_rate": 4.728907627806299e-06, "loss": 0.3739, "num_tokens": 9518433949.0, "step": 12443 }, { "epoch": 4.558395163506458, "grad_norm": 0.15458835059787177, "learning_rate": 4.727704543619522e-06, "loss": 0.3866, "num_tokens": 9519205909.0, "step": 12444 }, { "epoch": 4.5587615645323805, "grad_norm": 0.13314308568807018, "learning_rate": 4.726502432632319e-06, "loss": 0.3562, "num_tokens": 9520122862.0, "step": 12445 }, { "epoch": 4.559127965558304, "grad_norm": 0.14231997313549102, "learning_rate": 4.725301294912426e-06, "loss": 0.3623, "num_tokens": 9520930647.0, "step": 12446 }, { "epoch": 4.559494366584227, "grad_norm": 0.1571051663620753, "learning_rate": 4.724101130527512e-06, "loss": 0.3883, "num_tokens": 9521658633.0, "step": 12447 }, { "epoch": 4.559860767610149, "grad_norm": 0.15807992519037986, "learning_rate": 4.722901939545211e-06, "loss": 0.392, "num_tokens": 9522367989.0, "step": 12448 }, { "epoch": 4.5602271686360725, "grad_norm": 0.1506304056646256, "learning_rate": 4.7217037220330815e-06, "loss": 0.3383, "num_tokens": 9523145585.0, "step": 12449 }, { "epoch": 4.560593569661995, "grad_norm": 0.15179069381531207, "learning_rate": 4.72050647805864e-06, "loss": 0.3665, "num_tokens": 9523946100.0, "step": 12450 }, { "epoch": 4.560959970687918, "grad_norm": 0.15657863782919934, "learning_rate": 4.719310207689349e-06, "loss": 0.3866, "num_tokens": 9524661381.0, "step": 12451 }, { "epoch": 4.561326371713841, "grad_norm": 0.15569411144613016, "learning_rate": 4.718114910992603e-06, "loss": 0.4064, "num_tokens": 9525454373.0, "step": 12452 }, { "epoch": 4.561692772739764, "grad_norm": 0.15807358602509386, "learning_rate": 4.716920588035757e-06, "loss": 0.3813, "num_tokens": 9526236285.0, "step": 12453 }, { "epoch": 4.562059173765687, "grad_norm": 0.15689681454369483, "learning_rate": 4.715727238886102e-06, "loss": 0.3794, "num_tokens": 9526909549.0, "step": 12454 }, { "epoch": 4.562425574791609, "grad_norm": 0.1387652406994816, "learning_rate": 4.71453486361088e-06, "loss": 0.3445, "num_tokens": 9527723606.0, "step": 12455 }, { "epoch": 4.562791975817532, "grad_norm": 0.15790081550850604, "learning_rate": 4.713343462277268e-06, "loss": 0.3675, "num_tokens": 9528480224.0, "step": 12456 }, { "epoch": 4.563158376843456, "grad_norm": 0.14925484377488796, "learning_rate": 4.712153034952401e-06, "loss": 0.3743, "num_tokens": 9529230959.0, "step": 12457 }, { "epoch": 4.563524777869378, "grad_norm": 0.13631881184249114, "learning_rate": 4.710963581703352e-06, "loss": 0.3674, "num_tokens": 9530145453.0, "step": 12458 }, { "epoch": 4.563891178895301, "grad_norm": 0.1554777581507561, "learning_rate": 4.709775102597139e-06, "loss": 0.3738, "num_tokens": 9530855793.0, "step": 12459 }, { "epoch": 4.5642575799212235, "grad_norm": 0.14841893280525367, "learning_rate": 4.708587597700725e-06, "loss": 0.3601, "num_tokens": 9531611524.0, "step": 12460 }, { "epoch": 4.564623980947147, "grad_norm": 0.16093637244797102, "learning_rate": 4.707401067081024e-06, "loss": 0.3711, "num_tokens": 9532422375.0, "step": 12461 }, { "epoch": 4.56499038197307, "grad_norm": 0.13245159061137346, "learning_rate": 4.706215510804888e-06, "loss": 0.3349, "num_tokens": 9533326948.0, "step": 12462 }, { "epoch": 4.565356782998992, "grad_norm": 0.14200731680578996, "learning_rate": 4.7050309289391124e-06, "loss": 0.3474, "num_tokens": 9534103498.0, "step": 12463 }, { "epoch": 4.5657231840249155, "grad_norm": 0.15039692805871901, "learning_rate": 4.703847321550447e-06, "loss": 0.3647, "num_tokens": 9534833153.0, "step": 12464 }, { "epoch": 4.566089585050838, "grad_norm": 0.15105376100149226, "learning_rate": 4.702664688705586e-06, "loss": 0.364, "num_tokens": 9535595609.0, "step": 12465 }, { "epoch": 4.566455986076761, "grad_norm": 0.14810228612639403, "learning_rate": 4.70148303047115e-06, "loss": 0.3495, "num_tokens": 9536326005.0, "step": 12466 }, { "epoch": 4.566822387102684, "grad_norm": 0.14427598319836343, "learning_rate": 4.700302346913733e-06, "loss": 0.3673, "num_tokens": 9537126420.0, "step": 12467 }, { "epoch": 4.567188788128607, "grad_norm": 0.14403611636514502, "learning_rate": 4.6991226380998565e-06, "loss": 0.3805, "num_tokens": 9537970816.0, "step": 12468 }, { "epoch": 4.56755518915453, "grad_norm": 0.1498094199316569, "learning_rate": 4.697943904095983e-06, "loss": 0.3862, "num_tokens": 9538766319.0, "step": 12469 }, { "epoch": 4.567921590180452, "grad_norm": 0.14664849045977169, "learning_rate": 4.696766144968537e-06, "loss": 0.3632, "num_tokens": 9539534910.0, "step": 12470 }, { "epoch": 4.568287991206375, "grad_norm": 0.13029323941512727, "learning_rate": 4.695589360783877e-06, "loss": 0.3381, "num_tokens": 9540447958.0, "step": 12471 }, { "epoch": 4.568654392232299, "grad_norm": 0.14427395583426125, "learning_rate": 4.694413551608305e-06, "loss": 0.3647, "num_tokens": 9541224613.0, "step": 12472 }, { "epoch": 4.569020793258221, "grad_norm": 0.1413383384070638, "learning_rate": 4.693238717508073e-06, "loss": 0.3477, "num_tokens": 9541983858.0, "step": 12473 }, { "epoch": 4.569387194284144, "grad_norm": 0.18203531083481203, "learning_rate": 4.692064858549377e-06, "loss": 0.4378, "num_tokens": 9542614802.0, "step": 12474 }, { "epoch": 4.5697535953100665, "grad_norm": 0.1414422197463698, "learning_rate": 4.690891974798359e-06, "loss": 0.3558, "num_tokens": 9543451122.0, "step": 12475 }, { "epoch": 4.57011999633599, "grad_norm": 0.14510413758935667, "learning_rate": 4.689720066321102e-06, "loss": 0.3461, "num_tokens": 9544208282.0, "step": 12476 }, { "epoch": 4.570486397361913, "grad_norm": 0.1497158792319658, "learning_rate": 4.6885491331836345e-06, "loss": 0.3674, "num_tokens": 9544969682.0, "step": 12477 }, { "epoch": 4.570852798387835, "grad_norm": 0.14563020843382127, "learning_rate": 4.687379175451939e-06, "loss": 0.3641, "num_tokens": 9545791677.0, "step": 12478 }, { "epoch": 4.5712191994137585, "grad_norm": 0.14560273780930785, "learning_rate": 4.68621019319193e-06, "loss": 0.3387, "num_tokens": 9546530297.0, "step": 12479 }, { "epoch": 4.571585600439681, "grad_norm": 0.17811573970332734, "learning_rate": 4.685042186469477e-06, "loss": 0.3778, "num_tokens": 9547305673.0, "step": 12480 }, { "epoch": 4.571952001465604, "grad_norm": 0.14414171740631754, "learning_rate": 4.683875155350393e-06, "loss": 0.3666, "num_tokens": 9548102818.0, "step": 12481 }, { "epoch": 4.572318402491527, "grad_norm": 0.1646752800720324, "learning_rate": 4.682709099900424e-06, "loss": 0.3684, "num_tokens": 9548776751.0, "step": 12482 }, { "epoch": 4.57268480351745, "grad_norm": 0.14755254034185597, "learning_rate": 4.681544020185282e-06, "loss": 0.3613, "num_tokens": 9549601940.0, "step": 12483 }, { "epoch": 4.573051204543373, "grad_norm": 0.14959242645300191, "learning_rate": 4.6803799162706055e-06, "loss": 0.3709, "num_tokens": 9550354320.0, "step": 12484 }, { "epoch": 4.573417605569295, "grad_norm": 0.1589708992823005, "learning_rate": 4.679216788221988e-06, "loss": 0.3919, "num_tokens": 9551052035.0, "step": 12485 }, { "epoch": 4.573784006595218, "grad_norm": 0.1399418749021697, "learning_rate": 4.678054636104968e-06, "loss": 0.3425, "num_tokens": 9551864406.0, "step": 12486 }, { "epoch": 4.574150407621142, "grad_norm": 0.15109963688758715, "learning_rate": 4.676893459985023e-06, "loss": 0.3693, "num_tokens": 9552656845.0, "step": 12487 }, { "epoch": 4.574516808647064, "grad_norm": 0.15047788346479846, "learning_rate": 4.675733259927579e-06, "loss": 0.3703, "num_tokens": 9553436708.0, "step": 12488 }, { "epoch": 4.574883209672987, "grad_norm": 0.1680889805740107, "learning_rate": 4.674574035998007e-06, "loss": 0.4062, "num_tokens": 9554138699.0, "step": 12489 }, { "epoch": 4.57524961069891, "grad_norm": 0.15371640003785403, "learning_rate": 4.673415788261624e-06, "loss": 0.3946, "num_tokens": 9554917934.0, "step": 12490 }, { "epoch": 4.575616011724833, "grad_norm": 0.1429646835494798, "learning_rate": 4.672258516783693e-06, "loss": 0.3874, "num_tokens": 9555794239.0, "step": 12491 }, { "epoch": 4.575982412750756, "grad_norm": 0.15781623506897943, "learning_rate": 4.671102221629415e-06, "loss": 0.356, "num_tokens": 9556522643.0, "step": 12492 }, { "epoch": 4.576348813776678, "grad_norm": 0.16902191647625572, "learning_rate": 4.669946902863943e-06, "loss": 0.3452, "num_tokens": 9557258878.0, "step": 12493 }, { "epoch": 4.576715214802602, "grad_norm": 0.15204005728545528, "learning_rate": 4.668792560552374e-06, "loss": 0.3672, "num_tokens": 9557984258.0, "step": 12494 }, { "epoch": 4.577081615828524, "grad_norm": 0.14638136924732645, "learning_rate": 4.667639194759752e-06, "loss": 0.3564, "num_tokens": 9558771395.0, "step": 12495 }, { "epoch": 4.577448016854447, "grad_norm": 0.16687809216734864, "learning_rate": 4.666486805551053e-06, "loss": 0.3971, "num_tokens": 9559475306.0, "step": 12496 }, { "epoch": 4.57781441788037, "grad_norm": 0.1622600947080243, "learning_rate": 4.665335392991218e-06, "loss": 0.3767, "num_tokens": 9560183531.0, "step": 12497 }, { "epoch": 4.578180818906293, "grad_norm": 0.14192898063912832, "learning_rate": 4.664184957145118e-06, "loss": 0.3388, "num_tokens": 9560975519.0, "step": 12498 }, { "epoch": 4.578547219932216, "grad_norm": 0.14488162580143993, "learning_rate": 4.6630354980775746e-06, "loss": 0.3743, "num_tokens": 9561729057.0, "step": 12499 }, { "epoch": 4.578913620958138, "grad_norm": 0.15607416508433408, "learning_rate": 4.6618870158533546e-06, "loss": 0.3724, "num_tokens": 9562493081.0, "step": 12500 }, { "epoch": 4.5792800219840615, "grad_norm": 0.16011203787280587, "learning_rate": 4.660739510537167e-06, "loss": 0.3618, "num_tokens": 9563205368.0, "step": 12501 }, { "epoch": 4.579646423009985, "grad_norm": 0.149417386831696, "learning_rate": 4.659592982193667e-06, "loss": 0.3612, "num_tokens": 9563973411.0, "step": 12502 }, { "epoch": 4.580012824035907, "grad_norm": 0.13623617305898833, "learning_rate": 4.658447430887457e-06, "loss": 0.3584, "num_tokens": 9564889799.0, "step": 12503 }, { "epoch": 4.58037922506183, "grad_norm": 0.14644005492433265, "learning_rate": 4.65730285668308e-06, "loss": 0.366, "num_tokens": 9565674211.0, "step": 12504 }, { "epoch": 4.580745626087753, "grad_norm": 0.1425774353805693, "learning_rate": 4.656159259645031e-06, "loss": 0.3787, "num_tokens": 9566479012.0, "step": 12505 }, { "epoch": 4.581112027113676, "grad_norm": 0.15419866601751958, "learning_rate": 4.655016639837743e-06, "loss": 0.4015, "num_tokens": 9567245226.0, "step": 12506 }, { "epoch": 4.581478428139599, "grad_norm": 0.1492416599352516, "learning_rate": 4.653874997325593e-06, "loss": 0.3431, "num_tokens": 9568012590.0, "step": 12507 }, { "epoch": 4.581844829165521, "grad_norm": 0.1701938637402031, "learning_rate": 4.652734332172915e-06, "loss": 0.3832, "num_tokens": 9568621433.0, "step": 12508 }, { "epoch": 4.582211230191445, "grad_norm": 0.16336265749187154, "learning_rate": 4.651594644443968e-06, "loss": 0.389, "num_tokens": 9569292848.0, "step": 12509 }, { "epoch": 4.582577631217367, "grad_norm": 0.15385535650954696, "learning_rate": 4.6504559342029764e-06, "loss": 0.3449, "num_tokens": 9570031204.0, "step": 12510 }, { "epoch": 4.58294403224329, "grad_norm": 0.16391622835295286, "learning_rate": 4.649318201514096e-06, "loss": 0.3735, "num_tokens": 9570722681.0, "step": 12511 }, { "epoch": 4.583310433269213, "grad_norm": 0.1494587325469717, "learning_rate": 4.648181446441433e-06, "loss": 0.3449, "num_tokens": 9571444640.0, "step": 12512 }, { "epoch": 4.583676834295136, "grad_norm": 0.14812989049003128, "learning_rate": 4.647045669049037e-06, "loss": 0.3689, "num_tokens": 9572199863.0, "step": 12513 }, { "epoch": 4.584043235321059, "grad_norm": 0.1382626334343223, "learning_rate": 4.645910869400903e-06, "loss": 0.3666, "num_tokens": 9573092795.0, "step": 12514 }, { "epoch": 4.584409636346981, "grad_norm": 0.1616275020386637, "learning_rate": 4.644777047560969e-06, "loss": 0.3733, "num_tokens": 9573763077.0, "step": 12515 }, { "epoch": 4.5847760373729045, "grad_norm": 0.1411957411485009, "learning_rate": 4.643644203593124e-06, "loss": 0.3577, "num_tokens": 9574650600.0, "step": 12516 }, { "epoch": 4.585142438398828, "grad_norm": 0.1571566467740542, "learning_rate": 4.642512337561192e-06, "loss": 0.3546, "num_tokens": 9575399487.0, "step": 12517 }, { "epoch": 4.58550883942475, "grad_norm": 0.15580548154987509, "learning_rate": 4.641381449528954e-06, "loss": 0.3841, "num_tokens": 9576124875.0, "step": 12518 }, { "epoch": 4.585875240450673, "grad_norm": 0.14703156848575832, "learning_rate": 4.640251539560124e-06, "loss": 0.3722, "num_tokens": 9576910582.0, "step": 12519 }, { "epoch": 4.5862416414765965, "grad_norm": 0.15593244378196652, "learning_rate": 4.639122607718366e-06, "loss": 0.3574, "num_tokens": 9577608036.0, "step": 12520 }, { "epoch": 4.586608042502519, "grad_norm": 0.14685367607086466, "learning_rate": 4.637994654067293e-06, "loss": 0.3718, "num_tokens": 9578449803.0, "step": 12521 }, { "epoch": 4.586974443528442, "grad_norm": 0.1501248524458595, "learning_rate": 4.63686767867046e-06, "loss": 0.3725, "num_tokens": 9579200258.0, "step": 12522 }, { "epoch": 4.587340844554364, "grad_norm": 0.15668112018898928, "learning_rate": 4.635741681591359e-06, "loss": 0.3668, "num_tokens": 9579892164.0, "step": 12523 }, { "epoch": 4.587707245580288, "grad_norm": 0.1390889935376271, "learning_rate": 4.634616662893439e-06, "loss": 0.3592, "num_tokens": 9580690436.0, "step": 12524 }, { "epoch": 4.588073646606211, "grad_norm": 0.15235623836173365, "learning_rate": 4.633492622640091e-06, "loss": 0.347, "num_tokens": 9581415970.0, "step": 12525 }, { "epoch": 4.588440047632133, "grad_norm": 0.14446541676867639, "learning_rate": 4.632369560894641e-06, "loss": 0.391, "num_tokens": 9582205673.0, "step": 12526 }, { "epoch": 4.588806448658056, "grad_norm": 0.1725846282832719, "learning_rate": 4.631247477720372e-06, "loss": 0.3829, "num_tokens": 9582823202.0, "step": 12527 }, { "epoch": 4.589172849683979, "grad_norm": 0.14352215669748652, "learning_rate": 4.630126373180508e-06, "loss": 0.3782, "num_tokens": 9583626669.0, "step": 12528 }, { "epoch": 4.589539250709902, "grad_norm": 0.15054249401531897, "learning_rate": 4.6290062473382156e-06, "loss": 0.3718, "num_tokens": 9584381580.0, "step": 12529 }, { "epoch": 4.589905651735825, "grad_norm": 0.15723229576302297, "learning_rate": 4.6278871002566065e-06, "loss": 0.4188, "num_tokens": 9585130136.0, "step": 12530 }, { "epoch": 4.5902720527617475, "grad_norm": 0.17038580446932447, "learning_rate": 4.626768931998743e-06, "loss": 0.356, "num_tokens": 9585736424.0, "step": 12531 }, { "epoch": 4.590638453787671, "grad_norm": 0.14277686285268493, "learning_rate": 4.625651742627623e-06, "loss": 0.387, "num_tokens": 9586560586.0, "step": 12532 }, { "epoch": 4.591004854813593, "grad_norm": 0.16895641421104854, "learning_rate": 4.624535532206197e-06, "loss": 0.3656, "num_tokens": 9587196857.0, "step": 12533 }, { "epoch": 4.591371255839516, "grad_norm": 0.14386478011563208, "learning_rate": 4.623420300797354e-06, "loss": 0.3695, "num_tokens": 9588076430.0, "step": 12534 }, { "epoch": 4.5917376568654396, "grad_norm": 0.14619379521906103, "learning_rate": 4.6223060484639366e-06, "loss": 0.3612, "num_tokens": 9588892878.0, "step": 12535 }, { "epoch": 4.592104057891362, "grad_norm": 0.15974236922162718, "learning_rate": 4.621192775268718e-06, "loss": 0.3467, "num_tokens": 9589596814.0, "step": 12536 }, { "epoch": 4.592470458917285, "grad_norm": 0.14966599361960994, "learning_rate": 4.620080481274437e-06, "loss": 0.3636, "num_tokens": 9590369335.0, "step": 12537 }, { "epoch": 4.592836859943208, "grad_norm": 0.14436958608009084, "learning_rate": 4.618969166543758e-06, "loss": 0.3611, "num_tokens": 9591129070.0, "step": 12538 }, { "epoch": 4.593203260969131, "grad_norm": 0.1439647603477226, "learning_rate": 4.617858831139295e-06, "loss": 0.3578, "num_tokens": 9591944368.0, "step": 12539 }, { "epoch": 4.593569661995054, "grad_norm": 0.15332092191093902, "learning_rate": 4.616749475123616e-06, "loss": 0.3939, "num_tokens": 9592697075.0, "step": 12540 }, { "epoch": 4.593936063020976, "grad_norm": 0.1648235986946531, "learning_rate": 4.615641098559225e-06, "loss": 0.3888, "num_tokens": 9593380294.0, "step": 12541 }, { "epoch": 4.5943024640468995, "grad_norm": 0.15316143074201974, "learning_rate": 4.614533701508569e-06, "loss": 0.3616, "num_tokens": 9594195769.0, "step": 12542 }, { "epoch": 4.594668865072823, "grad_norm": 0.1403111984764094, "learning_rate": 4.613427284034048e-06, "loss": 0.3825, "num_tokens": 9594993598.0, "step": 12543 }, { "epoch": 4.595035266098745, "grad_norm": 0.15746009501036098, "learning_rate": 4.612321846198002e-06, "loss": 0.3709, "num_tokens": 9595671432.0, "step": 12544 }, { "epoch": 4.595401667124668, "grad_norm": 0.1428990187724138, "learning_rate": 4.611217388062718e-06, "loss": 0.3685, "num_tokens": 9596434398.0, "step": 12545 }, { "epoch": 4.595768068150591, "grad_norm": 0.15511146585523652, "learning_rate": 4.6101139096904204e-06, "loss": 0.351, "num_tokens": 9597159176.0, "step": 12546 }, { "epoch": 4.596134469176514, "grad_norm": 0.1445689820112623, "learning_rate": 4.609011411143288e-06, "loss": 0.3741, "num_tokens": 9597970533.0, "step": 12547 }, { "epoch": 4.596500870202437, "grad_norm": 0.15836409697490747, "learning_rate": 4.607909892483443e-06, "loss": 0.3905, "num_tokens": 9598677909.0, "step": 12548 }, { "epoch": 4.596867271228359, "grad_norm": 0.1565254715035816, "learning_rate": 4.606809353772947e-06, "loss": 0.353, "num_tokens": 9599342921.0, "step": 12549 }, { "epoch": 4.597233672254283, "grad_norm": 0.1512616737058953, "learning_rate": 4.605709795073806e-06, "loss": 0.3349, "num_tokens": 9600070505.0, "step": 12550 }, { "epoch": 4.597600073280205, "grad_norm": 0.1614664105176374, "learning_rate": 4.604611216447981e-06, "loss": 0.3958, "num_tokens": 9600706070.0, "step": 12551 }, { "epoch": 4.597966474306128, "grad_norm": 0.16024561761859132, "learning_rate": 4.603513617957369e-06, "loss": 0.3471, "num_tokens": 9601341424.0, "step": 12552 }, { "epoch": 4.598332875332051, "grad_norm": 0.1482930928524436, "learning_rate": 4.602416999663809e-06, "loss": 0.3534, "num_tokens": 9602091274.0, "step": 12553 }, { "epoch": 4.598699276357974, "grad_norm": 0.14299884380090974, "learning_rate": 4.601321361629092e-06, "loss": 0.3769, "num_tokens": 9602901049.0, "step": 12554 }, { "epoch": 4.599065677383897, "grad_norm": 0.15050516257145843, "learning_rate": 4.600226703914955e-06, "loss": 0.3519, "num_tokens": 9603735556.0, "step": 12555 }, { "epoch": 4.599432078409819, "grad_norm": 0.1450232122449423, "learning_rate": 4.599133026583073e-06, "loss": 0.3853, "num_tokens": 9604534904.0, "step": 12556 }, { "epoch": 4.5997984794357425, "grad_norm": 0.14696106995557523, "learning_rate": 4.598040329695068e-06, "loss": 0.3792, "num_tokens": 9605312769.0, "step": 12557 }, { "epoch": 4.600164880461666, "grad_norm": 0.15231462596657866, "learning_rate": 4.5969486133125095e-06, "loss": 0.3566, "num_tokens": 9606121082.0, "step": 12558 }, { "epoch": 4.600531281487588, "grad_norm": 0.14322584604232969, "learning_rate": 4.595857877496908e-06, "loss": 0.369, "num_tokens": 9606908450.0, "step": 12559 }, { "epoch": 4.600897682513511, "grad_norm": 0.1505264582008911, "learning_rate": 4.594768122309721e-06, "loss": 0.3454, "num_tokens": 9607600743.0, "step": 12560 }, { "epoch": 4.601264083539434, "grad_norm": 0.1498924692132622, "learning_rate": 4.593679347812347e-06, "loss": 0.3776, "num_tokens": 9608461753.0, "step": 12561 }, { "epoch": 4.601630484565357, "grad_norm": 0.1483165804522152, "learning_rate": 4.592591554066142e-06, "loss": 0.3676, "num_tokens": 9609205719.0, "step": 12562 }, { "epoch": 4.60199688559128, "grad_norm": 0.15174767296873923, "learning_rate": 4.5915047411323855e-06, "loss": 0.3823, "num_tokens": 9610010404.0, "step": 12563 }, { "epoch": 4.602363286617202, "grad_norm": 0.1520421724732622, "learning_rate": 4.590418909072321e-06, "loss": 0.3776, "num_tokens": 9610802193.0, "step": 12564 }, { "epoch": 4.602729687643126, "grad_norm": 0.1617005309186371, "learning_rate": 4.589334057947131e-06, "loss": 0.3841, "num_tokens": 9611540694.0, "step": 12565 }, { "epoch": 4.603096088669048, "grad_norm": 0.16093785678437314, "learning_rate": 4.588250187817933e-06, "loss": 0.4145, "num_tokens": 9612317491.0, "step": 12566 }, { "epoch": 4.603462489694971, "grad_norm": 0.15366414842985976, "learning_rate": 4.587167298745802e-06, "loss": 0.3625, "num_tokens": 9613062396.0, "step": 12567 }, { "epoch": 4.603828890720894, "grad_norm": 0.15650291543669206, "learning_rate": 4.586085390791753e-06, "loss": 0.3902, "num_tokens": 9613782295.0, "step": 12568 }, { "epoch": 4.604195291746817, "grad_norm": 0.14250262953319365, "learning_rate": 4.585004464016744e-06, "loss": 0.3942, "num_tokens": 9614639086.0, "step": 12569 }, { "epoch": 4.60456169277274, "grad_norm": 0.16152850117381215, "learning_rate": 4.58392451848168e-06, "loss": 0.3833, "num_tokens": 9615359438.0, "step": 12570 }, { "epoch": 4.604928093798662, "grad_norm": 0.1421612129156344, "learning_rate": 4.582845554247412e-06, "loss": 0.3549, "num_tokens": 9616166094.0, "step": 12571 }, { "epoch": 4.6052944948245855, "grad_norm": 0.15262824334650435, "learning_rate": 4.58176757137473e-06, "loss": 0.3553, "num_tokens": 9616922319.0, "step": 12572 }, { "epoch": 4.605660895850509, "grad_norm": 0.1584857924080039, "learning_rate": 4.5806905699243755e-06, "loss": 0.3724, "num_tokens": 9617678986.0, "step": 12573 }, { "epoch": 4.606027296876431, "grad_norm": 0.16057541305338838, "learning_rate": 4.57961454995703e-06, "loss": 0.3618, "num_tokens": 9618363894.0, "step": 12574 }, { "epoch": 4.606393697902354, "grad_norm": 0.14454923454062268, "learning_rate": 4.578539511533325e-06, "loss": 0.3538, "num_tokens": 9619137111.0, "step": 12575 }, { "epoch": 4.606760098928277, "grad_norm": 0.13752339833345192, "learning_rate": 4.577465454713828e-06, "loss": 0.3163, "num_tokens": 9619926837.0, "step": 12576 }, { "epoch": 4.6071264999542, "grad_norm": 0.14010529868110994, "learning_rate": 4.576392379559054e-06, "loss": 0.3749, "num_tokens": 9620759131.0, "step": 12577 }, { "epoch": 4.607492900980123, "grad_norm": 0.16331739257492386, "learning_rate": 4.5753202861294725e-06, "loss": 0.3657, "num_tokens": 9621522968.0, "step": 12578 }, { "epoch": 4.607859302006045, "grad_norm": 0.15147089365432387, "learning_rate": 4.574249174485489e-06, "loss": 0.3717, "num_tokens": 9622361312.0, "step": 12579 }, { "epoch": 4.608225703031969, "grad_norm": 0.15496027620383868, "learning_rate": 4.57317904468745e-06, "loss": 0.382, "num_tokens": 9623107326.0, "step": 12580 }, { "epoch": 4.608592104057891, "grad_norm": 0.15761731321111505, "learning_rate": 4.572109896795655e-06, "loss": 0.3861, "num_tokens": 9623789635.0, "step": 12581 }, { "epoch": 4.608958505083814, "grad_norm": 0.14920363613816257, "learning_rate": 4.571041730870344e-06, "loss": 0.3899, "num_tokens": 9624572337.0, "step": 12582 }, { "epoch": 4.609324906109737, "grad_norm": 0.15458196023500664, "learning_rate": 4.5699745469717e-06, "loss": 0.3789, "num_tokens": 9625340203.0, "step": 12583 }, { "epoch": 4.60969130713566, "grad_norm": 0.16962713057007792, "learning_rate": 4.568908345159856e-06, "loss": 0.404, "num_tokens": 9626073922.0, "step": 12584 }, { "epoch": 4.610057708161583, "grad_norm": 0.15924288474693396, "learning_rate": 4.567843125494886e-06, "loss": 0.3559, "num_tokens": 9626776057.0, "step": 12585 }, { "epoch": 4.610424109187505, "grad_norm": 0.13876002421636727, "learning_rate": 4.566778888036809e-06, "loss": 0.3635, "num_tokens": 9627635102.0, "step": 12586 }, { "epoch": 4.610790510213429, "grad_norm": 0.1552207430562366, "learning_rate": 4.56571563284559e-06, "loss": 0.3813, "num_tokens": 9628392404.0, "step": 12587 }, { "epoch": 4.611156911239352, "grad_norm": 0.15153484789845817, "learning_rate": 4.564653359981136e-06, "loss": 0.3998, "num_tokens": 9629100379.0, "step": 12588 }, { "epoch": 4.611523312265274, "grad_norm": 0.13632875924209886, "learning_rate": 4.563592069503299e-06, "loss": 0.364, "num_tokens": 9630053496.0, "step": 12589 }, { "epoch": 4.611889713291197, "grad_norm": 0.15526824484735816, "learning_rate": 4.5625317614718815e-06, "loss": 0.3568, "num_tokens": 9630741475.0, "step": 12590 }, { "epoch": 4.61225611431712, "grad_norm": 0.16675052942227964, "learning_rate": 4.561472435946618e-06, "loss": 0.358, "num_tokens": 9631424582.0, "step": 12591 }, { "epoch": 4.612622515343043, "grad_norm": 0.1554820854741358, "learning_rate": 4.560414092987208e-06, "loss": 0.4006, "num_tokens": 9632148656.0, "step": 12592 }, { "epoch": 4.612988916368966, "grad_norm": 0.16046883506135026, "learning_rate": 4.559356732653271e-06, "loss": 0.3453, "num_tokens": 9632894150.0, "step": 12593 }, { "epoch": 4.6133553173948885, "grad_norm": 0.13674062853584826, "learning_rate": 4.558300355004391e-06, "loss": 0.3306, "num_tokens": 9633696064.0, "step": 12594 }, { "epoch": 4.613721718420812, "grad_norm": 0.15391279833477692, "learning_rate": 4.557244960100089e-06, "loss": 0.3709, "num_tokens": 9634439680.0, "step": 12595 }, { "epoch": 4.614088119446734, "grad_norm": 0.15034335731933, "learning_rate": 4.556190547999827e-06, "loss": 0.378, "num_tokens": 9635189987.0, "step": 12596 }, { "epoch": 4.614454520472657, "grad_norm": 0.15723432945179117, "learning_rate": 4.555137118763016e-06, "loss": 0.3622, "num_tokens": 9635877465.0, "step": 12597 }, { "epoch": 4.6148209214985805, "grad_norm": 0.14870319940712828, "learning_rate": 4.554084672449013e-06, "loss": 0.363, "num_tokens": 9636621075.0, "step": 12598 }, { "epoch": 4.615187322524503, "grad_norm": 0.14885814984712606, "learning_rate": 4.553033209117117e-06, "loss": 0.3463, "num_tokens": 9637363100.0, "step": 12599 }, { "epoch": 4.615553723550426, "grad_norm": 0.14877506690442038, "learning_rate": 4.551982728826572e-06, "loss": 0.3706, "num_tokens": 9638139016.0, "step": 12600 }, { "epoch": 4.615920124576348, "grad_norm": 0.15290528093269828, "learning_rate": 4.550933231636563e-06, "loss": 0.3699, "num_tokens": 9638924592.0, "step": 12601 }, { "epoch": 4.616286525602272, "grad_norm": 0.14938255494439828, "learning_rate": 4.549884717606233e-06, "loss": 0.3635, "num_tokens": 9639704832.0, "step": 12602 }, { "epoch": 4.616652926628195, "grad_norm": 0.14191509171441075, "learning_rate": 4.548837186794652e-06, "loss": 0.3789, "num_tokens": 9640514534.0, "step": 12603 }, { "epoch": 4.617019327654117, "grad_norm": 0.14806976609453915, "learning_rate": 4.5477906392608424e-06, "loss": 0.37, "num_tokens": 9641289355.0, "step": 12604 }, { "epoch": 4.61738572868004, "grad_norm": 0.15021664447575983, "learning_rate": 4.546745075063778e-06, "loss": 0.3847, "num_tokens": 9642148190.0, "step": 12605 }, { "epoch": 4.617752129705963, "grad_norm": 0.17500680369022226, "learning_rate": 4.545700494262366e-06, "loss": 0.344, "num_tokens": 9642783870.0, "step": 12606 }, { "epoch": 4.618118530731886, "grad_norm": 0.158636388773697, "learning_rate": 4.544656896915461e-06, "loss": 0.363, "num_tokens": 9643441941.0, "step": 12607 }, { "epoch": 4.618484931757809, "grad_norm": 0.15812009448882908, "learning_rate": 4.543614283081866e-06, "loss": 0.3552, "num_tokens": 9644081134.0, "step": 12608 }, { "epoch": 4.6188513327837315, "grad_norm": 0.16135857002245899, "learning_rate": 4.542572652820331e-06, "loss": 0.3748, "num_tokens": 9644791168.0, "step": 12609 }, { "epoch": 4.619217733809655, "grad_norm": 0.14305902319542427, "learning_rate": 4.541532006189537e-06, "loss": 0.3647, "num_tokens": 9645583763.0, "step": 12610 }, { "epoch": 4.619584134835577, "grad_norm": 0.16152299595268074, "learning_rate": 4.540492343248128e-06, "loss": 0.3947, "num_tokens": 9646286058.0, "step": 12611 }, { "epoch": 4.6199505358615, "grad_norm": 0.14597585742038485, "learning_rate": 4.539453664054677e-06, "loss": 0.3675, "num_tokens": 9647110377.0, "step": 12612 }, { "epoch": 4.6203169368874235, "grad_norm": 0.1561948294919859, "learning_rate": 4.538415968667711e-06, "loss": 0.3722, "num_tokens": 9647898400.0, "step": 12613 }, { "epoch": 4.620683337913346, "grad_norm": 0.15367629494873394, "learning_rate": 4.537379257145698e-06, "loss": 0.3464, "num_tokens": 9648617776.0, "step": 12614 }, { "epoch": 4.621049738939269, "grad_norm": 0.1471282874775398, "learning_rate": 4.53634352954705e-06, "loss": 0.3432, "num_tokens": 9649410220.0, "step": 12615 }, { "epoch": 4.621416139965191, "grad_norm": 0.1434241104271852, "learning_rate": 4.535308785930126e-06, "loss": 0.3501, "num_tokens": 9650230152.0, "step": 12616 }, { "epoch": 4.621782540991115, "grad_norm": 0.14682968165250745, "learning_rate": 4.534275026353227e-06, "loss": 0.3267, "num_tokens": 9650963840.0, "step": 12617 }, { "epoch": 4.622148942017038, "grad_norm": 0.13870133919812283, "learning_rate": 4.533242250874598e-06, "loss": 0.3881, "num_tokens": 9651825567.0, "step": 12618 }, { "epoch": 4.62251534304296, "grad_norm": 0.16150182605912297, "learning_rate": 4.532210459552436e-06, "loss": 0.3942, "num_tokens": 9652526449.0, "step": 12619 }, { "epoch": 4.622881744068883, "grad_norm": 0.16983882739137685, "learning_rate": 4.53117965244487e-06, "loss": 0.355, "num_tokens": 9653117495.0, "step": 12620 }, { "epoch": 4.623248145094807, "grad_norm": 0.14986693147026864, "learning_rate": 4.530149829609987e-06, "loss": 0.3559, "num_tokens": 9653848288.0, "step": 12621 }, { "epoch": 4.623614546120729, "grad_norm": 0.14976500144176816, "learning_rate": 4.529120991105807e-06, "loss": 0.3214, "num_tokens": 9654584084.0, "step": 12622 }, { "epoch": 4.623980947146652, "grad_norm": 0.1523715295284541, "learning_rate": 4.528093136990299e-06, "loss": 0.3899, "num_tokens": 9655357287.0, "step": 12623 }, { "epoch": 4.6243473481725745, "grad_norm": 0.14571833697978565, "learning_rate": 4.52706626732138e-06, "loss": 0.3498, "num_tokens": 9656124194.0, "step": 12624 }, { "epoch": 4.624713749198498, "grad_norm": 0.15385563105049782, "learning_rate": 4.5260403821569075e-06, "loss": 0.3813, "num_tokens": 9656829339.0, "step": 12625 }, { "epoch": 4.625080150224421, "grad_norm": 0.14252758778061828, "learning_rate": 4.525015481554685e-06, "loss": 0.3635, "num_tokens": 9657605909.0, "step": 12626 }, { "epoch": 4.625446551250343, "grad_norm": 0.15141065863798484, "learning_rate": 4.52399156557246e-06, "loss": 0.3756, "num_tokens": 9658323069.0, "step": 12627 }, { "epoch": 4.6258129522762665, "grad_norm": 0.14747692173745636, "learning_rate": 4.522968634267922e-06, "loss": 0.3567, "num_tokens": 9659116846.0, "step": 12628 }, { "epoch": 4.626179353302189, "grad_norm": 0.13836719468494638, "learning_rate": 4.521946687698712e-06, "loss": 0.3593, "num_tokens": 9660001555.0, "step": 12629 }, { "epoch": 4.626545754328112, "grad_norm": 0.16262827958897275, "learning_rate": 4.5209257259224056e-06, "loss": 0.3661, "num_tokens": 9660695511.0, "step": 12630 }, { "epoch": 4.626912155354035, "grad_norm": 0.14365267927263461, "learning_rate": 4.519905748996532e-06, "loss": 0.376, "num_tokens": 9661559911.0, "step": 12631 }, { "epoch": 4.627278556379958, "grad_norm": 0.148485386991437, "learning_rate": 4.5188867569785635e-06, "loss": 0.3502, "num_tokens": 9662314430.0, "step": 12632 }, { "epoch": 4.627644957405881, "grad_norm": 0.16463687532171173, "learning_rate": 4.517868749925912e-06, "loss": 0.3953, "num_tokens": 9662937798.0, "step": 12633 }, { "epoch": 4.628011358431803, "grad_norm": 0.14383943127399837, "learning_rate": 4.516851727895934e-06, "loss": 0.3531, "num_tokens": 9663699094.0, "step": 12634 }, { "epoch": 4.6283777594577264, "grad_norm": 0.1667443036781416, "learning_rate": 4.515835690945937e-06, "loss": 0.3768, "num_tokens": 9664390598.0, "step": 12635 }, { "epoch": 4.62874416048365, "grad_norm": 0.14066958983674888, "learning_rate": 4.514820639133171e-06, "loss": 0.3651, "num_tokens": 9665232805.0, "step": 12636 }, { "epoch": 4.629110561509572, "grad_norm": 0.14952254085929115, "learning_rate": 4.5138065725148194e-06, "loss": 0.3783, "num_tokens": 9666052223.0, "step": 12637 }, { "epoch": 4.629476962535495, "grad_norm": 0.14290116739585876, "learning_rate": 4.51279349114803e-06, "loss": 0.3629, "num_tokens": 9666839004.0, "step": 12638 }, { "epoch": 4.6298433635614185, "grad_norm": 0.14738759465592727, "learning_rate": 4.51178139508988e-06, "loss": 0.3438, "num_tokens": 9667574186.0, "step": 12639 }, { "epoch": 4.630209764587341, "grad_norm": 0.16303672933938315, "learning_rate": 4.510770284397395e-06, "loss": 0.3976, "num_tokens": 9668293935.0, "step": 12640 }, { "epoch": 4.630576165613264, "grad_norm": 0.15099182391113225, "learning_rate": 4.509760159127546e-06, "loss": 0.3739, "num_tokens": 9669051137.0, "step": 12641 }, { "epoch": 4.630942566639186, "grad_norm": 0.14060088925607234, "learning_rate": 4.508751019337247e-06, "loss": 0.3662, "num_tokens": 9669884232.0, "step": 12642 }, { "epoch": 4.63130896766511, "grad_norm": 0.13092435418334086, "learning_rate": 4.50774286508336e-06, "loss": 0.3625, "num_tokens": 9670782733.0, "step": 12643 }, { "epoch": 4.631675368691033, "grad_norm": 0.14322409964795013, "learning_rate": 4.506735696422687e-06, "loss": 0.3699, "num_tokens": 9671542452.0, "step": 12644 }, { "epoch": 4.632041769716955, "grad_norm": 0.1560595218350295, "learning_rate": 4.505729513411976e-06, "loss": 0.373, "num_tokens": 9672290414.0, "step": 12645 }, { "epoch": 4.632408170742878, "grad_norm": 0.21178390976945718, "learning_rate": 4.504724316107923e-06, "loss": 0.3516, "num_tokens": 9673106513.0, "step": 12646 }, { "epoch": 4.632774571768801, "grad_norm": 0.14135036266964393, "learning_rate": 4.503720104567162e-06, "loss": 0.3648, "num_tokens": 9673974815.0, "step": 12647 }, { "epoch": 4.633140972794724, "grad_norm": 0.14182029421476522, "learning_rate": 4.5027168788462746e-06, "loss": 0.3921, "num_tokens": 9674815144.0, "step": 12648 }, { "epoch": 4.633507373820647, "grad_norm": 0.15388722445853156, "learning_rate": 4.501714639001793e-06, "loss": 0.3564, "num_tokens": 9675549750.0, "step": 12649 }, { "epoch": 4.6338737748465695, "grad_norm": 0.15065986785149185, "learning_rate": 4.50071338509018e-06, "loss": 0.3736, "num_tokens": 9676267075.0, "step": 12650 }, { "epoch": 4.634240175872493, "grad_norm": 0.14462157521292687, "learning_rate": 4.499713117167857e-06, "loss": 0.3489, "num_tokens": 9677050439.0, "step": 12651 }, { "epoch": 4.634606576898415, "grad_norm": 0.14985862579120068, "learning_rate": 4.498713835291179e-06, "loss": 0.3691, "num_tokens": 9677796717.0, "step": 12652 }, { "epoch": 4.634972977924338, "grad_norm": 0.14077442648962252, "learning_rate": 4.497715539516454e-06, "loss": 0.3591, "num_tokens": 9678666263.0, "step": 12653 }, { "epoch": 4.6353393789502615, "grad_norm": 0.15005145832505698, "learning_rate": 4.496718229899926e-06, "loss": 0.3866, "num_tokens": 9679472251.0, "step": 12654 }, { "epoch": 4.635705779976184, "grad_norm": 0.15618115016852346, "learning_rate": 4.4957219064977924e-06, "loss": 0.3648, "num_tokens": 9680204751.0, "step": 12655 }, { "epoch": 4.636072181002107, "grad_norm": 0.13658534371637196, "learning_rate": 4.494726569366189e-06, "loss": 0.3476, "num_tokens": 9681037764.0, "step": 12656 }, { "epoch": 4.636438582028029, "grad_norm": 0.15325885291323363, "learning_rate": 4.493732218561196e-06, "loss": 0.3861, "num_tokens": 9681825940.0, "step": 12657 }, { "epoch": 4.636804983053953, "grad_norm": 0.15598670132092063, "learning_rate": 4.492738854138841e-06, "loss": 0.3802, "num_tokens": 9682493116.0, "step": 12658 }, { "epoch": 4.637171384079876, "grad_norm": 0.14943471667058045, "learning_rate": 4.491746476155099e-06, "loss": 0.3584, "num_tokens": 9683279974.0, "step": 12659 }, { "epoch": 4.637537785105798, "grad_norm": 0.14465525289550235, "learning_rate": 4.490755084665877e-06, "loss": 0.3495, "num_tokens": 9684011547.0, "step": 12660 }, { "epoch": 4.637904186131721, "grad_norm": 0.16184683178296355, "learning_rate": 4.489764679727038e-06, "loss": 0.3646, "num_tokens": 9684665569.0, "step": 12661 }, { "epoch": 4.638270587157644, "grad_norm": 0.1469898495771189, "learning_rate": 4.488775261394387e-06, "loss": 0.3436, "num_tokens": 9685465768.0, "step": 12662 }, { "epoch": 4.638636988183567, "grad_norm": 0.15558907853151122, "learning_rate": 4.487786829723673e-06, "loss": 0.3985, "num_tokens": 9686218346.0, "step": 12663 }, { "epoch": 4.63900338920949, "grad_norm": 0.15131888504740884, "learning_rate": 4.486799384770585e-06, "loss": 0.3805, "num_tokens": 9686985134.0, "step": 12664 }, { "epoch": 4.6393697902354125, "grad_norm": 0.14031311638443447, "learning_rate": 4.485812926590763e-06, "loss": 0.3592, "num_tokens": 9687825343.0, "step": 12665 }, { "epoch": 4.639736191261336, "grad_norm": 0.15724594852742602, "learning_rate": 4.484827455239788e-06, "loss": 0.3761, "num_tokens": 9688508907.0, "step": 12666 }, { "epoch": 4.640102592287258, "grad_norm": 0.1484221122422681, "learning_rate": 4.483842970773186e-06, "loss": 0.3844, "num_tokens": 9689326057.0, "step": 12667 }, { "epoch": 4.640468993313181, "grad_norm": 0.15198042836254888, "learning_rate": 4.482859473246428e-06, "loss": 0.3666, "num_tokens": 9690050879.0, "step": 12668 }, { "epoch": 4.6408353943391045, "grad_norm": 0.148500617767499, "learning_rate": 4.481876962714927e-06, "loss": 0.3422, "num_tokens": 9690746359.0, "step": 12669 }, { "epoch": 4.641201795365027, "grad_norm": 0.13645594645579256, "learning_rate": 4.480895439234043e-06, "loss": 0.3791, "num_tokens": 9691596010.0, "step": 12670 }, { "epoch": 4.64156819639095, "grad_norm": 0.14870245155662642, "learning_rate": 4.47991490285908e-06, "loss": 0.3459, "num_tokens": 9692375535.0, "step": 12671 }, { "epoch": 4.641934597416872, "grad_norm": 0.15761414204217036, "learning_rate": 4.478935353645286e-06, "loss": 0.3691, "num_tokens": 9693083183.0, "step": 12672 }, { "epoch": 4.642300998442796, "grad_norm": 0.14952462282022164, "learning_rate": 4.47795679164785e-06, "loss": 0.3553, "num_tokens": 9693836878.0, "step": 12673 }, { "epoch": 4.642667399468719, "grad_norm": 0.1572990366634285, "learning_rate": 4.476979216921914e-06, "loss": 0.3779, "num_tokens": 9694591924.0, "step": 12674 }, { "epoch": 4.643033800494641, "grad_norm": 0.15096798282176402, "learning_rate": 4.476002629522553e-06, "loss": 0.3371, "num_tokens": 9695352743.0, "step": 12675 }, { "epoch": 4.643400201520564, "grad_norm": 0.14106701196833057, "learning_rate": 4.475027029504801e-06, "loss": 0.3667, "num_tokens": 9696184258.0, "step": 12676 }, { "epoch": 4.643766602546487, "grad_norm": 0.14079968512228014, "learning_rate": 4.474052416923618e-06, "loss": 0.3468, "num_tokens": 9697038597.0, "step": 12677 }, { "epoch": 4.64413300357241, "grad_norm": 0.1418700618011092, "learning_rate": 4.473078791833923e-06, "loss": 0.3571, "num_tokens": 9697834299.0, "step": 12678 }, { "epoch": 4.644499404598333, "grad_norm": 0.1478658417433234, "learning_rate": 4.472106154290577e-06, "loss": 0.3505, "num_tokens": 9698609965.0, "step": 12679 }, { "epoch": 4.6448658056242556, "grad_norm": 0.15715366058484956, "learning_rate": 4.471134504348375e-06, "loss": 0.3579, "num_tokens": 9699295053.0, "step": 12680 }, { "epoch": 4.645232206650179, "grad_norm": 0.13629379268979006, "learning_rate": 4.470163842062072e-06, "loss": 0.3731, "num_tokens": 9700242634.0, "step": 12681 }, { "epoch": 4.645598607676101, "grad_norm": 0.1375938950245981, "learning_rate": 4.4691941674863545e-06, "loss": 0.3594, "num_tokens": 9701073066.0, "step": 12682 }, { "epoch": 4.645965008702024, "grad_norm": 0.1598478013279829, "learning_rate": 4.468225480675864e-06, "loss": 0.3829, "num_tokens": 9701784534.0, "step": 12683 }, { "epoch": 4.646331409727948, "grad_norm": 0.1359920566635806, "learning_rate": 4.467257781685174e-06, "loss": 0.3711, "num_tokens": 9702669027.0, "step": 12684 }, { "epoch": 4.64669781075387, "grad_norm": 0.14611830445397703, "learning_rate": 4.466291070568811e-06, "loss": 0.3968, "num_tokens": 9703435114.0, "step": 12685 }, { "epoch": 4.647064211779793, "grad_norm": 0.13237624159153283, "learning_rate": 4.465325347381248e-06, "loss": 0.3433, "num_tokens": 9704297716.0, "step": 12686 }, { "epoch": 4.6474306128057155, "grad_norm": 0.16593089335587083, "learning_rate": 4.464360612176894e-06, "loss": 0.3657, "num_tokens": 9704957403.0, "step": 12687 }, { "epoch": 4.647797013831639, "grad_norm": 0.1513847813026378, "learning_rate": 4.463396865010105e-06, "loss": 0.3707, "num_tokens": 9705651272.0, "step": 12688 }, { "epoch": 4.648163414857562, "grad_norm": 0.13247138832414243, "learning_rate": 4.462434105935191e-06, "loss": 0.3676, "num_tokens": 9706521249.0, "step": 12689 }, { "epoch": 4.648529815883484, "grad_norm": 0.15613903795842216, "learning_rate": 4.4614723350063905e-06, "loss": 0.392, "num_tokens": 9707261819.0, "step": 12690 }, { "epoch": 4.6488962169094075, "grad_norm": 0.1516610302021603, "learning_rate": 4.460511552277894e-06, "loss": 0.3579, "num_tokens": 9708041276.0, "step": 12691 }, { "epoch": 4.64926261793533, "grad_norm": 0.1482589126300255, "learning_rate": 4.45955175780384e-06, "loss": 0.3745, "num_tokens": 9708842086.0, "step": 12692 }, { "epoch": 4.649629018961253, "grad_norm": 0.14993275310862586, "learning_rate": 4.458592951638309e-06, "loss": 0.3715, "num_tokens": 9709598236.0, "step": 12693 }, { "epoch": 4.649995419987176, "grad_norm": 0.15043755562697153, "learning_rate": 4.45763513383532e-06, "loss": 0.3678, "num_tokens": 9710323433.0, "step": 12694 }, { "epoch": 4.650361821013099, "grad_norm": 0.152783685150573, "learning_rate": 4.456678304448841e-06, "loss": 0.3622, "num_tokens": 9711082315.0, "step": 12695 }, { "epoch": 4.650728222039022, "grad_norm": 0.14964552862835367, "learning_rate": 4.455722463532788e-06, "loss": 0.3663, "num_tokens": 9711831230.0, "step": 12696 }, { "epoch": 4.651094623064944, "grad_norm": 0.15457950480631746, "learning_rate": 4.454767611141015e-06, "loss": 0.3713, "num_tokens": 9712524765.0, "step": 12697 }, { "epoch": 4.651461024090867, "grad_norm": 0.1478121813314927, "learning_rate": 4.4538137473273245e-06, "loss": 0.3673, "num_tokens": 9713372096.0, "step": 12698 }, { "epoch": 4.651827425116791, "grad_norm": 0.15145991613230675, "learning_rate": 4.45286087214546e-06, "loss": 0.3601, "num_tokens": 9714111830.0, "step": 12699 }, { "epoch": 4.652193826142713, "grad_norm": 0.15722222461779126, "learning_rate": 4.451908985649108e-06, "loss": 0.3761, "num_tokens": 9714796195.0, "step": 12700 }, { "epoch": 4.652560227168636, "grad_norm": 0.14273257680936063, "learning_rate": 4.450958087891907e-06, "loss": 0.36, "num_tokens": 9715587521.0, "step": 12701 }, { "epoch": 4.6529266281945585, "grad_norm": 0.13262920958845326, "learning_rate": 4.450008178927432e-06, "loss": 0.3383, "num_tokens": 9716465359.0, "step": 12702 }, { "epoch": 4.653293029220482, "grad_norm": 0.14690473969090742, "learning_rate": 4.449059258809207e-06, "loss": 0.3726, "num_tokens": 9717227186.0, "step": 12703 }, { "epoch": 4.653659430246405, "grad_norm": 0.14771088447379913, "learning_rate": 4.448111327590694e-06, "loss": 0.3595, "num_tokens": 9718034553.0, "step": 12704 }, { "epoch": 4.654025831272327, "grad_norm": 0.16015230050319565, "learning_rate": 4.447164385325311e-06, "loss": 0.3425, "num_tokens": 9718673736.0, "step": 12705 }, { "epoch": 4.6543922322982505, "grad_norm": 0.1539861116774248, "learning_rate": 4.446218432066409e-06, "loss": 0.3827, "num_tokens": 9719414174.0, "step": 12706 }, { "epoch": 4.654758633324173, "grad_norm": 0.16181195006892257, "learning_rate": 4.445273467867287e-06, "loss": 0.3959, "num_tokens": 9720159695.0, "step": 12707 }, { "epoch": 4.655125034350096, "grad_norm": 0.14598721571845022, "learning_rate": 4.444329492781189e-06, "loss": 0.4107, "num_tokens": 9721043256.0, "step": 12708 }, { "epoch": 4.655491435376019, "grad_norm": 0.14210794009908298, "learning_rate": 4.443386506861303e-06, "loss": 0.3635, "num_tokens": 9721906741.0, "step": 12709 }, { "epoch": 4.655857836401942, "grad_norm": 0.14341525049370685, "learning_rate": 4.442444510160761e-06, "loss": 0.3644, "num_tokens": 9722719767.0, "step": 12710 }, { "epoch": 4.656224237427865, "grad_norm": 0.14854189500679563, "learning_rate": 4.441503502732639e-06, "loss": 0.3804, "num_tokens": 9723501322.0, "step": 12711 }, { "epoch": 4.656590638453787, "grad_norm": 0.1466964426511647, "learning_rate": 4.440563484629959e-06, "loss": 0.3734, "num_tokens": 9724279141.0, "step": 12712 }, { "epoch": 4.65695703947971, "grad_norm": 0.15736705757740208, "learning_rate": 4.439624455905686e-06, "loss": 0.3936, "num_tokens": 9724944500.0, "step": 12713 }, { "epoch": 4.657323440505634, "grad_norm": 0.15312319889449294, "learning_rate": 4.438686416612728e-06, "loss": 0.3617, "num_tokens": 9725671411.0, "step": 12714 }, { "epoch": 4.657689841531556, "grad_norm": 0.15639019354204645, "learning_rate": 4.4377493668039355e-06, "loss": 0.3368, "num_tokens": 9726376030.0, "step": 12715 }, { "epoch": 4.658056242557479, "grad_norm": 0.15352497688291453, "learning_rate": 4.436813306532113e-06, "loss": 0.3659, "num_tokens": 9727135768.0, "step": 12716 }, { "epoch": 4.658422643583402, "grad_norm": 0.12783120896674752, "learning_rate": 4.435878235849996e-06, "loss": 0.3561, "num_tokens": 9728076588.0, "step": 12717 }, { "epoch": 4.658789044609325, "grad_norm": 0.15406390271248668, "learning_rate": 4.434944154810272e-06, "loss": 0.3858, "num_tokens": 9728804906.0, "step": 12718 }, { "epoch": 4.659155445635248, "grad_norm": 0.15483941772712306, "learning_rate": 4.434011063465578e-06, "loss": 0.375, "num_tokens": 9729523138.0, "step": 12719 }, { "epoch": 4.65952184666117, "grad_norm": 0.1750390304993928, "learning_rate": 4.433078961868479e-06, "loss": 0.3805, "num_tokens": 9730104266.0, "step": 12720 }, { "epoch": 4.6598882476870935, "grad_norm": 0.14759470180278064, "learning_rate": 4.432147850071496e-06, "loss": 0.3357, "num_tokens": 9730841429.0, "step": 12721 }, { "epoch": 4.660254648713017, "grad_norm": 0.15654162134398888, "learning_rate": 4.431217728127098e-06, "loss": 0.3805, "num_tokens": 9731594286.0, "step": 12722 }, { "epoch": 4.660621049738939, "grad_norm": 0.14629943052923622, "learning_rate": 4.430288596087688e-06, "loss": 0.383, "num_tokens": 9732392858.0, "step": 12723 }, { "epoch": 4.660987450764862, "grad_norm": 0.1552006561122343, "learning_rate": 4.429360454005617e-06, "loss": 0.3717, "num_tokens": 9733146510.0, "step": 12724 }, { "epoch": 4.661353851790785, "grad_norm": 0.14412156289265626, "learning_rate": 4.428433301933182e-06, "loss": 0.3601, "num_tokens": 9733948872.0, "step": 12725 }, { "epoch": 4.661720252816708, "grad_norm": 0.15934512949470975, "learning_rate": 4.427507139922622e-06, "loss": 0.3765, "num_tokens": 9734641353.0, "step": 12726 }, { "epoch": 4.662086653842631, "grad_norm": 0.15726003276341882, "learning_rate": 4.426581968026123e-06, "loss": 0.3755, "num_tokens": 9735312151.0, "step": 12727 }, { "epoch": 4.662453054868553, "grad_norm": 0.15631631669965076, "learning_rate": 4.425657786295812e-06, "loss": 0.3576, "num_tokens": 9736081231.0, "step": 12728 }, { "epoch": 4.662819455894477, "grad_norm": 0.14310081992282106, "learning_rate": 4.4247345947837605e-06, "loss": 0.3687, "num_tokens": 9736901011.0, "step": 12729 }, { "epoch": 4.663185856920399, "grad_norm": 0.15129845677476167, "learning_rate": 4.423812393541988e-06, "loss": 0.3922, "num_tokens": 9737695350.0, "step": 12730 }, { "epoch": 4.663552257946322, "grad_norm": 0.16535418272567984, "learning_rate": 4.422891182622453e-06, "loss": 0.3873, "num_tokens": 9738369478.0, "step": 12731 }, { "epoch": 4.6639186589722454, "grad_norm": 0.15325184655424529, "learning_rate": 4.421970962077059e-06, "loss": 0.3671, "num_tokens": 9739144174.0, "step": 12732 }, { "epoch": 4.664285059998168, "grad_norm": 0.14150502731129097, "learning_rate": 4.421051731957664e-06, "loss": 0.3796, "num_tokens": 9739995677.0, "step": 12733 }, { "epoch": 4.664651461024091, "grad_norm": 0.14620761376669486, "learning_rate": 4.420133492316052e-06, "loss": 0.3784, "num_tokens": 9740791636.0, "step": 12734 }, { "epoch": 4.665017862050014, "grad_norm": 0.14055529868073377, "learning_rate": 4.419216243203964e-06, "loss": 0.373, "num_tokens": 9741664574.0, "step": 12735 }, { "epoch": 4.665384263075937, "grad_norm": 0.14315542780689558, "learning_rate": 4.418299984673088e-06, "loss": 0.3647, "num_tokens": 9742434449.0, "step": 12736 }, { "epoch": 4.66575066410186, "grad_norm": 0.14488174379096447, "learning_rate": 4.417384716775038e-06, "loss": 0.3513, "num_tokens": 9743171546.0, "step": 12737 }, { "epoch": 4.666117065127782, "grad_norm": 0.14484781803776853, "learning_rate": 4.416470439561395e-06, "loss": 0.3943, "num_tokens": 9743945900.0, "step": 12738 }, { "epoch": 4.666483466153705, "grad_norm": 0.1454754204368039, "learning_rate": 4.415557153083667e-06, "loss": 0.3418, "num_tokens": 9744753361.0, "step": 12739 }, { "epoch": 4.666849867179629, "grad_norm": 0.1536472618797308, "learning_rate": 4.414644857393317e-06, "loss": 0.3418, "num_tokens": 9745481991.0, "step": 12740 }, { "epoch": 4.667216268205551, "grad_norm": 0.1602144077713096, "learning_rate": 4.413733552541746e-06, "loss": 0.3683, "num_tokens": 9746183516.0, "step": 12741 }, { "epoch": 4.667582669231474, "grad_norm": 0.16275743074464685, "learning_rate": 4.412823238580301e-06, "loss": 0.3462, "num_tokens": 9746841943.0, "step": 12742 }, { "epoch": 4.6679490702573965, "grad_norm": 0.1397289854243406, "learning_rate": 4.411913915560273e-06, "loss": 0.3796, "num_tokens": 9747743132.0, "step": 12743 }, { "epoch": 4.66831547128332, "grad_norm": 0.14897301513792352, "learning_rate": 4.411005583532899e-06, "loss": 0.3675, "num_tokens": 9748503879.0, "step": 12744 }, { "epoch": 4.668681872309243, "grad_norm": 0.14072039435297973, "learning_rate": 4.4100982425493565e-06, "loss": 0.3741, "num_tokens": 9749354097.0, "step": 12745 }, { "epoch": 4.669048273335165, "grad_norm": 0.14135501545092155, "learning_rate": 4.409191892660771e-06, "loss": 0.3797, "num_tokens": 9750226741.0, "step": 12746 }, { "epoch": 4.6694146743610885, "grad_norm": 0.16404061661209257, "learning_rate": 4.40828653391821e-06, "loss": 0.3695, "num_tokens": 9750920550.0, "step": 12747 }, { "epoch": 4.669781075387011, "grad_norm": 0.15987090751797028, "learning_rate": 4.407382166372683e-06, "loss": 0.4101, "num_tokens": 9751627162.0, "step": 12748 }, { "epoch": 4.670147476412934, "grad_norm": 0.13383000792805314, "learning_rate": 4.406478790075149e-06, "loss": 0.3625, "num_tokens": 9752613834.0, "step": 12749 }, { "epoch": 4.670513877438857, "grad_norm": 0.13897291534900916, "learning_rate": 4.405576405076509e-06, "loss": 0.3449, "num_tokens": 9753470268.0, "step": 12750 }, { "epoch": 4.67088027846478, "grad_norm": 0.14572020631013624, "learning_rate": 4.404675011427603e-06, "loss": 0.3496, "num_tokens": 9754218900.0, "step": 12751 }, { "epoch": 4.671246679490703, "grad_norm": 0.15685723246270167, "learning_rate": 4.403774609179224e-06, "loss": 0.3612, "num_tokens": 9754968994.0, "step": 12752 }, { "epoch": 4.671613080516625, "grad_norm": 0.14190219687910471, "learning_rate": 4.402875198382105e-06, "loss": 0.3646, "num_tokens": 9755789173.0, "step": 12753 }, { "epoch": 4.671979481542548, "grad_norm": 0.16296164606213973, "learning_rate": 4.40197677908692e-06, "loss": 0.4057, "num_tokens": 9756547543.0, "step": 12754 }, { "epoch": 4.672345882568472, "grad_norm": 0.15132808396105052, "learning_rate": 4.401079351344291e-06, "loss": 0.3725, "num_tokens": 9757307728.0, "step": 12755 }, { "epoch": 4.672712283594394, "grad_norm": 0.13543550048257338, "learning_rate": 4.4001829152047835e-06, "loss": 0.3552, "num_tokens": 9758158866.0, "step": 12756 }, { "epoch": 4.673078684620317, "grad_norm": 0.1520001312408506, "learning_rate": 4.399287470718906e-06, "loss": 0.3405, "num_tokens": 9758911898.0, "step": 12757 }, { "epoch": 4.6734450856462395, "grad_norm": 0.13838222292632335, "learning_rate": 4.3983930179371146e-06, "loss": 0.3555, "num_tokens": 9759759417.0, "step": 12758 }, { "epoch": 4.673811486672163, "grad_norm": 0.15000704707285445, "learning_rate": 4.397499556909803e-06, "loss": 0.3938, "num_tokens": 9760542888.0, "step": 12759 }, { "epoch": 4.674177887698086, "grad_norm": 0.14656949513387266, "learning_rate": 4.396607087687316e-06, "loss": 0.3674, "num_tokens": 9761310155.0, "step": 12760 }, { "epoch": 4.674544288724008, "grad_norm": 0.14611472092643776, "learning_rate": 4.395715610319935e-06, "loss": 0.3713, "num_tokens": 9762075807.0, "step": 12761 }, { "epoch": 4.6749106897499315, "grad_norm": 0.13661610155550988, "learning_rate": 4.394825124857895e-06, "loss": 0.3856, "num_tokens": 9762947559.0, "step": 12762 }, { "epoch": 4.675277090775854, "grad_norm": 0.13976420749741464, "learning_rate": 4.39393563135137e-06, "loss": 0.3355, "num_tokens": 9763702012.0, "step": 12763 }, { "epoch": 4.675643491801777, "grad_norm": 0.1459003098061871, "learning_rate": 4.393047129850473e-06, "loss": 0.356, "num_tokens": 9764480241.0, "step": 12764 }, { "epoch": 4.6760098928277, "grad_norm": 0.15528289584516464, "learning_rate": 4.39215962040527e-06, "loss": 0.37, "num_tokens": 9765279106.0, "step": 12765 }, { "epoch": 4.676376293853623, "grad_norm": 0.14363951689100438, "learning_rate": 4.391273103065769e-06, "loss": 0.3767, "num_tokens": 9766090075.0, "step": 12766 }, { "epoch": 4.676742694879546, "grad_norm": 0.15853833443863669, "learning_rate": 4.390387577881913e-06, "loss": 0.3565, "num_tokens": 9766771107.0, "step": 12767 }, { "epoch": 4.677109095905468, "grad_norm": 0.14934814441546085, "learning_rate": 4.389503044903606e-06, "loss": 0.3693, "num_tokens": 9767502355.0, "step": 12768 }, { "epoch": 4.677475496931391, "grad_norm": 0.13356758791174903, "learning_rate": 4.388619504180681e-06, "loss": 0.377, "num_tokens": 9768471893.0, "step": 12769 }, { "epoch": 4.677841897957315, "grad_norm": 0.14808222065199605, "learning_rate": 4.387736955762922e-06, "loss": 0.3839, "num_tokens": 9769217340.0, "step": 12770 }, { "epoch": 4.678208298983237, "grad_norm": 0.15227807387181364, "learning_rate": 4.386855399700054e-06, "loss": 0.3755, "num_tokens": 9769957981.0, "step": 12771 }, { "epoch": 4.67857470000916, "grad_norm": 0.15185899827611832, "learning_rate": 4.38597483604175e-06, "loss": 0.3723, "num_tokens": 9770712122.0, "step": 12772 }, { "epoch": 4.6789411010350825, "grad_norm": 0.15091097350514446, "learning_rate": 4.385095264837627e-06, "loss": 0.3546, "num_tokens": 9771481282.0, "step": 12773 }, { "epoch": 4.679307502061006, "grad_norm": 0.16230138187146612, "learning_rate": 4.384216686137239e-06, "loss": 0.3621, "num_tokens": 9772172278.0, "step": 12774 }, { "epoch": 4.679673903086929, "grad_norm": 0.1608836620179088, "learning_rate": 4.38333909999009e-06, "loss": 0.3496, "num_tokens": 9772832905.0, "step": 12775 }, { "epoch": 4.680040304112851, "grad_norm": 0.15576519838582903, "learning_rate": 4.3824625064456335e-06, "loss": 0.3892, "num_tokens": 9773567031.0, "step": 12776 }, { "epoch": 4.6804067051387745, "grad_norm": 0.150464360157746, "learning_rate": 4.381586905553251e-06, "loss": 0.3765, "num_tokens": 9774385971.0, "step": 12777 }, { "epoch": 4.680773106164697, "grad_norm": 0.14675738283143044, "learning_rate": 4.380712297362285e-06, "loss": 0.3531, "num_tokens": 9775222239.0, "step": 12778 }, { "epoch": 4.68113950719062, "grad_norm": 0.16118601719247125, "learning_rate": 4.379838681922012e-06, "loss": 0.3515, "num_tokens": 9775892956.0, "step": 12779 }, { "epoch": 4.681505908216543, "grad_norm": 0.14349099175794866, "learning_rate": 4.378966059281658e-06, "loss": 0.3647, "num_tokens": 9776672903.0, "step": 12780 }, { "epoch": 4.681872309242466, "grad_norm": 0.14962769025882522, "learning_rate": 4.378094429490386e-06, "loss": 0.3458, "num_tokens": 9777423982.0, "step": 12781 }, { "epoch": 4.682238710268389, "grad_norm": 0.1384131978557869, "learning_rate": 4.3772237925973125e-06, "loss": 0.3709, "num_tokens": 9778282070.0, "step": 12782 }, { "epoch": 4.682605111294311, "grad_norm": 0.15376947294354834, "learning_rate": 4.3763541486514906e-06, "loss": 0.3893, "num_tokens": 9779059107.0, "step": 12783 }, { "epoch": 4.6829715123202345, "grad_norm": 0.1473084731814128, "learning_rate": 4.37548549770192e-06, "loss": 0.354, "num_tokens": 9779830078.0, "step": 12784 }, { "epoch": 4.683337913346158, "grad_norm": 0.15699468898267022, "learning_rate": 4.3746178397975455e-06, "loss": 0.3739, "num_tokens": 9780577902.0, "step": 12785 }, { "epoch": 4.68370431437208, "grad_norm": 0.15716125812271384, "learning_rate": 4.373751174987252e-06, "loss": 0.3666, "num_tokens": 9781222524.0, "step": 12786 }, { "epoch": 4.684070715398003, "grad_norm": 0.14166021652413857, "learning_rate": 4.372885503319877e-06, "loss": 0.3579, "num_tokens": 9782012388.0, "step": 12787 }, { "epoch": 4.684437116423926, "grad_norm": 0.14658449811414562, "learning_rate": 4.372020824844189e-06, "loss": 0.3764, "num_tokens": 9782859842.0, "step": 12788 }, { "epoch": 4.684803517449849, "grad_norm": 0.15266039733867837, "learning_rate": 4.3711571396089144e-06, "loss": 0.3479, "num_tokens": 9783575056.0, "step": 12789 }, { "epoch": 4.685169918475772, "grad_norm": 0.15855702461287513, "learning_rate": 4.3702944476627145e-06, "loss": 0.3702, "num_tokens": 9784344349.0, "step": 12790 }, { "epoch": 4.685536319501694, "grad_norm": 0.15883527838281078, "learning_rate": 4.369432749054195e-06, "loss": 0.371, "num_tokens": 9785088399.0, "step": 12791 }, { "epoch": 4.685902720527618, "grad_norm": 0.16938409577484448, "learning_rate": 4.368572043831913e-06, "loss": 0.3904, "num_tokens": 9785714544.0, "step": 12792 }, { "epoch": 4.68626912155354, "grad_norm": 0.1423891569634579, "learning_rate": 4.367712332044361e-06, "loss": 0.3559, "num_tokens": 9786467748.0, "step": 12793 }, { "epoch": 4.686635522579463, "grad_norm": 0.1391903829501339, "learning_rate": 4.366853613739979e-06, "loss": 0.3396, "num_tokens": 9787283398.0, "step": 12794 }, { "epoch": 4.687001923605386, "grad_norm": 0.14131908984262875, "learning_rate": 4.365995888967152e-06, "loss": 0.3679, "num_tokens": 9788119668.0, "step": 12795 }, { "epoch": 4.687368324631309, "grad_norm": 0.1418719674927336, "learning_rate": 4.365139157774207e-06, "loss": 0.3911, "num_tokens": 9788976268.0, "step": 12796 }, { "epoch": 4.687734725657232, "grad_norm": 0.15588572626216698, "learning_rate": 4.364283420209421e-06, "loss": 0.3599, "num_tokens": 9789696678.0, "step": 12797 }, { "epoch": 4.688101126683154, "grad_norm": 0.15295946083082723, "learning_rate": 4.363428676321003e-06, "loss": 0.3624, "num_tokens": 9790414938.0, "step": 12798 }, { "epoch": 4.6884675277090775, "grad_norm": 0.15042405581051388, "learning_rate": 4.3625749261571156e-06, "loss": 0.3496, "num_tokens": 9791143637.0, "step": 12799 }, { "epoch": 4.688833928735001, "grad_norm": 0.1572954895481269, "learning_rate": 4.361722169765867e-06, "loss": 0.3971, "num_tokens": 9791889503.0, "step": 12800 }, { "epoch": 4.689200329760923, "grad_norm": 0.1514780944862532, "learning_rate": 4.3608704071953e-06, "loss": 0.3779, "num_tokens": 9792636825.0, "step": 12801 }, { "epoch": 4.689566730786846, "grad_norm": 0.15359553979986476, "learning_rate": 4.360019638493409e-06, "loss": 0.3324, "num_tokens": 9793329607.0, "step": 12802 }, { "epoch": 4.689933131812769, "grad_norm": 0.14665018236541996, "learning_rate": 4.359169863708132e-06, "loss": 0.3605, "num_tokens": 9794074037.0, "step": 12803 }, { "epoch": 4.690299532838692, "grad_norm": 0.15602370831095147, "learning_rate": 4.358321082887346e-06, "loss": 0.373, "num_tokens": 9794810197.0, "step": 12804 }, { "epoch": 4.690665933864615, "grad_norm": 0.15409056986964273, "learning_rate": 4.357473296078874e-06, "loss": 0.3717, "num_tokens": 9795518453.0, "step": 12805 }, { "epoch": 4.691032334890537, "grad_norm": 0.15403353932481925, "learning_rate": 4.356626503330491e-06, "loss": 0.3457, "num_tokens": 9796251612.0, "step": 12806 }, { "epoch": 4.691398735916461, "grad_norm": 0.16809069987282052, "learning_rate": 4.355780704689902e-06, "loss": 0.3897, "num_tokens": 9796944214.0, "step": 12807 }, { "epoch": 4.691765136942383, "grad_norm": 0.15015193504674357, "learning_rate": 4.354935900204767e-06, "loss": 0.3843, "num_tokens": 9797710425.0, "step": 12808 }, { "epoch": 4.692131537968306, "grad_norm": 0.14100586449913818, "learning_rate": 4.354092089922685e-06, "loss": 0.3823, "num_tokens": 9798555940.0, "step": 12809 }, { "epoch": 4.692497938994229, "grad_norm": 0.1316474834496486, "learning_rate": 4.353249273891199e-06, "loss": 0.3604, "num_tokens": 9799446347.0, "step": 12810 }, { "epoch": 4.692864340020152, "grad_norm": 0.1535205046080981, "learning_rate": 4.3524074521578e-06, "loss": 0.3342, "num_tokens": 9800219414.0, "step": 12811 }, { "epoch": 4.693230741046075, "grad_norm": 0.16275093175177413, "learning_rate": 4.3515666247699166e-06, "loss": 0.3733, "num_tokens": 9800918882.0, "step": 12812 }, { "epoch": 4.693597142071998, "grad_norm": 0.1530869738886808, "learning_rate": 4.350726791774928e-06, "loss": 0.3595, "num_tokens": 9801633282.0, "step": 12813 }, { "epoch": 4.6939635430979205, "grad_norm": 0.14993176918300155, "learning_rate": 4.349887953220151e-06, "loss": 0.3667, "num_tokens": 9802404997.0, "step": 12814 }, { "epoch": 4.694329944123844, "grad_norm": 0.14488532823750636, "learning_rate": 4.349050109152854e-06, "loss": 0.3679, "num_tokens": 9803220470.0, "step": 12815 }, { "epoch": 4.694696345149766, "grad_norm": 0.14401115633693248, "learning_rate": 4.348213259620239e-06, "loss": 0.3639, "num_tokens": 9804035466.0, "step": 12816 }, { "epoch": 4.695062746175689, "grad_norm": 0.15473067640048382, "learning_rate": 4.347377404669464e-06, "loss": 0.3676, "num_tokens": 9804726478.0, "step": 12817 }, { "epoch": 4.6954291472016125, "grad_norm": 0.16667894502632918, "learning_rate": 4.34654254434762e-06, "loss": 0.3727, "num_tokens": 9805391360.0, "step": 12818 }, { "epoch": 4.695795548227535, "grad_norm": 0.14417991933120042, "learning_rate": 4.345708678701748e-06, "loss": 0.3553, "num_tokens": 9806180991.0, "step": 12819 }, { "epoch": 4.696161949253458, "grad_norm": 0.1570286082324569, "learning_rate": 4.3448758077788355e-06, "loss": 0.366, "num_tokens": 9806836274.0, "step": 12820 }, { "epoch": 4.69652835027938, "grad_norm": 0.1447960105775834, "learning_rate": 4.344043931625805e-06, "loss": 0.3631, "num_tokens": 9807634092.0, "step": 12821 }, { "epoch": 4.696894751305304, "grad_norm": 0.14786145232576312, "learning_rate": 4.343213050289532e-06, "loss": 0.3672, "num_tokens": 9808448561.0, "step": 12822 }, { "epoch": 4.697261152331227, "grad_norm": 0.14254931689039416, "learning_rate": 4.3423831638168304e-06, "loss": 0.3532, "num_tokens": 9809277067.0, "step": 12823 }, { "epoch": 4.697627553357149, "grad_norm": 0.16897822382768798, "learning_rate": 4.341554272254458e-06, "loss": 0.3535, "num_tokens": 9809971314.0, "step": 12824 }, { "epoch": 4.697993954383072, "grad_norm": 0.14966527019186862, "learning_rate": 4.340726375649123e-06, "loss": 0.3896, "num_tokens": 9810746454.0, "step": 12825 }, { "epoch": 4.698360355408995, "grad_norm": 0.1335350421302834, "learning_rate": 4.3398994740474696e-06, "loss": 0.3563, "num_tokens": 9811664269.0, "step": 12826 }, { "epoch": 4.698726756434918, "grad_norm": 0.1361368135761237, "learning_rate": 4.339073567496089e-06, "loss": 0.3525, "num_tokens": 9812512833.0, "step": 12827 }, { "epoch": 4.699093157460841, "grad_norm": 0.1448972130119442, "learning_rate": 4.338248656041518e-06, "loss": 0.3651, "num_tokens": 9813324342.0, "step": 12828 }, { "epoch": 4.6994595584867636, "grad_norm": 0.15708858098839043, "learning_rate": 4.337424739730233e-06, "loss": 0.3844, "num_tokens": 9814106740.0, "step": 12829 }, { "epoch": 4.699825959512687, "grad_norm": 0.15415613259129635, "learning_rate": 4.336601818608663e-06, "loss": 0.3688, "num_tokens": 9814865969.0, "step": 12830 }, { "epoch": 4.70019236053861, "grad_norm": 0.14316130789939419, "learning_rate": 4.335779892723168e-06, "loss": 0.3957, "num_tokens": 9815719201.0, "step": 12831 }, { "epoch": 4.700558761564532, "grad_norm": 0.1648274564577832, "learning_rate": 4.334958962120063e-06, "loss": 0.3416, "num_tokens": 9816329375.0, "step": 12832 }, { "epoch": 4.700925162590456, "grad_norm": 0.16881033843021032, "learning_rate": 4.334139026845606e-06, "loss": 0.3983, "num_tokens": 9816983664.0, "step": 12833 }, { "epoch": 4.701291563616378, "grad_norm": 0.1505865309037946, "learning_rate": 4.33332008694599e-06, "loss": 0.3595, "num_tokens": 9817740493.0, "step": 12834 }, { "epoch": 4.701657964642301, "grad_norm": 0.15011809027679543, "learning_rate": 4.332502142467358e-06, "loss": 0.372, "num_tokens": 9818485226.0, "step": 12835 }, { "epoch": 4.702024365668224, "grad_norm": 0.13869094385304062, "learning_rate": 4.331685193455801e-06, "loss": 0.3664, "num_tokens": 9819344314.0, "step": 12836 }, { "epoch": 4.702390766694147, "grad_norm": 0.1474094054713664, "learning_rate": 4.330869239957347e-06, "loss": 0.3947, "num_tokens": 9820208084.0, "step": 12837 }, { "epoch": 4.70275716772007, "grad_norm": 0.1419701800479761, "learning_rate": 4.330054282017971e-06, "loss": 0.3545, "num_tokens": 9821028750.0, "step": 12838 }, { "epoch": 4.703123568745992, "grad_norm": 0.15799388072686718, "learning_rate": 4.329240319683593e-06, "loss": 0.4073, "num_tokens": 9821728349.0, "step": 12839 }, { "epoch": 4.7034899697719155, "grad_norm": 0.1661590991020141, "learning_rate": 4.328427353000071e-06, "loss": 0.354, "num_tokens": 9822342439.0, "step": 12840 }, { "epoch": 4.703856370797839, "grad_norm": 0.14046888857839923, "learning_rate": 4.327615382013215e-06, "loss": 0.3586, "num_tokens": 9823202338.0, "step": 12841 }, { "epoch": 4.704222771823761, "grad_norm": 0.1517152188821583, "learning_rate": 4.3268044067687755e-06, "loss": 0.3613, "num_tokens": 9823930423.0, "step": 12842 }, { "epoch": 4.704589172849684, "grad_norm": 0.1425659177703316, "learning_rate": 4.325994427312444e-06, "loss": 0.3707, "num_tokens": 9824741236.0, "step": 12843 }, { "epoch": 4.704955573875607, "grad_norm": 0.1436504333548436, "learning_rate": 4.3251854436898596e-06, "loss": 0.3736, "num_tokens": 9825527227.0, "step": 12844 }, { "epoch": 4.70532197490153, "grad_norm": 0.14606343930799004, "learning_rate": 4.324377455946603e-06, "loss": 0.3823, "num_tokens": 9826325632.0, "step": 12845 }, { "epoch": 4.705688375927453, "grad_norm": 0.1472741376595969, "learning_rate": 4.323570464128202e-06, "loss": 0.3493, "num_tokens": 9827069872.0, "step": 12846 }, { "epoch": 4.706054776953375, "grad_norm": 0.1462657804060861, "learning_rate": 4.322764468280127e-06, "loss": 0.3835, "num_tokens": 9827883688.0, "step": 12847 }, { "epoch": 4.706421177979299, "grad_norm": 0.14651740288957293, "learning_rate": 4.321959468447785e-06, "loss": 0.3778, "num_tokens": 9828672990.0, "step": 12848 }, { "epoch": 4.706787579005221, "grad_norm": 0.15946006899979961, "learning_rate": 4.321155464676541e-06, "loss": 0.3853, "num_tokens": 9829374652.0, "step": 12849 }, { "epoch": 4.707153980031144, "grad_norm": 0.14403359496784449, "learning_rate": 4.320352457011695e-06, "loss": 0.3948, "num_tokens": 9830202912.0, "step": 12850 }, { "epoch": 4.707520381057067, "grad_norm": 0.16192837203135196, "learning_rate": 4.319550445498488e-06, "loss": 0.3787, "num_tokens": 9830934157.0, "step": 12851 }, { "epoch": 4.70788678208299, "grad_norm": 0.14768164376335022, "learning_rate": 4.318749430182112e-06, "loss": 0.3824, "num_tokens": 9831694034.0, "step": 12852 }, { "epoch": 4.708253183108913, "grad_norm": 0.16117404607548985, "learning_rate": 4.317949411107698e-06, "loss": 0.3588, "num_tokens": 9832335695.0, "step": 12853 }, { "epoch": 4.708619584134835, "grad_norm": 0.14409535240944513, "learning_rate": 4.317150388320323e-06, "loss": 0.3603, "num_tokens": 9833126245.0, "step": 12854 }, { "epoch": 4.7089859851607585, "grad_norm": 0.1535235431128474, "learning_rate": 4.3163523618650105e-06, "loss": 0.3538, "num_tokens": 9833826921.0, "step": 12855 }, { "epoch": 4.709352386186682, "grad_norm": 0.14557925720845838, "learning_rate": 4.315555331786721e-06, "loss": 0.372, "num_tokens": 9834589558.0, "step": 12856 }, { "epoch": 4.709718787212604, "grad_norm": 0.15154915867872432, "learning_rate": 4.314759298130368e-06, "loss": 0.3643, "num_tokens": 9835342135.0, "step": 12857 }, { "epoch": 4.710085188238527, "grad_norm": 0.14672389133258001, "learning_rate": 4.313964260940797e-06, "loss": 0.3686, "num_tokens": 9836129923.0, "step": 12858 }, { "epoch": 4.71045158926445, "grad_norm": 0.14231485160191587, "learning_rate": 4.313170220262808e-06, "loss": 0.3778, "num_tokens": 9836925907.0, "step": 12859 }, { "epoch": 4.710817990290373, "grad_norm": 0.15348565189710858, "learning_rate": 4.312377176141142e-06, "loss": 0.3567, "num_tokens": 9837646759.0, "step": 12860 }, { "epoch": 4.711184391316296, "grad_norm": 0.15901379958204218, "learning_rate": 4.311585128620479e-06, "loss": 0.3899, "num_tokens": 9838392008.0, "step": 12861 }, { "epoch": 4.711550792342218, "grad_norm": 0.1673818072882637, "learning_rate": 4.310794077745447e-06, "loss": 0.412, "num_tokens": 9839109567.0, "step": 12862 }, { "epoch": 4.711917193368142, "grad_norm": 0.15925386459843988, "learning_rate": 4.31000402356062e-06, "loss": 0.3725, "num_tokens": 9839805164.0, "step": 12863 }, { "epoch": 4.712283594394064, "grad_norm": 0.1626023448797924, "learning_rate": 4.309214966110513e-06, "loss": 0.379, "num_tokens": 9840494848.0, "step": 12864 }, { "epoch": 4.712649995419987, "grad_norm": 0.15599254468047222, "learning_rate": 4.308426905439585e-06, "loss": 0.3582, "num_tokens": 9841222372.0, "step": 12865 }, { "epoch": 4.71301639644591, "grad_norm": 0.143199868368574, "learning_rate": 4.307639841592235e-06, "loss": 0.3556, "num_tokens": 9842030072.0, "step": 12866 }, { "epoch": 4.713382797471833, "grad_norm": 0.14578796585776946, "learning_rate": 4.306853774612814e-06, "loss": 0.3827, "num_tokens": 9842806170.0, "step": 12867 }, { "epoch": 4.713749198497756, "grad_norm": 0.1445637230267967, "learning_rate": 4.306068704545613e-06, "loss": 0.3464, "num_tokens": 9843544154.0, "step": 12868 }, { "epoch": 4.714115599523678, "grad_norm": 0.1585579660818439, "learning_rate": 4.305284631434864e-06, "loss": 0.3531, "num_tokens": 9844301404.0, "step": 12869 }, { "epoch": 4.7144820005496015, "grad_norm": 0.15101208213697284, "learning_rate": 4.304501555324746e-06, "loss": 0.3788, "num_tokens": 9845087735.0, "step": 12870 }, { "epoch": 4.714848401575525, "grad_norm": 0.14004135745815516, "learning_rate": 4.303719476259379e-06, "loss": 0.3698, "num_tokens": 9845986354.0, "step": 12871 }, { "epoch": 4.715214802601447, "grad_norm": 0.13615457723078855, "learning_rate": 4.302938394282834e-06, "loss": 0.3708, "num_tokens": 9846898201.0, "step": 12872 }, { "epoch": 4.71558120362737, "grad_norm": 0.137366995319335, "learning_rate": 4.302158309439116e-06, "loss": 0.359, "num_tokens": 9847743330.0, "step": 12873 }, { "epoch": 4.715947604653293, "grad_norm": 0.14970348578760898, "learning_rate": 4.3013792217721806e-06, "loss": 0.3591, "num_tokens": 9848536080.0, "step": 12874 }, { "epoch": 4.716314005679216, "grad_norm": 0.14507364969250844, "learning_rate": 4.300601131325922e-06, "loss": 0.3718, "num_tokens": 9849341962.0, "step": 12875 }, { "epoch": 4.716680406705139, "grad_norm": 0.12927725613395832, "learning_rate": 4.299824038144187e-06, "loss": 0.3422, "num_tokens": 9850263030.0, "step": 12876 }, { "epoch": 4.717046807731061, "grad_norm": 0.1500678900276087, "learning_rate": 4.29904794227076e-06, "loss": 0.3559, "num_tokens": 9850994968.0, "step": 12877 }, { "epoch": 4.717413208756985, "grad_norm": 0.14015738566684402, "learning_rate": 4.298272843749364e-06, "loss": 0.3805, "num_tokens": 9851828577.0, "step": 12878 }, { "epoch": 4.717779609782907, "grad_norm": 0.14534520396866893, "learning_rate": 4.297498742623676e-06, "loss": 0.3745, "num_tokens": 9852599902.0, "step": 12879 }, { "epoch": 4.71814601080883, "grad_norm": 0.15797716143876345, "learning_rate": 4.296725638937311e-06, "loss": 0.3379, "num_tokens": 9853323988.0, "step": 12880 }, { "epoch": 4.7185124118347535, "grad_norm": 0.1551078967216758, "learning_rate": 4.295953532733828e-06, "loss": 0.3704, "num_tokens": 9854075112.0, "step": 12881 }, { "epoch": 4.718878812860676, "grad_norm": 0.15193876319448335, "learning_rate": 4.295182424056735e-06, "loss": 0.3596, "num_tokens": 9854814871.0, "step": 12882 }, { "epoch": 4.719245213886599, "grad_norm": 0.16934675088019055, "learning_rate": 4.294412312949477e-06, "loss": 0.3846, "num_tokens": 9855455208.0, "step": 12883 }, { "epoch": 4.719611614912521, "grad_norm": 0.1535993907179773, "learning_rate": 4.293643199455445e-06, "loss": 0.3465, "num_tokens": 9856139095.0, "step": 12884 }, { "epoch": 4.719978015938445, "grad_norm": 0.15055343324805145, "learning_rate": 4.292875083617975e-06, "loss": 0.3687, "num_tokens": 9856931398.0, "step": 12885 }, { "epoch": 4.720344416964368, "grad_norm": 0.14529675150824656, "learning_rate": 4.292107965480345e-06, "loss": 0.3575, "num_tokens": 9857697274.0, "step": 12886 }, { "epoch": 4.72071081799029, "grad_norm": 0.15672915735006857, "learning_rate": 4.291341845085783e-06, "loss": 0.3685, "num_tokens": 9858395427.0, "step": 12887 }, { "epoch": 4.721077219016213, "grad_norm": 0.15548861891125101, "learning_rate": 4.2905767224774506e-06, "loss": 0.3547, "num_tokens": 9859151272.0, "step": 12888 }, { "epoch": 4.721443620042136, "grad_norm": 0.13981018990736896, "learning_rate": 4.289812597698457e-06, "loss": 0.3674, "num_tokens": 9860022660.0, "step": 12889 }, { "epoch": 4.721810021068059, "grad_norm": 0.15105847831352529, "learning_rate": 4.289049470791862e-06, "loss": 0.352, "num_tokens": 9860797959.0, "step": 12890 }, { "epoch": 4.722176422093982, "grad_norm": 0.14425506385615966, "learning_rate": 4.2882873418006585e-06, "loss": 0.3906, "num_tokens": 9861596126.0, "step": 12891 }, { "epoch": 4.7225428231199045, "grad_norm": 0.14633587448050783, "learning_rate": 4.287526210767791e-06, "loss": 0.3883, "num_tokens": 9862373850.0, "step": 12892 }, { "epoch": 4.722909224145828, "grad_norm": 0.16023536823143794, "learning_rate": 4.286766077736147e-06, "loss": 0.3657, "num_tokens": 9863083509.0, "step": 12893 }, { "epoch": 4.72327562517175, "grad_norm": 0.16926464755254067, "learning_rate": 4.286006942748552e-06, "loss": 0.3939, "num_tokens": 9863752869.0, "step": 12894 }, { "epoch": 4.723642026197673, "grad_norm": 0.16356344828962674, "learning_rate": 4.28524880584778e-06, "loss": 0.3528, "num_tokens": 9864413657.0, "step": 12895 }, { "epoch": 4.7240084272235965, "grad_norm": 0.13664370443376794, "learning_rate": 4.284491667076549e-06, "loss": 0.3558, "num_tokens": 9865307283.0, "step": 12896 }, { "epoch": 4.724374828249519, "grad_norm": 0.13680154270790668, "learning_rate": 4.283735526477518e-06, "loss": 0.3754, "num_tokens": 9866161157.0, "step": 12897 }, { "epoch": 4.724741229275442, "grad_norm": 0.15090643833222936, "learning_rate": 4.282980384093293e-06, "loss": 0.3329, "num_tokens": 9866878625.0, "step": 12898 }, { "epoch": 4.725107630301364, "grad_norm": 0.14465212046299972, "learning_rate": 4.2822262399664225e-06, "loss": 0.3731, "num_tokens": 9867694563.0, "step": 12899 }, { "epoch": 4.725474031327288, "grad_norm": 0.13096076663763198, "learning_rate": 4.281473094139397e-06, "loss": 0.3669, "num_tokens": 9868582674.0, "step": 12900 }, { "epoch": 4.725840432353211, "grad_norm": 0.14284850896110307, "learning_rate": 4.280720946654652e-06, "loss": 0.3746, "num_tokens": 9869408398.0, "step": 12901 }, { "epoch": 4.726206833379133, "grad_norm": 0.14966224694944985, "learning_rate": 4.279969797554568e-06, "loss": 0.3668, "num_tokens": 9870166828.0, "step": 12902 }, { "epoch": 4.726573234405056, "grad_norm": 0.1597547503993021, "learning_rate": 4.2792196468814665e-06, "loss": 0.3937, "num_tokens": 9870853717.0, "step": 12903 }, { "epoch": 4.726939635430979, "grad_norm": 0.14604806623754063, "learning_rate": 4.278470494677619e-06, "loss": 0.3472, "num_tokens": 9871574844.0, "step": 12904 }, { "epoch": 4.727306036456902, "grad_norm": 0.15115757348760833, "learning_rate": 4.27772234098523e-06, "loss": 0.3678, "num_tokens": 9872298942.0, "step": 12905 }, { "epoch": 4.727672437482825, "grad_norm": 0.1434847880798439, "learning_rate": 4.276975185846457e-06, "loss": 0.3642, "num_tokens": 9873082847.0, "step": 12906 }, { "epoch": 4.7280388385087475, "grad_norm": 0.14238151146220604, "learning_rate": 4.2762290293034e-06, "loss": 0.3734, "num_tokens": 9873927250.0, "step": 12907 }, { "epoch": 4.728405239534671, "grad_norm": 0.15799286329111414, "learning_rate": 4.275483871398095e-06, "loss": 0.3818, "num_tokens": 9874646613.0, "step": 12908 }, { "epoch": 4.728771640560593, "grad_norm": 0.15678101057592445, "learning_rate": 4.274739712172535e-06, "loss": 0.3665, "num_tokens": 9875357059.0, "step": 12909 }, { "epoch": 4.729138041586516, "grad_norm": 0.14709436510863222, "learning_rate": 4.273996551668643e-06, "loss": 0.3628, "num_tokens": 9876150406.0, "step": 12910 }, { "epoch": 4.7295044426124395, "grad_norm": 0.14280585325734713, "learning_rate": 4.2732543899282954e-06, "loss": 0.3376, "num_tokens": 9876915014.0, "step": 12911 }, { "epoch": 4.729870843638362, "grad_norm": 0.16642595052325182, "learning_rate": 4.272513226993306e-06, "loss": 0.3752, "num_tokens": 9877598864.0, "step": 12912 }, { "epoch": 4.730237244664285, "grad_norm": 0.1500305728626483, "learning_rate": 4.271773062905438e-06, "loss": 0.3837, "num_tokens": 9878396830.0, "step": 12913 }, { "epoch": 4.730603645690208, "grad_norm": 0.1460608653997777, "learning_rate": 4.2710338977063966e-06, "loss": 0.3645, "num_tokens": 9879179230.0, "step": 12914 }, { "epoch": 4.730970046716131, "grad_norm": 0.15802806246435533, "learning_rate": 4.270295731437829e-06, "loss": 0.3512, "num_tokens": 9879866819.0, "step": 12915 }, { "epoch": 4.731336447742054, "grad_norm": 0.14399251902615845, "learning_rate": 4.269558564141324e-06, "loss": 0.3658, "num_tokens": 9880684298.0, "step": 12916 }, { "epoch": 4.731702848767976, "grad_norm": 0.16787222001221944, "learning_rate": 4.268822395858421e-06, "loss": 0.3894, "num_tokens": 9881351247.0, "step": 12917 }, { "epoch": 4.732069249793899, "grad_norm": 0.15868076400280412, "learning_rate": 4.268087226630594e-06, "loss": 0.3809, "num_tokens": 9882057255.0, "step": 12918 }, { "epoch": 4.732435650819823, "grad_norm": 0.14933898077082045, "learning_rate": 4.267353056499269e-06, "loss": 0.3714, "num_tokens": 9882816443.0, "step": 12919 }, { "epoch": 4.732802051845745, "grad_norm": 0.14311879653444853, "learning_rate": 4.266619885505814e-06, "loss": 0.3457, "num_tokens": 9883580545.0, "step": 12920 }, { "epoch": 4.733168452871668, "grad_norm": 0.14975326809854028, "learning_rate": 4.265887713691537e-06, "loss": 0.3711, "num_tokens": 9884371130.0, "step": 12921 }, { "epoch": 4.7335348538975905, "grad_norm": 0.15232602916652244, "learning_rate": 4.265156541097693e-06, "loss": 0.3615, "num_tokens": 9885078186.0, "step": 12922 }, { "epoch": 4.733901254923514, "grad_norm": 0.1514277932843635, "learning_rate": 4.264426367765477e-06, "loss": 0.3705, "num_tokens": 9885813082.0, "step": 12923 }, { "epoch": 4.734267655949437, "grad_norm": 0.1567925957696083, "learning_rate": 4.263697193736034e-06, "loss": 0.3596, "num_tokens": 9886592050.0, "step": 12924 }, { "epoch": 4.734634056975359, "grad_norm": 0.1659744961008556, "learning_rate": 4.262969019050447e-06, "loss": 0.3864, "num_tokens": 9887249793.0, "step": 12925 }, { "epoch": 4.7350004580012826, "grad_norm": 0.14372430861252095, "learning_rate": 4.262241843749743e-06, "loss": 0.3625, "num_tokens": 9888051803.0, "step": 12926 }, { "epoch": 4.735366859027205, "grad_norm": 0.13975698904653294, "learning_rate": 4.261515667874899e-06, "loss": 0.3628, "num_tokens": 9888881732.0, "step": 12927 }, { "epoch": 4.735733260053128, "grad_norm": 0.14719524822411117, "learning_rate": 4.2607904914668255e-06, "loss": 0.3859, "num_tokens": 9889686052.0, "step": 12928 }, { "epoch": 4.736099661079051, "grad_norm": 0.14017210884308473, "learning_rate": 4.260066314566386e-06, "loss": 0.3288, "num_tokens": 9890468916.0, "step": 12929 }, { "epoch": 4.736466062104974, "grad_norm": 0.15447584500559494, "learning_rate": 4.259343137214381e-06, "loss": 0.3483, "num_tokens": 9891168524.0, "step": 12930 }, { "epoch": 4.736832463130897, "grad_norm": 0.1441114563891324, "learning_rate": 4.258620959451564e-06, "loss": 0.3668, "num_tokens": 9891930486.0, "step": 12931 }, { "epoch": 4.73719886415682, "grad_norm": 0.15139860643834052, "learning_rate": 4.257899781318616e-06, "loss": 0.3494, "num_tokens": 9892650552.0, "step": 12932 }, { "epoch": 4.7375652651827425, "grad_norm": 0.1586821486742624, "learning_rate": 4.257179602856181e-06, "loss": 0.3712, "num_tokens": 9893314538.0, "step": 12933 }, { "epoch": 4.737931666208666, "grad_norm": 0.15664073708798953, "learning_rate": 4.256460424104832e-06, "loss": 0.3798, "num_tokens": 9894013221.0, "step": 12934 }, { "epoch": 4.738298067234588, "grad_norm": 0.15300697876259084, "learning_rate": 4.255742245105089e-06, "loss": 0.3385, "num_tokens": 9894718737.0, "step": 12935 }, { "epoch": 4.738664468260511, "grad_norm": 0.15879573374288863, "learning_rate": 4.255025065897423e-06, "loss": 0.4054, "num_tokens": 9895421474.0, "step": 12936 }, { "epoch": 4.7390308692864345, "grad_norm": 0.15340882859501842, "learning_rate": 4.2543088865222385e-06, "loss": 0.3696, "num_tokens": 9896127051.0, "step": 12937 }, { "epoch": 4.739397270312357, "grad_norm": 0.1753679009214596, "learning_rate": 4.25359370701989e-06, "loss": 0.3528, "num_tokens": 9896699157.0, "step": 12938 }, { "epoch": 4.73976367133828, "grad_norm": 0.13826699716332216, "learning_rate": 4.252879527430675e-06, "loss": 0.3624, "num_tokens": 9897503624.0, "step": 12939 }, { "epoch": 4.740130072364202, "grad_norm": 0.1463196983624061, "learning_rate": 4.252166347794832e-06, "loss": 0.397, "num_tokens": 9898299414.0, "step": 12940 }, { "epoch": 4.740496473390126, "grad_norm": 0.14319980733302842, "learning_rate": 4.251454168152546e-06, "loss": 0.394, "num_tokens": 9899147730.0, "step": 12941 }, { "epoch": 4.740862874416049, "grad_norm": 0.14545773708290402, "learning_rate": 4.250742988543943e-06, "loss": 0.3566, "num_tokens": 9899917243.0, "step": 12942 }, { "epoch": 4.741229275441971, "grad_norm": 0.15373227365373565, "learning_rate": 4.250032809009095e-06, "loss": 0.3863, "num_tokens": 9900702060.0, "step": 12943 }, { "epoch": 4.741595676467894, "grad_norm": 0.14610076400590652, "learning_rate": 4.249323629588017e-06, "loss": 0.3701, "num_tokens": 9901531161.0, "step": 12944 }, { "epoch": 4.741962077493817, "grad_norm": 0.14623199771748102, "learning_rate": 4.248615450320667e-06, "loss": 0.3827, "num_tokens": 9902321185.0, "step": 12945 }, { "epoch": 4.74232847851974, "grad_norm": 0.16845866625462935, "learning_rate": 4.247908271246945e-06, "loss": 0.3744, "num_tokens": 9902920803.0, "step": 12946 }, { "epoch": 4.742694879545663, "grad_norm": 0.1666708520061455, "learning_rate": 4.247202092406703e-06, "loss": 0.3598, "num_tokens": 9903597736.0, "step": 12947 }, { "epoch": 4.7430612805715855, "grad_norm": 0.16513525296008705, "learning_rate": 4.246496913839723e-06, "loss": 0.362, "num_tokens": 9904213322.0, "step": 12948 }, { "epoch": 4.743427681597509, "grad_norm": 0.1480950467851316, "learning_rate": 4.2457927355857424e-06, "loss": 0.3561, "num_tokens": 9904936716.0, "step": 12949 }, { "epoch": 4.743794082623431, "grad_norm": 0.14580146599480517, "learning_rate": 4.245089557684435e-06, "loss": 0.3315, "num_tokens": 9905662282.0, "step": 12950 }, { "epoch": 4.744160483649354, "grad_norm": 0.14474711170397941, "learning_rate": 4.244387380175424e-06, "loss": 0.3816, "num_tokens": 9906504952.0, "step": 12951 }, { "epoch": 4.7445268846752775, "grad_norm": 0.14310258517563715, "learning_rate": 4.243686203098271e-06, "loss": 0.3701, "num_tokens": 9907384298.0, "step": 12952 }, { "epoch": 4.7448932857012, "grad_norm": 0.1477219058043761, "learning_rate": 4.242986026492485e-06, "loss": 0.3989, "num_tokens": 9908242306.0, "step": 12953 }, { "epoch": 4.745259686727123, "grad_norm": 0.14619032400886195, "learning_rate": 4.242286850397515e-06, "loss": 0.3803, "num_tokens": 9909014050.0, "step": 12954 }, { "epoch": 4.745626087753045, "grad_norm": 0.17606203529678033, "learning_rate": 4.2415886748527584e-06, "loss": 0.3833, "num_tokens": 9909698883.0, "step": 12955 }, { "epoch": 4.745992488778969, "grad_norm": 0.17750231328422722, "learning_rate": 4.24089149989755e-06, "loss": 0.3911, "num_tokens": 9910314959.0, "step": 12956 }, { "epoch": 4.746358889804892, "grad_norm": 0.16878430988440413, "learning_rate": 4.2401953255711776e-06, "loss": 0.391, "num_tokens": 9910983908.0, "step": 12957 }, { "epoch": 4.746725290830814, "grad_norm": 0.163632026812156, "learning_rate": 4.239500151912861e-06, "loss": 0.3772, "num_tokens": 9911651183.0, "step": 12958 }, { "epoch": 4.747091691856737, "grad_norm": 0.1322635770499278, "learning_rate": 4.238805978961772e-06, "loss": 0.3548, "num_tokens": 9912561470.0, "step": 12959 }, { "epoch": 4.74745809288266, "grad_norm": 0.14926059015898033, "learning_rate": 4.238112806757022e-06, "loss": 0.3678, "num_tokens": 9913348661.0, "step": 12960 }, { "epoch": 4.747824493908583, "grad_norm": 0.1442315794453037, "learning_rate": 4.237420635337672e-06, "loss": 0.3559, "num_tokens": 9914141318.0, "step": 12961 }, { "epoch": 4.748190894934506, "grad_norm": 0.13674654891407492, "learning_rate": 4.236729464742716e-06, "loss": 0.3564, "num_tokens": 9915024126.0, "step": 12962 }, { "epoch": 4.7485572959604285, "grad_norm": 0.1421350731383758, "learning_rate": 4.236039295011101e-06, "loss": 0.3566, "num_tokens": 9915822698.0, "step": 12963 }, { "epoch": 4.748923696986352, "grad_norm": 0.14609617733142782, "learning_rate": 4.235350126181716e-06, "loss": 0.3601, "num_tokens": 9916575428.0, "step": 12964 }, { "epoch": 4.749290098012274, "grad_norm": 0.15178337337901332, "learning_rate": 4.2346619582933865e-06, "loss": 0.35, "num_tokens": 9917283578.0, "step": 12965 }, { "epoch": 4.749656499038197, "grad_norm": 0.14712079452359111, "learning_rate": 4.233974791384894e-06, "loss": 0.3829, "num_tokens": 9918056767.0, "step": 12966 }, { "epoch": 4.7500229000641205, "grad_norm": 0.1482967699992221, "learning_rate": 4.233288625494949e-06, "loss": 0.3939, "num_tokens": 9918924039.0, "step": 12967 }, { "epoch": 4.750389301090043, "grad_norm": 0.15696869238007602, "learning_rate": 4.232603460662218e-06, "loss": 0.3538, "num_tokens": 9919591250.0, "step": 12968 }, { "epoch": 4.750755702115966, "grad_norm": 0.14275962207553666, "learning_rate": 4.231919296925306e-06, "loss": 0.331, "num_tokens": 9920351786.0, "step": 12969 }, { "epoch": 4.751122103141888, "grad_norm": 0.15163977520542762, "learning_rate": 4.2312361343227605e-06, "loss": 0.3415, "num_tokens": 9921075895.0, "step": 12970 }, { "epoch": 4.751488504167812, "grad_norm": 0.144348522145258, "learning_rate": 4.230553972893077e-06, "loss": 0.3751, "num_tokens": 9921861209.0, "step": 12971 }, { "epoch": 4.751854905193735, "grad_norm": 0.16189270459823762, "learning_rate": 4.229872812674687e-06, "loss": 0.3375, "num_tokens": 9922546755.0, "step": 12972 }, { "epoch": 4.752221306219657, "grad_norm": 0.13571368009456172, "learning_rate": 4.22919265370597e-06, "loss": 0.3534, "num_tokens": 9923428818.0, "step": 12973 }, { "epoch": 4.75258770724558, "grad_norm": 0.14436532598209828, "learning_rate": 4.228513496025256e-06, "loss": 0.3774, "num_tokens": 9924217110.0, "step": 12974 }, { "epoch": 4.752954108271503, "grad_norm": 0.15039384201640232, "learning_rate": 4.227835339670806e-06, "loss": 0.3687, "num_tokens": 9925031823.0, "step": 12975 }, { "epoch": 4.753320509297426, "grad_norm": 0.1632836829045691, "learning_rate": 4.2271581846808306e-06, "loss": 0.4265, "num_tokens": 9925749933.0, "step": 12976 }, { "epoch": 4.753686910323349, "grad_norm": 0.15128989010570298, "learning_rate": 4.226482031093487e-06, "loss": 0.3468, "num_tokens": 9926479368.0, "step": 12977 }, { "epoch": 4.754053311349272, "grad_norm": 0.15650782555242626, "learning_rate": 4.225806878946869e-06, "loss": 0.3984, "num_tokens": 9927190945.0, "step": 12978 }, { "epoch": 4.754419712375195, "grad_norm": 0.14223947568148074, "learning_rate": 4.225132728279021e-06, "loss": 0.3722, "num_tokens": 9927991839.0, "step": 12979 }, { "epoch": 4.754786113401117, "grad_norm": 0.14408851753682617, "learning_rate": 4.2244595791279265e-06, "loss": 0.3671, "num_tokens": 9928807515.0, "step": 12980 }, { "epoch": 4.75515251442704, "grad_norm": 0.1414637396115724, "learning_rate": 4.223787431531514e-06, "loss": 0.3527, "num_tokens": 9929590907.0, "step": 12981 }, { "epoch": 4.755518915452964, "grad_norm": 0.16924330954680455, "learning_rate": 4.223116285527654e-06, "loss": 0.3839, "num_tokens": 9930273757.0, "step": 12982 }, { "epoch": 4.755885316478886, "grad_norm": 0.16669144210034031, "learning_rate": 4.2224461411541625e-06, "loss": 0.3944, "num_tokens": 9930951456.0, "step": 12983 }, { "epoch": 4.756251717504809, "grad_norm": 0.1582111115739138, "learning_rate": 4.2217769984487985e-06, "loss": 0.3612, "num_tokens": 9931675352.0, "step": 12984 }, { "epoch": 4.7566181185307315, "grad_norm": 0.1331621200614687, "learning_rate": 4.221108857449267e-06, "loss": 0.3424, "num_tokens": 9932552893.0, "step": 12985 }, { "epoch": 4.756984519556655, "grad_norm": 0.1625315134629818, "learning_rate": 4.220441718193208e-06, "loss": 0.3957, "num_tokens": 9933238401.0, "step": 12986 }, { "epoch": 4.757350920582578, "grad_norm": 0.13373625945379608, "learning_rate": 4.219775580718218e-06, "loss": 0.3374, "num_tokens": 9934103814.0, "step": 12987 }, { "epoch": 4.7577173216085, "grad_norm": 0.1568706859995371, "learning_rate": 4.219110445061826e-06, "loss": 0.3942, "num_tokens": 9934867495.0, "step": 12988 }, { "epoch": 4.7580837226344235, "grad_norm": 0.15631195302664, "learning_rate": 4.2184463112615105e-06, "loss": 0.3489, "num_tokens": 9935566187.0, "step": 12989 }, { "epoch": 4.758450123660346, "grad_norm": 0.13632821908072396, "learning_rate": 4.217783179354692e-06, "loss": 0.3594, "num_tokens": 9936501571.0, "step": 12990 }, { "epoch": 4.758816524686269, "grad_norm": 0.15098625956345804, "learning_rate": 4.217121049378734e-06, "loss": 0.3801, "num_tokens": 9937250058.0, "step": 12991 }, { "epoch": 4.759182925712192, "grad_norm": 0.14218538836053277, "learning_rate": 4.216459921370942e-06, "loss": 0.361, "num_tokens": 9938108054.0, "step": 12992 }, { "epoch": 4.759549326738115, "grad_norm": 0.15158048949680308, "learning_rate": 4.21579979536857e-06, "loss": 0.3788, "num_tokens": 9938891629.0, "step": 12993 }, { "epoch": 4.759915727764038, "grad_norm": 0.15067154332209579, "learning_rate": 4.2151406714088115e-06, "loss": 0.376, "num_tokens": 9939655656.0, "step": 12994 }, { "epoch": 4.76028212878996, "grad_norm": 0.13763690658116717, "learning_rate": 4.214482549528803e-06, "loss": 0.3667, "num_tokens": 9940545853.0, "step": 12995 }, { "epoch": 4.760648529815883, "grad_norm": 0.15745976778937207, "learning_rate": 4.213825429765628e-06, "loss": 0.3862, "num_tokens": 9941292146.0, "step": 12996 }, { "epoch": 4.761014930841807, "grad_norm": 0.14333019442359404, "learning_rate": 4.21316931215631e-06, "loss": 0.3671, "num_tokens": 9942088707.0, "step": 12997 }, { "epoch": 4.761381331867729, "grad_norm": 0.1621089213330872, "learning_rate": 4.212514196737819e-06, "loss": 0.3751, "num_tokens": 9942836787.0, "step": 12998 }, { "epoch": 4.761747732893652, "grad_norm": 0.14765418722449564, "learning_rate": 4.211860083547067e-06, "loss": 0.3759, "num_tokens": 9943674042.0, "step": 12999 }, { "epoch": 4.7621141339195745, "grad_norm": 0.15913741487253658, "learning_rate": 4.211206972620908e-06, "loss": 0.3894, "num_tokens": 9944414202.0, "step": 13000 }, { "epoch": 4.762480534945498, "grad_norm": 0.15218808106522444, "learning_rate": 4.210554863996145e-06, "loss": 0.383, "num_tokens": 9945178082.0, "step": 13001 }, { "epoch": 4.762846935971421, "grad_norm": 0.1466988160620726, "learning_rate": 4.209903757709516e-06, "loss": 0.3865, "num_tokens": 9946008745.0, "step": 13002 }, { "epoch": 4.763213336997343, "grad_norm": 0.15083727842690475, "learning_rate": 4.209253653797711e-06, "loss": 0.3749, "num_tokens": 9946732387.0, "step": 13003 }, { "epoch": 4.7635797380232665, "grad_norm": 0.16804283312603033, "learning_rate": 4.20860455229736e-06, "loss": 0.3936, "num_tokens": 9947353296.0, "step": 13004 }, { "epoch": 4.763946139049189, "grad_norm": 0.16045244159293975, "learning_rate": 4.20795645324503e-06, "loss": 0.3604, "num_tokens": 9947999131.0, "step": 13005 }, { "epoch": 4.764312540075112, "grad_norm": 0.13328906495604617, "learning_rate": 4.207309356677247e-06, "loss": 0.3486, "num_tokens": 9948922690.0, "step": 13006 }, { "epoch": 4.764678941101035, "grad_norm": 0.1468466351209739, "learning_rate": 4.206663262630465e-06, "loss": 0.3577, "num_tokens": 9949780077.0, "step": 13007 }, { "epoch": 4.765045342126958, "grad_norm": 0.16069606616620724, "learning_rate": 4.206018171141091e-06, "loss": 0.3798, "num_tokens": 9950500962.0, "step": 13008 }, { "epoch": 4.765411743152881, "grad_norm": 0.14222719372063206, "learning_rate": 4.205374082245469e-06, "loss": 0.3673, "num_tokens": 9951336837.0, "step": 13009 }, { "epoch": 4.765778144178804, "grad_norm": 0.15454554631829862, "learning_rate": 4.204730995979896e-06, "loss": 0.373, "num_tokens": 9952062893.0, "step": 13010 }, { "epoch": 4.766144545204726, "grad_norm": 0.15540824022463484, "learning_rate": 4.204088912380599e-06, "loss": 0.3851, "num_tokens": 9952834452.0, "step": 13011 }, { "epoch": 4.76651094623065, "grad_norm": 0.1322293192986458, "learning_rate": 4.20344783148376e-06, "loss": 0.3527, "num_tokens": 9953803954.0, "step": 13012 }, { "epoch": 4.766877347256572, "grad_norm": 0.15466985365109726, "learning_rate": 4.2028077533255e-06, "loss": 0.3735, "num_tokens": 9954589825.0, "step": 13013 }, { "epoch": 4.767243748282495, "grad_norm": 0.1550837921634321, "learning_rate": 4.202168677941885e-06, "loss": 0.3569, "num_tokens": 9955312188.0, "step": 13014 }, { "epoch": 4.767610149308418, "grad_norm": 0.1405239325562625, "learning_rate": 4.201530605368923e-06, "loss": 0.365, "num_tokens": 9956163939.0, "step": 13015 }, { "epoch": 4.767976550334341, "grad_norm": 0.1459655503654637, "learning_rate": 4.200893535642562e-06, "loss": 0.3627, "num_tokens": 9956995530.0, "step": 13016 }, { "epoch": 4.768342951360264, "grad_norm": 0.1508494693575945, "learning_rate": 4.200257468798703e-06, "loss": 0.3626, "num_tokens": 9957743046.0, "step": 13017 }, { "epoch": 4.768709352386186, "grad_norm": 0.15430573460897606, "learning_rate": 4.199622404873182e-06, "loss": 0.3938, "num_tokens": 9958507133.0, "step": 13018 }, { "epoch": 4.7690757534121095, "grad_norm": 0.15210683268468697, "learning_rate": 4.198988343901781e-06, "loss": 0.3716, "num_tokens": 9959276937.0, "step": 13019 }, { "epoch": 4.769442154438033, "grad_norm": 0.1490063495194358, "learning_rate": 4.1983552859202266e-06, "loss": 0.3623, "num_tokens": 9960095909.0, "step": 13020 }, { "epoch": 4.769808555463955, "grad_norm": 0.1629060465110685, "learning_rate": 4.197723230964189e-06, "loss": 0.3884, "num_tokens": 9960815244.0, "step": 13021 }, { "epoch": 4.770174956489878, "grad_norm": 0.1452684141317538, "learning_rate": 4.197092179069279e-06, "loss": 0.3487, "num_tokens": 9961577910.0, "step": 13022 }, { "epoch": 4.770541357515801, "grad_norm": 0.13761913391330766, "learning_rate": 4.196462130271054e-06, "loss": 0.3437, "num_tokens": 9962420145.0, "step": 13023 }, { "epoch": 4.770907758541724, "grad_norm": 0.14476539000391098, "learning_rate": 4.195833084605016e-06, "loss": 0.3595, "num_tokens": 9963171886.0, "step": 13024 }, { "epoch": 4.771274159567647, "grad_norm": 0.13705461361381982, "learning_rate": 4.1952050421066046e-06, "loss": 0.381, "num_tokens": 9964070602.0, "step": 13025 }, { "epoch": 4.7716405605935694, "grad_norm": 0.141795831075507, "learning_rate": 4.1945780028112095e-06, "loss": 0.3744, "num_tokens": 9964868322.0, "step": 13026 }, { "epoch": 4.772006961619493, "grad_norm": 0.16178890293446996, "learning_rate": 4.193951966754158e-06, "loss": 0.3742, "num_tokens": 9965548624.0, "step": 13027 }, { "epoch": 4.772373362645416, "grad_norm": 0.15321936040127992, "learning_rate": 4.193326933970728e-06, "loss": 0.3769, "num_tokens": 9966290101.0, "step": 13028 }, { "epoch": 4.772739763671338, "grad_norm": 0.155002808737816, "learning_rate": 4.1927029044961335e-06, "loss": 0.3722, "num_tokens": 9967056395.0, "step": 13029 }, { "epoch": 4.7731061646972615, "grad_norm": 0.1412410188686932, "learning_rate": 4.192079878365534e-06, "loss": 0.3558, "num_tokens": 9967872991.0, "step": 13030 }, { "epoch": 4.773472565723184, "grad_norm": 0.14983901573997943, "learning_rate": 4.191457855614038e-06, "loss": 0.3693, "num_tokens": 9968599043.0, "step": 13031 }, { "epoch": 4.773838966749107, "grad_norm": 0.14019826469983915, "learning_rate": 4.190836836276687e-06, "loss": 0.3657, "num_tokens": 9969441306.0, "step": 13032 }, { "epoch": 4.77420536777503, "grad_norm": 0.16284876620743488, "learning_rate": 4.190216820388478e-06, "loss": 0.3607, "num_tokens": 9970141862.0, "step": 13033 }, { "epoch": 4.774571768800953, "grad_norm": 0.15882505846324615, "learning_rate": 4.189597807984343e-06, "loss": 0.3684, "num_tokens": 9970888448.0, "step": 13034 }, { "epoch": 4.774938169826876, "grad_norm": 0.14797586582042305, "learning_rate": 4.18897979909916e-06, "loss": 0.3734, "num_tokens": 9971670892.0, "step": 13035 }, { "epoch": 4.775304570852798, "grad_norm": 0.15345523137080028, "learning_rate": 4.18836279376775e-06, "loss": 0.3866, "num_tokens": 9972471861.0, "step": 13036 }, { "epoch": 4.775670971878721, "grad_norm": 0.13818752646895494, "learning_rate": 4.18774679202488e-06, "loss": 0.3692, "num_tokens": 9973300362.0, "step": 13037 }, { "epoch": 4.776037372904645, "grad_norm": 0.15935157248874487, "learning_rate": 4.187131793905256e-06, "loss": 0.3614, "num_tokens": 9974009509.0, "step": 13038 }, { "epoch": 4.776403773930567, "grad_norm": 0.14653141250456447, "learning_rate": 4.186517799443529e-06, "loss": 0.349, "num_tokens": 9974771266.0, "step": 13039 }, { "epoch": 4.77677017495649, "grad_norm": 0.16472858814292826, "learning_rate": 4.185904808674296e-06, "loss": 0.3608, "num_tokens": 9975474282.0, "step": 13040 }, { "epoch": 4.7771365759824125, "grad_norm": 0.1374235854937385, "learning_rate": 4.185292821632098e-06, "loss": 0.3688, "num_tokens": 9976334767.0, "step": 13041 }, { "epoch": 4.777502977008336, "grad_norm": 0.14917546847097396, "learning_rate": 4.184681838351412e-06, "loss": 0.3423, "num_tokens": 9977105340.0, "step": 13042 }, { "epoch": 4.777869378034259, "grad_norm": 0.16256280916462035, "learning_rate": 4.184071858866666e-06, "loss": 0.3816, "num_tokens": 9977822520.0, "step": 13043 }, { "epoch": 4.778235779060181, "grad_norm": 0.16160458532851266, "learning_rate": 4.18346288321223e-06, "loss": 0.3621, "num_tokens": 9978550085.0, "step": 13044 }, { "epoch": 4.7786021800861045, "grad_norm": 0.16405542361562778, "learning_rate": 4.1828549114224185e-06, "loss": 0.3662, "num_tokens": 9979205194.0, "step": 13045 }, { "epoch": 4.778968581112027, "grad_norm": 0.15909564397322873, "learning_rate": 4.18224794353148e-06, "loss": 0.3784, "num_tokens": 9979925448.0, "step": 13046 }, { "epoch": 4.77933498213795, "grad_norm": 0.1322769390811649, "learning_rate": 4.181641979573621e-06, "loss": 0.3685, "num_tokens": 9980848330.0, "step": 13047 }, { "epoch": 4.779701383163873, "grad_norm": 0.15396688522831226, "learning_rate": 4.181037019582981e-06, "loss": 0.3652, "num_tokens": 9981603447.0, "step": 13048 }, { "epoch": 4.780067784189796, "grad_norm": 0.15486480619544202, "learning_rate": 4.1804330635936464e-06, "loss": 0.4022, "num_tokens": 9982349836.0, "step": 13049 }, { "epoch": 4.780434185215719, "grad_norm": 0.14897008929029934, "learning_rate": 4.179830111639647e-06, "loss": 0.384, "num_tokens": 9983220406.0, "step": 13050 }, { "epoch": 4.780800586241641, "grad_norm": 0.14976741206763683, "learning_rate": 4.179228163754956e-06, "loss": 0.3499, "num_tokens": 9983955064.0, "step": 13051 }, { "epoch": 4.781166987267564, "grad_norm": 0.15274727922258455, "learning_rate": 4.178627219973491e-06, "loss": 0.3674, "num_tokens": 9984689808.0, "step": 13052 }, { "epoch": 4.781533388293488, "grad_norm": 0.13940395899757985, "learning_rate": 4.178027280329109e-06, "loss": 0.3723, "num_tokens": 9985521118.0, "step": 13053 }, { "epoch": 4.78189978931941, "grad_norm": 0.14538785678558672, "learning_rate": 4.177428344855616e-06, "loss": 0.3615, "num_tokens": 9986341678.0, "step": 13054 }, { "epoch": 4.782266190345333, "grad_norm": 0.14160207480606365, "learning_rate": 4.176830413586759e-06, "loss": 0.3719, "num_tokens": 9987175933.0, "step": 13055 }, { "epoch": 4.7826325913712555, "grad_norm": 0.1521097493497896, "learning_rate": 4.176233486556226e-06, "loss": 0.3592, "num_tokens": 9987863979.0, "step": 13056 }, { "epoch": 4.782998992397179, "grad_norm": 0.16556827040882882, "learning_rate": 4.175637563797652e-06, "loss": 0.41, "num_tokens": 9988547203.0, "step": 13057 }, { "epoch": 4.783365393423102, "grad_norm": 0.1560795379817199, "learning_rate": 4.175042645344614e-06, "loss": 0.3619, "num_tokens": 9989249872.0, "step": 13058 }, { "epoch": 4.783731794449024, "grad_norm": 0.14494670731501835, "learning_rate": 4.174448731230633e-06, "loss": 0.3723, "num_tokens": 9990070123.0, "step": 13059 }, { "epoch": 4.7840981954749475, "grad_norm": 0.14978964962052568, "learning_rate": 4.173855821489169e-06, "loss": 0.3862, "num_tokens": 9990898266.0, "step": 13060 }, { "epoch": 4.78446459650087, "grad_norm": 0.14518579859146696, "learning_rate": 4.1732639161536335e-06, "loss": 0.3595, "num_tokens": 9991664371.0, "step": 13061 }, { "epoch": 4.784830997526793, "grad_norm": 0.158336189570185, "learning_rate": 4.172673015257376e-06, "loss": 0.3701, "num_tokens": 9992362375.0, "step": 13062 }, { "epoch": 4.785197398552716, "grad_norm": 0.1549915836089138, "learning_rate": 4.17208311883369e-06, "loss": 0.3432, "num_tokens": 9993026875.0, "step": 13063 }, { "epoch": 4.785563799578639, "grad_norm": 0.17466843448695504, "learning_rate": 4.171494226915814e-06, "loss": 0.3937, "num_tokens": 9993653243.0, "step": 13064 }, { "epoch": 4.785930200604562, "grad_norm": 0.1434080628604236, "learning_rate": 4.170906339536926e-06, "loss": 0.361, "num_tokens": 9994434879.0, "step": 13065 }, { "epoch": 4.786296601630484, "grad_norm": 0.14456469817843992, "learning_rate": 4.170319456730154e-06, "loss": 0.3591, "num_tokens": 9995288927.0, "step": 13066 }, { "epoch": 4.786663002656407, "grad_norm": 0.14235972329003635, "learning_rate": 4.169733578528564e-06, "loss": 0.3582, "num_tokens": 9996118788.0, "step": 13067 }, { "epoch": 4.787029403682331, "grad_norm": 0.16544176236877545, "learning_rate": 4.169148704965168e-06, "loss": 0.3828, "num_tokens": 9996841147.0, "step": 13068 }, { "epoch": 4.787395804708253, "grad_norm": 0.1511512950003528, "learning_rate": 4.168564836072918e-06, "loss": 0.3768, "num_tokens": 9997590872.0, "step": 13069 }, { "epoch": 4.787762205734176, "grad_norm": 0.13864429294813885, "learning_rate": 4.167981971884714e-06, "loss": 0.3588, "num_tokens": 9998429825.0, "step": 13070 }, { "epoch": 4.7881286067600985, "grad_norm": 0.1488881576242444, "learning_rate": 4.167400112433394e-06, "loss": 0.3919, "num_tokens": 9999212705.0, "step": 13071 }, { "epoch": 4.788495007786022, "grad_norm": 0.14951680562015146, "learning_rate": 4.16681925775175e-06, "loss": 0.3943, "num_tokens": 9999952605.0, "step": 13072 }, { "epoch": 4.788861408811945, "grad_norm": 0.16799176875433613, "learning_rate": 4.166239407872499e-06, "loss": 0.3925, "num_tokens": 10000582789.0, "step": 13073 }, { "epoch": 4.789227809837867, "grad_norm": 0.13984079056702067, "learning_rate": 4.1656605628283225e-06, "loss": 0.3576, "num_tokens": 10001438543.0, "step": 13074 }, { "epoch": 4.789594210863791, "grad_norm": 0.14600622506993832, "learning_rate": 4.165082722651831e-06, "loss": 0.386, "num_tokens": 10002273475.0, "step": 13075 }, { "epoch": 4.789960611889713, "grad_norm": 0.1431946302708066, "learning_rate": 4.16450588737558e-06, "loss": 0.3867, "num_tokens": 10003101268.0, "step": 13076 }, { "epoch": 4.790327012915636, "grad_norm": 0.14055188476427935, "learning_rate": 4.1639300570320764e-06, "loss": 0.3792, "num_tokens": 10003940627.0, "step": 13077 }, { "epoch": 4.790693413941559, "grad_norm": 0.1461034677388956, "learning_rate": 4.163355231653764e-06, "loss": 0.3713, "num_tokens": 10004715192.0, "step": 13078 }, { "epoch": 4.791059814967482, "grad_norm": 0.15331890949097304, "learning_rate": 4.1627814112730265e-06, "loss": 0.368, "num_tokens": 10005431042.0, "step": 13079 }, { "epoch": 4.791426215993405, "grad_norm": 0.1553350755658619, "learning_rate": 4.1622085959222e-06, "loss": 0.3376, "num_tokens": 10006102990.0, "step": 13080 }, { "epoch": 4.791792617019327, "grad_norm": 0.1506857464130622, "learning_rate": 4.161636785633557e-06, "loss": 0.3897, "num_tokens": 10006897603.0, "step": 13081 }, { "epoch": 4.7921590180452505, "grad_norm": 0.1530927041611002, "learning_rate": 4.161065980439318e-06, "loss": 0.3755, "num_tokens": 10007666608.0, "step": 13082 }, { "epoch": 4.792525419071174, "grad_norm": 0.1517240874531222, "learning_rate": 4.160496180371645e-06, "loss": 0.3743, "num_tokens": 10008419662.0, "step": 13083 }, { "epoch": 4.792891820097096, "grad_norm": 0.1474587951877152, "learning_rate": 4.15992738546264e-06, "loss": 0.3869, "num_tokens": 10009199724.0, "step": 13084 }, { "epoch": 4.793258221123019, "grad_norm": 0.1344013157837911, "learning_rate": 4.159359595744357e-06, "loss": 0.3667, "num_tokens": 10010114199.0, "step": 13085 }, { "epoch": 4.793624622148942, "grad_norm": 0.13741076567350555, "learning_rate": 4.15879281124878e-06, "loss": 0.3515, "num_tokens": 10010927781.0, "step": 13086 }, { "epoch": 4.793991023174865, "grad_norm": 0.15527589887856064, "learning_rate": 4.1582270320078495e-06, "loss": 0.356, "num_tokens": 10011626030.0, "step": 13087 }, { "epoch": 4.794357424200788, "grad_norm": 0.14646874754385955, "learning_rate": 4.1576622580534445e-06, "loss": 0.3719, "num_tokens": 10012418040.0, "step": 13088 }, { "epoch": 4.79472382522671, "grad_norm": 0.1477125482007865, "learning_rate": 4.1570984894173834e-06, "loss": 0.3728, "num_tokens": 10013238197.0, "step": 13089 }, { "epoch": 4.795090226252634, "grad_norm": 0.13813408705247704, "learning_rate": 4.156535726131436e-06, "loss": 0.3498, "num_tokens": 10014040885.0, "step": 13090 }, { "epoch": 4.795456627278556, "grad_norm": 0.15309502567730804, "learning_rate": 4.155973968227308e-06, "loss": 0.3894, "num_tokens": 10014805719.0, "step": 13091 }, { "epoch": 4.795823028304479, "grad_norm": 0.15757555841247914, "learning_rate": 4.15541321573665e-06, "loss": 0.3805, "num_tokens": 10015540705.0, "step": 13092 }, { "epoch": 4.796189429330402, "grad_norm": 0.14573511635600048, "learning_rate": 4.154853468691063e-06, "loss": 0.38, "num_tokens": 10016320618.0, "step": 13093 }, { "epoch": 4.796555830356325, "grad_norm": 0.1602488672631164, "learning_rate": 4.154294727122078e-06, "loss": 0.3849, "num_tokens": 10017066307.0, "step": 13094 }, { "epoch": 4.796922231382248, "grad_norm": 0.147811971222072, "learning_rate": 4.153736991061183e-06, "loss": 0.3449, "num_tokens": 10017782208.0, "step": 13095 }, { "epoch": 4.79728863240817, "grad_norm": 0.14656201352693898, "learning_rate": 4.153180260539801e-06, "loss": 0.3616, "num_tokens": 10018567502.0, "step": 13096 }, { "epoch": 4.7976550334340935, "grad_norm": 0.15701854754281389, "learning_rate": 4.152624535589302e-06, "loss": 0.3773, "num_tokens": 10019370864.0, "step": 13097 }, { "epoch": 4.798021434460017, "grad_norm": 0.16320712148051322, "learning_rate": 4.1520698162409955e-06, "loss": 0.3748, "num_tokens": 10020097640.0, "step": 13098 }, { "epoch": 4.798387835485939, "grad_norm": 0.16048254169210152, "learning_rate": 4.151516102526141e-06, "loss": 0.4156, "num_tokens": 10020908564.0, "step": 13099 }, { "epoch": 4.798754236511862, "grad_norm": 0.1628446712871573, "learning_rate": 4.150963394475931e-06, "loss": 0.3684, "num_tokens": 10021569464.0, "step": 13100 }, { "epoch": 4.799120637537785, "grad_norm": 0.14303491429863827, "learning_rate": 4.150411692121513e-06, "loss": 0.3814, "num_tokens": 10022380876.0, "step": 13101 }, { "epoch": 4.799487038563708, "grad_norm": 0.15543099726514076, "learning_rate": 4.149860995493974e-06, "loss": 0.3819, "num_tokens": 10023122024.0, "step": 13102 }, { "epoch": 4.799853439589631, "grad_norm": 0.15139024006545515, "learning_rate": 4.149311304624336e-06, "loss": 0.3686, "num_tokens": 10023830488.0, "step": 13103 }, { "epoch": 4.800219840615553, "grad_norm": 0.16529313340476154, "learning_rate": 4.148762619543576e-06, "loss": 0.4023, "num_tokens": 10024555890.0, "step": 13104 }, { "epoch": 4.800586241641477, "grad_norm": 0.15786050146828406, "learning_rate": 4.148214940282609e-06, "loss": 0.3896, "num_tokens": 10025321840.0, "step": 13105 }, { "epoch": 4.8009526426674, "grad_norm": 0.1471521159891488, "learning_rate": 4.1476682668722915e-06, "loss": 0.3757, "num_tokens": 10026126223.0, "step": 13106 }, { "epoch": 4.801319043693322, "grad_norm": 0.1589021053980072, "learning_rate": 4.147122599343425e-06, "loss": 0.3962, "num_tokens": 10026850729.0, "step": 13107 }, { "epoch": 4.801685444719245, "grad_norm": 0.14980763931444674, "learning_rate": 4.1465779377267595e-06, "loss": 0.3676, "num_tokens": 10027605728.0, "step": 13108 }, { "epoch": 4.802051845745168, "grad_norm": 0.15167358819441384, "learning_rate": 4.146034282052982e-06, "loss": 0.3428, "num_tokens": 10028354372.0, "step": 13109 }, { "epoch": 4.802418246771091, "grad_norm": 0.14728297608250646, "learning_rate": 4.145491632352722e-06, "loss": 0.3396, "num_tokens": 10029116893.0, "step": 13110 }, { "epoch": 4.802784647797014, "grad_norm": 0.1490713176219514, "learning_rate": 4.144949988656555e-06, "loss": 0.3472, "num_tokens": 10029874084.0, "step": 13111 }, { "epoch": 4.8031510488229365, "grad_norm": 0.139508448951305, "learning_rate": 4.144409350995002e-06, "loss": 0.3558, "num_tokens": 10030675613.0, "step": 13112 }, { "epoch": 4.80351744984886, "grad_norm": 0.15522141092919567, "learning_rate": 4.143869719398526e-06, "loss": 0.3696, "num_tokens": 10031382290.0, "step": 13113 }, { "epoch": 4.803883850874782, "grad_norm": 0.15414637221134994, "learning_rate": 4.143331093897527e-06, "loss": 0.3689, "num_tokens": 10032081735.0, "step": 13114 }, { "epoch": 4.804250251900705, "grad_norm": 0.14929536556750156, "learning_rate": 4.142793474522359e-06, "loss": 0.3728, "num_tokens": 10032846677.0, "step": 13115 }, { "epoch": 4.8046166529266285, "grad_norm": 0.1505202633976432, "learning_rate": 4.142256861303311e-06, "loss": 0.3969, "num_tokens": 10033612502.0, "step": 13116 }, { "epoch": 4.804983053952551, "grad_norm": 0.13770768615927492, "learning_rate": 4.141721254270619e-06, "loss": 0.3354, "num_tokens": 10034456608.0, "step": 13117 }, { "epoch": 4.805349454978474, "grad_norm": 0.14472897320015, "learning_rate": 4.141186653454461e-06, "loss": 0.3374, "num_tokens": 10035186176.0, "step": 13118 }, { "epoch": 4.805715856004396, "grad_norm": 0.15382618160676043, "learning_rate": 4.14065305888496e-06, "loss": 0.3901, "num_tokens": 10035936442.0, "step": 13119 }, { "epoch": 4.80608225703032, "grad_norm": 0.1575561106683269, "learning_rate": 4.1401204705921805e-06, "loss": 0.3805, "num_tokens": 10036668161.0, "step": 13120 }, { "epoch": 4.806448658056243, "grad_norm": 0.1462920311987821, "learning_rate": 4.139588888606133e-06, "loss": 0.3855, "num_tokens": 10037460237.0, "step": 13121 }, { "epoch": 4.806815059082165, "grad_norm": 0.16038693460888054, "learning_rate": 4.1390583129567655e-06, "loss": 0.343, "num_tokens": 10038171978.0, "step": 13122 }, { "epoch": 4.8071814601080884, "grad_norm": 0.13843723409145242, "learning_rate": 4.138528743673973e-06, "loss": 0.3244, "num_tokens": 10038915757.0, "step": 13123 }, { "epoch": 4.807547861134012, "grad_norm": 0.15669012120320247, "learning_rate": 4.138000180787598e-06, "loss": 0.385, "num_tokens": 10039619097.0, "step": 13124 }, { "epoch": 4.807914262159934, "grad_norm": 0.15636201340842032, "learning_rate": 4.137472624327419e-06, "loss": 0.3712, "num_tokens": 10040319739.0, "step": 13125 }, { "epoch": 4.808280663185857, "grad_norm": 0.14639943329165445, "learning_rate": 4.136946074323163e-06, "loss": 0.3653, "num_tokens": 10041102420.0, "step": 13126 }, { "epoch": 4.80864706421178, "grad_norm": 0.1526816623873307, "learning_rate": 4.136420530804494e-06, "loss": 0.3605, "num_tokens": 10041864753.0, "step": 13127 }, { "epoch": 4.809013465237703, "grad_norm": 0.16446029648219915, "learning_rate": 4.135895993801029e-06, "loss": 0.3435, "num_tokens": 10042531264.0, "step": 13128 }, { "epoch": 4.809379866263626, "grad_norm": 0.14372152486234496, "learning_rate": 4.135372463342321e-06, "loss": 0.3723, "num_tokens": 10043315668.0, "step": 13129 }, { "epoch": 4.809746267289548, "grad_norm": 0.1511704794428111, "learning_rate": 4.1348499394578634e-06, "loss": 0.3798, "num_tokens": 10044079677.0, "step": 13130 }, { "epoch": 4.810112668315472, "grad_norm": 0.15521249513278293, "learning_rate": 4.134328422177103e-06, "loss": 0.3519, "num_tokens": 10044757282.0, "step": 13131 }, { "epoch": 4.810479069341394, "grad_norm": 0.1376796025816138, "learning_rate": 4.133807911529426e-06, "loss": 0.3659, "num_tokens": 10045592917.0, "step": 13132 }, { "epoch": 4.810845470367317, "grad_norm": 0.15595431178774524, "learning_rate": 4.133288407544153e-06, "loss": 0.3637, "num_tokens": 10046288247.0, "step": 13133 }, { "epoch": 4.81121187139324, "grad_norm": 0.14877078375513839, "learning_rate": 4.132769910250562e-06, "loss": 0.384, "num_tokens": 10047089093.0, "step": 13134 }, { "epoch": 4.811578272419163, "grad_norm": 0.15799560984260175, "learning_rate": 4.132252419677863e-06, "loss": 0.3702, "num_tokens": 10047875411.0, "step": 13135 }, { "epoch": 4.811944673445086, "grad_norm": 0.15991462516764127, "learning_rate": 4.131735935855218e-06, "loss": 0.34, "num_tokens": 10048513954.0, "step": 13136 }, { "epoch": 4.812311074471008, "grad_norm": 0.13745540330245218, "learning_rate": 4.131220458811723e-06, "loss": 0.3479, "num_tokens": 10049346212.0, "step": 13137 }, { "epoch": 4.8126774754969315, "grad_norm": 0.1643420332710993, "learning_rate": 4.130705988576428e-06, "loss": 0.3704, "num_tokens": 10050055116.0, "step": 13138 }, { "epoch": 4.813043876522855, "grad_norm": 0.14818642286020542, "learning_rate": 4.130192525178316e-06, "loss": 0.3464, "num_tokens": 10050868208.0, "step": 13139 }, { "epoch": 4.813410277548777, "grad_norm": 0.14712657498822224, "learning_rate": 4.1296800686463214e-06, "loss": 0.3572, "num_tokens": 10051651975.0, "step": 13140 }, { "epoch": 4.8137766785747, "grad_norm": 0.14183420750632786, "learning_rate": 4.129168619009314e-06, "loss": 0.3728, "num_tokens": 10052478709.0, "step": 13141 }, { "epoch": 4.814143079600623, "grad_norm": 0.13599821677993898, "learning_rate": 4.128658176296117e-06, "loss": 0.3545, "num_tokens": 10053351039.0, "step": 13142 }, { "epoch": 4.814509480626546, "grad_norm": 0.1563401062032004, "learning_rate": 4.128148740535485e-06, "loss": 0.3558, "num_tokens": 10054044351.0, "step": 13143 }, { "epoch": 4.814875881652469, "grad_norm": 0.14088685355779865, "learning_rate": 4.127640311756123e-06, "loss": 0.3419, "num_tokens": 10054850256.0, "step": 13144 }, { "epoch": 4.815242282678391, "grad_norm": 0.15750768801683657, "learning_rate": 4.127132889986682e-06, "loss": 0.3821, "num_tokens": 10055554243.0, "step": 13145 }, { "epoch": 4.815608683704315, "grad_norm": 0.16075825032857108, "learning_rate": 4.126626475255751e-06, "loss": 0.336, "num_tokens": 10056252180.0, "step": 13146 }, { "epoch": 4.815975084730237, "grad_norm": 0.15698192914675885, "learning_rate": 4.1261210675918605e-06, "loss": 0.3736, "num_tokens": 10057000945.0, "step": 13147 }, { "epoch": 4.81634148575616, "grad_norm": 0.1489972373337999, "learning_rate": 4.125616667023493e-06, "loss": 0.3802, "num_tokens": 10057737685.0, "step": 13148 }, { "epoch": 4.816707886782083, "grad_norm": 0.16527198734820742, "learning_rate": 4.125113273579065e-06, "loss": 0.3713, "num_tokens": 10058407930.0, "step": 13149 }, { "epoch": 4.817074287808006, "grad_norm": 0.16359717677027855, "learning_rate": 4.12461088728694e-06, "loss": 0.3576, "num_tokens": 10059063402.0, "step": 13150 }, { "epoch": 4.817440688833929, "grad_norm": 0.13439830067296266, "learning_rate": 4.124109508175423e-06, "loss": 0.3795, "num_tokens": 10059999500.0, "step": 13151 }, { "epoch": 4.817807089859851, "grad_norm": 0.1545273745587459, "learning_rate": 4.123609136272769e-06, "loss": 0.3802, "num_tokens": 10060798069.0, "step": 13152 }, { "epoch": 4.8181734908857745, "grad_norm": 0.14877560380496618, "learning_rate": 4.123109771607169e-06, "loss": 0.3639, "num_tokens": 10061586663.0, "step": 13153 }, { "epoch": 4.818539891911698, "grad_norm": 0.14721944208990645, "learning_rate": 4.1226114142067584e-06, "loss": 0.3972, "num_tokens": 10062397112.0, "step": 13154 }, { "epoch": 4.81890629293762, "grad_norm": 0.14091463174743024, "learning_rate": 4.122114064099618e-06, "loss": 0.3482, "num_tokens": 10063153007.0, "step": 13155 }, { "epoch": 4.819272693963543, "grad_norm": 0.13543528750411893, "learning_rate": 4.121617721313768e-06, "loss": 0.3633, "num_tokens": 10064027534.0, "step": 13156 }, { "epoch": 4.819639094989466, "grad_norm": 0.14825330067797698, "learning_rate": 4.1211223858771784e-06, "loss": 0.3667, "num_tokens": 10064842365.0, "step": 13157 }, { "epoch": 4.820005496015389, "grad_norm": 0.16268698675379561, "learning_rate": 4.120628057817758e-06, "loss": 0.3656, "num_tokens": 10065485693.0, "step": 13158 }, { "epoch": 4.820371897041312, "grad_norm": 0.13823489130142216, "learning_rate": 4.120134737163358e-06, "loss": 0.3549, "num_tokens": 10066340152.0, "step": 13159 }, { "epoch": 4.820738298067234, "grad_norm": 0.14141452277628677, "learning_rate": 4.119642423941774e-06, "loss": 0.3477, "num_tokens": 10067099935.0, "step": 13160 }, { "epoch": 4.821104699093158, "grad_norm": 0.13622319294963853, "learning_rate": 4.119151118180748e-06, "loss": 0.3624, "num_tokens": 10067986690.0, "step": 13161 }, { "epoch": 4.82147110011908, "grad_norm": 0.14398080950821313, "learning_rate": 4.11866081990796e-06, "loss": 0.3757, "num_tokens": 10068767960.0, "step": 13162 }, { "epoch": 4.821837501145003, "grad_norm": 0.15372047613532278, "learning_rate": 4.118171529151035e-06, "loss": 0.39, "num_tokens": 10069517513.0, "step": 13163 }, { "epoch": 4.822203902170926, "grad_norm": 0.14854311940457274, "learning_rate": 4.117683245937545e-06, "loss": 0.3913, "num_tokens": 10070355672.0, "step": 13164 }, { "epoch": 4.822570303196849, "grad_norm": 0.15938580645280634, "learning_rate": 4.117195970294999e-06, "loss": 0.3918, "num_tokens": 10071013497.0, "step": 13165 }, { "epoch": 4.822936704222772, "grad_norm": 0.14306811709539527, "learning_rate": 4.1167097022508545e-06, "loss": 0.3786, "num_tokens": 10071844606.0, "step": 13166 }, { "epoch": 4.823303105248694, "grad_norm": 0.15246189966429224, "learning_rate": 4.116224441832507e-06, "loss": 0.3641, "num_tokens": 10072598990.0, "step": 13167 }, { "epoch": 4.8236695062746175, "grad_norm": 0.14563830744927908, "learning_rate": 4.115740189067301e-06, "loss": 0.3462, "num_tokens": 10073361875.0, "step": 13168 }, { "epoch": 4.824035907300541, "grad_norm": 0.14965060432004834, "learning_rate": 4.115256943982521e-06, "loss": 0.3765, "num_tokens": 10074243103.0, "step": 13169 }, { "epoch": 4.824402308326463, "grad_norm": 0.16076097486082833, "learning_rate": 4.114774706605397e-06, "loss": 0.3733, "num_tokens": 10074966554.0, "step": 13170 }, { "epoch": 4.824768709352386, "grad_norm": 0.14795143522553364, "learning_rate": 4.114293476963095e-06, "loss": 0.3916, "num_tokens": 10075767606.0, "step": 13171 }, { "epoch": 4.825135110378309, "grad_norm": 0.1502055359247133, "learning_rate": 4.113813255082736e-06, "loss": 0.3658, "num_tokens": 10076510019.0, "step": 13172 }, { "epoch": 4.825501511404232, "grad_norm": 0.16052211354696885, "learning_rate": 4.113334040991373e-06, "loss": 0.3895, "num_tokens": 10077208789.0, "step": 13173 }, { "epoch": 4.825867912430155, "grad_norm": 0.14055115960075135, "learning_rate": 4.1128558347160085e-06, "loss": 0.3548, "num_tokens": 10078051768.0, "step": 13174 }, { "epoch": 4.8262343134560775, "grad_norm": 0.15261191227841636, "learning_rate": 4.112378636283589e-06, "loss": 0.3718, "num_tokens": 10078804372.0, "step": 13175 }, { "epoch": 4.826600714482001, "grad_norm": 0.16226059068152884, "learning_rate": 4.111902445721002e-06, "loss": 0.3777, "num_tokens": 10079529348.0, "step": 13176 }, { "epoch": 4.826967115507923, "grad_norm": 0.13744415329577359, "learning_rate": 4.1114272630550745e-06, "loss": 0.3737, "num_tokens": 10080348322.0, "step": 13177 }, { "epoch": 4.827333516533846, "grad_norm": 0.16333274555799682, "learning_rate": 4.110953088312582e-06, "loss": 0.3853, "num_tokens": 10081000326.0, "step": 13178 }, { "epoch": 4.8276999175597695, "grad_norm": 0.16390060450098937, "learning_rate": 4.110479921520244e-06, "loss": 0.3892, "num_tokens": 10081651123.0, "step": 13179 }, { "epoch": 4.828066318585692, "grad_norm": 0.14908036446201756, "learning_rate": 4.110007762704718e-06, "loss": 0.3614, "num_tokens": 10082422420.0, "step": 13180 }, { "epoch": 4.828432719611615, "grad_norm": 0.1406076061901654, "learning_rate": 4.10953661189261e-06, "loss": 0.3714, "num_tokens": 10083301005.0, "step": 13181 }, { "epoch": 4.828799120637537, "grad_norm": 0.15884347209499988, "learning_rate": 4.1090664691104645e-06, "loss": 0.3457, "num_tokens": 10083928687.0, "step": 13182 }, { "epoch": 4.829165521663461, "grad_norm": 0.15118905848464237, "learning_rate": 4.108597334384772e-06, "loss": 0.354, "num_tokens": 10084681151.0, "step": 13183 }, { "epoch": 4.829531922689384, "grad_norm": 0.15341056970245645, "learning_rate": 4.108129207741966e-06, "loss": 0.3791, "num_tokens": 10085480191.0, "step": 13184 }, { "epoch": 4.829898323715306, "grad_norm": 0.140565038833164, "learning_rate": 4.107662089208425e-06, "loss": 0.3594, "num_tokens": 10086326692.0, "step": 13185 }, { "epoch": 4.830264724741229, "grad_norm": 0.1627718383124486, "learning_rate": 4.1071959788104655e-06, "loss": 0.4039, "num_tokens": 10087076073.0, "step": 13186 }, { "epoch": 4.830631125767152, "grad_norm": 0.1484037989039409, "learning_rate": 4.106730876574352e-06, "loss": 0.3607, "num_tokens": 10087859797.0, "step": 13187 }, { "epoch": 4.830997526793075, "grad_norm": 0.1527301679216648, "learning_rate": 4.106266782526288e-06, "loss": 0.3704, "num_tokens": 10088590446.0, "step": 13188 }, { "epoch": 4.831363927818998, "grad_norm": 0.15700535685109981, "learning_rate": 4.105803696692426e-06, "loss": 0.3797, "num_tokens": 10089340454.0, "step": 13189 }, { "epoch": 4.8317303288449205, "grad_norm": 0.1487992037517259, "learning_rate": 4.105341619098856e-06, "loss": 0.3651, "num_tokens": 10090125199.0, "step": 13190 }, { "epoch": 4.832096729870844, "grad_norm": 0.14425870797527987, "learning_rate": 4.1048805497716164e-06, "loss": 0.3607, "num_tokens": 10090878744.0, "step": 13191 }, { "epoch": 4.832463130896766, "grad_norm": 0.1396484964983831, "learning_rate": 4.104420488736682e-06, "loss": 0.3857, "num_tokens": 10091740552.0, "step": 13192 }, { "epoch": 4.832829531922689, "grad_norm": 0.16057165046383393, "learning_rate": 4.103961436019977e-06, "loss": 0.3593, "num_tokens": 10092393895.0, "step": 13193 }, { "epoch": 4.8331959329486125, "grad_norm": 0.14585884737941404, "learning_rate": 4.1035033916473685e-06, "loss": 0.3786, "num_tokens": 10093178668.0, "step": 13194 }, { "epoch": 4.833562333974535, "grad_norm": 0.14064902223006695, "learning_rate": 4.103046355644661e-06, "loss": 0.3577, "num_tokens": 10093995706.0, "step": 13195 }, { "epoch": 4.833928735000458, "grad_norm": 0.14243326527217248, "learning_rate": 4.1025903280376065e-06, "loss": 0.3524, "num_tokens": 10094818589.0, "step": 13196 }, { "epoch": 4.83429513602638, "grad_norm": 0.14758014003376152, "learning_rate": 4.102135308851902e-06, "loss": 0.3445, "num_tokens": 10095563012.0, "step": 13197 }, { "epoch": 4.834661537052304, "grad_norm": 0.15694975735850922, "learning_rate": 4.101681298113182e-06, "loss": 0.3974, "num_tokens": 10096287088.0, "step": 13198 }, { "epoch": 4.835027938078227, "grad_norm": 0.18142396077592454, "learning_rate": 4.101228295847032e-06, "loss": 0.4046, "num_tokens": 10096873466.0, "step": 13199 }, { "epoch": 4.835394339104149, "grad_norm": 0.14831455386182038, "learning_rate": 4.100776302078972e-06, "loss": 0.3844, "num_tokens": 10097615172.0, "step": 13200 }, { "epoch": 4.835760740130072, "grad_norm": 0.1427848386005734, "learning_rate": 4.100325316834471e-06, "loss": 0.3894, "num_tokens": 10098479722.0, "step": 13201 }, { "epoch": 4.836127141155996, "grad_norm": 0.15251105645169163, "learning_rate": 4.0998753401389424e-06, "loss": 0.3881, "num_tokens": 10099225703.0, "step": 13202 }, { "epoch": 4.836493542181918, "grad_norm": 0.15575316527379937, "learning_rate": 4.099426372017734e-06, "loss": 0.3438, "num_tokens": 10099973727.0, "step": 13203 }, { "epoch": 4.836859943207841, "grad_norm": 0.14252949694305456, "learning_rate": 4.098978412496149e-06, "loss": 0.3692, "num_tokens": 10100802858.0, "step": 13204 }, { "epoch": 4.8372263442337635, "grad_norm": 0.1389362963362788, "learning_rate": 4.098531461599422e-06, "loss": 0.366, "num_tokens": 10101656131.0, "step": 13205 }, { "epoch": 4.837592745259687, "grad_norm": 0.13806220452829085, "learning_rate": 4.09808551935274e-06, "loss": 0.3387, "num_tokens": 10102478200.0, "step": 13206 }, { "epoch": 4.83795914628561, "grad_norm": 0.15295034584780595, "learning_rate": 4.097640585781227e-06, "loss": 0.3533, "num_tokens": 10103219433.0, "step": 13207 }, { "epoch": 4.838325547311532, "grad_norm": 0.15662827428943407, "learning_rate": 4.097196660909955e-06, "loss": 0.3598, "num_tokens": 10103961531.0, "step": 13208 }, { "epoch": 4.8386919483374555, "grad_norm": 0.16469008984066708, "learning_rate": 4.096753744763934e-06, "loss": 0.3482, "num_tokens": 10104650934.0, "step": 13209 }, { "epoch": 4.839058349363378, "grad_norm": 0.1596815220541152, "learning_rate": 4.096311837368122e-06, "loss": 0.3784, "num_tokens": 10105402696.0, "step": 13210 }, { "epoch": 4.839424750389301, "grad_norm": 0.15463497577500793, "learning_rate": 4.095870938747417e-06, "loss": 0.3813, "num_tokens": 10106136376.0, "step": 13211 }, { "epoch": 4.839791151415224, "grad_norm": 0.1600005594260163, "learning_rate": 4.095431048926662e-06, "loss": 0.3786, "num_tokens": 10106824489.0, "step": 13212 }, { "epoch": 4.840157552441147, "grad_norm": 0.15604234838694284, "learning_rate": 4.0949921679306425e-06, "loss": 0.3929, "num_tokens": 10107570812.0, "step": 13213 }, { "epoch": 4.84052395346707, "grad_norm": 0.14526245110354843, "learning_rate": 4.094554295784085e-06, "loss": 0.3766, "num_tokens": 10108366765.0, "step": 13214 }, { "epoch": 4.840890354492992, "grad_norm": 0.1460055625615113, "learning_rate": 4.094117432511664e-06, "loss": 0.3611, "num_tokens": 10109149668.0, "step": 13215 }, { "epoch": 4.841256755518915, "grad_norm": 0.14861928051448198, "learning_rate": 4.093681578137993e-06, "loss": 0.3647, "num_tokens": 10109982607.0, "step": 13216 }, { "epoch": 4.841623156544839, "grad_norm": 0.1402865921762032, "learning_rate": 4.093246732687629e-06, "loss": 0.3679, "num_tokens": 10110783526.0, "step": 13217 }, { "epoch": 4.841989557570761, "grad_norm": 0.14587861202136596, "learning_rate": 4.092812896185075e-06, "loss": 0.3423, "num_tokens": 10111539348.0, "step": 13218 }, { "epoch": 4.842355958596684, "grad_norm": 0.1576846360987644, "learning_rate": 4.092380068654776e-06, "loss": 0.3571, "num_tokens": 10112291812.0, "step": 13219 }, { "epoch": 4.842722359622607, "grad_norm": 0.1444192376994376, "learning_rate": 4.091948250121116e-06, "loss": 0.3565, "num_tokens": 10113091737.0, "step": 13220 }, { "epoch": 4.84308876064853, "grad_norm": 0.162762275414032, "learning_rate": 4.091517440608428e-06, "loss": 0.396, "num_tokens": 10113821707.0, "step": 13221 }, { "epoch": 4.843455161674453, "grad_norm": 0.14634552118269276, "learning_rate": 4.091087640140986e-06, "loss": 0.3866, "num_tokens": 10114615474.0, "step": 13222 }, { "epoch": 4.843821562700375, "grad_norm": 0.1397599382425341, "learning_rate": 4.0906588487430074e-06, "loss": 0.3319, "num_tokens": 10115401109.0, "step": 13223 }, { "epoch": 4.844187963726299, "grad_norm": 0.14425850532406012, "learning_rate": 4.09023106643865e-06, "loss": 0.3761, "num_tokens": 10116246373.0, "step": 13224 }, { "epoch": 4.844554364752222, "grad_norm": 0.16738255073473138, "learning_rate": 4.089804293252017e-06, "loss": 0.3714, "num_tokens": 10116914920.0, "step": 13225 }, { "epoch": 4.844920765778144, "grad_norm": 0.1609666349483126, "learning_rate": 4.089378529207157e-06, "loss": 0.3737, "num_tokens": 10117607342.0, "step": 13226 }, { "epoch": 4.845287166804067, "grad_norm": 0.1523332549669848, "learning_rate": 4.08895377432806e-06, "loss": 0.3972, "num_tokens": 10118362786.0, "step": 13227 }, { "epoch": 4.84565356782999, "grad_norm": 0.16641058261919459, "learning_rate": 4.088530028638654e-06, "loss": 0.3746, "num_tokens": 10119021488.0, "step": 13228 }, { "epoch": 4.846019968855913, "grad_norm": 0.15677083852220589, "learning_rate": 4.088107292162821e-06, "loss": 0.3425, "num_tokens": 10119723018.0, "step": 13229 }, { "epoch": 4.846386369881836, "grad_norm": 0.1618781851969186, "learning_rate": 4.087685564924374e-06, "loss": 0.3687, "num_tokens": 10120404391.0, "step": 13230 }, { "epoch": 4.8467527709077585, "grad_norm": 0.1562307627093357, "learning_rate": 4.087264846947081e-06, "loss": 0.3663, "num_tokens": 10121138719.0, "step": 13231 }, { "epoch": 4.847119171933682, "grad_norm": 0.1461397452839838, "learning_rate": 4.08684513825464e-06, "loss": 0.3511, "num_tokens": 10121925530.0, "step": 13232 }, { "epoch": 4.847485572959604, "grad_norm": 0.1459024008737179, "learning_rate": 4.086426438870706e-06, "loss": 0.3596, "num_tokens": 10122749555.0, "step": 13233 }, { "epoch": 4.847851973985527, "grad_norm": 0.1370827251017791, "learning_rate": 4.086008748818866e-06, "loss": 0.3541, "num_tokens": 10123642415.0, "step": 13234 }, { "epoch": 4.8482183750114505, "grad_norm": 0.16459000497589635, "learning_rate": 4.085592068122657e-06, "loss": 0.4137, "num_tokens": 10124343767.0, "step": 13235 }, { "epoch": 4.848584776037373, "grad_norm": 0.14128962953857893, "learning_rate": 4.085176396805556e-06, "loss": 0.3509, "num_tokens": 10125164557.0, "step": 13236 }, { "epoch": 4.848951177063296, "grad_norm": 0.16809387860972533, "learning_rate": 4.084761734890983e-06, "loss": 0.3883, "num_tokens": 10125774024.0, "step": 13237 }, { "epoch": 4.849317578089218, "grad_norm": 0.15829543658221698, "learning_rate": 4.084348082402303e-06, "loss": 0.3558, "num_tokens": 10126463193.0, "step": 13238 }, { "epoch": 4.849683979115142, "grad_norm": 0.14507984218154224, "learning_rate": 4.083935439362823e-06, "loss": 0.4031, "num_tokens": 10127264060.0, "step": 13239 }, { "epoch": 4.850050380141065, "grad_norm": 0.13833440563397187, "learning_rate": 4.083523805795792e-06, "loss": 0.3626, "num_tokens": 10128105627.0, "step": 13240 }, { "epoch": 4.850416781166987, "grad_norm": 0.16152123727818285, "learning_rate": 4.083113181724405e-06, "loss": 0.3983, "num_tokens": 10128860220.0, "step": 13241 }, { "epoch": 4.85078318219291, "grad_norm": 0.1457961323749586, "learning_rate": 4.082703567171797e-06, "loss": 0.3524, "num_tokens": 10129582131.0, "step": 13242 }, { "epoch": 4.851149583218833, "grad_norm": 0.14544935501922013, "learning_rate": 4.082294962161048e-06, "loss": 0.3512, "num_tokens": 10130373378.0, "step": 13243 }, { "epoch": 4.851515984244756, "grad_norm": 0.1489612315653439, "learning_rate": 4.081887366715181e-06, "loss": 0.3685, "num_tokens": 10131099293.0, "step": 13244 }, { "epoch": 4.851882385270679, "grad_norm": 0.15428279748719798, "learning_rate": 4.081480780857162e-06, "loss": 0.3798, "num_tokens": 10131801126.0, "step": 13245 }, { "epoch": 4.8522487862966015, "grad_norm": 0.15157237869250256, "learning_rate": 4.081075204609899e-06, "loss": 0.3547, "num_tokens": 10132511683.0, "step": 13246 }, { "epoch": 4.852615187322525, "grad_norm": 0.1481427816268409, "learning_rate": 4.080670637996245e-06, "loss": 0.3587, "num_tokens": 10133311325.0, "step": 13247 }, { "epoch": 4.852981588348447, "grad_norm": 0.17029124051618572, "learning_rate": 4.080267081038994e-06, "loss": 0.3589, "num_tokens": 10134025229.0, "step": 13248 }, { "epoch": 4.85334798937437, "grad_norm": 0.1563722261135131, "learning_rate": 4.079864533760885e-06, "loss": 0.3907, "num_tokens": 10134776555.0, "step": 13249 }, { "epoch": 4.8537143904002935, "grad_norm": 0.15688430327147823, "learning_rate": 4.0794629961846e-06, "loss": 0.3646, "num_tokens": 10135432080.0, "step": 13250 }, { "epoch": 4.854080791426216, "grad_norm": 0.14478690001491792, "learning_rate": 4.079062468332761e-06, "loss": 0.3602, "num_tokens": 10136192754.0, "step": 13251 }, { "epoch": 4.854447192452139, "grad_norm": 0.15209846802774474, "learning_rate": 4.078662950227938e-06, "loss": 0.3399, "num_tokens": 10136906507.0, "step": 13252 }, { "epoch": 4.854813593478061, "grad_norm": 0.15169199889424392, "learning_rate": 4.078264441892641e-06, "loss": 0.3685, "num_tokens": 10137625311.0, "step": 13253 }, { "epoch": 4.855179994503985, "grad_norm": 0.13613342764064845, "learning_rate": 4.077866943349322e-06, "loss": 0.3633, "num_tokens": 10138474510.0, "step": 13254 }, { "epoch": 4.855546395529908, "grad_norm": 0.1410845375599596, "learning_rate": 4.077470454620382e-06, "loss": 0.3681, "num_tokens": 10139268866.0, "step": 13255 }, { "epoch": 4.85591279655583, "grad_norm": 0.14395419954512095, "learning_rate": 4.077074975728158e-06, "loss": 0.3792, "num_tokens": 10140154948.0, "step": 13256 }, { "epoch": 4.856279197581753, "grad_norm": 0.14521305829121672, "learning_rate": 4.076680506694931e-06, "loss": 0.3427, "num_tokens": 10140852226.0, "step": 13257 }, { "epoch": 4.856645598607676, "grad_norm": 0.13758240854490236, "learning_rate": 4.0762870475429316e-06, "loss": 0.3686, "num_tokens": 10141693459.0, "step": 13258 }, { "epoch": 4.857011999633599, "grad_norm": 0.15169885592021376, "learning_rate": 4.075894598294327e-06, "loss": 0.3837, "num_tokens": 10142420672.0, "step": 13259 }, { "epoch": 4.857378400659522, "grad_norm": 0.17102344318584045, "learning_rate": 4.07550315897123e-06, "loss": 0.372, "num_tokens": 10143117226.0, "step": 13260 }, { "epoch": 4.8577448016854445, "grad_norm": 0.15829274027667997, "learning_rate": 4.075112729595695e-06, "loss": 0.3819, "num_tokens": 10143796465.0, "step": 13261 }, { "epoch": 4.858111202711368, "grad_norm": 0.1679014005491771, "learning_rate": 4.074723310189723e-06, "loss": 0.3683, "num_tokens": 10144485937.0, "step": 13262 }, { "epoch": 4.85847760373729, "grad_norm": 0.14287689007622478, "learning_rate": 4.074334900775251e-06, "loss": 0.3698, "num_tokens": 10145400610.0, "step": 13263 }, { "epoch": 4.858844004763213, "grad_norm": 0.18007347198259294, "learning_rate": 4.0739475013741695e-06, "loss": 0.3424, "num_tokens": 10145970014.0, "step": 13264 }, { "epoch": 4.8592104057891365, "grad_norm": 0.14831488586925887, "learning_rate": 4.073561112008304e-06, "loss": 0.3574, "num_tokens": 10146748193.0, "step": 13265 }, { "epoch": 4.859576806815059, "grad_norm": 0.15809766599022224, "learning_rate": 4.073175732699424e-06, "loss": 0.395, "num_tokens": 10147453174.0, "step": 13266 }, { "epoch": 4.859943207840982, "grad_norm": 0.1595326705017638, "learning_rate": 4.072791363469243e-06, "loss": 0.3715, "num_tokens": 10148112388.0, "step": 13267 }, { "epoch": 4.860309608866904, "grad_norm": 0.14384195612738448, "learning_rate": 4.0724080043394205e-06, "loss": 0.3657, "num_tokens": 10148872710.0, "step": 13268 }, { "epoch": 4.860676009892828, "grad_norm": 0.16081765928164884, "learning_rate": 4.072025655331558e-06, "loss": 0.3609, "num_tokens": 10149542777.0, "step": 13269 }, { "epoch": 4.861042410918751, "grad_norm": 0.1594621100614232, "learning_rate": 4.071644316467196e-06, "loss": 0.3791, "num_tokens": 10150313522.0, "step": 13270 }, { "epoch": 4.861408811944673, "grad_norm": 0.15352496142031444, "learning_rate": 4.0712639877678195e-06, "loss": 0.3902, "num_tokens": 10151054572.0, "step": 13271 }, { "epoch": 4.8617752129705964, "grad_norm": 0.14446953471892376, "learning_rate": 4.070884669254862e-06, "loss": 0.3616, "num_tokens": 10151831840.0, "step": 13272 }, { "epoch": 4.862141613996519, "grad_norm": 0.14690356696593815, "learning_rate": 4.070506360949694e-06, "loss": 0.3414, "num_tokens": 10152593926.0, "step": 13273 }, { "epoch": 4.862508015022442, "grad_norm": 0.15019952768135816, "learning_rate": 4.07012906287363e-06, "loss": 0.3633, "num_tokens": 10153348577.0, "step": 13274 }, { "epoch": 4.862874416048365, "grad_norm": 0.1454348059232644, "learning_rate": 4.069752775047929e-06, "loss": 0.3647, "num_tokens": 10154161238.0, "step": 13275 }, { "epoch": 4.863240817074288, "grad_norm": 0.1514163644968826, "learning_rate": 4.069377497493795e-06, "loss": 0.3511, "num_tokens": 10155018842.0, "step": 13276 }, { "epoch": 4.863607218100211, "grad_norm": 0.15008516895392582, "learning_rate": 4.069003230232372e-06, "loss": 0.3467, "num_tokens": 10155700252.0, "step": 13277 }, { "epoch": 4.863973619126133, "grad_norm": 0.15371063487035996, "learning_rate": 4.068629973284747e-06, "loss": 0.3552, "num_tokens": 10156403320.0, "step": 13278 }, { "epoch": 4.864340020152056, "grad_norm": 0.14794618180720095, "learning_rate": 4.06825772667195e-06, "loss": 0.385, "num_tokens": 10157225236.0, "step": 13279 }, { "epoch": 4.86470642117798, "grad_norm": 0.1469458129936479, "learning_rate": 4.067886490414958e-06, "loss": 0.388, "num_tokens": 10158008148.0, "step": 13280 }, { "epoch": 4.865072822203902, "grad_norm": 0.1487672813953233, "learning_rate": 4.067516264534685e-06, "loss": 0.3834, "num_tokens": 10158802810.0, "step": 13281 }, { "epoch": 4.865439223229825, "grad_norm": 0.14386001769088952, "learning_rate": 4.067147049051993e-06, "loss": 0.3395, "num_tokens": 10159527842.0, "step": 13282 }, { "epoch": 4.8658056242557475, "grad_norm": 0.15328801642873618, "learning_rate": 4.066778843987686e-06, "loss": 0.3717, "num_tokens": 10160250451.0, "step": 13283 }, { "epoch": 4.866172025281671, "grad_norm": 0.14291868626513912, "learning_rate": 4.066411649362508e-06, "loss": 0.3886, "num_tokens": 10161070838.0, "step": 13284 }, { "epoch": 4.866538426307594, "grad_norm": 0.13825640226352304, "learning_rate": 4.06604546519715e-06, "loss": 0.3342, "num_tokens": 10161948760.0, "step": 13285 }, { "epoch": 4.866904827333516, "grad_norm": 0.15838997846531763, "learning_rate": 4.065680291512245e-06, "loss": 0.3588, "num_tokens": 10162583795.0, "step": 13286 }, { "epoch": 4.8672712283594395, "grad_norm": 0.16587416951109382, "learning_rate": 4.0653161283283675e-06, "loss": 0.3738, "num_tokens": 10163251471.0, "step": 13287 }, { "epoch": 4.867637629385362, "grad_norm": 0.13302487165706572, "learning_rate": 4.064952975666036e-06, "loss": 0.3553, "num_tokens": 10164148662.0, "step": 13288 }, { "epoch": 4.868004030411285, "grad_norm": 0.16228072382366948, "learning_rate": 4.064590833545715e-06, "loss": 0.383, "num_tokens": 10164824896.0, "step": 13289 }, { "epoch": 4.868370431437208, "grad_norm": 0.15587673151538778, "learning_rate": 4.064229701987805e-06, "loss": 0.3696, "num_tokens": 10165557172.0, "step": 13290 }, { "epoch": 4.868736832463131, "grad_norm": 0.14834745899993038, "learning_rate": 4.0638695810126545e-06, "loss": 0.3489, "num_tokens": 10166296503.0, "step": 13291 }, { "epoch": 4.869103233489054, "grad_norm": 0.15942066818480666, "learning_rate": 4.063510470640555e-06, "loss": 0.3947, "num_tokens": 10166994029.0, "step": 13292 }, { "epoch": 4.869469634514976, "grad_norm": 0.15770441541106925, "learning_rate": 4.063152370891744e-06, "loss": 0.3631, "num_tokens": 10167745485.0, "step": 13293 }, { "epoch": 4.869836035540899, "grad_norm": 0.15645484877401253, "learning_rate": 4.062795281786393e-06, "loss": 0.3719, "num_tokens": 10168415926.0, "step": 13294 }, { "epoch": 4.870202436566823, "grad_norm": 0.13694470354605076, "learning_rate": 4.062439203344624e-06, "loss": 0.3522, "num_tokens": 10169277120.0, "step": 13295 }, { "epoch": 4.870568837592745, "grad_norm": 0.14975724922803071, "learning_rate": 4.062084135586503e-06, "loss": 0.3593, "num_tokens": 10170042379.0, "step": 13296 }, { "epoch": 4.870935238618668, "grad_norm": 0.1483262970699587, "learning_rate": 4.06173007853203e-06, "loss": 0.3674, "num_tokens": 10170888307.0, "step": 13297 }, { "epoch": 4.8713016396445905, "grad_norm": 0.15345272347386754, "learning_rate": 4.0613770322011605e-06, "loss": 0.3827, "num_tokens": 10171663524.0, "step": 13298 }, { "epoch": 4.871668040670514, "grad_norm": 0.1386882294688752, "learning_rate": 4.061024996613783e-06, "loss": 0.343, "num_tokens": 10172584303.0, "step": 13299 }, { "epoch": 4.872034441696437, "grad_norm": 0.13631715140659303, "learning_rate": 4.060673971789733e-06, "loss": 0.3456, "num_tokens": 10173408950.0, "step": 13300 }, { "epoch": 4.872400842722359, "grad_norm": 0.14197553589015646, "learning_rate": 4.060323957748791e-06, "loss": 0.3559, "num_tokens": 10174240904.0, "step": 13301 }, { "epoch": 4.8727672437482825, "grad_norm": 0.14921766261388578, "learning_rate": 4.059974954510675e-06, "loss": 0.3642, "num_tokens": 10175014784.0, "step": 13302 }, { "epoch": 4.873133644774206, "grad_norm": 0.14709945817645287, "learning_rate": 4.059626962095054e-06, "loss": 0.3777, "num_tokens": 10175786843.0, "step": 13303 }, { "epoch": 4.873500045800128, "grad_norm": 0.1372883969698972, "learning_rate": 4.05927998052153e-06, "loss": 0.347, "num_tokens": 10176588485.0, "step": 13304 }, { "epoch": 4.873866446826051, "grad_norm": 0.16180054684335285, "learning_rate": 4.058934009809659e-06, "loss": 0.3855, "num_tokens": 10177301714.0, "step": 13305 }, { "epoch": 4.874232847851974, "grad_norm": 0.14547263517120304, "learning_rate": 4.058589049978931e-06, "loss": 0.3646, "num_tokens": 10178105638.0, "step": 13306 }, { "epoch": 4.874599248877897, "grad_norm": 0.14723689307007232, "learning_rate": 4.058245101048782e-06, "loss": 0.3848, "num_tokens": 10178942513.0, "step": 13307 }, { "epoch": 4.87496564990382, "grad_norm": 0.15486576450379277, "learning_rate": 4.057902163038594e-06, "loss": 0.4106, "num_tokens": 10179728363.0, "step": 13308 }, { "epoch": 4.875332050929742, "grad_norm": 0.149559370855697, "learning_rate": 4.057560235967692e-06, "loss": 0.3798, "num_tokens": 10180504021.0, "step": 13309 }, { "epoch": 4.875698451955666, "grad_norm": 0.14426438975943115, "learning_rate": 4.057219319855335e-06, "loss": 0.3702, "num_tokens": 10181370715.0, "step": 13310 }, { "epoch": 4.876064852981588, "grad_norm": 0.1486094327249329, "learning_rate": 4.056879414720736e-06, "loss": 0.3502, "num_tokens": 10182103984.0, "step": 13311 }, { "epoch": 4.876431254007511, "grad_norm": 0.14633589754062548, "learning_rate": 4.056540520583046e-06, "loss": 0.3695, "num_tokens": 10182873012.0, "step": 13312 }, { "epoch": 4.876797655033434, "grad_norm": 0.1437745182917445, "learning_rate": 4.056202637461361e-06, "loss": 0.3562, "num_tokens": 10183700000.0, "step": 13313 }, { "epoch": 4.877164056059357, "grad_norm": 0.1421235002671873, "learning_rate": 4.055865765374716e-06, "loss": 0.3657, "num_tokens": 10184500267.0, "step": 13314 }, { "epoch": 4.87753045708528, "grad_norm": 0.1486038571887592, "learning_rate": 4.055529904342095e-06, "loss": 0.3713, "num_tokens": 10185325561.0, "step": 13315 }, { "epoch": 4.877896858111202, "grad_norm": 0.15699421016160206, "learning_rate": 4.055195054382421e-06, "loss": 0.3672, "num_tokens": 10186031735.0, "step": 13316 }, { "epoch": 4.8782632591371256, "grad_norm": 0.16064604140464134, "learning_rate": 4.054861215514561e-06, "loss": 0.3836, "num_tokens": 10186791824.0, "step": 13317 }, { "epoch": 4.878629660163049, "grad_norm": 0.14102706816779093, "learning_rate": 4.054528387757325e-06, "loss": 0.401, "num_tokens": 10187630349.0, "step": 13318 }, { "epoch": 4.878996061188971, "grad_norm": 0.13387502659502876, "learning_rate": 4.054196571129464e-06, "loss": 0.3709, "num_tokens": 10188525252.0, "step": 13319 }, { "epoch": 4.879362462214894, "grad_norm": 0.1450064974704821, "learning_rate": 4.053865765649676e-06, "loss": 0.3836, "num_tokens": 10189376140.0, "step": 13320 }, { "epoch": 4.879728863240818, "grad_norm": 0.14103446487382337, "learning_rate": 4.053535971336601e-06, "loss": 0.3699, "num_tokens": 10190220022.0, "step": 13321 }, { "epoch": 4.88009526426674, "grad_norm": 0.15579190820782718, "learning_rate": 4.05320718820882e-06, "loss": 0.3688, "num_tokens": 10190964128.0, "step": 13322 }, { "epoch": 4.880461665292663, "grad_norm": 0.15259335970353488, "learning_rate": 4.052879416284858e-06, "loss": 0.3679, "num_tokens": 10191745288.0, "step": 13323 }, { "epoch": 4.8808280663185855, "grad_norm": 0.14385312363072625, "learning_rate": 4.05255265558318e-06, "loss": 0.3722, "num_tokens": 10192511833.0, "step": 13324 }, { "epoch": 4.881194467344509, "grad_norm": 0.13548128278516086, "learning_rate": 4.052226906122203e-06, "loss": 0.3842, "num_tokens": 10193374987.0, "step": 13325 }, { "epoch": 4.881560868370432, "grad_norm": 0.16110339465614223, "learning_rate": 4.05190216792028e-06, "loss": 0.367, "num_tokens": 10194129370.0, "step": 13326 }, { "epoch": 4.881927269396354, "grad_norm": 0.13566742470619822, "learning_rate": 4.051578440995705e-06, "loss": 0.3642, "num_tokens": 10194982878.0, "step": 13327 }, { "epoch": 4.8822936704222775, "grad_norm": 0.15367733646127865, "learning_rate": 4.051255725366719e-06, "loss": 0.3777, "num_tokens": 10195690231.0, "step": 13328 }, { "epoch": 4.8826600714482, "grad_norm": 0.148449250115369, "learning_rate": 4.050934021051508e-06, "loss": 0.3939, "num_tokens": 10196499842.0, "step": 13329 }, { "epoch": 4.883026472474123, "grad_norm": 0.14919347128838872, "learning_rate": 4.050613328068198e-06, "loss": 0.3554, "num_tokens": 10197229228.0, "step": 13330 }, { "epoch": 4.883392873500046, "grad_norm": 0.15526077317408932, "learning_rate": 4.050293646434854e-06, "loss": 0.4045, "num_tokens": 10197946427.0, "step": 13331 }, { "epoch": 4.883759274525969, "grad_norm": 0.15755522090063795, "learning_rate": 4.049974976169495e-06, "loss": 0.3919, "num_tokens": 10198655662.0, "step": 13332 }, { "epoch": 4.884125675551892, "grad_norm": 0.1584157568308412, "learning_rate": 4.04965731729007e-06, "loss": 0.3715, "num_tokens": 10199399606.0, "step": 13333 }, { "epoch": 4.884492076577814, "grad_norm": 0.1531963106664073, "learning_rate": 4.04934066981448e-06, "loss": 0.3665, "num_tokens": 10200101428.0, "step": 13334 }, { "epoch": 4.884858477603737, "grad_norm": 0.1551047072410761, "learning_rate": 4.049025033760566e-06, "loss": 0.3549, "num_tokens": 10200802317.0, "step": 13335 }, { "epoch": 4.885224878629661, "grad_norm": 0.13126923688445297, "learning_rate": 4.048710409146113e-06, "loss": 0.3832, "num_tokens": 10201777980.0, "step": 13336 }, { "epoch": 4.885591279655583, "grad_norm": 0.16664865239818608, "learning_rate": 4.0483967959888495e-06, "loss": 0.4017, "num_tokens": 10202447206.0, "step": 13337 }, { "epoch": 4.885957680681506, "grad_norm": 0.13952181199202535, "learning_rate": 4.048084194306442e-06, "loss": 0.355, "num_tokens": 10203254248.0, "step": 13338 }, { "epoch": 4.8863240817074285, "grad_norm": 0.15370169944272483, "learning_rate": 4.047772604116508e-06, "loss": 0.3982, "num_tokens": 10204054516.0, "step": 13339 }, { "epoch": 4.886690482733352, "grad_norm": 0.15961994866946483, "learning_rate": 4.047462025436602e-06, "loss": 0.4023, "num_tokens": 10204752921.0, "step": 13340 }, { "epoch": 4.887056883759275, "grad_norm": 0.15055135171772868, "learning_rate": 4.047152458284224e-06, "loss": 0.3696, "num_tokens": 10205531437.0, "step": 13341 }, { "epoch": 4.887423284785197, "grad_norm": 0.1538855759356968, "learning_rate": 4.046843902676813e-06, "loss": 0.3888, "num_tokens": 10206288766.0, "step": 13342 }, { "epoch": 4.8877896858111205, "grad_norm": 0.15066102270824547, "learning_rate": 4.0465363586317605e-06, "loss": 0.3609, "num_tokens": 10206984833.0, "step": 13343 }, { "epoch": 4.888156086837043, "grad_norm": 0.1556166448736202, "learning_rate": 4.046229826166393e-06, "loss": 0.3959, "num_tokens": 10207776436.0, "step": 13344 }, { "epoch": 4.888522487862966, "grad_norm": 0.15971337282131226, "learning_rate": 4.045924305297978e-06, "loss": 0.3571, "num_tokens": 10208413686.0, "step": 13345 }, { "epoch": 4.888888888888889, "grad_norm": 0.1657936797460065, "learning_rate": 4.045619796043734e-06, "loss": 0.3742, "num_tokens": 10209131211.0, "step": 13346 }, { "epoch": 4.889255289914812, "grad_norm": 0.14337012022208465, "learning_rate": 4.045316298420817e-06, "loss": 0.3779, "num_tokens": 10210046708.0, "step": 13347 }, { "epoch": 4.889621690940735, "grad_norm": 0.13489804082006004, "learning_rate": 4.045013812446329e-06, "loss": 0.3463, "num_tokens": 10210877068.0, "step": 13348 }, { "epoch": 4.889988091966657, "grad_norm": 0.13822558987357644, "learning_rate": 4.044712338137309e-06, "loss": 0.3521, "num_tokens": 10211733445.0, "step": 13349 }, { "epoch": 4.89035449299258, "grad_norm": 0.15765740158872768, "learning_rate": 4.044411875510749e-06, "loss": 0.3647, "num_tokens": 10212436255.0, "step": 13350 }, { "epoch": 4.890720894018504, "grad_norm": 0.1504200884982121, "learning_rate": 4.044112424583575e-06, "loss": 0.3611, "num_tokens": 10213206384.0, "step": 13351 }, { "epoch": 4.891087295044426, "grad_norm": 0.14188758853193306, "learning_rate": 4.04381398537266e-06, "loss": 0.3899, "num_tokens": 10214083630.0, "step": 13352 }, { "epoch": 4.891453696070349, "grad_norm": 0.14846402373138137, "learning_rate": 4.043516557894821e-06, "loss": 0.3683, "num_tokens": 10214837049.0, "step": 13353 }, { "epoch": 4.8918200970962715, "grad_norm": 0.1563080492467865, "learning_rate": 4.043220142166815e-06, "loss": 0.3663, "num_tokens": 10215588283.0, "step": 13354 }, { "epoch": 4.892186498122195, "grad_norm": 0.1655513029347652, "learning_rate": 4.042924738205342e-06, "loss": 0.4003, "num_tokens": 10216211920.0, "step": 13355 }, { "epoch": 4.892552899148118, "grad_norm": 0.14538831333905516, "learning_rate": 4.042630346027051e-06, "loss": 0.3971, "num_tokens": 10217002958.0, "step": 13356 }, { "epoch": 4.89291930017404, "grad_norm": 0.14809603463268045, "learning_rate": 4.0423369656485236e-06, "loss": 0.3653, "num_tokens": 10217826115.0, "step": 13357 }, { "epoch": 4.8932857011999635, "grad_norm": 0.1764704032722088, "learning_rate": 4.0420445970862935e-06, "loss": 0.3583, "num_tokens": 10218593554.0, "step": 13358 }, { "epoch": 4.893652102225886, "grad_norm": 0.1417403332518916, "learning_rate": 4.041753240356835e-06, "loss": 0.3622, "num_tokens": 10219358013.0, "step": 13359 }, { "epoch": 4.894018503251809, "grad_norm": 0.14988251431612468, "learning_rate": 4.041462895476561e-06, "loss": 0.3709, "num_tokens": 10220113903.0, "step": 13360 }, { "epoch": 4.894384904277732, "grad_norm": 0.1451831560225741, "learning_rate": 4.0411735624618345e-06, "loss": 0.3626, "num_tokens": 10220950250.0, "step": 13361 }, { "epoch": 4.894751305303655, "grad_norm": 0.1571705239820058, "learning_rate": 4.040885241328955e-06, "loss": 0.3576, "num_tokens": 10221640599.0, "step": 13362 }, { "epoch": 4.895117706329578, "grad_norm": 0.16414152493389167, "learning_rate": 4.04059793209417e-06, "loss": 0.3727, "num_tokens": 10222343968.0, "step": 13363 }, { "epoch": 4.8954841073555, "grad_norm": 0.14009407623500997, "learning_rate": 4.040311634773667e-06, "loss": 0.3648, "num_tokens": 10223171585.0, "step": 13364 }, { "epoch": 4.895850508381423, "grad_norm": 0.15065093962080775, "learning_rate": 4.0400263493835755e-06, "loss": 0.348, "num_tokens": 10223891934.0, "step": 13365 }, { "epoch": 4.896216909407347, "grad_norm": 0.1529971826658391, "learning_rate": 4.039742075939973e-06, "loss": 0.3462, "num_tokens": 10224663354.0, "step": 13366 }, { "epoch": 4.896583310433269, "grad_norm": 0.16198095984430733, "learning_rate": 4.039458814458876e-06, "loss": 0.3553, "num_tokens": 10225298086.0, "step": 13367 }, { "epoch": 4.896949711459192, "grad_norm": 0.15416472226636008, "learning_rate": 4.039176564956242e-06, "loss": 0.3895, "num_tokens": 10226038503.0, "step": 13368 }, { "epoch": 4.897316112485115, "grad_norm": 0.1363515890252975, "learning_rate": 4.038895327447976e-06, "loss": 0.3641, "num_tokens": 10226897502.0, "step": 13369 }, { "epoch": 4.897682513511038, "grad_norm": 0.15195680290525965, "learning_rate": 4.0386151019499265e-06, "loss": 0.3882, "num_tokens": 10227657187.0, "step": 13370 }, { "epoch": 4.898048914536961, "grad_norm": 0.16811881017298602, "learning_rate": 4.038335888477877e-06, "loss": 0.3662, "num_tokens": 10228296480.0, "step": 13371 }, { "epoch": 4.898415315562883, "grad_norm": 0.1374141248791565, "learning_rate": 4.038057687047567e-06, "loss": 0.3599, "num_tokens": 10229141673.0, "step": 13372 }, { "epoch": 4.898781716588807, "grad_norm": 0.15351430482338652, "learning_rate": 4.0377804976746655e-06, "loss": 0.3816, "num_tokens": 10229872251.0, "step": 13373 }, { "epoch": 4.899148117614729, "grad_norm": 0.1383959901227204, "learning_rate": 4.037504320374792e-06, "loss": 0.3563, "num_tokens": 10230712003.0, "step": 13374 }, { "epoch": 4.899514518640652, "grad_norm": 0.15673650541448497, "learning_rate": 4.0372291551635105e-06, "loss": 0.3832, "num_tokens": 10231419596.0, "step": 13375 }, { "epoch": 4.899880919666575, "grad_norm": 0.1666069503032457, "learning_rate": 4.036955002056322e-06, "loss": 0.3895, "num_tokens": 10232174224.0, "step": 13376 }, { "epoch": 4.900247320692498, "grad_norm": 0.16683844270487458, "learning_rate": 4.0366818610686766e-06, "loss": 0.3824, "num_tokens": 10232846478.0, "step": 13377 }, { "epoch": 4.900613721718421, "grad_norm": 0.14987106747546067, "learning_rate": 4.0364097322159605e-06, "loss": 0.3687, "num_tokens": 10233596040.0, "step": 13378 }, { "epoch": 4.900980122744343, "grad_norm": 0.15076175832099728, "learning_rate": 4.036138615513508e-06, "loss": 0.3505, "num_tokens": 10234378489.0, "step": 13379 }, { "epoch": 4.9013465237702665, "grad_norm": 0.1471429576573794, "learning_rate": 4.035868510976595e-06, "loss": 0.3562, "num_tokens": 10235154010.0, "step": 13380 }, { "epoch": 4.90171292479619, "grad_norm": 0.16473700372969036, "learning_rate": 4.035599418620443e-06, "loss": 0.3798, "num_tokens": 10235838999.0, "step": 13381 }, { "epoch": 4.902079325822112, "grad_norm": 0.14386345297322398, "learning_rate": 4.035331338460209e-06, "loss": 0.3583, "num_tokens": 10236676819.0, "step": 13382 }, { "epoch": 4.902445726848035, "grad_norm": 0.15923279080030037, "learning_rate": 4.035064270511003e-06, "loss": 0.3676, "num_tokens": 10237387484.0, "step": 13383 }, { "epoch": 4.902812127873958, "grad_norm": 0.14797991971883814, "learning_rate": 4.034798214787871e-06, "loss": 0.374, "num_tokens": 10238122570.0, "step": 13384 }, { "epoch": 4.903178528899881, "grad_norm": 0.16496647703879525, "learning_rate": 4.034533171305801e-06, "loss": 0.3783, "num_tokens": 10238793130.0, "step": 13385 }, { "epoch": 4.903544929925804, "grad_norm": 0.16065693263133038, "learning_rate": 4.03426914007973e-06, "loss": 0.3737, "num_tokens": 10239470419.0, "step": 13386 }, { "epoch": 4.903911330951726, "grad_norm": 0.1557006707087157, "learning_rate": 4.0340061211245345e-06, "loss": 0.3536, "num_tokens": 10240237763.0, "step": 13387 }, { "epoch": 4.90427773197765, "grad_norm": 0.15847681866954685, "learning_rate": 4.033744114455033e-06, "loss": 0.3652, "num_tokens": 10240971764.0, "step": 13388 }, { "epoch": 4.904644133003572, "grad_norm": 0.15201823437554446, "learning_rate": 4.033483120085988e-06, "loss": 0.3402, "num_tokens": 10241673779.0, "step": 13389 }, { "epoch": 4.905010534029495, "grad_norm": 0.14027112113712564, "learning_rate": 4.033223138032105e-06, "loss": 0.3513, "num_tokens": 10242499981.0, "step": 13390 }, { "epoch": 4.905376935055418, "grad_norm": 0.1439415404718459, "learning_rate": 4.0329641683080335e-06, "loss": 0.3752, "num_tokens": 10243304113.0, "step": 13391 }, { "epoch": 4.905743336081341, "grad_norm": 0.15556348556585994, "learning_rate": 4.0327062109283666e-06, "loss": 0.3696, "num_tokens": 10243978262.0, "step": 13392 }, { "epoch": 4.906109737107264, "grad_norm": 0.13554187570577944, "learning_rate": 4.032449265907634e-06, "loss": 0.3667, "num_tokens": 10244863042.0, "step": 13393 }, { "epoch": 4.906476138133186, "grad_norm": 0.1448945372630646, "learning_rate": 4.032193333260319e-06, "loss": 0.3761, "num_tokens": 10245644465.0, "step": 13394 }, { "epoch": 4.9068425391591095, "grad_norm": 0.13869680521894778, "learning_rate": 4.0319384130008375e-06, "loss": 0.3573, "num_tokens": 10246493009.0, "step": 13395 }, { "epoch": 4.907208940185033, "grad_norm": 0.1546047574495482, "learning_rate": 4.031684505143554e-06, "loss": 0.3682, "num_tokens": 10247231526.0, "step": 13396 }, { "epoch": 4.907575341210955, "grad_norm": 0.15873913422380703, "learning_rate": 4.031431609702778e-06, "loss": 0.343, "num_tokens": 10247946721.0, "step": 13397 }, { "epoch": 4.907941742236878, "grad_norm": 0.15073404459881065, "learning_rate": 4.031179726692752e-06, "loss": 0.3674, "num_tokens": 10248707456.0, "step": 13398 }, { "epoch": 4.9083081432628015, "grad_norm": 0.15014795953386945, "learning_rate": 4.0309288561276744e-06, "loss": 0.3511, "num_tokens": 10249494457.0, "step": 13399 }, { "epoch": 4.908674544288724, "grad_norm": 0.16748035503929246, "learning_rate": 4.0306789980216794e-06, "loss": 0.351, "num_tokens": 10250137627.0, "step": 13400 }, { "epoch": 4.909040945314647, "grad_norm": 0.15421298346672163, "learning_rate": 4.030430152388844e-06, "loss": 0.3671, "num_tokens": 10250816564.0, "step": 13401 }, { "epoch": 4.909407346340569, "grad_norm": 0.15657940502844767, "learning_rate": 4.030182319243187e-06, "loss": 0.3977, "num_tokens": 10251602061.0, "step": 13402 }, { "epoch": 4.909773747366493, "grad_norm": 0.15977094078425833, "learning_rate": 4.029935498598677e-06, "loss": 0.3904, "num_tokens": 10252336615.0, "step": 13403 }, { "epoch": 4.910140148392416, "grad_norm": 0.14609623217006745, "learning_rate": 4.029689690469216e-06, "loss": 0.3682, "num_tokens": 10253134563.0, "step": 13404 }, { "epoch": 4.910506549418338, "grad_norm": 0.13968407719890727, "learning_rate": 4.02944489486866e-06, "loss": 0.3744, "num_tokens": 10253956711.0, "step": 13405 }, { "epoch": 4.910872950444261, "grad_norm": 0.14897180666646742, "learning_rate": 4.0292011118107975e-06, "loss": 0.3805, "num_tokens": 10254801413.0, "step": 13406 }, { "epoch": 4.911239351470184, "grad_norm": 0.13534069028646642, "learning_rate": 4.028958341309364e-06, "loss": 0.3566, "num_tokens": 10255712626.0, "step": 13407 }, { "epoch": 4.911605752496107, "grad_norm": 0.16637234245960095, "learning_rate": 4.028716583378042e-06, "loss": 0.3676, "num_tokens": 10256369510.0, "step": 13408 }, { "epoch": 4.91197215352203, "grad_norm": 0.1543672297683593, "learning_rate": 4.028475838030449e-06, "loss": 0.3604, "num_tokens": 10257050195.0, "step": 13409 }, { "epoch": 4.9123385545479525, "grad_norm": 0.15625578584696603, "learning_rate": 4.0282361052801534e-06, "loss": 0.3499, "num_tokens": 10257730419.0, "step": 13410 }, { "epoch": 4.912704955573876, "grad_norm": 0.1416776691134168, "learning_rate": 4.027997385140659e-06, "loss": 0.3833, "num_tokens": 10258590545.0, "step": 13411 }, { "epoch": 4.913071356599798, "grad_norm": 0.15935028815748647, "learning_rate": 4.0277596776254194e-06, "loss": 0.3531, "num_tokens": 10259251296.0, "step": 13412 }, { "epoch": 4.913437757625721, "grad_norm": 0.17137114251690974, "learning_rate": 4.027522982747828e-06, "loss": 0.3859, "num_tokens": 10259905068.0, "step": 13413 }, { "epoch": 4.9138041586516445, "grad_norm": 0.17187695185540672, "learning_rate": 4.027287300521221e-06, "loss": 0.3739, "num_tokens": 10260605381.0, "step": 13414 }, { "epoch": 4.914170559677567, "grad_norm": 0.13479716971808242, "learning_rate": 4.027052630958875e-06, "loss": 0.3594, "num_tokens": 10261490840.0, "step": 13415 }, { "epoch": 4.91453696070349, "grad_norm": 0.15807720582404572, "learning_rate": 4.026818974074016e-06, "loss": 0.3389, "num_tokens": 10262154030.0, "step": 13416 }, { "epoch": 4.914903361729413, "grad_norm": 0.13550654362888997, "learning_rate": 4.026586329879807e-06, "loss": 0.3438, "num_tokens": 10263032434.0, "step": 13417 }, { "epoch": 4.915269762755336, "grad_norm": 0.14747663155139484, "learning_rate": 4.026354698389359e-06, "loss": 0.3894, "num_tokens": 10263812734.0, "step": 13418 }, { "epoch": 4.915636163781259, "grad_norm": 0.1456532182825017, "learning_rate": 4.026124079615721e-06, "loss": 0.3324, "num_tokens": 10264567645.0, "step": 13419 }, { "epoch": 4.916002564807181, "grad_norm": 0.14888067681650946, "learning_rate": 4.025894473571885e-06, "loss": 0.3657, "num_tokens": 10265352334.0, "step": 13420 }, { "epoch": 4.9163689658331045, "grad_norm": 0.16176985147843148, "learning_rate": 4.025665880270793e-06, "loss": 0.3825, "num_tokens": 10266086133.0, "step": 13421 }, { "epoch": 4.916735366859028, "grad_norm": 0.1393737445009097, "learning_rate": 4.0254382997253225e-06, "loss": 0.3477, "num_tokens": 10266891976.0, "step": 13422 }, { "epoch": 4.91710176788495, "grad_norm": 0.14460078608920243, "learning_rate": 4.025211731948294e-06, "loss": 0.3685, "num_tokens": 10267722568.0, "step": 13423 }, { "epoch": 4.917468168910873, "grad_norm": 0.16019872507569854, "learning_rate": 4.024986176952478e-06, "loss": 0.353, "num_tokens": 10268382982.0, "step": 13424 }, { "epoch": 4.917834569936796, "grad_norm": 0.1476921996728286, "learning_rate": 4.02476163475058e-06, "loss": 0.3727, "num_tokens": 10269147011.0, "step": 13425 }, { "epoch": 4.918200970962719, "grad_norm": 0.14304282349396752, "learning_rate": 4.024538105355252e-06, "loss": 0.3513, "num_tokens": 10269942543.0, "step": 13426 }, { "epoch": 4.918567371988642, "grad_norm": 0.1486450238291635, "learning_rate": 4.0243155887790915e-06, "loss": 0.3717, "num_tokens": 10270765254.0, "step": 13427 }, { "epoch": 4.918933773014564, "grad_norm": 0.15348774904662973, "learning_rate": 4.024094085034633e-06, "loss": 0.399, "num_tokens": 10271546940.0, "step": 13428 }, { "epoch": 4.919300174040488, "grad_norm": 0.13889343902707693, "learning_rate": 4.0238735941343575e-06, "loss": 0.3684, "num_tokens": 10272403961.0, "step": 13429 }, { "epoch": 4.91966657506641, "grad_norm": 0.14902370479883909, "learning_rate": 4.02365411609069e-06, "loss": 0.3507, "num_tokens": 10273250631.0, "step": 13430 }, { "epoch": 4.920032976092333, "grad_norm": 0.15973858718349424, "learning_rate": 4.023435650915996e-06, "loss": 0.3591, "num_tokens": 10273966764.0, "step": 13431 }, { "epoch": 4.920399377118256, "grad_norm": 0.1464469736000494, "learning_rate": 4.023218198622582e-06, "loss": 0.3765, "num_tokens": 10274756327.0, "step": 13432 }, { "epoch": 4.920765778144179, "grad_norm": 0.15480003453509944, "learning_rate": 4.023001759222705e-06, "loss": 0.3806, "num_tokens": 10275543493.0, "step": 13433 }, { "epoch": 4.921132179170102, "grad_norm": 0.1412994036516125, "learning_rate": 4.022786332728559e-06, "loss": 0.3499, "num_tokens": 10276325722.0, "step": 13434 }, { "epoch": 4.921498580196024, "grad_norm": 0.14799085594779487, "learning_rate": 4.022571919152279e-06, "loss": 0.3509, "num_tokens": 10277057916.0, "step": 13435 }, { "epoch": 4.9218649812219475, "grad_norm": 0.13755759710286772, "learning_rate": 4.02235851850595e-06, "loss": 0.3507, "num_tokens": 10277903643.0, "step": 13436 }, { "epoch": 4.922231382247871, "grad_norm": 0.16373376075706322, "learning_rate": 4.022146130801594e-06, "loss": 0.3911, "num_tokens": 10278639200.0, "step": 13437 }, { "epoch": 4.922597783273793, "grad_norm": 0.14334208928904132, "learning_rate": 4.021934756051179e-06, "loss": 0.3548, "num_tokens": 10279411733.0, "step": 13438 }, { "epoch": 4.922964184299716, "grad_norm": 0.1467951689855946, "learning_rate": 4.021724394266611e-06, "loss": 0.3726, "num_tokens": 10280159194.0, "step": 13439 }, { "epoch": 4.923330585325639, "grad_norm": 0.14556924035228525, "learning_rate": 4.02151504545975e-06, "loss": 0.3363, "num_tokens": 10280965928.0, "step": 13440 }, { "epoch": 4.923696986351562, "grad_norm": 0.16471406011769454, "learning_rate": 4.021306709642384e-06, "loss": 0.3761, "num_tokens": 10281659152.0, "step": 13441 }, { "epoch": 4.924063387377485, "grad_norm": 0.15850245621256237, "learning_rate": 4.021099386826255e-06, "loss": 0.3533, "num_tokens": 10282347120.0, "step": 13442 }, { "epoch": 4.924429788403407, "grad_norm": 0.1621097843474418, "learning_rate": 4.020893077023046e-06, "loss": 0.3819, "num_tokens": 10283025945.0, "step": 13443 }, { "epoch": 4.924796189429331, "grad_norm": 0.14026596602303035, "learning_rate": 4.020687780244379e-06, "loss": 0.381, "num_tokens": 10283900389.0, "step": 13444 }, { "epoch": 4.925162590455253, "grad_norm": 0.14668534468165387, "learning_rate": 4.020483496501823e-06, "loss": 0.3456, "num_tokens": 10284645692.0, "step": 13445 }, { "epoch": 4.925528991481176, "grad_norm": 0.16016858716494647, "learning_rate": 4.020280225806887e-06, "loss": 0.3673, "num_tokens": 10285309597.0, "step": 13446 }, { "epoch": 4.925895392507099, "grad_norm": 0.1675298882300297, "learning_rate": 4.020077968171023e-06, "loss": 0.3596, "num_tokens": 10286001255.0, "step": 13447 }, { "epoch": 4.926261793533022, "grad_norm": 0.14708001536570375, "learning_rate": 4.019876723605632e-06, "loss": 0.3816, "num_tokens": 10286865081.0, "step": 13448 }, { "epoch": 4.926628194558945, "grad_norm": 0.14698705923343222, "learning_rate": 4.019676492122048e-06, "loss": 0.3632, "num_tokens": 10287657465.0, "step": 13449 }, { "epoch": 4.926994595584867, "grad_norm": 0.16014203746349132, "learning_rate": 4.019477273731554e-06, "loss": 0.3973, "num_tokens": 10288342835.0, "step": 13450 }, { "epoch": 4.9273609966107905, "grad_norm": 0.15046486740733853, "learning_rate": 4.0192790684453755e-06, "loss": 0.3569, "num_tokens": 10289084995.0, "step": 13451 }, { "epoch": 4.927727397636714, "grad_norm": 0.151035206996558, "learning_rate": 4.019081876274683e-06, "loss": 0.3797, "num_tokens": 10289832005.0, "step": 13452 }, { "epoch": 4.928093798662636, "grad_norm": 0.14563800934501087, "learning_rate": 4.018885697230582e-06, "loss": 0.3764, "num_tokens": 10290660320.0, "step": 13453 }, { "epoch": 4.928460199688559, "grad_norm": 0.15043937116958225, "learning_rate": 4.01869053132413e-06, "loss": 0.357, "num_tokens": 10291384717.0, "step": 13454 }, { "epoch": 4.928826600714482, "grad_norm": 0.15056984585166122, "learning_rate": 4.018496378566323e-06, "loss": 0.3545, "num_tokens": 10292144760.0, "step": 13455 }, { "epoch": 4.929193001740405, "grad_norm": 0.14940412808608625, "learning_rate": 4.018303238968098e-06, "loss": 0.3802, "num_tokens": 10292920024.0, "step": 13456 }, { "epoch": 4.929559402766328, "grad_norm": 0.13929974414737054, "learning_rate": 4.018111112540341e-06, "loss": 0.3752, "num_tokens": 10293806187.0, "step": 13457 }, { "epoch": 4.92992580379225, "grad_norm": 0.166077239456669, "learning_rate": 4.017919999293875e-06, "loss": 0.404, "num_tokens": 10294548152.0, "step": 13458 }, { "epoch": 4.930292204818174, "grad_norm": 0.14373765632441468, "learning_rate": 4.017729899239467e-06, "loss": 0.3398, "num_tokens": 10295311698.0, "step": 13459 }, { "epoch": 4.930658605844096, "grad_norm": 0.14486129027082845, "learning_rate": 4.0175408123878304e-06, "loss": 0.3692, "num_tokens": 10296129451.0, "step": 13460 }, { "epoch": 4.931025006870019, "grad_norm": 0.1505840779202991, "learning_rate": 4.017352738749619e-06, "loss": 0.3879, "num_tokens": 10296983067.0, "step": 13461 }, { "epoch": 4.931391407895942, "grad_norm": 0.1624982589621301, "learning_rate": 4.017165678335429e-06, "loss": 0.3525, "num_tokens": 10297694901.0, "step": 13462 }, { "epoch": 4.931757808921865, "grad_norm": 0.15399618542686905, "learning_rate": 4.016979631155801e-06, "loss": 0.3343, "num_tokens": 10298458659.0, "step": 13463 }, { "epoch": 4.932124209947788, "grad_norm": 0.16211221508526952, "learning_rate": 4.016794597221216e-06, "loss": 0.3789, "num_tokens": 10299175418.0, "step": 13464 }, { "epoch": 4.93249061097371, "grad_norm": 0.15206924524438353, "learning_rate": 4.016610576542101e-06, "loss": 0.3799, "num_tokens": 10299955324.0, "step": 13465 }, { "epoch": 4.932857011999634, "grad_norm": 0.14525030105842318, "learning_rate": 4.016427569128826e-06, "loss": 0.3431, "num_tokens": 10300700820.0, "step": 13466 }, { "epoch": 4.933223413025557, "grad_norm": 0.13733507758142335, "learning_rate": 4.0162455749916995e-06, "loss": 0.3636, "num_tokens": 10301551439.0, "step": 13467 }, { "epoch": 4.933589814051479, "grad_norm": 0.137771022158367, "learning_rate": 4.016064594140977e-06, "loss": 0.3429, "num_tokens": 10302380841.0, "step": 13468 }, { "epoch": 4.933956215077402, "grad_norm": 0.15342019365390402, "learning_rate": 4.0158846265868565e-06, "loss": 0.4204, "num_tokens": 10303195982.0, "step": 13469 }, { "epoch": 4.934322616103325, "grad_norm": 0.1580271842553295, "learning_rate": 4.015705672339478e-06, "loss": 0.3879, "num_tokens": 10303945129.0, "step": 13470 }, { "epoch": 4.934689017129248, "grad_norm": 0.14625002333391973, "learning_rate": 4.015527731408924e-06, "loss": 0.3523, "num_tokens": 10304742942.0, "step": 13471 }, { "epoch": 4.935055418155171, "grad_norm": 0.156285519758811, "learning_rate": 4.01535080380522e-06, "loss": 0.3336, "num_tokens": 10305449124.0, "step": 13472 }, { "epoch": 4.9354218191810935, "grad_norm": 0.1515692734220689, "learning_rate": 4.015174889538336e-06, "loss": 0.3605, "num_tokens": 10306184474.0, "step": 13473 }, { "epoch": 4.935788220207017, "grad_norm": 0.17176556891026346, "learning_rate": 4.014999988618186e-06, "loss": 0.3779, "num_tokens": 10306836499.0, "step": 13474 }, { "epoch": 4.936154621232939, "grad_norm": 0.14849621730243046, "learning_rate": 4.014826101054619e-06, "loss": 0.3941, "num_tokens": 10307623943.0, "step": 13475 }, { "epoch": 4.936521022258862, "grad_norm": 0.14148356164527803, "learning_rate": 4.014653226857436e-06, "loss": 0.3582, "num_tokens": 10308470449.0, "step": 13476 }, { "epoch": 4.9368874232847855, "grad_norm": 0.1375594606866084, "learning_rate": 4.01448136603638e-06, "loss": 0.3752, "num_tokens": 10309351737.0, "step": 13477 }, { "epoch": 4.937253824310708, "grad_norm": 0.14827890757567216, "learning_rate": 4.014310518601128e-06, "loss": 0.3545, "num_tokens": 10310083338.0, "step": 13478 }, { "epoch": 4.937620225336631, "grad_norm": 0.16558807081248428, "learning_rate": 4.014140684561313e-06, "loss": 0.3979, "num_tokens": 10310782150.0, "step": 13479 }, { "epoch": 4.937986626362553, "grad_norm": 0.14448698763276852, "learning_rate": 4.013971863926502e-06, "loss": 0.347, "num_tokens": 10311589587.0, "step": 13480 }, { "epoch": 4.938353027388477, "grad_norm": 0.15695142381514135, "learning_rate": 4.013804056706204e-06, "loss": 0.3615, "num_tokens": 10312336972.0, "step": 13481 }, { "epoch": 4.9387194284144, "grad_norm": 0.1459488462206229, "learning_rate": 4.0136372629098776e-06, "loss": 0.3901, "num_tokens": 10313173926.0, "step": 13482 }, { "epoch": 4.939085829440322, "grad_norm": 0.16259295966083032, "learning_rate": 4.01347148254692e-06, "loss": 0.3821, "num_tokens": 10313923896.0, "step": 13483 }, { "epoch": 4.939452230466245, "grad_norm": 0.15496474888401796, "learning_rate": 4.013306715626671e-06, "loss": 0.3632, "num_tokens": 10314648547.0, "step": 13484 }, { "epoch": 4.939818631492168, "grad_norm": 0.14036686009629953, "learning_rate": 4.013142962158415e-06, "loss": 0.3281, "num_tokens": 10315517462.0, "step": 13485 }, { "epoch": 4.940185032518091, "grad_norm": 0.15138873225961907, "learning_rate": 4.012980222151379e-06, "loss": 0.357, "num_tokens": 10316247793.0, "step": 13486 }, { "epoch": 4.940551433544014, "grad_norm": 0.16017379937742954, "learning_rate": 4.012818495614732e-06, "loss": 0.3824, "num_tokens": 10317015799.0, "step": 13487 }, { "epoch": 4.9409178345699365, "grad_norm": 0.1566447048057729, "learning_rate": 4.012657782557586e-06, "loss": 0.3689, "num_tokens": 10317687687.0, "step": 13488 }, { "epoch": 4.94128423559586, "grad_norm": 0.1724644722486816, "learning_rate": 4.012498082988996e-06, "loss": 0.3812, "num_tokens": 10318345246.0, "step": 13489 }, { "epoch": 4.941650636621782, "grad_norm": 0.1498520445046911, "learning_rate": 4.012339396917962e-06, "loss": 0.3745, "num_tokens": 10319143412.0, "step": 13490 }, { "epoch": 4.942017037647705, "grad_norm": 0.14193666213438602, "learning_rate": 4.012181724353424e-06, "loss": 0.3769, "num_tokens": 10319988046.0, "step": 13491 }, { "epoch": 4.9423834386736285, "grad_norm": 0.1442326693151306, "learning_rate": 4.012025065304265e-06, "loss": 0.3598, "num_tokens": 10320768521.0, "step": 13492 }, { "epoch": 4.942749839699551, "grad_norm": 0.1554748535785724, "learning_rate": 4.011869419779315e-06, "loss": 0.3898, "num_tokens": 10321482480.0, "step": 13493 }, { "epoch": 4.943116240725474, "grad_norm": 0.13746105015808116, "learning_rate": 4.01171478778734e-06, "loss": 0.3754, "num_tokens": 10322399976.0, "step": 13494 }, { "epoch": 4.943482641751397, "grad_norm": 0.1537924059828936, "learning_rate": 4.0115611693370555e-06, "loss": 0.3733, "num_tokens": 10323193119.0, "step": 13495 }, { "epoch": 4.94384904277732, "grad_norm": 0.13901725625581543, "learning_rate": 4.011408564437115e-06, "loss": 0.369, "num_tokens": 10324063022.0, "step": 13496 }, { "epoch": 4.944215443803243, "grad_norm": 0.140062928741535, "learning_rate": 4.0112569730961185e-06, "loss": 0.379, "num_tokens": 10324934572.0, "step": 13497 }, { "epoch": 4.944581844829165, "grad_norm": 0.16885532630753144, "learning_rate": 4.011106395322607e-06, "loss": 0.3791, "num_tokens": 10325601642.0, "step": 13498 }, { "epoch": 4.944948245855088, "grad_norm": 0.1435075361067455, "learning_rate": 4.010956831125062e-06, "loss": 0.3483, "num_tokens": 10326411395.0, "step": 13499 }, { "epoch": 4.945314646881012, "grad_norm": 0.14727506980946212, "learning_rate": 4.0108082805119155e-06, "loss": 0.3821, "num_tokens": 10327211441.0, "step": 13500 }, { "epoch": 4.945681047906934, "grad_norm": 0.13826861136585292, "learning_rate": 4.010660743491535e-06, "loss": 0.3469, "num_tokens": 10328047968.0, "step": 13501 }, { "epoch": 4.946047448932857, "grad_norm": 0.1295889863213494, "learning_rate": 4.010514220072233e-06, "loss": 0.3778, "num_tokens": 10328982215.0, "step": 13502 }, { "epoch": 4.9464138499587795, "grad_norm": 0.15335276164448336, "learning_rate": 4.010368710262266e-06, "loss": 0.4006, "num_tokens": 10329744369.0, "step": 13503 }, { "epoch": 4.946780250984703, "grad_norm": 0.14510337688952452, "learning_rate": 4.010224214069833e-06, "loss": 0.3716, "num_tokens": 10330539368.0, "step": 13504 }, { "epoch": 4.947146652010626, "grad_norm": 0.14077699173763844, "learning_rate": 4.0100807315030745e-06, "loss": 0.38, "num_tokens": 10331371380.0, "step": 13505 }, { "epoch": 4.947513053036548, "grad_norm": 0.15011452611368398, "learning_rate": 4.009938262570074e-06, "loss": 0.3581, "num_tokens": 10332119573.0, "step": 13506 }, { "epoch": 4.9478794540624715, "grad_norm": 0.137425031800556, "learning_rate": 4.009796807278864e-06, "loss": 0.3799, "num_tokens": 10332962518.0, "step": 13507 }, { "epoch": 4.948245855088394, "grad_norm": 0.15543156523964813, "learning_rate": 4.0096563656374075e-06, "loss": 0.342, "num_tokens": 10333675243.0, "step": 13508 }, { "epoch": 4.948612256114317, "grad_norm": 0.14756461411930594, "learning_rate": 4.0095169376536225e-06, "loss": 0.3651, "num_tokens": 10334420342.0, "step": 13509 }, { "epoch": 4.94897865714024, "grad_norm": 0.1540017813718766, "learning_rate": 4.0093785233353634e-06, "loss": 0.3903, "num_tokens": 10335201734.0, "step": 13510 }, { "epoch": 4.949345058166163, "grad_norm": 0.1590041277153315, "learning_rate": 4.00924112269043e-06, "loss": 0.3671, "num_tokens": 10335878176.0, "step": 13511 }, { "epoch": 4.949711459192086, "grad_norm": 0.15028792923872575, "learning_rate": 4.009104735726563e-06, "loss": 0.403, "num_tokens": 10336661298.0, "step": 13512 }, { "epoch": 4.950077860218009, "grad_norm": 0.15843171206686207, "learning_rate": 4.00896936245145e-06, "loss": 0.3922, "num_tokens": 10337341287.0, "step": 13513 }, { "epoch": 4.950444261243931, "grad_norm": 0.14634653534465064, "learning_rate": 4.008835002872713e-06, "loss": 0.3648, "num_tokens": 10338075571.0, "step": 13514 }, { "epoch": 4.950810662269855, "grad_norm": 0.15459527297098197, "learning_rate": 4.008701656997926e-06, "loss": 0.3649, "num_tokens": 10338825368.0, "step": 13515 }, { "epoch": 4.951177063295777, "grad_norm": 0.14974024250066614, "learning_rate": 4.008569324834603e-06, "loss": 0.3588, "num_tokens": 10339578714.0, "step": 13516 }, { "epoch": 4.9515434643217, "grad_norm": 0.14742749121096976, "learning_rate": 4.008438006390198e-06, "loss": 0.3767, "num_tokens": 10340339400.0, "step": 13517 }, { "epoch": 4.9519098653476235, "grad_norm": 0.1445052608361271, "learning_rate": 4.0083077016721114e-06, "loss": 0.3435, "num_tokens": 10341122637.0, "step": 13518 }, { "epoch": 4.952276266373546, "grad_norm": 0.1601236666324542, "learning_rate": 4.008178410687687e-06, "loss": 0.3812, "num_tokens": 10341820917.0, "step": 13519 }, { "epoch": 4.952642667399469, "grad_norm": 0.1479075514426573, "learning_rate": 4.008050133444205e-06, "loss": 0.3784, "num_tokens": 10342614151.0, "step": 13520 }, { "epoch": 4.953009068425391, "grad_norm": 0.13874025723031383, "learning_rate": 4.007922869948895e-06, "loss": 0.3569, "num_tokens": 10343428605.0, "step": 13521 }, { "epoch": 4.953375469451315, "grad_norm": 0.16198996021230933, "learning_rate": 4.007796620208931e-06, "loss": 0.3554, "num_tokens": 10344150970.0, "step": 13522 }, { "epoch": 4.953741870477238, "grad_norm": 0.1507932943938449, "learning_rate": 4.0076713842314206e-06, "loss": 0.3735, "num_tokens": 10344907236.0, "step": 13523 }, { "epoch": 4.95410827150316, "grad_norm": 0.15548349195237438, "learning_rate": 4.007547162023426e-06, "loss": 0.3829, "num_tokens": 10345621702.0, "step": 13524 }, { "epoch": 4.954474672529083, "grad_norm": 0.16555035127785653, "learning_rate": 4.007423953591941e-06, "loss": 0.3698, "num_tokens": 10346289878.0, "step": 13525 }, { "epoch": 4.954841073555006, "grad_norm": 0.1449030819969528, "learning_rate": 4.007301758943913e-06, "loss": 0.3594, "num_tokens": 10347132049.0, "step": 13526 }, { "epoch": 4.955207474580929, "grad_norm": 0.1508413514071187, "learning_rate": 4.007180578086223e-06, "loss": 0.4012, "num_tokens": 10347954704.0, "step": 13527 }, { "epoch": 4.955573875606852, "grad_norm": 0.16190071560145128, "learning_rate": 4.0070604110257005e-06, "loss": 0.3841, "num_tokens": 10348677837.0, "step": 13528 }, { "epoch": 4.9559402766327745, "grad_norm": 0.1483104300896384, "learning_rate": 4.006941257769117e-06, "loss": 0.3739, "num_tokens": 10349513205.0, "step": 13529 }, { "epoch": 4.956306677658698, "grad_norm": 0.1580951888977358, "learning_rate": 4.006823118323185e-06, "loss": 0.3987, "num_tokens": 10350247238.0, "step": 13530 }, { "epoch": 4.95667307868462, "grad_norm": 0.14292859435503044, "learning_rate": 4.006705992694561e-06, "loss": 0.3631, "num_tokens": 10351043142.0, "step": 13531 }, { "epoch": 4.957039479710543, "grad_norm": 0.14835400918070415, "learning_rate": 4.006589880889844e-06, "loss": 0.3757, "num_tokens": 10351820493.0, "step": 13532 }, { "epoch": 4.9574058807364665, "grad_norm": 0.1629024858661554, "learning_rate": 4.006474782915579e-06, "loss": 0.3909, "num_tokens": 10352498464.0, "step": 13533 }, { "epoch": 4.957772281762389, "grad_norm": 0.14557907199317438, "learning_rate": 4.006360698778245e-06, "loss": 0.3744, "num_tokens": 10353266385.0, "step": 13534 }, { "epoch": 4.958138682788312, "grad_norm": 0.14595964226690183, "learning_rate": 4.006247628484278e-06, "loss": 0.3823, "num_tokens": 10354098569.0, "step": 13535 }, { "epoch": 4.958505083814234, "grad_norm": 0.14018701612441786, "learning_rate": 4.006135572040042e-06, "loss": 0.3553, "num_tokens": 10354897262.0, "step": 13536 }, { "epoch": 4.958871484840158, "grad_norm": 0.15343739691947714, "learning_rate": 4.0060245294518554e-06, "loss": 0.3418, "num_tokens": 10355663003.0, "step": 13537 }, { "epoch": 4.959237885866081, "grad_norm": 0.16079019732164065, "learning_rate": 4.005914500725973e-06, "loss": 0.3741, "num_tokens": 10356339396.0, "step": 13538 }, { "epoch": 4.959604286892003, "grad_norm": 0.16339478531462087, "learning_rate": 4.005805485868596e-06, "loss": 0.36, "num_tokens": 10356977700.0, "step": 13539 }, { "epoch": 4.959970687917926, "grad_norm": 0.1525525539814712, "learning_rate": 4.005697484885863e-06, "loss": 0.3392, "num_tokens": 10357677819.0, "step": 13540 }, { "epoch": 4.960337088943849, "grad_norm": 0.17159130478734316, "learning_rate": 4.005590497783864e-06, "loss": 0.3626, "num_tokens": 10358267474.0, "step": 13541 }, { "epoch": 4.960703489969772, "grad_norm": 0.15636166036394974, "learning_rate": 4.00548452456862e-06, "loss": 0.3779, "num_tokens": 10358995014.0, "step": 13542 }, { "epoch": 4.961069890995695, "grad_norm": 0.15224071529384717, "learning_rate": 4.005379565246111e-06, "loss": 0.3823, "num_tokens": 10359781047.0, "step": 13543 }, { "epoch": 4.9614362920216175, "grad_norm": 0.1421543847008601, "learning_rate": 4.005275619822245e-06, "loss": 0.3431, "num_tokens": 10360597511.0, "step": 13544 }, { "epoch": 4.961802693047541, "grad_norm": 0.17424547245756594, "learning_rate": 4.005172688302881e-06, "loss": 0.3911, "num_tokens": 10361269154.0, "step": 13545 }, { "epoch": 4.962169094073463, "grad_norm": 0.1543937750349004, "learning_rate": 4.005070770693816e-06, "loss": 0.3509, "num_tokens": 10361974853.0, "step": 13546 }, { "epoch": 4.962535495099386, "grad_norm": 0.15147261684957372, "learning_rate": 4.004969867000798e-06, "loss": 0.3588, "num_tokens": 10362696429.0, "step": 13547 }, { "epoch": 4.9629018961253095, "grad_norm": 0.14982879718886088, "learning_rate": 4.004869977229507e-06, "loss": 0.3563, "num_tokens": 10363433055.0, "step": 13548 }, { "epoch": 4.963268297151232, "grad_norm": 0.15970099305287555, "learning_rate": 4.004771101385573e-06, "loss": 0.3544, "num_tokens": 10364090608.0, "step": 13549 }, { "epoch": 4.963634698177155, "grad_norm": 0.1386501931063616, "learning_rate": 4.0046732394745654e-06, "loss": 0.3754, "num_tokens": 10364998430.0, "step": 13550 }, { "epoch": 4.964001099203077, "grad_norm": 0.1385288627610876, "learning_rate": 4.004576391502002e-06, "loss": 0.3822, "num_tokens": 10365912348.0, "step": 13551 }, { "epoch": 4.964367500229001, "grad_norm": 0.15414755918524373, "learning_rate": 4.004480557473337e-06, "loss": 0.3763, "num_tokens": 10366660420.0, "step": 13552 }, { "epoch": 4.964733901254924, "grad_norm": 0.15023678746913616, "learning_rate": 4.004385737393971e-06, "loss": 0.3607, "num_tokens": 10367437666.0, "step": 13553 }, { "epoch": 4.965100302280846, "grad_norm": 0.1360481814604551, "learning_rate": 4.004291931269248e-06, "loss": 0.3442, "num_tokens": 10368245015.0, "step": 13554 }, { "epoch": 4.965466703306769, "grad_norm": 0.15379302288791166, "learning_rate": 4.004199139104449e-06, "loss": 0.3866, "num_tokens": 10369026614.0, "step": 13555 }, { "epoch": 4.965833104332692, "grad_norm": 0.1407758748059928, "learning_rate": 4.004107360904807e-06, "loss": 0.3696, "num_tokens": 10369823093.0, "step": 13556 }, { "epoch": 4.966199505358615, "grad_norm": 0.15528731814323257, "learning_rate": 4.004016596675491e-06, "loss": 0.3671, "num_tokens": 10370532256.0, "step": 13557 }, { "epoch": 4.966565906384538, "grad_norm": 0.14809921157700343, "learning_rate": 4.003926846421614e-06, "loss": 0.3396, "num_tokens": 10371351844.0, "step": 13558 }, { "epoch": 4.9669323074104605, "grad_norm": 0.1530038009231034, "learning_rate": 4.003838110148237e-06, "loss": 0.3471, "num_tokens": 10372063960.0, "step": 13559 }, { "epoch": 4.967298708436384, "grad_norm": 0.1596242893353804, "learning_rate": 4.003750387860355e-06, "loss": 0.3654, "num_tokens": 10372753331.0, "step": 13560 }, { "epoch": 4.967665109462306, "grad_norm": 0.13948136674008454, "learning_rate": 4.003663679562913e-06, "loss": 0.3522, "num_tokens": 10373604105.0, "step": 13561 }, { "epoch": 4.968031510488229, "grad_norm": 0.14806439630692306, "learning_rate": 4.0035779852607965e-06, "loss": 0.3812, "num_tokens": 10374404544.0, "step": 13562 }, { "epoch": 4.9683979115141526, "grad_norm": 0.14960849355730124, "learning_rate": 4.0034933049588355e-06, "loss": 0.3892, "num_tokens": 10375198817.0, "step": 13563 }, { "epoch": 4.968764312540075, "grad_norm": 0.1474703321319798, "learning_rate": 4.003409638661798e-06, "loss": 0.3573, "num_tokens": 10375999681.0, "step": 13564 }, { "epoch": 4.969130713565998, "grad_norm": 0.125085367333937, "learning_rate": 4.003326986374399e-06, "loss": 0.3776, "num_tokens": 10377089792.0, "step": 13565 }, { "epoch": 4.9694971145919205, "grad_norm": 0.1522213579647339, "learning_rate": 4.003245348101298e-06, "loss": 0.3693, "num_tokens": 10377863227.0, "step": 13566 }, { "epoch": 4.969863515617844, "grad_norm": 0.1308065266884276, "learning_rate": 4.003164723847092e-06, "loss": 0.369, "num_tokens": 10378749663.0, "step": 13567 }, { "epoch": 4.970229916643767, "grad_norm": 0.13947132133602905, "learning_rate": 4.003085113616325e-06, "loss": 0.3724, "num_tokens": 10379599987.0, "step": 13568 }, { "epoch": 4.970596317669689, "grad_norm": 0.15798891106870025, "learning_rate": 4.003006517413483e-06, "loss": 0.3786, "num_tokens": 10380312887.0, "step": 13569 }, { "epoch": 4.9709627186956125, "grad_norm": 0.14438986103888624, "learning_rate": 4.002928935242993e-06, "loss": 0.3406, "num_tokens": 10381079239.0, "step": 13570 }, { "epoch": 4.971329119721535, "grad_norm": 0.15291412045985664, "learning_rate": 4.002852367109228e-06, "loss": 0.4182, "num_tokens": 10381833696.0, "step": 13571 }, { "epoch": 4.971695520747458, "grad_norm": 0.16573164226254364, "learning_rate": 4.0027768130165e-06, "loss": 0.3728, "num_tokens": 10382530740.0, "step": 13572 }, { "epoch": 4.972061921773381, "grad_norm": 0.1363188530823271, "learning_rate": 4.002702272969069e-06, "loss": 0.3657, "num_tokens": 10383374687.0, "step": 13573 }, { "epoch": 4.972428322799304, "grad_norm": 0.15329795642367697, "learning_rate": 4.002628746971131e-06, "loss": 0.3816, "num_tokens": 10384121615.0, "step": 13574 }, { "epoch": 4.972794723825227, "grad_norm": 0.1677322015152921, "learning_rate": 4.002556235026834e-06, "loss": 0.346, "num_tokens": 10384730177.0, "step": 13575 }, { "epoch": 4.973161124851149, "grad_norm": 0.16271071994291766, "learning_rate": 4.002484737140259e-06, "loss": 0.3761, "num_tokens": 10385460631.0, "step": 13576 }, { "epoch": 4.973527525877072, "grad_norm": 0.15620938087835493, "learning_rate": 4.002414253315438e-06, "loss": 0.361, "num_tokens": 10386248376.0, "step": 13577 }, { "epoch": 4.973893926902996, "grad_norm": 0.16596424766611834, "learning_rate": 4.002344783556339e-06, "loss": 0.4013, "num_tokens": 10386925034.0, "step": 13578 }, { "epoch": 4.974260327928918, "grad_norm": 0.14754311002406514, "learning_rate": 4.00227632786688e-06, "loss": 0.3563, "num_tokens": 10387712729.0, "step": 13579 }, { "epoch": 4.974626728954841, "grad_norm": 0.14469782007961296, "learning_rate": 4.002208886250914e-06, "loss": 0.3594, "num_tokens": 10388482633.0, "step": 13580 }, { "epoch": 4.9749931299807635, "grad_norm": 0.16708251719694323, "learning_rate": 4.0021424587122444e-06, "loss": 0.4095, "num_tokens": 10389138283.0, "step": 13581 }, { "epoch": 4.975359531006687, "grad_norm": 0.15629418046578805, "learning_rate": 4.002077045254612e-06, "loss": 0.4068, "num_tokens": 10389925479.0, "step": 13582 }, { "epoch": 4.97572593203261, "grad_norm": 0.14175910187268792, "learning_rate": 4.002012645881703e-06, "loss": 0.3805, "num_tokens": 10390770733.0, "step": 13583 }, { "epoch": 4.976092333058532, "grad_norm": 0.14325316082333586, "learning_rate": 4.0019492605971465e-06, "loss": 0.3685, "num_tokens": 10391588808.0, "step": 13584 }, { "epoch": 4.9764587340844555, "grad_norm": 0.14566795873313526, "learning_rate": 4.001886889404514e-06, "loss": 0.3798, "num_tokens": 10392368415.0, "step": 13585 }, { "epoch": 4.976825135110378, "grad_norm": 0.1444318918321822, "learning_rate": 4.0018255323073165e-06, "loss": 0.3798, "num_tokens": 10393162363.0, "step": 13586 }, { "epoch": 4.977191536136301, "grad_norm": 0.167006863427289, "learning_rate": 4.0017651893090165e-06, "loss": 0.4114, "num_tokens": 10393835847.0, "step": 13587 }, { "epoch": 4.977557937162224, "grad_norm": 0.1611713845810827, "learning_rate": 4.001705860413013e-06, "loss": 0.3552, "num_tokens": 10394510218.0, "step": 13588 }, { "epoch": 4.977924338188147, "grad_norm": 0.1467476806920575, "learning_rate": 4.001647545622644e-06, "loss": 0.3499, "num_tokens": 10395228637.0, "step": 13589 }, { "epoch": 4.97829073921407, "grad_norm": 0.1397975759414876, "learning_rate": 4.001590244941201e-06, "loss": 0.3676, "num_tokens": 10396085818.0, "step": 13590 }, { "epoch": 4.978657140239992, "grad_norm": 0.144811378662826, "learning_rate": 4.001533958371909e-06, "loss": 0.3579, "num_tokens": 10396918817.0, "step": 13591 }, { "epoch": 4.979023541265915, "grad_norm": 0.14968917025880077, "learning_rate": 4.001478685917939e-06, "loss": 0.378, "num_tokens": 10397691606.0, "step": 13592 }, { "epoch": 4.979389942291839, "grad_norm": 0.16484071362463473, "learning_rate": 4.001424427582409e-06, "loss": 0.3769, "num_tokens": 10398421543.0, "step": 13593 }, { "epoch": 4.979756343317761, "grad_norm": 0.15099728431685489, "learning_rate": 4.001371183368373e-06, "loss": 0.3977, "num_tokens": 10399212023.0, "step": 13594 }, { "epoch": 4.980122744343684, "grad_norm": 0.14291663664626364, "learning_rate": 4.001318953278833e-06, "loss": 0.3578, "num_tokens": 10399999197.0, "step": 13595 }, { "epoch": 4.980489145369607, "grad_norm": 0.14062721144252668, "learning_rate": 4.0012677373167305e-06, "loss": 0.3822, "num_tokens": 10400865101.0, "step": 13596 }, { "epoch": 4.98085554639553, "grad_norm": 0.13996832900785525, "learning_rate": 4.001217535484951e-06, "loss": 0.3553, "num_tokens": 10401706037.0, "step": 13597 }, { "epoch": 4.981221947421453, "grad_norm": 0.15377260864094092, "learning_rate": 4.001168347786325e-06, "loss": 0.3564, "num_tokens": 10402406503.0, "step": 13598 }, { "epoch": 4.981588348447375, "grad_norm": 0.1529639549082795, "learning_rate": 4.001120174223622e-06, "loss": 0.3727, "num_tokens": 10403158800.0, "step": 13599 }, { "epoch": 4.9819547494732985, "grad_norm": 0.17731080693204246, "learning_rate": 4.001073014799556e-06, "loss": 0.3584, "num_tokens": 10403723235.0, "step": 13600 }, { "epoch": 4.982321150499222, "grad_norm": 0.1511347881109657, "learning_rate": 4.001026869516784e-06, "loss": 0.3882, "num_tokens": 10404488545.0, "step": 13601 }, { "epoch": 4.982687551525144, "grad_norm": 0.13814653241281227, "learning_rate": 4.0009817383779105e-06, "loss": 0.3566, "num_tokens": 10405341765.0, "step": 13602 }, { "epoch": 4.983053952551067, "grad_norm": 0.15442928991774643, "learning_rate": 4.000937621385474e-06, "loss": 0.3854, "num_tokens": 10406024409.0, "step": 13603 }, { "epoch": 4.98342035357699, "grad_norm": 0.15598029631865404, "learning_rate": 4.000894518541961e-06, "loss": 0.358, "num_tokens": 10406743589.0, "step": 13604 }, { "epoch": 4.983786754602913, "grad_norm": 0.1710216117654934, "learning_rate": 4.000852429849801e-06, "loss": 0.3569, "num_tokens": 10407347596.0, "step": 13605 }, { "epoch": 4.984153155628836, "grad_norm": 0.1458523391287696, "learning_rate": 4.000811355311365e-06, "loss": 0.3735, "num_tokens": 10408194164.0, "step": 13606 }, { "epoch": 4.984519556654758, "grad_norm": 0.13478482161269378, "learning_rate": 4.000771294928968e-06, "loss": 0.3713, "num_tokens": 10409072986.0, "step": 13607 }, { "epoch": 4.984885957680682, "grad_norm": 0.16309451998052638, "learning_rate": 4.000732248704864e-06, "loss": 0.3642, "num_tokens": 10409763009.0, "step": 13608 }, { "epoch": 4.985252358706604, "grad_norm": 0.1510916113623079, "learning_rate": 4.000694216641257e-06, "loss": 0.3689, "num_tokens": 10410488315.0, "step": 13609 }, { "epoch": 4.985618759732527, "grad_norm": 0.14094683271621863, "learning_rate": 4.000657198740289e-06, "loss": 0.3761, "num_tokens": 10411363659.0, "step": 13610 }, { "epoch": 4.98598516075845, "grad_norm": 0.1527781084548757, "learning_rate": 4.000621195004044e-06, "loss": 0.3446, "num_tokens": 10412128593.0, "step": 13611 }, { "epoch": 4.986351561784373, "grad_norm": 0.14284292337512108, "learning_rate": 4.000586205434553e-06, "loss": 0.3596, "num_tokens": 10412949039.0, "step": 13612 }, { "epoch": 4.986717962810296, "grad_norm": 0.157945056623945, "learning_rate": 4.000552230033784e-06, "loss": 0.3427, "num_tokens": 10413622386.0, "step": 13613 }, { "epoch": 4.987084363836219, "grad_norm": 0.15322712330731827, "learning_rate": 4.0005192688036565e-06, "loss": 0.359, "num_tokens": 10414428671.0, "step": 13614 }, { "epoch": 4.987450764862142, "grad_norm": 0.13691616659383637, "learning_rate": 4.0004873217460236e-06, "loss": 0.3868, "num_tokens": 10415323780.0, "step": 13615 }, { "epoch": 4.987817165888065, "grad_norm": 0.16784425955372473, "learning_rate": 4.000456388862684e-06, "loss": 0.373, "num_tokens": 10415974622.0, "step": 13616 }, { "epoch": 4.988183566913987, "grad_norm": 0.15723090141428003, "learning_rate": 4.000426470155387e-06, "loss": 0.3849, "num_tokens": 10416742140.0, "step": 13617 }, { "epoch": 4.98854996793991, "grad_norm": 0.1584260590116122, "learning_rate": 4.000397565625814e-06, "loss": 0.4039, "num_tokens": 10417443392.0, "step": 13618 }, { "epoch": 4.988916368965834, "grad_norm": 0.13350753303495302, "learning_rate": 4.0003696752755916e-06, "loss": 0.3875, "num_tokens": 10418343964.0, "step": 13619 }, { "epoch": 4.989282769991756, "grad_norm": 0.15084194541441276, "learning_rate": 4.000342799106294e-06, "loss": 0.3981, "num_tokens": 10419146338.0, "step": 13620 }, { "epoch": 4.989649171017679, "grad_norm": 0.1592655259047261, "learning_rate": 4.000316937119437e-06, "loss": 0.3959, "num_tokens": 10419861285.0, "step": 13621 }, { "epoch": 4.9900155720436015, "grad_norm": 0.15953700198280893, "learning_rate": 4.0002920893164744e-06, "loss": 0.3254, "num_tokens": 10420528504.0, "step": 13622 }, { "epoch": 4.990381973069525, "grad_norm": 0.14927885497765603, "learning_rate": 4.000268255698808e-06, "loss": 0.3969, "num_tokens": 10421285117.0, "step": 13623 }, { "epoch": 4.990748374095448, "grad_norm": 0.14680372243322315, "learning_rate": 4.000245436267782e-06, "loss": 0.3446, "num_tokens": 10422006253.0, "step": 13624 }, { "epoch": 4.99111477512137, "grad_norm": 0.1451964307090196, "learning_rate": 4.000223631024681e-06, "loss": 0.3335, "num_tokens": 10422747117.0, "step": 13625 }, { "epoch": 4.9914811761472935, "grad_norm": 0.12919662740051888, "learning_rate": 4.000202839970733e-06, "loss": 0.3454, "num_tokens": 10423660532.0, "step": 13626 }, { "epoch": 4.991847577173216, "grad_norm": 0.15197158140361047, "learning_rate": 4.000183063107108e-06, "loss": 0.3601, "num_tokens": 10424454540.0, "step": 13627 }, { "epoch": 4.992213978199139, "grad_norm": 0.13701770481263456, "learning_rate": 4.000164300434924e-06, "loss": 0.3787, "num_tokens": 10425290023.0, "step": 13628 }, { "epoch": 4.992580379225062, "grad_norm": 0.1455853300057831, "learning_rate": 4.0001465519552355e-06, "loss": 0.3607, "num_tokens": 10426041884.0, "step": 13629 }, { "epoch": 4.992946780250985, "grad_norm": 0.15031199395744116, "learning_rate": 4.000129817669043e-06, "loss": 0.3937, "num_tokens": 10426785139.0, "step": 13630 }, { "epoch": 4.993313181276908, "grad_norm": 0.14724546437749755, "learning_rate": 4.000114097577291e-06, "loss": 0.3628, "num_tokens": 10427569524.0, "step": 13631 }, { "epoch": 4.99367958230283, "grad_norm": 0.13533482582662074, "learning_rate": 4.000099391680862e-06, "loss": 0.3695, "num_tokens": 10428475960.0, "step": 13632 }, { "epoch": 4.994045983328753, "grad_norm": 0.15609395360527983, "learning_rate": 4.000085699980586e-06, "loss": 0.3865, "num_tokens": 10429274551.0, "step": 13633 }, { "epoch": 4.994412384354677, "grad_norm": 0.1487747534241506, "learning_rate": 4.000073022477238e-06, "loss": 0.3746, "num_tokens": 10430035619.0, "step": 13634 }, { "epoch": 4.994778785380599, "grad_norm": 0.15070706737771908, "learning_rate": 4.0000613591715286e-06, "loss": 0.3615, "num_tokens": 10430752855.0, "step": 13635 }, { "epoch": 4.995145186406522, "grad_norm": 0.15332314970425293, "learning_rate": 4.000050710064115e-06, "loss": 0.3646, "num_tokens": 10431425688.0, "step": 13636 }, { "epoch": 4.9955115874324445, "grad_norm": 0.137947828118942, "learning_rate": 4.000041075155597e-06, "loss": 0.356, "num_tokens": 10432244291.0, "step": 13637 }, { "epoch": 4.995877988458368, "grad_norm": 0.16013263672235303, "learning_rate": 4.0000324544465185e-06, "loss": 0.3555, "num_tokens": 10432858916.0, "step": 13638 }, { "epoch": 4.996244389484291, "grad_norm": 0.1440085244866003, "learning_rate": 4.000024847937366e-06, "loss": 0.3914, "num_tokens": 10433714599.0, "step": 13639 }, { "epoch": 4.996610790510213, "grad_norm": 0.1581530673245975, "learning_rate": 4.0000182556285676e-06, "loss": 0.3953, "num_tokens": 10434449733.0, "step": 13640 }, { "epoch": 4.9969771915361365, "grad_norm": 0.1551626337111884, "learning_rate": 4.000012677520494e-06, "loss": 0.3557, "num_tokens": 10435177509.0, "step": 13641 }, { "epoch": 4.997343592562059, "grad_norm": 0.15067031781593607, "learning_rate": 4.000008113613459e-06, "loss": 0.3881, "num_tokens": 10435933339.0, "step": 13642 }, { "epoch": 4.997709993587982, "grad_norm": 0.14703387458708184, "learning_rate": 4.00000456390772e-06, "loss": 0.3416, "num_tokens": 10436659522.0, "step": 13643 }, { "epoch": 4.998076394613905, "grad_norm": 0.14054976395074648, "learning_rate": 4.00000202840348e-06, "loss": 0.3675, "num_tokens": 10437551379.0, "step": 13644 }, { "epoch": 4.998442795639828, "grad_norm": 0.152972303467466, "learning_rate": 4.000000507100878e-06, "loss": 0.3842, "num_tokens": 10438305002.0, "step": 13645 } ], "logging_steps": 1, "max_steps": 13645, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.803940296155432e+20, "train_batch_size": 2, "trial_name": null, "trial_params": null }