{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9998222538215429, "eval_steps": 500, "global_step": 45000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00022218272307145397, "grad_norm": 3.3125, "learning_rate": 1.9996000000000003e-05, "loss": 1.2837, "step": 10 }, { "epoch": 0.00044436544614290794, "grad_norm": 3.84375, "learning_rate": 1.9991555555555558e-05, "loss": 0.6861, "step": 20 }, { "epoch": 0.0006665481692143619, "grad_norm": 2.609375, "learning_rate": 1.9987111111111113e-05, "loss": 0.6204, "step": 30 }, { "epoch": 0.0008887308922858159, "grad_norm": 2.359375, "learning_rate": 1.9982666666666668e-05, "loss": 0.5587, "step": 40 }, { "epoch": 0.0011109136153572697, "grad_norm": 2.859375, "learning_rate": 1.9978222222222222e-05, "loss": 0.5842, "step": 50 }, { "epoch": 0.0013330963384287238, "grad_norm": 2.484375, "learning_rate": 1.9973777777777777e-05, "loss": 0.6044, "step": 60 }, { "epoch": 0.0015552790615001777, "grad_norm": 2.15625, "learning_rate": 1.9969333333333335e-05, "loss": 0.5807, "step": 70 }, { "epoch": 0.0017774617845716318, "grad_norm": 2.625, "learning_rate": 1.996488888888889e-05, "loss": 0.5589, "step": 80 }, { "epoch": 0.0019996445076430856, "grad_norm": 2.4375, "learning_rate": 1.9960444444444445e-05, "loss": 0.5105, "step": 90 }, { "epoch": 0.0022218272307145395, "grad_norm": 1.8828125, "learning_rate": 1.9956000000000003e-05, "loss": 0.5402, "step": 100 }, { "epoch": 0.002444009953785994, "grad_norm": 2.65625, "learning_rate": 1.9951555555555558e-05, "loss": 0.5461, "step": 110 }, { "epoch": 0.0026661926768574476, "grad_norm": 2.15625, "learning_rate": 1.9947111111111113e-05, "loss": 0.5681, "step": 120 }, { "epoch": 0.0028883753999289015, "grad_norm": 2.609375, "learning_rate": 1.9942666666666668e-05, "loss": 0.5401, "step": 130 }, { "epoch": 0.0031105581230003554, "grad_norm": 2.5, "learning_rate": 1.9938222222222223e-05, "loss": 0.525, "step": 140 }, { "epoch": 0.0033327408460718097, "grad_norm": 2.1875, "learning_rate": 1.9933777777777778e-05, "loss": 0.5138, "step": 150 }, { "epoch": 0.0035549235691432635, "grad_norm": 2.328125, "learning_rate": 1.9929333333333332e-05, "loss": 0.5688, "step": 160 }, { "epoch": 0.0037771062922147174, "grad_norm": 1.9765625, "learning_rate": 1.992488888888889e-05, "loss": 0.5616, "step": 170 }, { "epoch": 0.003999289015286171, "grad_norm": 2.34375, "learning_rate": 1.9920444444444446e-05, "loss": 0.5798, "step": 180 }, { "epoch": 0.004221471738357625, "grad_norm": 2.6875, "learning_rate": 1.9916e-05, "loss": 0.5198, "step": 190 }, { "epoch": 0.004443654461429079, "grad_norm": 2.296875, "learning_rate": 1.991155555555556e-05, "loss": 0.5195, "step": 200 }, { "epoch": 0.004665837184500533, "grad_norm": 2.140625, "learning_rate": 1.9907111111111113e-05, "loss": 0.5163, "step": 210 }, { "epoch": 0.004888019907571988, "grad_norm": 2.171875, "learning_rate": 1.990266666666667e-05, "loss": 0.5526, "step": 220 }, { "epoch": 0.0051102026306434414, "grad_norm": 2.375, "learning_rate": 1.9898222222222223e-05, "loss": 0.5381, "step": 230 }, { "epoch": 0.005332385353714895, "grad_norm": 1.8046875, "learning_rate": 1.9893777777777778e-05, "loss": 0.4959, "step": 240 }, { "epoch": 0.005554568076786349, "grad_norm": 2.40625, "learning_rate": 1.9889333333333336e-05, "loss": 0.5625, "step": 250 }, { "epoch": 0.005776750799857803, "grad_norm": 2.109375, "learning_rate": 1.988488888888889e-05, "loss": 0.5574, "step": 260 }, { "epoch": 0.005998933522929257, "grad_norm": 1.90625, "learning_rate": 1.9880444444444446e-05, "loss": 0.5417, "step": 270 }, { "epoch": 0.006221116246000711, "grad_norm": 2.171875, "learning_rate": 1.9876e-05, "loss": 0.5873, "step": 280 }, { "epoch": 0.006443298969072165, "grad_norm": 2.75, "learning_rate": 1.9871555555555556e-05, "loss": 0.5572, "step": 290 }, { "epoch": 0.006665481692143619, "grad_norm": 1.7421875, "learning_rate": 1.9867111111111114e-05, "loss": 0.4952, "step": 300 }, { "epoch": 0.006887664415215073, "grad_norm": 2.40625, "learning_rate": 1.986266666666667e-05, "loss": 0.5766, "step": 310 }, { "epoch": 0.007109847138286527, "grad_norm": 2.03125, "learning_rate": 1.9858222222222224e-05, "loss": 0.5868, "step": 320 }, { "epoch": 0.007332029861357981, "grad_norm": 2.671875, "learning_rate": 1.985377777777778e-05, "loss": 0.531, "step": 330 }, { "epoch": 0.007554212584429435, "grad_norm": 2.109375, "learning_rate": 1.9849333333333337e-05, "loss": 0.5477, "step": 340 }, { "epoch": 0.007776395307500889, "grad_norm": 2.703125, "learning_rate": 1.984488888888889e-05, "loss": 0.5688, "step": 350 }, { "epoch": 0.007998578030572343, "grad_norm": 2.15625, "learning_rate": 1.9840444444444446e-05, "loss": 0.5066, "step": 360 }, { "epoch": 0.008220760753643797, "grad_norm": 2.59375, "learning_rate": 1.9836e-05, "loss": 0.5124, "step": 370 }, { "epoch": 0.00844294347671525, "grad_norm": 2.125, "learning_rate": 1.9831555555555556e-05, "loss": 0.5109, "step": 380 }, { "epoch": 0.008665126199786705, "grad_norm": 2.59375, "learning_rate": 1.982711111111111e-05, "loss": 0.5672, "step": 390 }, { "epoch": 0.008887308922858158, "grad_norm": 2.203125, "learning_rate": 1.982266666666667e-05, "loss": 0.5503, "step": 400 }, { "epoch": 0.009109491645929613, "grad_norm": 1.9609375, "learning_rate": 1.9818222222222224e-05, "loss": 0.5321, "step": 410 }, { "epoch": 0.009331674369001066, "grad_norm": 2.515625, "learning_rate": 1.981377777777778e-05, "loss": 0.4862, "step": 420 }, { "epoch": 0.00955385709207252, "grad_norm": 2.4375, "learning_rate": 1.9809333333333337e-05, "loss": 0.4908, "step": 430 }, { "epoch": 0.009776039815143975, "grad_norm": 2.46875, "learning_rate": 1.9804888888888892e-05, "loss": 0.5749, "step": 440 }, { "epoch": 0.009998222538215428, "grad_norm": 2.78125, "learning_rate": 1.9800444444444447e-05, "loss": 0.532, "step": 450 }, { "epoch": 0.010220405261286883, "grad_norm": 1.7421875, "learning_rate": 1.9796e-05, "loss": 0.5432, "step": 460 }, { "epoch": 0.010442587984358336, "grad_norm": 2.25, "learning_rate": 1.9791555555555556e-05, "loss": 0.5124, "step": 470 }, { "epoch": 0.01066477070742979, "grad_norm": 2.375, "learning_rate": 1.978711111111111e-05, "loss": 0.5398, "step": 480 }, { "epoch": 0.010886953430501244, "grad_norm": 2.453125, "learning_rate": 1.978266666666667e-05, "loss": 0.5623, "step": 490 }, { "epoch": 0.011109136153572698, "grad_norm": 2.28125, "learning_rate": 1.9778222222222224e-05, "loss": 0.4947, "step": 500 }, { "epoch": 0.011331318876644151, "grad_norm": 1.9453125, "learning_rate": 1.977377777777778e-05, "loss": 0.5006, "step": 510 }, { "epoch": 0.011553501599715606, "grad_norm": 2.3125, "learning_rate": 1.9769333333333337e-05, "loss": 0.5097, "step": 520 }, { "epoch": 0.01177568432278706, "grad_norm": 2.0625, "learning_rate": 1.9764888888888892e-05, "loss": 0.4702, "step": 530 }, { "epoch": 0.011997867045858514, "grad_norm": 1.9609375, "learning_rate": 1.9760444444444447e-05, "loss": 0.5285, "step": 540 }, { "epoch": 0.012220049768929968, "grad_norm": 1.8125, "learning_rate": 1.9756000000000002e-05, "loss": 0.522, "step": 550 }, { "epoch": 0.012442232492001421, "grad_norm": 2.046875, "learning_rate": 1.9751555555555557e-05, "loss": 0.4878, "step": 560 }, { "epoch": 0.012664415215072876, "grad_norm": 2.21875, "learning_rate": 1.974711111111111e-05, "loss": 0.5499, "step": 570 }, { "epoch": 0.01288659793814433, "grad_norm": 1.984375, "learning_rate": 1.9742666666666666e-05, "loss": 0.487, "step": 580 }, { "epoch": 0.013108780661215784, "grad_norm": 2.15625, "learning_rate": 1.9738222222222225e-05, "loss": 0.5365, "step": 590 }, { "epoch": 0.013330963384287239, "grad_norm": 2.1875, "learning_rate": 1.973377777777778e-05, "loss": 0.5492, "step": 600 }, { "epoch": 0.013553146107358692, "grad_norm": 2.21875, "learning_rate": 1.9729333333333334e-05, "loss": 0.4887, "step": 610 }, { "epoch": 0.013775328830430146, "grad_norm": 2.328125, "learning_rate": 1.9724888888888893e-05, "loss": 0.5365, "step": 620 }, { "epoch": 0.0139975115535016, "grad_norm": 2.078125, "learning_rate": 1.9720444444444447e-05, "loss": 0.5356, "step": 630 }, { "epoch": 0.014219694276573054, "grad_norm": 1.9296875, "learning_rate": 1.9716000000000002e-05, "loss": 0.5229, "step": 640 }, { "epoch": 0.014441876999644507, "grad_norm": 2.03125, "learning_rate": 1.9711555555555557e-05, "loss": 0.4801, "step": 650 }, { "epoch": 0.014664059722715962, "grad_norm": 2.046875, "learning_rate": 1.9707111111111112e-05, "loss": 0.5292, "step": 660 }, { "epoch": 0.014886242445787415, "grad_norm": 2.21875, "learning_rate": 1.9702666666666667e-05, "loss": 0.4828, "step": 670 }, { "epoch": 0.01510842516885887, "grad_norm": 2.0625, "learning_rate": 1.9698222222222222e-05, "loss": 0.5338, "step": 680 }, { "epoch": 0.015330607891930324, "grad_norm": 2.140625, "learning_rate": 1.969377777777778e-05, "loss": 0.5348, "step": 690 }, { "epoch": 0.015552790615001777, "grad_norm": 2.109375, "learning_rate": 1.9689333333333335e-05, "loss": 0.5324, "step": 700 }, { "epoch": 0.01577497333807323, "grad_norm": 1.8359375, "learning_rate": 1.968488888888889e-05, "loss": 0.4862, "step": 710 }, { "epoch": 0.015997156061144685, "grad_norm": 1.875, "learning_rate": 1.9680444444444448e-05, "loss": 0.4729, "step": 720 }, { "epoch": 0.01621933878421614, "grad_norm": 2.203125, "learning_rate": 1.9676000000000003e-05, "loss": 0.5072, "step": 730 }, { "epoch": 0.016441521507287594, "grad_norm": 1.75, "learning_rate": 1.9671555555555558e-05, "loss": 0.4851, "step": 740 }, { "epoch": 0.016663704230359046, "grad_norm": 1.734375, "learning_rate": 1.9667111111111112e-05, "loss": 0.5163, "step": 750 }, { "epoch": 0.0168858869534305, "grad_norm": 1.9609375, "learning_rate": 1.9662666666666667e-05, "loss": 0.5307, "step": 760 }, { "epoch": 0.017108069676501955, "grad_norm": 2.0, "learning_rate": 1.9658222222222222e-05, "loss": 0.4971, "step": 770 }, { "epoch": 0.01733025239957341, "grad_norm": 1.8671875, "learning_rate": 1.9653777777777777e-05, "loss": 0.5307, "step": 780 }, { "epoch": 0.017552435122644865, "grad_norm": 2.65625, "learning_rate": 1.9649333333333335e-05, "loss": 0.5158, "step": 790 }, { "epoch": 0.017774617845716316, "grad_norm": 2.359375, "learning_rate": 1.964488888888889e-05, "loss": 0.5216, "step": 800 }, { "epoch": 0.01799680056878777, "grad_norm": 2.109375, "learning_rate": 1.9640444444444445e-05, "loss": 0.544, "step": 810 }, { "epoch": 0.018218983291859225, "grad_norm": 1.8203125, "learning_rate": 1.9636000000000003e-05, "loss": 0.5232, "step": 820 }, { "epoch": 0.01844116601493068, "grad_norm": 2.15625, "learning_rate": 1.9631555555555558e-05, "loss": 0.481, "step": 830 }, { "epoch": 0.01866334873800213, "grad_norm": 2.171875, "learning_rate": 1.9627111111111113e-05, "loss": 0.4943, "step": 840 }, { "epoch": 0.018885531461073586, "grad_norm": 1.9296875, "learning_rate": 1.9622666666666668e-05, "loss": 0.5153, "step": 850 }, { "epoch": 0.01910771418414504, "grad_norm": 1.84375, "learning_rate": 1.9618222222222222e-05, "loss": 0.4575, "step": 860 }, { "epoch": 0.019329896907216496, "grad_norm": 1.7890625, "learning_rate": 1.961377777777778e-05, "loss": 0.5078, "step": 870 }, { "epoch": 0.01955207963028795, "grad_norm": 1.890625, "learning_rate": 1.9609333333333336e-05, "loss": 0.4857, "step": 880 }, { "epoch": 0.0197742623533594, "grad_norm": 1.7890625, "learning_rate": 1.960488888888889e-05, "loss": 0.5011, "step": 890 }, { "epoch": 0.019996445076430856, "grad_norm": 1.6796875, "learning_rate": 1.9600444444444445e-05, "loss": 0.5439, "step": 900 }, { "epoch": 0.02021862779950231, "grad_norm": 2.0625, "learning_rate": 1.9596e-05, "loss": 0.4918, "step": 910 }, { "epoch": 0.020440810522573766, "grad_norm": 1.859375, "learning_rate": 1.959155555555556e-05, "loss": 0.4805, "step": 920 }, { "epoch": 0.020662993245645217, "grad_norm": 2.046875, "learning_rate": 1.9587111111111113e-05, "loss": 0.4735, "step": 930 }, { "epoch": 0.02088517596871667, "grad_norm": 1.546875, "learning_rate": 1.9582666666666668e-05, "loss": 0.4953, "step": 940 }, { "epoch": 0.021107358691788126, "grad_norm": 1.7734375, "learning_rate": 1.9578222222222223e-05, "loss": 0.4638, "step": 950 }, { "epoch": 0.02132954141485958, "grad_norm": 2.3125, "learning_rate": 1.957377777777778e-05, "loss": 0.5258, "step": 960 }, { "epoch": 0.021551724137931036, "grad_norm": 2.3125, "learning_rate": 1.9569333333333336e-05, "loss": 0.4854, "step": 970 }, { "epoch": 0.021773906861002487, "grad_norm": 2.421875, "learning_rate": 1.956488888888889e-05, "loss": 0.5109, "step": 980 }, { "epoch": 0.021996089584073942, "grad_norm": 1.921875, "learning_rate": 1.9560444444444446e-05, "loss": 0.5116, "step": 990 }, { "epoch": 0.022218272307145397, "grad_norm": 2.34375, "learning_rate": 1.9556e-05, "loss": 0.5149, "step": 1000 }, { "epoch": 0.02244045503021685, "grad_norm": 1.703125, "learning_rate": 1.9551555555555555e-05, "loss": 0.5, "step": 1010 }, { "epoch": 0.022662637753288303, "grad_norm": 2.1875, "learning_rate": 1.9547111111111114e-05, "loss": 0.4927, "step": 1020 }, { "epoch": 0.022884820476359757, "grad_norm": 2.0625, "learning_rate": 1.954266666666667e-05, "loss": 0.5126, "step": 1030 }, { "epoch": 0.023107003199431212, "grad_norm": 2.046875, "learning_rate": 1.9538222222222223e-05, "loss": 0.5177, "step": 1040 }, { "epoch": 0.023329185922502667, "grad_norm": 2.015625, "learning_rate": 1.953377777777778e-05, "loss": 0.4775, "step": 1050 }, { "epoch": 0.02355136864557412, "grad_norm": 2.125, "learning_rate": 1.9529333333333336e-05, "loss": 0.5069, "step": 1060 }, { "epoch": 0.023773551368645573, "grad_norm": 1.96875, "learning_rate": 1.952488888888889e-05, "loss": 0.4852, "step": 1070 }, { "epoch": 0.023995734091717028, "grad_norm": 2.234375, "learning_rate": 1.9520444444444446e-05, "loss": 0.4755, "step": 1080 }, { "epoch": 0.024217916814788482, "grad_norm": 2.125, "learning_rate": 1.9516e-05, "loss": 0.4473, "step": 1090 }, { "epoch": 0.024440099537859937, "grad_norm": 2.203125, "learning_rate": 1.9511555555555556e-05, "loss": 0.5012, "step": 1100 }, { "epoch": 0.024662282260931388, "grad_norm": 2.171875, "learning_rate": 1.950711111111111e-05, "loss": 0.4845, "step": 1110 }, { "epoch": 0.024884464984002843, "grad_norm": 1.640625, "learning_rate": 1.950266666666667e-05, "loss": 0.4897, "step": 1120 }, { "epoch": 0.025106647707074298, "grad_norm": 1.7578125, "learning_rate": 1.9498222222222224e-05, "loss": 0.4571, "step": 1130 }, { "epoch": 0.025328830430145752, "grad_norm": 2.1875, "learning_rate": 1.9493777777777782e-05, "loss": 0.4878, "step": 1140 }, { "epoch": 0.025551013153217207, "grad_norm": 2.078125, "learning_rate": 1.9489333333333337e-05, "loss": 0.5114, "step": 1150 }, { "epoch": 0.02577319587628866, "grad_norm": 2.203125, "learning_rate": 1.948488888888889e-05, "loss": 0.5107, "step": 1160 }, { "epoch": 0.025995378599360113, "grad_norm": 2.078125, "learning_rate": 1.9480444444444446e-05, "loss": 0.5008, "step": 1170 }, { "epoch": 0.026217561322431568, "grad_norm": 2.328125, "learning_rate": 1.9476e-05, "loss": 0.5021, "step": 1180 }, { "epoch": 0.026439744045503023, "grad_norm": 2.0625, "learning_rate": 1.9471555555555556e-05, "loss": 0.538, "step": 1190 }, { "epoch": 0.026661926768574477, "grad_norm": 1.625, "learning_rate": 1.946711111111111e-05, "loss": 0.4946, "step": 1200 }, { "epoch": 0.02688410949164593, "grad_norm": 2.296875, "learning_rate": 1.946266666666667e-05, "loss": 0.4855, "step": 1210 }, { "epoch": 0.027106292214717383, "grad_norm": 2.171875, "learning_rate": 1.9458222222222224e-05, "loss": 0.5346, "step": 1220 }, { "epoch": 0.027328474937788838, "grad_norm": 1.7109375, "learning_rate": 1.945377777777778e-05, "loss": 0.5336, "step": 1230 }, { "epoch": 0.027550657660860293, "grad_norm": 2.03125, "learning_rate": 1.9449333333333337e-05, "loss": 0.5163, "step": 1240 }, { "epoch": 0.027772840383931744, "grad_norm": 1.875, "learning_rate": 1.9444888888888892e-05, "loss": 0.483, "step": 1250 }, { "epoch": 0.0279950231070032, "grad_norm": 2.203125, "learning_rate": 1.9440444444444447e-05, "loss": 0.5556, "step": 1260 }, { "epoch": 0.028217205830074654, "grad_norm": 2.46875, "learning_rate": 1.9436e-05, "loss": 0.5281, "step": 1270 }, { "epoch": 0.028439388553146108, "grad_norm": 2.265625, "learning_rate": 1.9431555555555556e-05, "loss": 0.4806, "step": 1280 }, { "epoch": 0.028661571276217563, "grad_norm": 1.7265625, "learning_rate": 1.942711111111111e-05, "loss": 0.5233, "step": 1290 }, { "epoch": 0.028883753999289014, "grad_norm": 1.8828125, "learning_rate": 1.9422666666666666e-05, "loss": 0.5057, "step": 1300 }, { "epoch": 0.02910593672236047, "grad_norm": 1.7890625, "learning_rate": 1.9418222222222224e-05, "loss": 0.4829, "step": 1310 }, { "epoch": 0.029328119445431924, "grad_norm": 2.046875, "learning_rate": 1.941377777777778e-05, "loss": 0.4969, "step": 1320 }, { "epoch": 0.02955030216850338, "grad_norm": 2.09375, "learning_rate": 1.9409333333333334e-05, "loss": 0.5034, "step": 1330 }, { "epoch": 0.02977248489157483, "grad_norm": 1.8515625, "learning_rate": 1.9404888888888892e-05, "loss": 0.5165, "step": 1340 }, { "epoch": 0.029994667614646284, "grad_norm": 1.953125, "learning_rate": 1.9400444444444447e-05, "loss": 0.4964, "step": 1350 }, { "epoch": 0.03021685033771774, "grad_norm": 1.8125, "learning_rate": 1.9396000000000002e-05, "loss": 0.4331, "step": 1360 }, { "epoch": 0.030439033060789194, "grad_norm": 1.8203125, "learning_rate": 1.9391555555555557e-05, "loss": 0.4651, "step": 1370 }, { "epoch": 0.03066121578386065, "grad_norm": 2.015625, "learning_rate": 1.938711111111111e-05, "loss": 0.48, "step": 1380 }, { "epoch": 0.0308833985069321, "grad_norm": 1.9609375, "learning_rate": 1.9382666666666667e-05, "loss": 0.4874, "step": 1390 }, { "epoch": 0.031105581230003555, "grad_norm": 1.734375, "learning_rate": 1.937822222222222e-05, "loss": 0.4431, "step": 1400 }, { "epoch": 0.03132776395307501, "grad_norm": 1.96875, "learning_rate": 1.937377777777778e-05, "loss": 0.5207, "step": 1410 }, { "epoch": 0.03154994667614646, "grad_norm": 1.984375, "learning_rate": 1.9369333333333334e-05, "loss": 0.5071, "step": 1420 }, { "epoch": 0.03177212939921792, "grad_norm": 2.046875, "learning_rate": 1.936488888888889e-05, "loss": 0.5093, "step": 1430 }, { "epoch": 0.03199431212228937, "grad_norm": 1.59375, "learning_rate": 1.9360444444444448e-05, "loss": 0.4823, "step": 1440 }, { "epoch": 0.03221649484536082, "grad_norm": 1.5546875, "learning_rate": 1.9356000000000002e-05, "loss": 0.4658, "step": 1450 }, { "epoch": 0.03243867756843228, "grad_norm": 1.78125, "learning_rate": 1.9351555555555557e-05, "loss": 0.5124, "step": 1460 }, { "epoch": 0.03266086029150373, "grad_norm": 2.046875, "learning_rate": 1.9347111111111112e-05, "loss": 0.513, "step": 1470 }, { "epoch": 0.03288304301457519, "grad_norm": 1.9921875, "learning_rate": 1.9342666666666667e-05, "loss": 0.4456, "step": 1480 }, { "epoch": 0.03310522573764664, "grad_norm": 1.8984375, "learning_rate": 1.9338222222222225e-05, "loss": 0.5171, "step": 1490 }, { "epoch": 0.03332740846071809, "grad_norm": 1.875, "learning_rate": 1.933377777777778e-05, "loss": 0.4782, "step": 1500 }, { "epoch": 0.03354959118378955, "grad_norm": 1.7265625, "learning_rate": 1.9329333333333335e-05, "loss": 0.4689, "step": 1510 }, { "epoch": 0.033771773906861, "grad_norm": 1.96875, "learning_rate": 1.932488888888889e-05, "loss": 0.506, "step": 1520 }, { "epoch": 0.03399395662993246, "grad_norm": 2.421875, "learning_rate": 1.9320444444444445e-05, "loss": 0.5159, "step": 1530 }, { "epoch": 0.03421613935300391, "grad_norm": 2.0625, "learning_rate": 1.9316000000000003e-05, "loss": 0.504, "step": 1540 }, { "epoch": 0.03443832207607536, "grad_norm": 1.5546875, "learning_rate": 1.9311555555555558e-05, "loss": 0.5043, "step": 1550 }, { "epoch": 0.03466050479914682, "grad_norm": 1.84375, "learning_rate": 1.9307111111111112e-05, "loss": 0.4947, "step": 1560 }, { "epoch": 0.03488268752221827, "grad_norm": 2.3125, "learning_rate": 1.9302666666666667e-05, "loss": 0.4833, "step": 1570 }, { "epoch": 0.03510487024528973, "grad_norm": 1.8046875, "learning_rate": 1.9298222222222226e-05, "loss": 0.4666, "step": 1580 }, { "epoch": 0.03532705296836118, "grad_norm": 2.078125, "learning_rate": 1.929377777777778e-05, "loss": 0.4763, "step": 1590 }, { "epoch": 0.03554923569143263, "grad_norm": 2.234375, "learning_rate": 1.9289333333333335e-05, "loss": 0.519, "step": 1600 }, { "epoch": 0.03577141841450409, "grad_norm": 1.8046875, "learning_rate": 1.928488888888889e-05, "loss": 0.5078, "step": 1610 }, { "epoch": 0.03599360113757554, "grad_norm": 1.9921875, "learning_rate": 1.9280444444444445e-05, "loss": 0.5186, "step": 1620 }, { "epoch": 0.036215783860647, "grad_norm": 1.6875, "learning_rate": 1.9276e-05, "loss": 0.4755, "step": 1630 }, { "epoch": 0.03643796658371845, "grad_norm": 1.8828125, "learning_rate": 1.9271555555555558e-05, "loss": 0.4768, "step": 1640 }, { "epoch": 0.0366601493067899, "grad_norm": 1.859375, "learning_rate": 1.9267111111111113e-05, "loss": 0.5004, "step": 1650 }, { "epoch": 0.03688233202986136, "grad_norm": 2.40625, "learning_rate": 1.9262666666666668e-05, "loss": 0.5269, "step": 1660 }, { "epoch": 0.03710451475293281, "grad_norm": 1.7890625, "learning_rate": 1.9258222222222226e-05, "loss": 0.4504, "step": 1670 }, { "epoch": 0.03732669747600426, "grad_norm": 2.265625, "learning_rate": 1.925377777777778e-05, "loss": 0.5188, "step": 1680 }, { "epoch": 0.03754888019907572, "grad_norm": 2.140625, "learning_rate": 1.9249333333333336e-05, "loss": 0.4665, "step": 1690 }, { "epoch": 0.03777106292214717, "grad_norm": 1.890625, "learning_rate": 1.924488888888889e-05, "loss": 0.5029, "step": 1700 }, { "epoch": 0.03799324564521863, "grad_norm": 1.9609375, "learning_rate": 1.9240444444444445e-05, "loss": 0.5019, "step": 1710 }, { "epoch": 0.03821542836829008, "grad_norm": 2.0625, "learning_rate": 1.9236e-05, "loss": 0.4716, "step": 1720 }, { "epoch": 0.03843761109136153, "grad_norm": 1.8515625, "learning_rate": 1.9231555555555555e-05, "loss": 0.4626, "step": 1730 }, { "epoch": 0.03865979381443299, "grad_norm": 1.8828125, "learning_rate": 1.9227111111111113e-05, "loss": 0.4727, "step": 1740 }, { "epoch": 0.03888197653750444, "grad_norm": 2.0, "learning_rate": 1.9222666666666668e-05, "loss": 0.5317, "step": 1750 }, { "epoch": 0.0391041592605759, "grad_norm": 1.734375, "learning_rate": 1.9218222222222226e-05, "loss": 0.4401, "step": 1760 }, { "epoch": 0.03932634198364735, "grad_norm": 1.6875, "learning_rate": 1.921377777777778e-05, "loss": 0.4829, "step": 1770 }, { "epoch": 0.0395485247067188, "grad_norm": 2.109375, "learning_rate": 1.9209333333333336e-05, "loss": 0.4393, "step": 1780 }, { "epoch": 0.03977070742979026, "grad_norm": 1.84375, "learning_rate": 1.920488888888889e-05, "loss": 0.4402, "step": 1790 }, { "epoch": 0.03999289015286171, "grad_norm": 2.25, "learning_rate": 1.9200444444444446e-05, "loss": 0.491, "step": 1800 }, { "epoch": 0.04021507287593317, "grad_norm": 2.203125, "learning_rate": 1.9196e-05, "loss": 0.487, "step": 1810 }, { "epoch": 0.04043725559900462, "grad_norm": 1.6171875, "learning_rate": 1.9191555555555555e-05, "loss": 0.5038, "step": 1820 }, { "epoch": 0.04065943832207607, "grad_norm": 2.125, "learning_rate": 1.9187111111111114e-05, "loss": 0.4536, "step": 1830 }, { "epoch": 0.04088162104514753, "grad_norm": 1.6875, "learning_rate": 1.918266666666667e-05, "loss": 0.5185, "step": 1840 }, { "epoch": 0.04110380376821898, "grad_norm": 1.8828125, "learning_rate": 1.9178222222222223e-05, "loss": 0.4818, "step": 1850 }, { "epoch": 0.041325986491290434, "grad_norm": 1.9921875, "learning_rate": 1.917377777777778e-05, "loss": 0.4608, "step": 1860 }, { "epoch": 0.04154816921436189, "grad_norm": 1.7734375, "learning_rate": 1.9169333333333336e-05, "loss": 0.4942, "step": 1870 }, { "epoch": 0.04177035193743334, "grad_norm": 1.734375, "learning_rate": 1.916488888888889e-05, "loss": 0.4829, "step": 1880 }, { "epoch": 0.0419925346605048, "grad_norm": 2.015625, "learning_rate": 1.9160444444444446e-05, "loss": 0.4504, "step": 1890 }, { "epoch": 0.04221471738357625, "grad_norm": 1.9765625, "learning_rate": 1.9156e-05, "loss": 0.491, "step": 1900 }, { "epoch": 0.042436900106647704, "grad_norm": 2.34375, "learning_rate": 1.9151555555555556e-05, "loss": 0.486, "step": 1910 }, { "epoch": 0.04265908282971916, "grad_norm": 2.4375, "learning_rate": 1.914711111111111e-05, "loss": 0.4814, "step": 1920 }, { "epoch": 0.042881265552790614, "grad_norm": 1.9453125, "learning_rate": 1.914266666666667e-05, "loss": 0.4853, "step": 1930 }, { "epoch": 0.04310344827586207, "grad_norm": 1.84375, "learning_rate": 1.9138222222222224e-05, "loss": 0.5006, "step": 1940 }, { "epoch": 0.04332563099893352, "grad_norm": 1.8046875, "learning_rate": 1.913377777777778e-05, "loss": 0.5142, "step": 1950 }, { "epoch": 0.043547813722004974, "grad_norm": 1.8984375, "learning_rate": 1.9129333333333337e-05, "loss": 0.4764, "step": 1960 }, { "epoch": 0.04376999644507643, "grad_norm": 1.703125, "learning_rate": 1.912488888888889e-05, "loss": 0.4343, "step": 1970 }, { "epoch": 0.043992179168147884, "grad_norm": 2.34375, "learning_rate": 1.9120444444444446e-05, "loss": 0.5007, "step": 1980 }, { "epoch": 0.04421436189121934, "grad_norm": 2.3125, "learning_rate": 1.9116e-05, "loss": 0.5271, "step": 1990 }, { "epoch": 0.04443654461429079, "grad_norm": 1.9375, "learning_rate": 1.9111555555555556e-05, "loss": 0.4994, "step": 2000 }, { "epoch": 0.044658727337362245, "grad_norm": 1.828125, "learning_rate": 1.910711111111111e-05, "loss": 0.4418, "step": 2010 }, { "epoch": 0.0448809100604337, "grad_norm": 1.7578125, "learning_rate": 1.9102666666666666e-05, "loss": 0.4824, "step": 2020 }, { "epoch": 0.045103092783505154, "grad_norm": 1.9921875, "learning_rate": 1.9098222222222224e-05, "loss": 0.4577, "step": 2030 }, { "epoch": 0.045325275506576605, "grad_norm": 2.0625, "learning_rate": 1.909377777777778e-05, "loss": 0.4742, "step": 2040 }, { "epoch": 0.04554745822964806, "grad_norm": 1.9921875, "learning_rate": 1.9089333333333334e-05, "loss": 0.4691, "step": 2050 }, { "epoch": 0.045769640952719515, "grad_norm": 2.109375, "learning_rate": 1.9084888888888892e-05, "loss": 0.4703, "step": 2060 }, { "epoch": 0.04599182367579097, "grad_norm": 1.8125, "learning_rate": 1.9080444444444447e-05, "loss": 0.4721, "step": 2070 }, { "epoch": 0.046214006398862424, "grad_norm": 1.8515625, "learning_rate": 1.9076e-05, "loss": 0.4702, "step": 2080 }, { "epoch": 0.046436189121933875, "grad_norm": 1.6953125, "learning_rate": 1.9071555555555557e-05, "loss": 0.4956, "step": 2090 }, { "epoch": 0.046658371845005334, "grad_norm": 2.109375, "learning_rate": 1.906711111111111e-05, "loss": 0.4889, "step": 2100 }, { "epoch": 0.046880554568076785, "grad_norm": 2.046875, "learning_rate": 1.906266666666667e-05, "loss": 0.4642, "step": 2110 }, { "epoch": 0.04710273729114824, "grad_norm": 1.78125, "learning_rate": 1.9058222222222224e-05, "loss": 0.4673, "step": 2120 }, { "epoch": 0.047324920014219694, "grad_norm": 1.8125, "learning_rate": 1.905377777777778e-05, "loss": 0.478, "step": 2130 }, { "epoch": 0.047547102737291146, "grad_norm": 2.109375, "learning_rate": 1.9049333333333334e-05, "loss": 0.4863, "step": 2140 }, { "epoch": 0.047769285460362604, "grad_norm": 1.9453125, "learning_rate": 1.904488888888889e-05, "loss": 0.4782, "step": 2150 }, { "epoch": 0.047991468183434055, "grad_norm": 1.6328125, "learning_rate": 1.9040444444444447e-05, "loss": 0.4336, "step": 2160 }, { "epoch": 0.04821365090650551, "grad_norm": 1.9140625, "learning_rate": 1.9036000000000002e-05, "loss": 0.4448, "step": 2170 }, { "epoch": 0.048435833629576965, "grad_norm": 1.6796875, "learning_rate": 1.9031555555555557e-05, "loss": 0.4361, "step": 2180 }, { "epoch": 0.048658016352648416, "grad_norm": 2.078125, "learning_rate": 1.9027111111111112e-05, "loss": 0.4494, "step": 2190 }, { "epoch": 0.048880199075719874, "grad_norm": 1.75, "learning_rate": 1.902266666666667e-05, "loss": 0.4557, "step": 2200 }, { "epoch": 0.049102381798791325, "grad_norm": 1.75, "learning_rate": 1.9018222222222225e-05, "loss": 0.4584, "step": 2210 }, { "epoch": 0.049324564521862777, "grad_norm": 2.265625, "learning_rate": 1.901377777777778e-05, "loss": 0.5059, "step": 2220 }, { "epoch": 0.049546747244934235, "grad_norm": 2.359375, "learning_rate": 1.9009333333333335e-05, "loss": 0.4931, "step": 2230 }, { "epoch": 0.049768929968005686, "grad_norm": 1.7734375, "learning_rate": 1.900488888888889e-05, "loss": 0.4557, "step": 2240 }, { "epoch": 0.049991112691077144, "grad_norm": 1.9453125, "learning_rate": 1.9000444444444444e-05, "loss": 0.4672, "step": 2250 }, { "epoch": 0.050213295414148595, "grad_norm": 1.71875, "learning_rate": 1.8996000000000002e-05, "loss": 0.4516, "step": 2260 }, { "epoch": 0.05043547813722005, "grad_norm": 1.9453125, "learning_rate": 1.8991555555555557e-05, "loss": 0.4555, "step": 2270 }, { "epoch": 0.050657660860291505, "grad_norm": 2.1875, "learning_rate": 1.8987111111111112e-05, "loss": 0.453, "step": 2280 }, { "epoch": 0.050879843583362956, "grad_norm": 1.90625, "learning_rate": 1.898266666666667e-05, "loss": 0.4844, "step": 2290 }, { "epoch": 0.051102026306434414, "grad_norm": 2.296875, "learning_rate": 1.8978222222222225e-05, "loss": 0.4848, "step": 2300 }, { "epoch": 0.051324209029505866, "grad_norm": 1.9765625, "learning_rate": 1.897377777777778e-05, "loss": 0.4426, "step": 2310 }, { "epoch": 0.05154639175257732, "grad_norm": 1.8984375, "learning_rate": 1.8969333333333335e-05, "loss": 0.4427, "step": 2320 }, { "epoch": 0.051768574475648775, "grad_norm": 1.96875, "learning_rate": 1.896488888888889e-05, "loss": 0.4774, "step": 2330 }, { "epoch": 0.051990757198720226, "grad_norm": 1.875, "learning_rate": 1.8960444444444445e-05, "loss": 0.4683, "step": 2340 }, { "epoch": 0.052212939921791685, "grad_norm": 2.0625, "learning_rate": 1.8956e-05, "loss": 0.4915, "step": 2350 }, { "epoch": 0.052435122644863136, "grad_norm": 2.078125, "learning_rate": 1.8951555555555558e-05, "loss": 0.4971, "step": 2360 }, { "epoch": 0.05265730536793459, "grad_norm": 1.7890625, "learning_rate": 1.8947111111111113e-05, "loss": 0.4544, "step": 2370 }, { "epoch": 0.052879488091006045, "grad_norm": 2.109375, "learning_rate": 1.894266666666667e-05, "loss": 0.4575, "step": 2380 }, { "epoch": 0.053101670814077497, "grad_norm": 1.78125, "learning_rate": 1.8938222222222226e-05, "loss": 0.5068, "step": 2390 }, { "epoch": 0.053323853537148955, "grad_norm": 1.671875, "learning_rate": 1.893377777777778e-05, "loss": 0.4863, "step": 2400 }, { "epoch": 0.053546036260220406, "grad_norm": 1.828125, "learning_rate": 1.8929333333333335e-05, "loss": 0.4527, "step": 2410 }, { "epoch": 0.05376821898329186, "grad_norm": 1.7421875, "learning_rate": 1.892488888888889e-05, "loss": 0.5141, "step": 2420 }, { "epoch": 0.053990401706363315, "grad_norm": 2.0625, "learning_rate": 1.8920444444444445e-05, "loss": 0.478, "step": 2430 }, { "epoch": 0.05421258442943477, "grad_norm": 2.125, "learning_rate": 1.8916e-05, "loss": 0.4193, "step": 2440 }, { "epoch": 0.05443476715250622, "grad_norm": 2.125, "learning_rate": 1.8911555555555555e-05, "loss": 0.4617, "step": 2450 }, { "epoch": 0.054656949875577676, "grad_norm": 1.890625, "learning_rate": 1.8907111111111113e-05, "loss": 0.4434, "step": 2460 }, { "epoch": 0.05487913259864913, "grad_norm": 1.8125, "learning_rate": 1.8902666666666668e-05, "loss": 0.456, "step": 2470 }, { "epoch": 0.055101315321720586, "grad_norm": 1.640625, "learning_rate": 1.8898222222222226e-05, "loss": 0.4581, "step": 2480 }, { "epoch": 0.05532349804479204, "grad_norm": 2.5625, "learning_rate": 1.889377777777778e-05, "loss": 0.5004, "step": 2490 }, { "epoch": 0.05554568076786349, "grad_norm": 1.6875, "learning_rate": 1.8889333333333336e-05, "loss": 0.4618, "step": 2500 }, { "epoch": 0.055767863490934946, "grad_norm": 2.09375, "learning_rate": 1.888488888888889e-05, "loss": 0.4335, "step": 2510 }, { "epoch": 0.0559900462140064, "grad_norm": 2.25, "learning_rate": 1.8880444444444445e-05, "loss": 0.4709, "step": 2520 }, { "epoch": 0.056212228937077856, "grad_norm": 1.4609375, "learning_rate": 1.8876e-05, "loss": 0.4395, "step": 2530 }, { "epoch": 0.05643441166014931, "grad_norm": 2.015625, "learning_rate": 1.8871555555555555e-05, "loss": 0.4741, "step": 2540 }, { "epoch": 0.05665659438322076, "grad_norm": 1.5703125, "learning_rate": 1.8867111111111113e-05, "loss": 0.4438, "step": 2550 }, { "epoch": 0.056878777106292217, "grad_norm": 1.7265625, "learning_rate": 1.8862666666666668e-05, "loss": 0.444, "step": 2560 }, { "epoch": 0.05710095982936367, "grad_norm": 1.8125, "learning_rate": 1.8858222222222223e-05, "loss": 0.4554, "step": 2570 }, { "epoch": 0.057323142552435126, "grad_norm": 1.8984375, "learning_rate": 1.885377777777778e-05, "loss": 0.507, "step": 2580 }, { "epoch": 0.05754532527550658, "grad_norm": 2.078125, "learning_rate": 1.8849333333333336e-05, "loss": 0.4842, "step": 2590 }, { "epoch": 0.05776750799857803, "grad_norm": 1.9765625, "learning_rate": 1.884488888888889e-05, "loss": 0.4443, "step": 2600 }, { "epoch": 0.05798969072164949, "grad_norm": 1.8984375, "learning_rate": 1.8840444444444446e-05, "loss": 0.4685, "step": 2610 }, { "epoch": 0.05821187344472094, "grad_norm": 1.9140625, "learning_rate": 1.8836e-05, "loss": 0.4668, "step": 2620 }, { "epoch": 0.05843405616779239, "grad_norm": 2.15625, "learning_rate": 1.8831555555555555e-05, "loss": 0.4316, "step": 2630 }, { "epoch": 0.05865623889086385, "grad_norm": 1.9609375, "learning_rate": 1.882711111111111e-05, "loss": 0.4482, "step": 2640 }, { "epoch": 0.0588784216139353, "grad_norm": 1.8046875, "learning_rate": 1.882266666666667e-05, "loss": 0.4789, "step": 2650 }, { "epoch": 0.05910060433700676, "grad_norm": 1.484375, "learning_rate": 1.8818222222222223e-05, "loss": 0.4504, "step": 2660 }, { "epoch": 0.05932278706007821, "grad_norm": 2.09375, "learning_rate": 1.8813777777777778e-05, "loss": 0.486, "step": 2670 }, { "epoch": 0.05954496978314966, "grad_norm": 1.7578125, "learning_rate": 1.8809333333333336e-05, "loss": 0.4454, "step": 2680 }, { "epoch": 0.05976715250622112, "grad_norm": 2.03125, "learning_rate": 1.880488888888889e-05, "loss": 0.4169, "step": 2690 }, { "epoch": 0.05998933522929257, "grad_norm": 1.875, "learning_rate": 1.8800444444444446e-05, "loss": 0.4754, "step": 2700 }, { "epoch": 0.06021151795236403, "grad_norm": 2.25, "learning_rate": 1.8796e-05, "loss": 0.5018, "step": 2710 }, { "epoch": 0.06043370067543548, "grad_norm": 1.8515625, "learning_rate": 1.8791555555555556e-05, "loss": 0.4931, "step": 2720 }, { "epoch": 0.06065588339850693, "grad_norm": 1.9140625, "learning_rate": 1.8787111111111114e-05, "loss": 0.4725, "step": 2730 }, { "epoch": 0.06087806612157839, "grad_norm": 1.6640625, "learning_rate": 1.878266666666667e-05, "loss": 0.4276, "step": 2740 }, { "epoch": 0.06110024884464984, "grad_norm": 1.7890625, "learning_rate": 1.8778222222222224e-05, "loss": 0.4817, "step": 2750 }, { "epoch": 0.0613224315677213, "grad_norm": 1.9375, "learning_rate": 1.877377777777778e-05, "loss": 0.4638, "step": 2760 }, { "epoch": 0.06154461429079275, "grad_norm": 1.8046875, "learning_rate": 1.8769333333333333e-05, "loss": 0.4529, "step": 2770 }, { "epoch": 0.0617667970138642, "grad_norm": 1.9609375, "learning_rate": 1.876488888888889e-05, "loss": 0.4779, "step": 2780 }, { "epoch": 0.06198897973693566, "grad_norm": 1.90625, "learning_rate": 1.8760444444444447e-05, "loss": 0.4788, "step": 2790 }, { "epoch": 0.06221116246000711, "grad_norm": 1.953125, "learning_rate": 1.8756e-05, "loss": 0.4513, "step": 2800 }, { "epoch": 0.06243334518307856, "grad_norm": 1.6875, "learning_rate": 1.8751555555555556e-05, "loss": 0.4661, "step": 2810 }, { "epoch": 0.06265552790615002, "grad_norm": 1.9296875, "learning_rate": 1.8747111111111114e-05, "loss": 0.4529, "step": 2820 }, { "epoch": 0.06287771062922147, "grad_norm": 1.9765625, "learning_rate": 1.874266666666667e-05, "loss": 0.4556, "step": 2830 }, { "epoch": 0.06309989335229292, "grad_norm": 2.171875, "learning_rate": 1.8738222222222224e-05, "loss": 0.4645, "step": 2840 }, { "epoch": 0.06332207607536439, "grad_norm": 1.921875, "learning_rate": 1.873377777777778e-05, "loss": 0.3935, "step": 2850 }, { "epoch": 0.06354425879843584, "grad_norm": 1.7890625, "learning_rate": 1.8729333333333334e-05, "loss": 0.4634, "step": 2860 }, { "epoch": 0.06376644152150729, "grad_norm": 2.078125, "learning_rate": 1.872488888888889e-05, "loss": 0.4722, "step": 2870 }, { "epoch": 0.06398862424457874, "grad_norm": 1.90625, "learning_rate": 1.8720444444444447e-05, "loss": 0.4674, "step": 2880 }, { "epoch": 0.06421080696765019, "grad_norm": 2.015625, "learning_rate": 1.8716000000000002e-05, "loss": 0.4621, "step": 2890 }, { "epoch": 0.06443298969072164, "grad_norm": 2.15625, "learning_rate": 1.8711555555555557e-05, "loss": 0.4584, "step": 2900 }, { "epoch": 0.06465517241379311, "grad_norm": 1.859375, "learning_rate": 1.8707111111111115e-05, "loss": 0.4808, "step": 2910 }, { "epoch": 0.06487735513686456, "grad_norm": 2.203125, "learning_rate": 1.870266666666667e-05, "loss": 0.4729, "step": 2920 }, { "epoch": 0.06509953785993601, "grad_norm": 1.484375, "learning_rate": 1.8698222222222225e-05, "loss": 0.4282, "step": 2930 }, { "epoch": 0.06532172058300746, "grad_norm": 2.078125, "learning_rate": 1.869377777777778e-05, "loss": 0.5072, "step": 2940 }, { "epoch": 0.06554390330607891, "grad_norm": 1.8515625, "learning_rate": 1.8689333333333334e-05, "loss": 0.4563, "step": 2950 }, { "epoch": 0.06576608602915038, "grad_norm": 1.7578125, "learning_rate": 1.868488888888889e-05, "loss": 0.4412, "step": 2960 }, { "epoch": 0.06598826875222183, "grad_norm": 1.890625, "learning_rate": 1.8680444444444444e-05, "loss": 0.4591, "step": 2970 }, { "epoch": 0.06621045147529328, "grad_norm": 2.03125, "learning_rate": 1.8676000000000002e-05, "loss": 0.4204, "step": 2980 }, { "epoch": 0.06643263419836473, "grad_norm": 1.7578125, "learning_rate": 1.8671555555555557e-05, "loss": 0.4324, "step": 2990 }, { "epoch": 0.06665481692143618, "grad_norm": 1.9921875, "learning_rate": 1.8667111111111115e-05, "loss": 0.434, "step": 3000 }, { "epoch": 0.06687699964450765, "grad_norm": 2.359375, "learning_rate": 1.866266666666667e-05, "loss": 0.4756, "step": 3010 }, { "epoch": 0.0670991823675791, "grad_norm": 2.1875, "learning_rate": 1.8658222222222225e-05, "loss": 0.4765, "step": 3020 }, { "epoch": 0.06732136509065055, "grad_norm": 2.1875, "learning_rate": 1.865377777777778e-05, "loss": 0.4121, "step": 3030 }, { "epoch": 0.067543547813722, "grad_norm": 1.6640625, "learning_rate": 1.8649333333333335e-05, "loss": 0.4742, "step": 3040 }, { "epoch": 0.06776573053679345, "grad_norm": 1.8671875, "learning_rate": 1.864488888888889e-05, "loss": 0.4649, "step": 3050 }, { "epoch": 0.06798791325986492, "grad_norm": 1.96875, "learning_rate": 1.8640444444444444e-05, "loss": 0.472, "step": 3060 }, { "epoch": 0.06821009598293637, "grad_norm": 1.9765625, "learning_rate": 1.8636e-05, "loss": 0.4169, "step": 3070 }, { "epoch": 0.06843227870600782, "grad_norm": 2.125, "learning_rate": 1.8631555555555557e-05, "loss": 0.4786, "step": 3080 }, { "epoch": 0.06865446142907927, "grad_norm": 2.15625, "learning_rate": 1.8627111111111112e-05, "loss": 0.4465, "step": 3090 }, { "epoch": 0.06887664415215072, "grad_norm": 1.984375, "learning_rate": 1.862266666666667e-05, "loss": 0.4672, "step": 3100 }, { "epoch": 0.06909882687522219, "grad_norm": 2.375, "learning_rate": 1.8618222222222225e-05, "loss": 0.4604, "step": 3110 }, { "epoch": 0.06932100959829364, "grad_norm": 1.890625, "learning_rate": 1.861377777777778e-05, "loss": 0.4389, "step": 3120 }, { "epoch": 0.06954319232136509, "grad_norm": 2.171875, "learning_rate": 1.8609333333333335e-05, "loss": 0.4301, "step": 3130 }, { "epoch": 0.06976537504443654, "grad_norm": 1.9296875, "learning_rate": 1.860488888888889e-05, "loss": 0.4703, "step": 3140 }, { "epoch": 0.069987557767508, "grad_norm": 1.9921875, "learning_rate": 1.8600444444444445e-05, "loss": 0.4542, "step": 3150 }, { "epoch": 0.07020974049057946, "grad_norm": 1.9921875, "learning_rate": 1.8596e-05, "loss": 0.4301, "step": 3160 }, { "epoch": 0.07043192321365091, "grad_norm": 2.390625, "learning_rate": 1.8591555555555554e-05, "loss": 0.453, "step": 3170 }, { "epoch": 0.07065410593672236, "grad_norm": 1.609375, "learning_rate": 1.8587111111111113e-05, "loss": 0.4156, "step": 3180 }, { "epoch": 0.07087628865979381, "grad_norm": 2.0625, "learning_rate": 1.8582666666666667e-05, "loss": 0.4333, "step": 3190 }, { "epoch": 0.07109847138286526, "grad_norm": 1.625, "learning_rate": 1.8578222222222226e-05, "loss": 0.4398, "step": 3200 }, { "epoch": 0.07132065410593673, "grad_norm": 1.9609375, "learning_rate": 1.857377777777778e-05, "loss": 0.4683, "step": 3210 }, { "epoch": 0.07154283682900818, "grad_norm": 1.6875, "learning_rate": 1.8569333333333335e-05, "loss": 0.4475, "step": 3220 }, { "epoch": 0.07176501955207963, "grad_norm": 2.15625, "learning_rate": 1.856488888888889e-05, "loss": 0.4453, "step": 3230 }, { "epoch": 0.07198720227515108, "grad_norm": 1.796875, "learning_rate": 1.8560444444444445e-05, "loss": 0.4922, "step": 3240 }, { "epoch": 0.07220938499822253, "grad_norm": 1.9296875, "learning_rate": 1.8556e-05, "loss": 0.492, "step": 3250 }, { "epoch": 0.072431567721294, "grad_norm": 1.7890625, "learning_rate": 1.8551555555555555e-05, "loss": 0.4376, "step": 3260 }, { "epoch": 0.07265375044436545, "grad_norm": 2.4375, "learning_rate": 1.8547111111111113e-05, "loss": 0.4818, "step": 3270 }, { "epoch": 0.0728759331674369, "grad_norm": 1.921875, "learning_rate": 1.8542666666666668e-05, "loss": 0.4779, "step": 3280 }, { "epoch": 0.07309811589050835, "grad_norm": 1.546875, "learning_rate": 1.8538222222222223e-05, "loss": 0.4235, "step": 3290 }, { "epoch": 0.0733202986135798, "grad_norm": 1.890625, "learning_rate": 1.853377777777778e-05, "loss": 0.4584, "step": 3300 }, { "epoch": 0.07354248133665126, "grad_norm": 2.03125, "learning_rate": 1.8529333333333336e-05, "loss": 0.4687, "step": 3310 }, { "epoch": 0.07376466405972272, "grad_norm": 1.9140625, "learning_rate": 1.852488888888889e-05, "loss": 0.4481, "step": 3320 }, { "epoch": 0.07398684678279417, "grad_norm": 2.015625, "learning_rate": 1.8520444444444445e-05, "loss": 0.4901, "step": 3330 }, { "epoch": 0.07420902950586562, "grad_norm": 1.4296875, "learning_rate": 1.8516e-05, "loss": 0.4399, "step": 3340 }, { "epoch": 0.07443121222893707, "grad_norm": 2.203125, "learning_rate": 1.851155555555556e-05, "loss": 0.4367, "step": 3350 }, { "epoch": 0.07465339495200853, "grad_norm": 2.28125, "learning_rate": 1.8507111111111113e-05, "loss": 0.4724, "step": 3360 }, { "epoch": 0.07487557767507999, "grad_norm": 1.8515625, "learning_rate": 1.8502666666666668e-05, "loss": 0.4534, "step": 3370 }, { "epoch": 0.07509776039815144, "grad_norm": 2.25, "learning_rate": 1.8498222222222223e-05, "loss": 0.4649, "step": 3380 }, { "epoch": 0.0753199431212229, "grad_norm": 1.703125, "learning_rate": 1.8493777777777778e-05, "loss": 0.4539, "step": 3390 }, { "epoch": 0.07554212584429434, "grad_norm": 1.75, "learning_rate": 1.8489333333333336e-05, "loss": 0.4484, "step": 3400 }, { "epoch": 0.0757643085673658, "grad_norm": 1.9765625, "learning_rate": 1.848488888888889e-05, "loss": 0.4544, "step": 3410 }, { "epoch": 0.07598649129043726, "grad_norm": 1.8671875, "learning_rate": 1.8480444444444446e-05, "loss": 0.4431, "step": 3420 }, { "epoch": 0.07620867401350871, "grad_norm": 2.1875, "learning_rate": 1.8476e-05, "loss": 0.437, "step": 3430 }, { "epoch": 0.07643085673658016, "grad_norm": 2.0625, "learning_rate": 1.847155555555556e-05, "loss": 0.458, "step": 3440 }, { "epoch": 0.07665303945965161, "grad_norm": 2.09375, "learning_rate": 1.8467111111111114e-05, "loss": 0.4512, "step": 3450 }, { "epoch": 0.07687522218272307, "grad_norm": 2.375, "learning_rate": 1.846266666666667e-05, "loss": 0.435, "step": 3460 }, { "epoch": 0.07709740490579453, "grad_norm": 2.203125, "learning_rate": 1.8458222222222223e-05, "loss": 0.4778, "step": 3470 }, { "epoch": 0.07731958762886598, "grad_norm": 1.9375, "learning_rate": 1.8453777777777778e-05, "loss": 0.446, "step": 3480 }, { "epoch": 0.07754177035193743, "grad_norm": 1.859375, "learning_rate": 1.8449333333333333e-05, "loss": 0.4624, "step": 3490 }, { "epoch": 0.07776395307500888, "grad_norm": 1.921875, "learning_rate": 1.844488888888889e-05, "loss": 0.4458, "step": 3500 }, { "epoch": 0.07798613579808034, "grad_norm": 1.7734375, "learning_rate": 1.8440444444444446e-05, "loss": 0.4879, "step": 3510 }, { "epoch": 0.0782083185211518, "grad_norm": 1.8046875, "learning_rate": 1.8436e-05, "loss": 0.4646, "step": 3520 }, { "epoch": 0.07843050124422325, "grad_norm": 1.6015625, "learning_rate": 1.843155555555556e-05, "loss": 0.4816, "step": 3530 }, { "epoch": 0.0786526839672947, "grad_norm": 1.8359375, "learning_rate": 1.8427111111111114e-05, "loss": 0.4507, "step": 3540 }, { "epoch": 0.07887486669036615, "grad_norm": 1.9765625, "learning_rate": 1.842266666666667e-05, "loss": 0.425, "step": 3550 }, { "epoch": 0.0790970494134376, "grad_norm": 2.203125, "learning_rate": 1.8418222222222224e-05, "loss": 0.4619, "step": 3560 }, { "epoch": 0.07931923213650907, "grad_norm": 1.8828125, "learning_rate": 1.841377777777778e-05, "loss": 0.4677, "step": 3570 }, { "epoch": 0.07954141485958052, "grad_norm": 2.15625, "learning_rate": 1.8409333333333333e-05, "loss": 0.4423, "step": 3580 }, { "epoch": 0.07976359758265197, "grad_norm": 1.59375, "learning_rate": 1.840488888888889e-05, "loss": 0.4354, "step": 3590 }, { "epoch": 0.07998578030572343, "grad_norm": 1.8984375, "learning_rate": 1.8400444444444447e-05, "loss": 0.4159, "step": 3600 }, { "epoch": 0.08020796302879488, "grad_norm": 2.15625, "learning_rate": 1.8396e-05, "loss": 0.4579, "step": 3610 }, { "epoch": 0.08043014575186634, "grad_norm": 1.9765625, "learning_rate": 1.839155555555556e-05, "loss": 0.4623, "step": 3620 }, { "epoch": 0.08065232847493779, "grad_norm": 2.03125, "learning_rate": 1.8387111111111114e-05, "loss": 0.4846, "step": 3630 }, { "epoch": 0.08087451119800924, "grad_norm": 1.5703125, "learning_rate": 1.838266666666667e-05, "loss": 0.4815, "step": 3640 }, { "epoch": 0.0810966939210807, "grad_norm": 1.8828125, "learning_rate": 1.8378222222222224e-05, "loss": 0.4374, "step": 3650 }, { "epoch": 0.08131887664415215, "grad_norm": 1.5703125, "learning_rate": 1.837377777777778e-05, "loss": 0.454, "step": 3660 }, { "epoch": 0.0815410593672236, "grad_norm": 1.75, "learning_rate": 1.8369333333333334e-05, "loss": 0.4526, "step": 3670 }, { "epoch": 0.08176324209029506, "grad_norm": 1.71875, "learning_rate": 1.836488888888889e-05, "loss": 0.4432, "step": 3680 }, { "epoch": 0.08198542481336651, "grad_norm": 1.7109375, "learning_rate": 1.8360444444444444e-05, "loss": 0.4295, "step": 3690 }, { "epoch": 0.08220760753643797, "grad_norm": 1.9140625, "learning_rate": 1.8356000000000002e-05, "loss": 0.4415, "step": 3700 }, { "epoch": 0.08242979025950942, "grad_norm": 1.765625, "learning_rate": 1.8351555555555557e-05, "loss": 0.43, "step": 3710 }, { "epoch": 0.08265197298258087, "grad_norm": 1.8515625, "learning_rate": 1.8347111111111115e-05, "loss": 0.4624, "step": 3720 }, { "epoch": 0.08287415570565233, "grad_norm": 1.828125, "learning_rate": 1.834266666666667e-05, "loss": 0.4405, "step": 3730 }, { "epoch": 0.08309633842872378, "grad_norm": 2.234375, "learning_rate": 1.8338222222222225e-05, "loss": 0.4621, "step": 3740 }, { "epoch": 0.08331852115179524, "grad_norm": 2.03125, "learning_rate": 1.833377777777778e-05, "loss": 0.4183, "step": 3750 }, { "epoch": 0.08354070387486669, "grad_norm": 1.921875, "learning_rate": 1.8329333333333334e-05, "loss": 0.49, "step": 3760 }, { "epoch": 0.08376288659793814, "grad_norm": 2.125, "learning_rate": 1.832488888888889e-05, "loss": 0.4697, "step": 3770 }, { "epoch": 0.0839850693210096, "grad_norm": 1.671875, "learning_rate": 1.8320444444444444e-05, "loss": 0.4161, "step": 3780 }, { "epoch": 0.08420725204408105, "grad_norm": 1.9765625, "learning_rate": 1.8316e-05, "loss": 0.4223, "step": 3790 }, { "epoch": 0.0844294347671525, "grad_norm": 1.9453125, "learning_rate": 1.8311555555555557e-05, "loss": 0.4626, "step": 3800 }, { "epoch": 0.08465161749022396, "grad_norm": 1.96875, "learning_rate": 1.8307111111111112e-05, "loss": 0.4782, "step": 3810 }, { "epoch": 0.08487380021329541, "grad_norm": 1.8984375, "learning_rate": 1.830266666666667e-05, "loss": 0.4219, "step": 3820 }, { "epoch": 0.08509598293636687, "grad_norm": 2.09375, "learning_rate": 1.8298222222222225e-05, "loss": 0.4611, "step": 3830 }, { "epoch": 0.08531816565943832, "grad_norm": 2.09375, "learning_rate": 1.829377777777778e-05, "loss": 0.4372, "step": 3840 }, { "epoch": 0.08554034838250978, "grad_norm": 1.859375, "learning_rate": 1.8289333333333335e-05, "loss": 0.4556, "step": 3850 }, { "epoch": 0.08576253110558123, "grad_norm": 1.9921875, "learning_rate": 1.828488888888889e-05, "loss": 0.4458, "step": 3860 }, { "epoch": 0.08598471382865268, "grad_norm": 2.03125, "learning_rate": 1.8280444444444444e-05, "loss": 0.4566, "step": 3870 }, { "epoch": 0.08620689655172414, "grad_norm": 1.578125, "learning_rate": 1.8276e-05, "loss": 0.4049, "step": 3880 }, { "epoch": 0.0864290792747956, "grad_norm": 1.9296875, "learning_rate": 1.8271555555555557e-05, "loss": 0.4401, "step": 3890 }, { "epoch": 0.08665126199786705, "grad_norm": 1.9140625, "learning_rate": 1.8267111111111112e-05, "loss": 0.4426, "step": 3900 }, { "epoch": 0.0868734447209385, "grad_norm": 1.6875, "learning_rate": 1.8262666666666667e-05, "loss": 0.4529, "step": 3910 }, { "epoch": 0.08709562744400995, "grad_norm": 1.7578125, "learning_rate": 1.8258222222222225e-05, "loss": 0.4444, "step": 3920 }, { "epoch": 0.08731781016708141, "grad_norm": 2.0625, "learning_rate": 1.825377777777778e-05, "loss": 0.4486, "step": 3930 }, { "epoch": 0.08753999289015287, "grad_norm": 2.046875, "learning_rate": 1.8249333333333335e-05, "loss": 0.4299, "step": 3940 }, { "epoch": 0.08776217561322432, "grad_norm": 1.5859375, "learning_rate": 1.824488888888889e-05, "loss": 0.4385, "step": 3950 }, { "epoch": 0.08798435833629577, "grad_norm": 1.9140625, "learning_rate": 1.8240444444444445e-05, "loss": 0.4493, "step": 3960 }, { "epoch": 0.08820654105936722, "grad_norm": 1.71875, "learning_rate": 1.8236000000000003e-05, "loss": 0.455, "step": 3970 }, { "epoch": 0.08842872378243868, "grad_norm": 2.1875, "learning_rate": 1.8231555555555558e-05, "loss": 0.4461, "step": 3980 }, { "epoch": 0.08865090650551014, "grad_norm": 1.6640625, "learning_rate": 1.8227111111111113e-05, "loss": 0.4066, "step": 3990 }, { "epoch": 0.08887308922858159, "grad_norm": 1.5703125, "learning_rate": 1.8222666666666667e-05, "loss": 0.4344, "step": 4000 }, { "epoch": 0.08909527195165304, "grad_norm": 1.8515625, "learning_rate": 1.8218222222222222e-05, "loss": 0.4603, "step": 4010 }, { "epoch": 0.08931745467472449, "grad_norm": 1.8125, "learning_rate": 1.821377777777778e-05, "loss": 0.4158, "step": 4020 }, { "epoch": 0.08953963739779595, "grad_norm": 1.7421875, "learning_rate": 1.8209333333333335e-05, "loss": 0.4181, "step": 4030 }, { "epoch": 0.0897618201208674, "grad_norm": 2.03125, "learning_rate": 1.820488888888889e-05, "loss": 0.4191, "step": 4040 }, { "epoch": 0.08998400284393886, "grad_norm": 2.015625, "learning_rate": 1.8200444444444445e-05, "loss": 0.4887, "step": 4050 }, { "epoch": 0.09020618556701031, "grad_norm": 1.859375, "learning_rate": 1.8196000000000003e-05, "loss": 0.4436, "step": 4060 }, { "epoch": 0.09042836829008176, "grad_norm": 1.9453125, "learning_rate": 1.8191555555555558e-05, "loss": 0.4669, "step": 4070 }, { "epoch": 0.09065055101315321, "grad_norm": 1.859375, "learning_rate": 1.8187111111111113e-05, "loss": 0.4529, "step": 4080 }, { "epoch": 0.09087273373622468, "grad_norm": 2.015625, "learning_rate": 1.8182666666666668e-05, "loss": 0.4579, "step": 4090 }, { "epoch": 0.09109491645929613, "grad_norm": 1.828125, "learning_rate": 1.8178222222222223e-05, "loss": 0.4856, "step": 4100 }, { "epoch": 0.09131709918236758, "grad_norm": 2.140625, "learning_rate": 1.8173777777777778e-05, "loss": 0.4385, "step": 4110 }, { "epoch": 0.09153928190543903, "grad_norm": 1.7890625, "learning_rate": 1.8169333333333336e-05, "loss": 0.4163, "step": 4120 }, { "epoch": 0.09176146462851048, "grad_norm": 1.8125, "learning_rate": 1.816488888888889e-05, "loss": 0.4889, "step": 4130 }, { "epoch": 0.09198364735158195, "grad_norm": 2.1875, "learning_rate": 1.8160444444444445e-05, "loss": 0.4412, "step": 4140 }, { "epoch": 0.0922058300746534, "grad_norm": 2.109375, "learning_rate": 1.8156000000000004e-05, "loss": 0.5022, "step": 4150 }, { "epoch": 0.09242801279772485, "grad_norm": 1.9453125, "learning_rate": 1.815155555555556e-05, "loss": 0.4063, "step": 4160 }, { "epoch": 0.0926501955207963, "grad_norm": 2.09375, "learning_rate": 1.8147111111111113e-05, "loss": 0.4474, "step": 4170 }, { "epoch": 0.09287237824386775, "grad_norm": 1.3515625, "learning_rate": 1.8142666666666668e-05, "loss": 0.4276, "step": 4180 }, { "epoch": 0.09309456096693922, "grad_norm": 1.875, "learning_rate": 1.8138222222222223e-05, "loss": 0.4464, "step": 4190 }, { "epoch": 0.09331674369001067, "grad_norm": 2.328125, "learning_rate": 1.8133777777777778e-05, "loss": 0.4756, "step": 4200 }, { "epoch": 0.09353892641308212, "grad_norm": 2.0625, "learning_rate": 1.8129333333333333e-05, "loss": 0.4513, "step": 4210 }, { "epoch": 0.09376110913615357, "grad_norm": 2.125, "learning_rate": 1.812488888888889e-05, "loss": 0.4588, "step": 4220 }, { "epoch": 0.09398329185922502, "grad_norm": 2.171875, "learning_rate": 1.8120444444444446e-05, "loss": 0.424, "step": 4230 }, { "epoch": 0.09420547458229649, "grad_norm": 2.078125, "learning_rate": 1.8116000000000004e-05, "loss": 0.4253, "step": 4240 }, { "epoch": 0.09442765730536794, "grad_norm": 1.703125, "learning_rate": 1.811155555555556e-05, "loss": 0.4561, "step": 4250 }, { "epoch": 0.09464984002843939, "grad_norm": 1.8203125, "learning_rate": 1.8107111111111114e-05, "loss": 0.3994, "step": 4260 }, { "epoch": 0.09487202275151084, "grad_norm": 1.8046875, "learning_rate": 1.810266666666667e-05, "loss": 0.4645, "step": 4270 }, { "epoch": 0.09509420547458229, "grad_norm": 2.109375, "learning_rate": 1.8098222222222223e-05, "loss": 0.4633, "step": 4280 }, { "epoch": 0.09531638819765376, "grad_norm": 1.921875, "learning_rate": 1.809377777777778e-05, "loss": 0.4848, "step": 4290 }, { "epoch": 0.09553857092072521, "grad_norm": 2.15625, "learning_rate": 1.8089333333333333e-05, "loss": 0.4327, "step": 4300 }, { "epoch": 0.09576075364379666, "grad_norm": 1.953125, "learning_rate": 1.8084888888888888e-05, "loss": 0.4712, "step": 4310 }, { "epoch": 0.09598293636686811, "grad_norm": 1.9375, "learning_rate": 1.8080444444444446e-05, "loss": 0.4267, "step": 4320 }, { "epoch": 0.09620511908993956, "grad_norm": 1.4453125, "learning_rate": 1.8076e-05, "loss": 0.4325, "step": 4330 }, { "epoch": 0.09642730181301103, "grad_norm": 1.9140625, "learning_rate": 1.807155555555556e-05, "loss": 0.4381, "step": 4340 }, { "epoch": 0.09664948453608248, "grad_norm": 1.9140625, "learning_rate": 1.8067111111111114e-05, "loss": 0.4514, "step": 4350 }, { "epoch": 0.09687166725915393, "grad_norm": 1.9921875, "learning_rate": 1.806266666666667e-05, "loss": 0.4969, "step": 4360 }, { "epoch": 0.09709384998222538, "grad_norm": 1.84375, "learning_rate": 1.8058222222222224e-05, "loss": 0.4552, "step": 4370 }, { "epoch": 0.09731603270529683, "grad_norm": 2.0625, "learning_rate": 1.805377777777778e-05, "loss": 0.432, "step": 4380 }, { "epoch": 0.0975382154283683, "grad_norm": 1.984375, "learning_rate": 1.8049333333333334e-05, "loss": 0.4501, "step": 4390 }, { "epoch": 0.09776039815143975, "grad_norm": 1.8515625, "learning_rate": 1.804488888888889e-05, "loss": 0.4332, "step": 4400 }, { "epoch": 0.0979825808745112, "grad_norm": 1.703125, "learning_rate": 1.8040444444444443e-05, "loss": 0.4507, "step": 4410 }, { "epoch": 0.09820476359758265, "grad_norm": 2.015625, "learning_rate": 1.8036e-05, "loss": 0.4568, "step": 4420 }, { "epoch": 0.0984269463206541, "grad_norm": 1.796875, "learning_rate": 1.8031555555555556e-05, "loss": 0.4763, "step": 4430 }, { "epoch": 0.09864912904372555, "grad_norm": 1.9453125, "learning_rate": 1.8027111111111115e-05, "loss": 0.4373, "step": 4440 }, { "epoch": 0.09887131176679702, "grad_norm": 2.109375, "learning_rate": 1.802266666666667e-05, "loss": 0.4492, "step": 4450 }, { "epoch": 0.09909349448986847, "grad_norm": 2.171875, "learning_rate": 1.8018222222222224e-05, "loss": 0.454, "step": 4460 }, { "epoch": 0.09931567721293992, "grad_norm": 1.8203125, "learning_rate": 1.801377777777778e-05, "loss": 0.4747, "step": 4470 }, { "epoch": 0.09953785993601137, "grad_norm": 1.8359375, "learning_rate": 1.8009333333333334e-05, "loss": 0.4355, "step": 4480 }, { "epoch": 0.09976004265908282, "grad_norm": 2.15625, "learning_rate": 1.800488888888889e-05, "loss": 0.4867, "step": 4490 }, { "epoch": 0.09998222538215429, "grad_norm": 2.234375, "learning_rate": 1.8000444444444444e-05, "loss": 0.4415, "step": 4500 }, { "epoch": 0.10020440810522574, "grad_norm": 1.9375, "learning_rate": 1.7996000000000002e-05, "loss": 0.4903, "step": 4510 }, { "epoch": 0.10042659082829719, "grad_norm": 2.15625, "learning_rate": 1.7991555555555557e-05, "loss": 0.4308, "step": 4520 }, { "epoch": 0.10064877355136864, "grad_norm": 1.65625, "learning_rate": 1.798711111111111e-05, "loss": 0.5011, "step": 4530 }, { "epoch": 0.1008709562744401, "grad_norm": 2.03125, "learning_rate": 1.798266666666667e-05, "loss": 0.4614, "step": 4540 }, { "epoch": 0.10109313899751156, "grad_norm": 1.78125, "learning_rate": 1.7978222222222225e-05, "loss": 0.4547, "step": 4550 }, { "epoch": 0.10131532172058301, "grad_norm": 1.8984375, "learning_rate": 1.797377777777778e-05, "loss": 0.4505, "step": 4560 }, { "epoch": 0.10153750444365446, "grad_norm": 1.5625, "learning_rate": 1.7969333333333334e-05, "loss": 0.4285, "step": 4570 }, { "epoch": 0.10175968716672591, "grad_norm": 1.7421875, "learning_rate": 1.796488888888889e-05, "loss": 0.4422, "step": 4580 }, { "epoch": 0.10198186988979736, "grad_norm": 1.7578125, "learning_rate": 1.7960444444444447e-05, "loss": 0.4242, "step": 4590 }, { "epoch": 0.10220405261286883, "grad_norm": 1.734375, "learning_rate": 1.7956000000000002e-05, "loss": 0.4183, "step": 4600 }, { "epoch": 0.10242623533594028, "grad_norm": 1.8828125, "learning_rate": 1.7951555555555557e-05, "loss": 0.4375, "step": 4610 }, { "epoch": 0.10264841805901173, "grad_norm": 1.9296875, "learning_rate": 1.7947111111111112e-05, "loss": 0.4561, "step": 4620 }, { "epoch": 0.10287060078208318, "grad_norm": 1.5234375, "learning_rate": 1.7942666666666667e-05, "loss": 0.4326, "step": 4630 }, { "epoch": 0.10309278350515463, "grad_norm": 2.0625, "learning_rate": 1.7938222222222225e-05, "loss": 0.4414, "step": 4640 }, { "epoch": 0.1033149662282261, "grad_norm": 1.9609375, "learning_rate": 1.793377777777778e-05, "loss": 0.4279, "step": 4650 }, { "epoch": 0.10353714895129755, "grad_norm": 1.859375, "learning_rate": 1.7929333333333335e-05, "loss": 0.4223, "step": 4660 }, { "epoch": 0.103759331674369, "grad_norm": 2.078125, "learning_rate": 1.792488888888889e-05, "loss": 0.4071, "step": 4670 }, { "epoch": 0.10398151439744045, "grad_norm": 1.8359375, "learning_rate": 1.7920444444444448e-05, "loss": 0.4339, "step": 4680 }, { "epoch": 0.1042036971205119, "grad_norm": 1.8984375, "learning_rate": 1.7916000000000003e-05, "loss": 0.4988, "step": 4690 }, { "epoch": 0.10442587984358337, "grad_norm": 1.7421875, "learning_rate": 1.7911555555555557e-05, "loss": 0.4143, "step": 4700 }, { "epoch": 0.10464806256665482, "grad_norm": 1.4375, "learning_rate": 1.7907111111111112e-05, "loss": 0.4351, "step": 4710 }, { "epoch": 0.10487024528972627, "grad_norm": 1.9609375, "learning_rate": 1.7902666666666667e-05, "loss": 0.4467, "step": 4720 }, { "epoch": 0.10509242801279772, "grad_norm": 2.015625, "learning_rate": 1.7898222222222222e-05, "loss": 0.446, "step": 4730 }, { "epoch": 0.10531461073586917, "grad_norm": 1.8125, "learning_rate": 1.789377777777778e-05, "loss": 0.4593, "step": 4740 }, { "epoch": 0.10553679345894064, "grad_norm": 1.859375, "learning_rate": 1.7889333333333335e-05, "loss": 0.4329, "step": 4750 }, { "epoch": 0.10575897618201209, "grad_norm": 1.6328125, "learning_rate": 1.788488888888889e-05, "loss": 0.389, "step": 4760 }, { "epoch": 0.10598115890508354, "grad_norm": 1.71875, "learning_rate": 1.7880444444444448e-05, "loss": 0.4639, "step": 4770 }, { "epoch": 0.10620334162815499, "grad_norm": 1.9375, "learning_rate": 1.7876000000000003e-05, "loss": 0.4327, "step": 4780 }, { "epoch": 0.10642552435122644, "grad_norm": 1.9375, "learning_rate": 1.7871555555555558e-05, "loss": 0.4199, "step": 4790 }, { "epoch": 0.10664770707429791, "grad_norm": 1.9765625, "learning_rate": 1.7867111111111113e-05, "loss": 0.4632, "step": 4800 }, { "epoch": 0.10686988979736936, "grad_norm": 2.09375, "learning_rate": 1.7862666666666668e-05, "loss": 0.449, "step": 4810 }, { "epoch": 0.10709207252044081, "grad_norm": 2.046875, "learning_rate": 1.7858222222222222e-05, "loss": 0.4737, "step": 4820 }, { "epoch": 0.10731425524351226, "grad_norm": 1.5859375, "learning_rate": 1.7853777777777777e-05, "loss": 0.4336, "step": 4830 }, { "epoch": 0.10753643796658371, "grad_norm": 1.9765625, "learning_rate": 1.7849333333333335e-05, "loss": 0.4554, "step": 4840 }, { "epoch": 0.10775862068965517, "grad_norm": 1.5703125, "learning_rate": 1.784488888888889e-05, "loss": 0.3901, "step": 4850 }, { "epoch": 0.10798080341272663, "grad_norm": 1.6953125, "learning_rate": 1.784044444444445e-05, "loss": 0.4239, "step": 4860 }, { "epoch": 0.10820298613579808, "grad_norm": 1.8203125, "learning_rate": 1.7836000000000003e-05, "loss": 0.4552, "step": 4870 }, { "epoch": 0.10842516885886953, "grad_norm": 1.828125, "learning_rate": 1.7831555555555558e-05, "loss": 0.4197, "step": 4880 }, { "epoch": 0.10864735158194098, "grad_norm": 1.7421875, "learning_rate": 1.7827111111111113e-05, "loss": 0.404, "step": 4890 }, { "epoch": 0.10886953430501244, "grad_norm": 2.015625, "learning_rate": 1.7822666666666668e-05, "loss": 0.4529, "step": 4900 }, { "epoch": 0.1090917170280839, "grad_norm": 1.671875, "learning_rate": 1.7818222222222223e-05, "loss": 0.4506, "step": 4910 }, { "epoch": 0.10931389975115535, "grad_norm": 1.953125, "learning_rate": 1.7813777777777778e-05, "loss": 0.4431, "step": 4920 }, { "epoch": 0.1095360824742268, "grad_norm": 2.28125, "learning_rate": 1.7809333333333332e-05, "loss": 0.4971, "step": 4930 }, { "epoch": 0.10975826519729825, "grad_norm": 1.5703125, "learning_rate": 1.780488888888889e-05, "loss": 0.4553, "step": 4940 }, { "epoch": 0.1099804479203697, "grad_norm": 1.703125, "learning_rate": 1.7800444444444446e-05, "loss": 0.4072, "step": 4950 }, { "epoch": 0.11020263064344117, "grad_norm": 1.90625, "learning_rate": 1.7796000000000004e-05, "loss": 0.4075, "step": 4960 }, { "epoch": 0.11042481336651262, "grad_norm": 2.0625, "learning_rate": 1.779155555555556e-05, "loss": 0.4849, "step": 4970 }, { "epoch": 0.11064699608958407, "grad_norm": 1.8203125, "learning_rate": 1.7787111111111113e-05, "loss": 0.4143, "step": 4980 }, { "epoch": 0.11086917881265552, "grad_norm": 2.0625, "learning_rate": 1.7782666666666668e-05, "loss": 0.4471, "step": 4990 }, { "epoch": 0.11109136153572698, "grad_norm": 1.890625, "learning_rate": 1.7778222222222223e-05, "loss": 0.4606, "step": 5000 }, { "epoch": 0.11131354425879844, "grad_norm": 1.7734375, "learning_rate": 1.7773777777777778e-05, "loss": 0.4429, "step": 5010 }, { "epoch": 0.11153572698186989, "grad_norm": 1.7890625, "learning_rate": 1.7769333333333333e-05, "loss": 0.4411, "step": 5020 }, { "epoch": 0.11175790970494134, "grad_norm": 1.8203125, "learning_rate": 1.7764888888888888e-05, "loss": 0.4839, "step": 5030 }, { "epoch": 0.1119800924280128, "grad_norm": 2.09375, "learning_rate": 1.7760444444444446e-05, "loss": 0.4667, "step": 5040 }, { "epoch": 0.11220227515108425, "grad_norm": 1.7734375, "learning_rate": 1.7756e-05, "loss": 0.4371, "step": 5050 }, { "epoch": 0.11242445787415571, "grad_norm": 1.8125, "learning_rate": 1.775155555555556e-05, "loss": 0.435, "step": 5060 }, { "epoch": 0.11264664059722716, "grad_norm": 1.7265625, "learning_rate": 1.7747111111111114e-05, "loss": 0.426, "step": 5070 }, { "epoch": 0.11286882332029861, "grad_norm": 2.03125, "learning_rate": 1.774266666666667e-05, "loss": 0.4797, "step": 5080 }, { "epoch": 0.11309100604337007, "grad_norm": 1.734375, "learning_rate": 1.7738222222222224e-05, "loss": 0.4882, "step": 5090 }, { "epoch": 0.11331318876644152, "grad_norm": 1.7265625, "learning_rate": 1.773377777777778e-05, "loss": 0.4936, "step": 5100 }, { "epoch": 0.11353537148951298, "grad_norm": 1.765625, "learning_rate": 1.7729333333333333e-05, "loss": 0.4728, "step": 5110 }, { "epoch": 0.11375755421258443, "grad_norm": 1.6640625, "learning_rate": 1.7724888888888888e-05, "loss": 0.4162, "step": 5120 }, { "epoch": 0.11397973693565588, "grad_norm": 1.5234375, "learning_rate": 1.7720444444444446e-05, "loss": 0.4128, "step": 5130 }, { "epoch": 0.11420191965872734, "grad_norm": 1.8359375, "learning_rate": 1.7716e-05, "loss": 0.4388, "step": 5140 }, { "epoch": 0.11442410238179879, "grad_norm": 2.015625, "learning_rate": 1.7711555555555556e-05, "loss": 0.4442, "step": 5150 }, { "epoch": 0.11464628510487025, "grad_norm": 2.140625, "learning_rate": 1.7707111111111114e-05, "loss": 0.4308, "step": 5160 }, { "epoch": 0.1148684678279417, "grad_norm": 1.625, "learning_rate": 1.770266666666667e-05, "loss": 0.4208, "step": 5170 }, { "epoch": 0.11509065055101315, "grad_norm": 1.84375, "learning_rate": 1.7698222222222224e-05, "loss": 0.4284, "step": 5180 }, { "epoch": 0.1153128332740846, "grad_norm": 1.859375, "learning_rate": 1.769377777777778e-05, "loss": 0.4512, "step": 5190 }, { "epoch": 0.11553501599715606, "grad_norm": 1.96875, "learning_rate": 1.7689333333333334e-05, "loss": 0.464, "step": 5200 }, { "epoch": 0.11575719872022751, "grad_norm": 2.109375, "learning_rate": 1.7684888888888892e-05, "loss": 0.4381, "step": 5210 }, { "epoch": 0.11597938144329897, "grad_norm": 1.7734375, "learning_rate": 1.7680444444444447e-05, "loss": 0.4139, "step": 5220 }, { "epoch": 0.11620156416637042, "grad_norm": 1.5234375, "learning_rate": 1.7676e-05, "loss": 0.4377, "step": 5230 }, { "epoch": 0.11642374688944188, "grad_norm": 2.0, "learning_rate": 1.7671555555555556e-05, "loss": 0.4682, "step": 5240 }, { "epoch": 0.11664592961251333, "grad_norm": 1.890625, "learning_rate": 1.766711111111111e-05, "loss": 0.4179, "step": 5250 }, { "epoch": 0.11686811233558478, "grad_norm": 1.859375, "learning_rate": 1.766266666666667e-05, "loss": 0.438, "step": 5260 }, { "epoch": 0.11709029505865624, "grad_norm": 2.03125, "learning_rate": 1.7658222222222224e-05, "loss": 0.4374, "step": 5270 }, { "epoch": 0.1173124777817277, "grad_norm": 1.9765625, "learning_rate": 1.765377777777778e-05, "loss": 0.4344, "step": 5280 }, { "epoch": 0.11753466050479915, "grad_norm": 1.9921875, "learning_rate": 1.7649333333333334e-05, "loss": 0.4616, "step": 5290 }, { "epoch": 0.1177568432278706, "grad_norm": 1.71875, "learning_rate": 1.7644888888888892e-05, "loss": 0.3837, "step": 5300 }, { "epoch": 0.11797902595094205, "grad_norm": 1.9453125, "learning_rate": 1.7640444444444447e-05, "loss": 0.4316, "step": 5310 }, { "epoch": 0.11820120867401351, "grad_norm": 1.9921875, "learning_rate": 1.7636000000000002e-05, "loss": 0.4124, "step": 5320 }, { "epoch": 0.11842339139708496, "grad_norm": 2.015625, "learning_rate": 1.7631555555555557e-05, "loss": 0.4376, "step": 5330 }, { "epoch": 0.11864557412015642, "grad_norm": 2.015625, "learning_rate": 1.762711111111111e-05, "loss": 0.437, "step": 5340 }, { "epoch": 0.11886775684322787, "grad_norm": 1.765625, "learning_rate": 1.7622666666666666e-05, "loss": 0.4459, "step": 5350 }, { "epoch": 0.11908993956629932, "grad_norm": 2.3125, "learning_rate": 1.7618222222222225e-05, "loss": 0.4244, "step": 5360 }, { "epoch": 0.11931212228937078, "grad_norm": 2.5, "learning_rate": 1.761377777777778e-05, "loss": 0.4318, "step": 5370 }, { "epoch": 0.11953430501244224, "grad_norm": 2.0, "learning_rate": 1.7609333333333334e-05, "loss": 0.4228, "step": 5380 }, { "epoch": 0.11975648773551369, "grad_norm": 1.9921875, "learning_rate": 1.7604888888888893e-05, "loss": 0.4659, "step": 5390 }, { "epoch": 0.11997867045858514, "grad_norm": 2.296875, "learning_rate": 1.7600444444444447e-05, "loss": 0.4646, "step": 5400 }, { "epoch": 0.12020085318165659, "grad_norm": 1.7109375, "learning_rate": 1.7596000000000002e-05, "loss": 0.405, "step": 5410 }, { "epoch": 0.12042303590472805, "grad_norm": 2.5625, "learning_rate": 1.7591555555555557e-05, "loss": 0.4746, "step": 5420 }, { "epoch": 0.1206452186277995, "grad_norm": 2.0, "learning_rate": 1.7587111111111112e-05, "loss": 0.4529, "step": 5430 }, { "epoch": 0.12086740135087096, "grad_norm": 1.6953125, "learning_rate": 1.7582666666666667e-05, "loss": 0.4221, "step": 5440 }, { "epoch": 0.12108958407394241, "grad_norm": 1.734375, "learning_rate": 1.757822222222222e-05, "loss": 0.4198, "step": 5450 }, { "epoch": 0.12131176679701386, "grad_norm": 1.84375, "learning_rate": 1.757377777777778e-05, "loss": 0.4673, "step": 5460 }, { "epoch": 0.12153394952008532, "grad_norm": 1.8125, "learning_rate": 1.7569333333333335e-05, "loss": 0.4656, "step": 5470 }, { "epoch": 0.12175613224315678, "grad_norm": 1.7421875, "learning_rate": 1.7564888888888893e-05, "loss": 0.4143, "step": 5480 }, { "epoch": 0.12197831496622823, "grad_norm": 1.765625, "learning_rate": 1.7560444444444448e-05, "loss": 0.4781, "step": 5490 }, { "epoch": 0.12220049768929968, "grad_norm": 1.5625, "learning_rate": 1.7556000000000003e-05, "loss": 0.4364, "step": 5500 }, { "epoch": 0.12242268041237113, "grad_norm": 1.8125, "learning_rate": 1.7551555555555558e-05, "loss": 0.4422, "step": 5510 }, { "epoch": 0.1226448631354426, "grad_norm": 1.359375, "learning_rate": 1.7547111111111112e-05, "loss": 0.4161, "step": 5520 }, { "epoch": 0.12286704585851405, "grad_norm": 1.7578125, "learning_rate": 1.7542666666666667e-05, "loss": 0.4024, "step": 5530 }, { "epoch": 0.1230892285815855, "grad_norm": 2.125, "learning_rate": 1.7538222222222222e-05, "loss": 0.4651, "step": 5540 }, { "epoch": 0.12331141130465695, "grad_norm": 1.859375, "learning_rate": 1.7533777777777777e-05, "loss": 0.4519, "step": 5550 }, { "epoch": 0.1235335940277284, "grad_norm": 1.9765625, "learning_rate": 1.7529333333333335e-05, "loss": 0.4236, "step": 5560 }, { "epoch": 0.12375577675079986, "grad_norm": 2.0625, "learning_rate": 1.752488888888889e-05, "loss": 0.4643, "step": 5570 }, { "epoch": 0.12397795947387132, "grad_norm": 1.8359375, "learning_rate": 1.7520444444444448e-05, "loss": 0.3911, "step": 5580 }, { "epoch": 0.12420014219694277, "grad_norm": 1.84375, "learning_rate": 1.7516000000000003e-05, "loss": 0.4421, "step": 5590 }, { "epoch": 0.12442232492001422, "grad_norm": 1.9921875, "learning_rate": 1.7511555555555558e-05, "loss": 0.4339, "step": 5600 }, { "epoch": 0.12464450764308567, "grad_norm": 2.328125, "learning_rate": 1.7507111111111113e-05, "loss": 0.4642, "step": 5610 }, { "epoch": 0.12486669036615712, "grad_norm": 1.71875, "learning_rate": 1.7502666666666668e-05, "loss": 0.4276, "step": 5620 }, { "epoch": 0.12508887308922859, "grad_norm": 2.328125, "learning_rate": 1.7498222222222222e-05, "loss": 0.405, "step": 5630 }, { "epoch": 0.12531105581230004, "grad_norm": 1.90625, "learning_rate": 1.7493777777777777e-05, "loss": 0.4389, "step": 5640 }, { "epoch": 0.1255332385353715, "grad_norm": 1.9140625, "learning_rate": 1.7489333333333332e-05, "loss": 0.4203, "step": 5650 }, { "epoch": 0.12575542125844294, "grad_norm": 1.890625, "learning_rate": 1.748488888888889e-05, "loss": 0.4125, "step": 5660 }, { "epoch": 0.1259776039815144, "grad_norm": 1.546875, "learning_rate": 1.7480444444444445e-05, "loss": 0.4202, "step": 5670 }, { "epoch": 0.12619978670458584, "grad_norm": 1.421875, "learning_rate": 1.7476000000000003e-05, "loss": 0.4097, "step": 5680 }, { "epoch": 0.1264219694276573, "grad_norm": 2.265625, "learning_rate": 1.7471555555555558e-05, "loss": 0.4353, "step": 5690 }, { "epoch": 0.12664415215072877, "grad_norm": 1.859375, "learning_rate": 1.7467111111111113e-05, "loss": 0.4267, "step": 5700 }, { "epoch": 0.12686633487380022, "grad_norm": 2.171875, "learning_rate": 1.7462666666666668e-05, "loss": 0.4148, "step": 5710 }, { "epoch": 0.12708851759687168, "grad_norm": 1.765625, "learning_rate": 1.7458222222222223e-05, "loss": 0.4694, "step": 5720 }, { "epoch": 0.12731070031994313, "grad_norm": 1.5390625, "learning_rate": 1.7453777777777778e-05, "loss": 0.4575, "step": 5730 }, { "epoch": 0.12753288304301458, "grad_norm": 1.9296875, "learning_rate": 1.7449333333333333e-05, "loss": 0.4206, "step": 5740 }, { "epoch": 0.12775506576608603, "grad_norm": 2.171875, "learning_rate": 1.744488888888889e-05, "loss": 0.4359, "step": 5750 }, { "epoch": 0.12797724848915748, "grad_norm": 2.25, "learning_rate": 1.7440444444444446e-05, "loss": 0.4633, "step": 5760 }, { "epoch": 0.12819943121222893, "grad_norm": 1.9375, "learning_rate": 1.7436e-05, "loss": 0.4273, "step": 5770 }, { "epoch": 0.12842161393530038, "grad_norm": 2.03125, "learning_rate": 1.743155555555556e-05, "loss": 0.4296, "step": 5780 }, { "epoch": 0.12864379665837183, "grad_norm": 1.453125, "learning_rate": 1.7427111111111114e-05, "loss": 0.4253, "step": 5790 }, { "epoch": 0.12886597938144329, "grad_norm": 2.046875, "learning_rate": 1.742266666666667e-05, "loss": 0.4561, "step": 5800 }, { "epoch": 0.12908816210451476, "grad_norm": 1.6953125, "learning_rate": 1.7418222222222223e-05, "loss": 0.4183, "step": 5810 }, { "epoch": 0.12931034482758622, "grad_norm": 1.8046875, "learning_rate": 1.7413777777777778e-05, "loss": 0.4209, "step": 5820 }, { "epoch": 0.12953252755065767, "grad_norm": 1.9296875, "learning_rate": 1.7409333333333336e-05, "loss": 0.4257, "step": 5830 }, { "epoch": 0.12975471027372912, "grad_norm": 1.8515625, "learning_rate": 1.740488888888889e-05, "loss": 0.4621, "step": 5840 }, { "epoch": 0.12997689299680057, "grad_norm": 2.03125, "learning_rate": 1.7400444444444446e-05, "loss": 0.4583, "step": 5850 }, { "epoch": 0.13019907571987202, "grad_norm": 2.078125, "learning_rate": 1.7396e-05, "loss": 0.4717, "step": 5860 }, { "epoch": 0.13042125844294347, "grad_norm": 1.890625, "learning_rate": 1.7391555555555556e-05, "loss": 0.405, "step": 5870 }, { "epoch": 0.13064344116601492, "grad_norm": 1.8515625, "learning_rate": 1.7387111111111114e-05, "loss": 0.4456, "step": 5880 }, { "epoch": 0.13086562388908637, "grad_norm": 1.765625, "learning_rate": 1.738266666666667e-05, "loss": 0.4651, "step": 5890 }, { "epoch": 0.13108780661215783, "grad_norm": 1.5703125, "learning_rate": 1.7378222222222224e-05, "loss": 0.3938, "step": 5900 }, { "epoch": 0.1313099893352293, "grad_norm": 1.8046875, "learning_rate": 1.737377777777778e-05, "loss": 0.4213, "step": 5910 }, { "epoch": 0.13153217205830076, "grad_norm": 1.6875, "learning_rate": 1.7369333333333337e-05, "loss": 0.438, "step": 5920 }, { "epoch": 0.1317543547813722, "grad_norm": 1.890625, "learning_rate": 1.736488888888889e-05, "loss": 0.4435, "step": 5930 }, { "epoch": 0.13197653750444366, "grad_norm": 1.828125, "learning_rate": 1.7360444444444446e-05, "loss": 0.4096, "step": 5940 }, { "epoch": 0.1321987202275151, "grad_norm": 1.75, "learning_rate": 1.7356e-05, "loss": 0.4139, "step": 5950 }, { "epoch": 0.13242090295058656, "grad_norm": 1.953125, "learning_rate": 1.7351555555555556e-05, "loss": 0.3856, "step": 5960 }, { "epoch": 0.132643085673658, "grad_norm": 1.625, "learning_rate": 1.734711111111111e-05, "loss": 0.4187, "step": 5970 }, { "epoch": 0.13286526839672946, "grad_norm": 1.9453125, "learning_rate": 1.734266666666667e-05, "loss": 0.4594, "step": 5980 }, { "epoch": 0.13308745111980091, "grad_norm": 2.03125, "learning_rate": 1.7338222222222224e-05, "loss": 0.4173, "step": 5990 }, { "epoch": 0.13330963384287237, "grad_norm": 1.7578125, "learning_rate": 1.733377777777778e-05, "loss": 0.4522, "step": 6000 }, { "epoch": 0.13353181656594384, "grad_norm": 1.8125, "learning_rate": 1.7329333333333337e-05, "loss": 0.421, "step": 6010 }, { "epoch": 0.1337539992890153, "grad_norm": 2.328125, "learning_rate": 1.7324888888888892e-05, "loss": 0.4665, "step": 6020 }, { "epoch": 0.13397618201208675, "grad_norm": 1.890625, "learning_rate": 1.7320444444444447e-05, "loss": 0.4417, "step": 6030 }, { "epoch": 0.1341983647351582, "grad_norm": 1.7734375, "learning_rate": 1.7316e-05, "loss": 0.3872, "step": 6040 }, { "epoch": 0.13442054745822965, "grad_norm": 1.921875, "learning_rate": 1.7311555555555556e-05, "loss": 0.4432, "step": 6050 }, { "epoch": 0.1346427301813011, "grad_norm": 2.0625, "learning_rate": 1.730711111111111e-05, "loss": 0.4623, "step": 6060 }, { "epoch": 0.13486491290437255, "grad_norm": 2.109375, "learning_rate": 1.7302666666666666e-05, "loss": 0.4505, "step": 6070 }, { "epoch": 0.135087095627444, "grad_norm": 1.8203125, "learning_rate": 1.7298222222222224e-05, "loss": 0.4451, "step": 6080 }, { "epoch": 0.13530927835051546, "grad_norm": 1.6796875, "learning_rate": 1.729377777777778e-05, "loss": 0.4378, "step": 6090 }, { "epoch": 0.1355314610735869, "grad_norm": 2.015625, "learning_rate": 1.7289333333333337e-05, "loss": 0.4491, "step": 6100 }, { "epoch": 0.13575364379665839, "grad_norm": 2.03125, "learning_rate": 1.7284888888888892e-05, "loss": 0.421, "step": 6110 }, { "epoch": 0.13597582651972984, "grad_norm": 2.09375, "learning_rate": 1.7280444444444447e-05, "loss": 0.4395, "step": 6120 }, { "epoch": 0.1361980092428013, "grad_norm": 1.640625, "learning_rate": 1.7276000000000002e-05, "loss": 0.4214, "step": 6130 }, { "epoch": 0.13642019196587274, "grad_norm": 1.6796875, "learning_rate": 1.7271555555555557e-05, "loss": 0.4264, "step": 6140 }, { "epoch": 0.1366423746889442, "grad_norm": 1.546875, "learning_rate": 1.726711111111111e-05, "loss": 0.3855, "step": 6150 }, { "epoch": 0.13686455741201564, "grad_norm": 1.8203125, "learning_rate": 1.7262666666666667e-05, "loss": 0.4683, "step": 6160 }, { "epoch": 0.1370867401350871, "grad_norm": 1.9296875, "learning_rate": 1.725822222222222e-05, "loss": 0.4176, "step": 6170 }, { "epoch": 0.13730892285815854, "grad_norm": 1.9453125, "learning_rate": 1.725377777777778e-05, "loss": 0.4196, "step": 6180 }, { "epoch": 0.13753110558123, "grad_norm": 1.765625, "learning_rate": 1.7249333333333334e-05, "loss": 0.4262, "step": 6190 }, { "epoch": 0.13775328830430145, "grad_norm": 2.265625, "learning_rate": 1.7244888888888893e-05, "loss": 0.4148, "step": 6200 }, { "epoch": 0.1379754710273729, "grad_norm": 1.7734375, "learning_rate": 1.7240444444444448e-05, "loss": 0.398, "step": 6210 }, { "epoch": 0.13819765375044438, "grad_norm": 1.90625, "learning_rate": 1.7236000000000002e-05, "loss": 0.425, "step": 6220 }, { "epoch": 0.13841983647351583, "grad_norm": 1.78125, "learning_rate": 1.7231555555555557e-05, "loss": 0.4279, "step": 6230 }, { "epoch": 0.13864201919658728, "grad_norm": 1.921875, "learning_rate": 1.7227111111111112e-05, "loss": 0.4503, "step": 6240 }, { "epoch": 0.13886420191965873, "grad_norm": 1.890625, "learning_rate": 1.7222666666666667e-05, "loss": 0.445, "step": 6250 }, { "epoch": 0.13908638464273018, "grad_norm": 1.8125, "learning_rate": 1.7218222222222222e-05, "loss": 0.4387, "step": 6260 }, { "epoch": 0.13930856736580163, "grad_norm": 1.9140625, "learning_rate": 1.7213777777777777e-05, "loss": 0.4213, "step": 6270 }, { "epoch": 0.13953075008887308, "grad_norm": 1.7265625, "learning_rate": 1.7209333333333335e-05, "loss": 0.4127, "step": 6280 }, { "epoch": 0.13975293281194454, "grad_norm": 1.9140625, "learning_rate": 1.720488888888889e-05, "loss": 0.4115, "step": 6290 }, { "epoch": 0.139975115535016, "grad_norm": 1.96875, "learning_rate": 1.7200444444444448e-05, "loss": 0.4444, "step": 6300 }, { "epoch": 0.14019729825808744, "grad_norm": 1.953125, "learning_rate": 1.7196000000000003e-05, "loss": 0.4442, "step": 6310 }, { "epoch": 0.14041948098115892, "grad_norm": 1.90625, "learning_rate": 1.7191555555555558e-05, "loss": 0.4216, "step": 6320 }, { "epoch": 0.14064166370423037, "grad_norm": 2.078125, "learning_rate": 1.7187111111111112e-05, "loss": 0.4539, "step": 6330 }, { "epoch": 0.14086384642730182, "grad_norm": 1.890625, "learning_rate": 1.7182666666666667e-05, "loss": 0.4116, "step": 6340 }, { "epoch": 0.14108602915037327, "grad_norm": 2.234375, "learning_rate": 1.7178222222222222e-05, "loss": 0.4251, "step": 6350 }, { "epoch": 0.14130821187344472, "grad_norm": 1.6171875, "learning_rate": 1.7173777777777777e-05, "loss": 0.4355, "step": 6360 }, { "epoch": 0.14153039459651617, "grad_norm": 2.0, "learning_rate": 1.7169333333333335e-05, "loss": 0.409, "step": 6370 }, { "epoch": 0.14175257731958762, "grad_norm": 1.859375, "learning_rate": 1.716488888888889e-05, "loss": 0.4058, "step": 6380 }, { "epoch": 0.14197476004265908, "grad_norm": 1.703125, "learning_rate": 1.7160444444444445e-05, "loss": 0.4102, "step": 6390 }, { "epoch": 0.14219694276573053, "grad_norm": 2.203125, "learning_rate": 1.7156000000000003e-05, "loss": 0.4403, "step": 6400 }, { "epoch": 0.14241912548880198, "grad_norm": 1.8046875, "learning_rate": 1.7151555555555558e-05, "loss": 0.4555, "step": 6410 }, { "epoch": 0.14264130821187346, "grad_norm": 1.7734375, "learning_rate": 1.7147111111111113e-05, "loss": 0.4371, "step": 6420 }, { "epoch": 0.1428634909349449, "grad_norm": 1.8359375, "learning_rate": 1.7142666666666668e-05, "loss": 0.4506, "step": 6430 }, { "epoch": 0.14308567365801636, "grad_norm": 1.640625, "learning_rate": 1.7138222222222222e-05, "loss": 0.402, "step": 6440 }, { "epoch": 0.1433078563810878, "grad_norm": 1.9765625, "learning_rate": 1.713377777777778e-05, "loss": 0.4712, "step": 6450 }, { "epoch": 0.14353003910415926, "grad_norm": 1.90625, "learning_rate": 1.7129333333333336e-05, "loss": 0.4108, "step": 6460 }, { "epoch": 0.14375222182723071, "grad_norm": 1.890625, "learning_rate": 1.712488888888889e-05, "loss": 0.4138, "step": 6470 }, { "epoch": 0.14397440455030217, "grad_norm": 2.046875, "learning_rate": 1.7120444444444445e-05, "loss": 0.4218, "step": 6480 }, { "epoch": 0.14419658727337362, "grad_norm": 1.78125, "learning_rate": 1.7116e-05, "loss": 0.3997, "step": 6490 }, { "epoch": 0.14441876999644507, "grad_norm": 2.171875, "learning_rate": 1.711155555555556e-05, "loss": 0.3893, "step": 6500 }, { "epoch": 0.14464095271951652, "grad_norm": 2.015625, "learning_rate": 1.7107111111111113e-05, "loss": 0.453, "step": 6510 }, { "epoch": 0.144863135442588, "grad_norm": 1.5390625, "learning_rate": 1.7102666666666668e-05, "loss": 0.4047, "step": 6520 }, { "epoch": 0.14508531816565945, "grad_norm": 1.9609375, "learning_rate": 1.7098222222222223e-05, "loss": 0.3939, "step": 6530 }, { "epoch": 0.1453075008887309, "grad_norm": 1.9921875, "learning_rate": 1.709377777777778e-05, "loss": 0.4508, "step": 6540 }, { "epoch": 0.14552968361180235, "grad_norm": 1.5625, "learning_rate": 1.7089333333333336e-05, "loss": 0.429, "step": 6550 }, { "epoch": 0.1457518663348738, "grad_norm": 2.078125, "learning_rate": 1.708488888888889e-05, "loss": 0.4291, "step": 6560 }, { "epoch": 0.14597404905794525, "grad_norm": 2.109375, "learning_rate": 1.7080444444444446e-05, "loss": 0.4572, "step": 6570 }, { "epoch": 0.1461962317810167, "grad_norm": 1.90625, "learning_rate": 1.7076e-05, "loss": 0.4417, "step": 6580 }, { "epoch": 0.14641841450408816, "grad_norm": 1.984375, "learning_rate": 1.7071555555555555e-05, "loss": 0.4619, "step": 6590 }, { "epoch": 0.1466405972271596, "grad_norm": 2.234375, "learning_rate": 1.7067111111111114e-05, "loss": 0.4344, "step": 6600 }, { "epoch": 0.14686277995023106, "grad_norm": 1.71875, "learning_rate": 1.706266666666667e-05, "loss": 0.4255, "step": 6610 }, { "epoch": 0.1470849626733025, "grad_norm": 1.953125, "learning_rate": 1.7058222222222223e-05, "loss": 0.4519, "step": 6620 }, { "epoch": 0.147307145396374, "grad_norm": 2.125, "learning_rate": 1.705377777777778e-05, "loss": 0.4239, "step": 6630 }, { "epoch": 0.14752932811944544, "grad_norm": 2.09375, "learning_rate": 1.7049333333333336e-05, "loss": 0.4371, "step": 6640 }, { "epoch": 0.1477515108425169, "grad_norm": 1.9375, "learning_rate": 1.704488888888889e-05, "loss": 0.4028, "step": 6650 }, { "epoch": 0.14797369356558834, "grad_norm": 2.296875, "learning_rate": 1.7040444444444446e-05, "loss": 0.4049, "step": 6660 }, { "epoch": 0.1481958762886598, "grad_norm": 1.6875, "learning_rate": 1.7036e-05, "loss": 0.4177, "step": 6670 }, { "epoch": 0.14841805901173125, "grad_norm": 2.046875, "learning_rate": 1.7031555555555556e-05, "loss": 0.4601, "step": 6680 }, { "epoch": 0.1486402417348027, "grad_norm": 1.6875, "learning_rate": 1.702711111111111e-05, "loss": 0.4334, "step": 6690 }, { "epoch": 0.14886242445787415, "grad_norm": 2.0625, "learning_rate": 1.702266666666667e-05, "loss": 0.4312, "step": 6700 }, { "epoch": 0.1490846071809456, "grad_norm": 1.859375, "learning_rate": 1.7018222222222224e-05, "loss": 0.4537, "step": 6710 }, { "epoch": 0.14930678990401705, "grad_norm": 1.6953125, "learning_rate": 1.701377777777778e-05, "loss": 0.3942, "step": 6720 }, { "epoch": 0.14952897262708853, "grad_norm": 2.109375, "learning_rate": 1.7009333333333337e-05, "loss": 0.4384, "step": 6730 }, { "epoch": 0.14975115535015998, "grad_norm": 1.96875, "learning_rate": 1.700488888888889e-05, "loss": 0.4478, "step": 6740 }, { "epoch": 0.14997333807323143, "grad_norm": 1.640625, "learning_rate": 1.7000444444444446e-05, "loss": 0.4289, "step": 6750 }, { "epoch": 0.15019552079630288, "grad_norm": 2.125, "learning_rate": 1.6996e-05, "loss": 0.4442, "step": 6760 }, { "epoch": 0.15041770351937434, "grad_norm": 2.0625, "learning_rate": 1.6991555555555556e-05, "loss": 0.4368, "step": 6770 }, { "epoch": 0.1506398862424458, "grad_norm": 2.03125, "learning_rate": 1.698711111111111e-05, "loss": 0.447, "step": 6780 }, { "epoch": 0.15086206896551724, "grad_norm": 1.625, "learning_rate": 1.6982666666666666e-05, "loss": 0.4368, "step": 6790 }, { "epoch": 0.1510842516885887, "grad_norm": 2.0625, "learning_rate": 1.6978222222222224e-05, "loss": 0.4489, "step": 6800 }, { "epoch": 0.15130643441166014, "grad_norm": 2.171875, "learning_rate": 1.697377777777778e-05, "loss": 0.4006, "step": 6810 }, { "epoch": 0.1515286171347316, "grad_norm": 1.8515625, "learning_rate": 1.6969333333333337e-05, "loss": 0.4404, "step": 6820 }, { "epoch": 0.15175079985780307, "grad_norm": 2.015625, "learning_rate": 1.6964888888888892e-05, "loss": 0.4283, "step": 6830 }, { "epoch": 0.15197298258087452, "grad_norm": 1.84375, "learning_rate": 1.6960444444444447e-05, "loss": 0.4502, "step": 6840 }, { "epoch": 0.15219516530394597, "grad_norm": 1.703125, "learning_rate": 1.6956e-05, "loss": 0.3995, "step": 6850 }, { "epoch": 0.15241734802701742, "grad_norm": 2.0, "learning_rate": 1.6951555555555556e-05, "loss": 0.4811, "step": 6860 }, { "epoch": 0.15263953075008888, "grad_norm": 1.9609375, "learning_rate": 1.694711111111111e-05, "loss": 0.4137, "step": 6870 }, { "epoch": 0.15286171347316033, "grad_norm": 1.6875, "learning_rate": 1.6942666666666666e-05, "loss": 0.4289, "step": 6880 }, { "epoch": 0.15308389619623178, "grad_norm": 1.4921875, "learning_rate": 1.693822222222222e-05, "loss": 0.4144, "step": 6890 }, { "epoch": 0.15330607891930323, "grad_norm": 2.328125, "learning_rate": 1.693377777777778e-05, "loss": 0.4346, "step": 6900 }, { "epoch": 0.15352826164237468, "grad_norm": 1.6796875, "learning_rate": 1.6929333333333334e-05, "loss": 0.3889, "step": 6910 }, { "epoch": 0.15375044436544613, "grad_norm": 2.140625, "learning_rate": 1.6924888888888892e-05, "loss": 0.4404, "step": 6920 }, { "epoch": 0.1539726270885176, "grad_norm": 2.0625, "learning_rate": 1.6920444444444447e-05, "loss": 0.4582, "step": 6930 }, { "epoch": 0.15419480981158906, "grad_norm": 1.9609375, "learning_rate": 1.6916000000000002e-05, "loss": 0.4138, "step": 6940 }, { "epoch": 0.1544169925346605, "grad_norm": 2.21875, "learning_rate": 1.6911555555555557e-05, "loss": 0.4371, "step": 6950 }, { "epoch": 0.15463917525773196, "grad_norm": 1.8671875, "learning_rate": 1.6907111111111112e-05, "loss": 0.4283, "step": 6960 }, { "epoch": 0.15486135798080342, "grad_norm": 2.234375, "learning_rate": 1.6902666666666667e-05, "loss": 0.4166, "step": 6970 }, { "epoch": 0.15508354070387487, "grad_norm": 1.9765625, "learning_rate": 1.689822222222222e-05, "loss": 0.4245, "step": 6980 }, { "epoch": 0.15530572342694632, "grad_norm": 1.90625, "learning_rate": 1.689377777777778e-05, "loss": 0.4454, "step": 6990 }, { "epoch": 0.15552790615001777, "grad_norm": 1.8671875, "learning_rate": 1.6889333333333334e-05, "loss": 0.4286, "step": 7000 }, { "epoch": 0.15575008887308922, "grad_norm": 2.171875, "learning_rate": 1.688488888888889e-05, "loss": 0.429, "step": 7010 }, { "epoch": 0.15597227159616067, "grad_norm": 2.109375, "learning_rate": 1.6880444444444448e-05, "loss": 0.4676, "step": 7020 }, { "epoch": 0.15619445431923212, "grad_norm": 2.125, "learning_rate": 1.6876000000000002e-05, "loss": 0.4476, "step": 7030 }, { "epoch": 0.1564166370423036, "grad_norm": 2.03125, "learning_rate": 1.6871555555555557e-05, "loss": 0.4274, "step": 7040 }, { "epoch": 0.15663881976537505, "grad_norm": 2.046875, "learning_rate": 1.6867111111111112e-05, "loss": 0.45, "step": 7050 }, { "epoch": 0.1568610024884465, "grad_norm": 1.78125, "learning_rate": 1.6862666666666667e-05, "loss": 0.3895, "step": 7060 }, { "epoch": 0.15708318521151796, "grad_norm": 1.859375, "learning_rate": 1.6858222222222225e-05, "loss": 0.4266, "step": 7070 }, { "epoch": 0.1573053679345894, "grad_norm": 1.9765625, "learning_rate": 1.685377777777778e-05, "loss": 0.41, "step": 7080 }, { "epoch": 0.15752755065766086, "grad_norm": 1.5859375, "learning_rate": 1.6849333333333335e-05, "loss": 0.4325, "step": 7090 }, { "epoch": 0.1577497333807323, "grad_norm": 1.75, "learning_rate": 1.684488888888889e-05, "loss": 0.4437, "step": 7100 }, { "epoch": 0.15797191610380376, "grad_norm": 1.9609375, "learning_rate": 1.6840444444444445e-05, "loss": 0.4044, "step": 7110 }, { "epoch": 0.1581940988268752, "grad_norm": 2.0, "learning_rate": 1.6836000000000003e-05, "loss": 0.4015, "step": 7120 }, { "epoch": 0.15841628154994666, "grad_norm": 1.8984375, "learning_rate": 1.6831555555555558e-05, "loss": 0.4318, "step": 7130 }, { "epoch": 0.15863846427301814, "grad_norm": 1.90625, "learning_rate": 1.6827111111111112e-05, "loss": 0.4308, "step": 7140 }, { "epoch": 0.1588606469960896, "grad_norm": 2.140625, "learning_rate": 1.6822666666666667e-05, "loss": 0.4395, "step": 7150 }, { "epoch": 0.15908282971916105, "grad_norm": 2.328125, "learning_rate": 1.6818222222222226e-05, "loss": 0.4215, "step": 7160 }, { "epoch": 0.1593050124422325, "grad_norm": 2.125, "learning_rate": 1.681377777777778e-05, "loss": 0.4215, "step": 7170 }, { "epoch": 0.15952719516530395, "grad_norm": 2.015625, "learning_rate": 1.6809333333333335e-05, "loss": 0.4018, "step": 7180 }, { "epoch": 0.1597493778883754, "grad_norm": 1.5859375, "learning_rate": 1.680488888888889e-05, "loss": 0.4546, "step": 7190 }, { "epoch": 0.15997156061144685, "grad_norm": 1.875, "learning_rate": 1.6800444444444445e-05, "loss": 0.4275, "step": 7200 }, { "epoch": 0.1601937433345183, "grad_norm": 1.9453125, "learning_rate": 1.6796e-05, "loss": 0.4418, "step": 7210 }, { "epoch": 0.16041592605758975, "grad_norm": 1.859375, "learning_rate": 1.6791555555555558e-05, "loss": 0.3983, "step": 7220 }, { "epoch": 0.1606381087806612, "grad_norm": 1.875, "learning_rate": 1.6787111111111113e-05, "loss": 0.4474, "step": 7230 }, { "epoch": 0.16086029150373268, "grad_norm": 1.71875, "learning_rate": 1.6782666666666668e-05, "loss": 0.4463, "step": 7240 }, { "epoch": 0.16108247422680413, "grad_norm": 1.9296875, "learning_rate": 1.6778222222222226e-05, "loss": 0.4474, "step": 7250 }, { "epoch": 0.16130465694987559, "grad_norm": 2.265625, "learning_rate": 1.677377777777778e-05, "loss": 0.4243, "step": 7260 }, { "epoch": 0.16152683967294704, "grad_norm": 2.046875, "learning_rate": 1.6769333333333336e-05, "loss": 0.4301, "step": 7270 }, { "epoch": 0.1617490223960185, "grad_norm": 1.8515625, "learning_rate": 1.676488888888889e-05, "loss": 0.4086, "step": 7280 }, { "epoch": 0.16197120511908994, "grad_norm": 1.6171875, "learning_rate": 1.6760444444444445e-05, "loss": 0.4076, "step": 7290 }, { "epoch": 0.1621933878421614, "grad_norm": 1.9765625, "learning_rate": 1.6756e-05, "loss": 0.4533, "step": 7300 }, { "epoch": 0.16241557056523284, "grad_norm": 2.078125, "learning_rate": 1.6751555555555555e-05, "loss": 0.3845, "step": 7310 }, { "epoch": 0.1626377532883043, "grad_norm": 1.9921875, "learning_rate": 1.6747111111111113e-05, "loss": 0.3745, "step": 7320 }, { "epoch": 0.16285993601137574, "grad_norm": 1.875, "learning_rate": 1.6742666666666668e-05, "loss": 0.3866, "step": 7330 }, { "epoch": 0.1630821187344472, "grad_norm": 2.046875, "learning_rate": 1.6738222222222223e-05, "loss": 0.447, "step": 7340 }, { "epoch": 0.16330430145751867, "grad_norm": 2.28125, "learning_rate": 1.673377777777778e-05, "loss": 0.4418, "step": 7350 }, { "epoch": 0.16352648418059013, "grad_norm": 2.0625, "learning_rate": 1.6729333333333336e-05, "loss": 0.3611, "step": 7360 }, { "epoch": 0.16374866690366158, "grad_norm": 1.828125, "learning_rate": 1.672488888888889e-05, "loss": 0.4063, "step": 7370 }, { "epoch": 0.16397084962673303, "grad_norm": 2.28125, "learning_rate": 1.6720444444444446e-05, "loss": 0.4245, "step": 7380 }, { "epoch": 0.16419303234980448, "grad_norm": 1.9296875, "learning_rate": 1.6716e-05, "loss": 0.434, "step": 7390 }, { "epoch": 0.16441521507287593, "grad_norm": 2.234375, "learning_rate": 1.6711555555555555e-05, "loss": 0.3997, "step": 7400 }, { "epoch": 0.16463739779594738, "grad_norm": 2.1875, "learning_rate": 1.670711111111111e-05, "loss": 0.4215, "step": 7410 }, { "epoch": 0.16485958051901883, "grad_norm": 2.203125, "learning_rate": 1.670266666666667e-05, "loss": 0.4563, "step": 7420 }, { "epoch": 0.16508176324209028, "grad_norm": 2.421875, "learning_rate": 1.6698222222222223e-05, "loss": 0.4351, "step": 7430 }, { "epoch": 0.16530394596516174, "grad_norm": 1.828125, "learning_rate": 1.6693777777777778e-05, "loss": 0.4374, "step": 7440 }, { "epoch": 0.16552612868823322, "grad_norm": 2.03125, "learning_rate": 1.6689333333333336e-05, "loss": 0.4122, "step": 7450 }, { "epoch": 0.16574831141130467, "grad_norm": 2.3125, "learning_rate": 1.668488888888889e-05, "loss": 0.4547, "step": 7460 }, { "epoch": 0.16597049413437612, "grad_norm": 2.171875, "learning_rate": 1.6680444444444446e-05, "loss": 0.4414, "step": 7470 }, { "epoch": 0.16619267685744757, "grad_norm": 1.71875, "learning_rate": 1.6676e-05, "loss": 0.4345, "step": 7480 }, { "epoch": 0.16641485958051902, "grad_norm": 2.03125, "learning_rate": 1.6671555555555556e-05, "loss": 0.4033, "step": 7490 }, { "epoch": 0.16663704230359047, "grad_norm": 1.9921875, "learning_rate": 1.666711111111111e-05, "loss": 0.4159, "step": 7500 }, { "epoch": 0.16685922502666192, "grad_norm": 2.75, "learning_rate": 1.6662666666666665e-05, "loss": 0.4305, "step": 7510 }, { "epoch": 0.16708140774973337, "grad_norm": 2.328125, "learning_rate": 1.6658222222222224e-05, "loss": 0.4756, "step": 7520 }, { "epoch": 0.16730359047280483, "grad_norm": 1.8046875, "learning_rate": 1.665377777777778e-05, "loss": 0.4448, "step": 7530 }, { "epoch": 0.16752577319587628, "grad_norm": 1.7421875, "learning_rate": 1.6649333333333337e-05, "loss": 0.3996, "step": 7540 }, { "epoch": 0.16774795591894776, "grad_norm": 1.8515625, "learning_rate": 1.664488888888889e-05, "loss": 0.3974, "step": 7550 }, { "epoch": 0.1679701386420192, "grad_norm": 1.6875, "learning_rate": 1.6640444444444446e-05, "loss": 0.4326, "step": 7560 }, { "epoch": 0.16819232136509066, "grad_norm": 1.5859375, "learning_rate": 1.6636e-05, "loss": 0.4018, "step": 7570 }, { "epoch": 0.1684145040881621, "grad_norm": 2.234375, "learning_rate": 1.6631555555555556e-05, "loss": 0.4749, "step": 7580 }, { "epoch": 0.16863668681123356, "grad_norm": 1.65625, "learning_rate": 1.662711111111111e-05, "loss": 0.4123, "step": 7590 }, { "epoch": 0.168858869534305, "grad_norm": 1.78125, "learning_rate": 1.6622666666666666e-05, "loss": 0.4242, "step": 7600 }, { "epoch": 0.16908105225737646, "grad_norm": 1.6328125, "learning_rate": 1.6618222222222224e-05, "loss": 0.4025, "step": 7610 }, { "epoch": 0.16930323498044791, "grad_norm": 2.296875, "learning_rate": 1.661377777777778e-05, "loss": 0.3951, "step": 7620 }, { "epoch": 0.16952541770351937, "grad_norm": 1.6796875, "learning_rate": 1.6609333333333334e-05, "loss": 0.4199, "step": 7630 }, { "epoch": 0.16974760042659082, "grad_norm": 1.65625, "learning_rate": 1.6604888888888892e-05, "loss": 0.4041, "step": 7640 }, { "epoch": 0.1699697831496623, "grad_norm": 1.859375, "learning_rate": 1.6600444444444447e-05, "loss": 0.4304, "step": 7650 }, { "epoch": 0.17019196587273375, "grad_norm": 1.9140625, "learning_rate": 1.6596000000000002e-05, "loss": 0.4008, "step": 7660 }, { "epoch": 0.1704141485958052, "grad_norm": 1.8515625, "learning_rate": 1.6591555555555557e-05, "loss": 0.3829, "step": 7670 }, { "epoch": 0.17063633131887665, "grad_norm": 2.09375, "learning_rate": 1.658711111111111e-05, "loss": 0.4511, "step": 7680 }, { "epoch": 0.1708585140419481, "grad_norm": 1.71875, "learning_rate": 1.658266666666667e-05, "loss": 0.4274, "step": 7690 }, { "epoch": 0.17108069676501955, "grad_norm": 1.84375, "learning_rate": 1.6578222222222224e-05, "loss": 0.426, "step": 7700 }, { "epoch": 0.171302879488091, "grad_norm": 2.046875, "learning_rate": 1.657377777777778e-05, "loss": 0.4066, "step": 7710 }, { "epoch": 0.17152506221116245, "grad_norm": 1.703125, "learning_rate": 1.6569333333333334e-05, "loss": 0.412, "step": 7720 }, { "epoch": 0.1717472449342339, "grad_norm": 2.1875, "learning_rate": 1.656488888888889e-05, "loss": 0.4298, "step": 7730 }, { "epoch": 0.17196942765730536, "grad_norm": 1.9921875, "learning_rate": 1.6560444444444447e-05, "loss": 0.4233, "step": 7740 }, { "epoch": 0.1721916103803768, "grad_norm": 1.8046875, "learning_rate": 1.6556000000000002e-05, "loss": 0.4258, "step": 7750 }, { "epoch": 0.1724137931034483, "grad_norm": 2.15625, "learning_rate": 1.6551555555555557e-05, "loss": 0.4082, "step": 7760 }, { "epoch": 0.17263597582651974, "grad_norm": 1.5, "learning_rate": 1.6547111111111112e-05, "loss": 0.4062, "step": 7770 }, { "epoch": 0.1728581585495912, "grad_norm": 1.65625, "learning_rate": 1.654266666666667e-05, "loss": 0.397, "step": 7780 }, { "epoch": 0.17308034127266264, "grad_norm": 2.375, "learning_rate": 1.6538222222222225e-05, "loss": 0.4277, "step": 7790 }, { "epoch": 0.1733025239957341, "grad_norm": 1.7109375, "learning_rate": 1.653377777777778e-05, "loss": 0.4284, "step": 7800 }, { "epoch": 0.17352470671880554, "grad_norm": 1.8515625, "learning_rate": 1.6529333333333335e-05, "loss": 0.4554, "step": 7810 }, { "epoch": 0.173746889441877, "grad_norm": 2.046875, "learning_rate": 1.652488888888889e-05, "loss": 0.4404, "step": 7820 }, { "epoch": 0.17396907216494845, "grad_norm": 2.21875, "learning_rate": 1.6520444444444444e-05, "loss": 0.4119, "step": 7830 }, { "epoch": 0.1741912548880199, "grad_norm": 2.359375, "learning_rate": 1.6516000000000002e-05, "loss": 0.4272, "step": 7840 }, { "epoch": 0.17441343761109135, "grad_norm": 2.515625, "learning_rate": 1.6511555555555557e-05, "loss": 0.4163, "step": 7850 }, { "epoch": 0.17463562033416283, "grad_norm": 1.75, "learning_rate": 1.6507111111111112e-05, "loss": 0.4186, "step": 7860 }, { "epoch": 0.17485780305723428, "grad_norm": 2.015625, "learning_rate": 1.650266666666667e-05, "loss": 0.438, "step": 7870 }, { "epoch": 0.17507998578030573, "grad_norm": 1.765625, "learning_rate": 1.6498222222222225e-05, "loss": 0.4119, "step": 7880 }, { "epoch": 0.17530216850337718, "grad_norm": 2.109375, "learning_rate": 1.649377777777778e-05, "loss": 0.4031, "step": 7890 }, { "epoch": 0.17552435122644863, "grad_norm": 2.140625, "learning_rate": 1.6489333333333335e-05, "loss": 0.4007, "step": 7900 }, { "epoch": 0.17574653394952008, "grad_norm": 1.8984375, "learning_rate": 1.648488888888889e-05, "loss": 0.4203, "step": 7910 }, { "epoch": 0.17596871667259154, "grad_norm": 1.84375, "learning_rate": 1.6480444444444445e-05, "loss": 0.4436, "step": 7920 }, { "epoch": 0.176190899395663, "grad_norm": 2.078125, "learning_rate": 1.6476e-05, "loss": 0.415, "step": 7930 }, { "epoch": 0.17641308211873444, "grad_norm": 2.515625, "learning_rate": 1.6471555555555558e-05, "loss": 0.4047, "step": 7940 }, { "epoch": 0.1766352648418059, "grad_norm": 2.125, "learning_rate": 1.6467111111111113e-05, "loss": 0.4092, "step": 7950 }, { "epoch": 0.17685744756487737, "grad_norm": 1.90625, "learning_rate": 1.6462666666666667e-05, "loss": 0.3885, "step": 7960 }, { "epoch": 0.17707963028794882, "grad_norm": 1.734375, "learning_rate": 1.6458222222222226e-05, "loss": 0.424, "step": 7970 }, { "epoch": 0.17730181301102027, "grad_norm": 1.5703125, "learning_rate": 1.645377777777778e-05, "loss": 0.3977, "step": 7980 }, { "epoch": 0.17752399573409172, "grad_norm": 2.078125, "learning_rate": 1.6449333333333335e-05, "loss": 0.417, "step": 7990 }, { "epoch": 0.17774617845716317, "grad_norm": 1.578125, "learning_rate": 1.644488888888889e-05, "loss": 0.3975, "step": 8000 }, { "epoch": 0.17796836118023462, "grad_norm": 1.890625, "learning_rate": 1.6440444444444445e-05, "loss": 0.3919, "step": 8010 }, { "epoch": 0.17819054390330608, "grad_norm": 2.046875, "learning_rate": 1.6436e-05, "loss": 0.4242, "step": 8020 }, { "epoch": 0.17841272662637753, "grad_norm": 2.1875, "learning_rate": 1.6431555555555555e-05, "loss": 0.4207, "step": 8030 }, { "epoch": 0.17863490934944898, "grad_norm": 1.859375, "learning_rate": 1.6427111111111113e-05, "loss": 0.3976, "step": 8040 }, { "epoch": 0.17885709207252043, "grad_norm": 1.8046875, "learning_rate": 1.6422666666666668e-05, "loss": 0.4321, "step": 8050 }, { "epoch": 0.1790792747955919, "grad_norm": 1.7578125, "learning_rate": 1.6418222222222223e-05, "loss": 0.4196, "step": 8060 }, { "epoch": 0.17930145751866336, "grad_norm": 1.59375, "learning_rate": 1.641377777777778e-05, "loss": 0.4491, "step": 8070 }, { "epoch": 0.1795236402417348, "grad_norm": 1.953125, "learning_rate": 1.6409333333333336e-05, "loss": 0.4271, "step": 8080 }, { "epoch": 0.17974582296480626, "grad_norm": 2.390625, "learning_rate": 1.640488888888889e-05, "loss": 0.4652, "step": 8090 }, { "epoch": 0.1799680056878777, "grad_norm": 1.9140625, "learning_rate": 1.6400444444444445e-05, "loss": 0.422, "step": 8100 }, { "epoch": 0.18019018841094916, "grad_norm": 2.125, "learning_rate": 1.6396e-05, "loss": 0.3766, "step": 8110 }, { "epoch": 0.18041237113402062, "grad_norm": 2.09375, "learning_rate": 1.6391555555555555e-05, "loss": 0.3981, "step": 8120 }, { "epoch": 0.18063455385709207, "grad_norm": 1.796875, "learning_rate": 1.638711111111111e-05, "loss": 0.4288, "step": 8130 }, { "epoch": 0.18085673658016352, "grad_norm": 2.0625, "learning_rate": 1.6382666666666668e-05, "loss": 0.4185, "step": 8140 }, { "epoch": 0.18107891930323497, "grad_norm": 2.046875, "learning_rate": 1.6378222222222223e-05, "loss": 0.4175, "step": 8150 }, { "epoch": 0.18130110202630642, "grad_norm": 2.015625, "learning_rate": 1.637377777777778e-05, "loss": 0.4535, "step": 8160 }, { "epoch": 0.1815232847493779, "grad_norm": 1.3984375, "learning_rate": 1.6369333333333336e-05, "loss": 0.4269, "step": 8170 }, { "epoch": 0.18174546747244935, "grad_norm": 1.8984375, "learning_rate": 1.636488888888889e-05, "loss": 0.4164, "step": 8180 }, { "epoch": 0.1819676501955208, "grad_norm": 1.8671875, "learning_rate": 1.6360444444444446e-05, "loss": 0.3888, "step": 8190 }, { "epoch": 0.18218983291859225, "grad_norm": 1.984375, "learning_rate": 1.6356e-05, "loss": 0.4377, "step": 8200 }, { "epoch": 0.1824120156416637, "grad_norm": 1.984375, "learning_rate": 1.6351555555555555e-05, "loss": 0.4383, "step": 8210 }, { "epoch": 0.18263419836473516, "grad_norm": 2.375, "learning_rate": 1.634711111111111e-05, "loss": 0.461, "step": 8220 }, { "epoch": 0.1828563810878066, "grad_norm": 2.3125, "learning_rate": 1.634266666666667e-05, "loss": 0.4201, "step": 8230 }, { "epoch": 0.18307856381087806, "grad_norm": 1.6953125, "learning_rate": 1.6338222222222223e-05, "loss": 0.4292, "step": 8240 }, { "epoch": 0.1833007465339495, "grad_norm": 2.328125, "learning_rate": 1.6333777777777778e-05, "loss": 0.4403, "step": 8250 }, { "epoch": 0.18352292925702096, "grad_norm": 1.8203125, "learning_rate": 1.6329333333333336e-05, "loss": 0.3969, "step": 8260 }, { "epoch": 0.18374511198009244, "grad_norm": 2.28125, "learning_rate": 1.632488888888889e-05, "loss": 0.4344, "step": 8270 }, { "epoch": 0.1839672947031639, "grad_norm": 1.7734375, "learning_rate": 1.6320444444444446e-05, "loss": 0.3917, "step": 8280 }, { "epoch": 0.18418947742623534, "grad_norm": 2.078125, "learning_rate": 1.6316e-05, "loss": 0.4092, "step": 8290 }, { "epoch": 0.1844116601493068, "grad_norm": 1.4765625, "learning_rate": 1.6311555555555556e-05, "loss": 0.4259, "step": 8300 }, { "epoch": 0.18463384287237825, "grad_norm": 2.25, "learning_rate": 1.630711111111111e-05, "loss": 0.4341, "step": 8310 }, { "epoch": 0.1848560255954497, "grad_norm": 1.703125, "learning_rate": 1.630266666666667e-05, "loss": 0.4374, "step": 8320 }, { "epoch": 0.18507820831852115, "grad_norm": 1.921875, "learning_rate": 1.6298222222222224e-05, "loss": 0.4293, "step": 8330 }, { "epoch": 0.1853003910415926, "grad_norm": 1.8828125, "learning_rate": 1.629377777777778e-05, "loss": 0.4398, "step": 8340 }, { "epoch": 0.18552257376466405, "grad_norm": 1.8671875, "learning_rate": 1.6289333333333333e-05, "loss": 0.4505, "step": 8350 }, { "epoch": 0.1857447564877355, "grad_norm": 1.703125, "learning_rate": 1.628488888888889e-05, "loss": 0.3886, "step": 8360 }, { "epoch": 0.18596693921080698, "grad_norm": 1.640625, "learning_rate": 1.6280444444444447e-05, "loss": 0.3695, "step": 8370 }, { "epoch": 0.18618912193387843, "grad_norm": 1.6171875, "learning_rate": 1.6276e-05, "loss": 0.4525, "step": 8380 }, { "epoch": 0.18641130465694988, "grad_norm": 2.296875, "learning_rate": 1.6271555555555556e-05, "loss": 0.4285, "step": 8390 }, { "epoch": 0.18663348738002133, "grad_norm": 2.140625, "learning_rate": 1.6267111111111114e-05, "loss": 0.4404, "step": 8400 }, { "epoch": 0.18685567010309279, "grad_norm": 1.84375, "learning_rate": 1.626266666666667e-05, "loss": 0.405, "step": 8410 }, { "epoch": 0.18707785282616424, "grad_norm": 2.203125, "learning_rate": 1.6258222222222224e-05, "loss": 0.4259, "step": 8420 }, { "epoch": 0.1873000355492357, "grad_norm": 1.8515625, "learning_rate": 1.625377777777778e-05, "loss": 0.4658, "step": 8430 }, { "epoch": 0.18752221827230714, "grad_norm": 2.15625, "learning_rate": 1.6249333333333334e-05, "loss": 0.4148, "step": 8440 }, { "epoch": 0.1877444009953786, "grad_norm": 2.109375, "learning_rate": 1.624488888888889e-05, "loss": 0.4423, "step": 8450 }, { "epoch": 0.18796658371845004, "grad_norm": 1.6171875, "learning_rate": 1.6240444444444447e-05, "loss": 0.4113, "step": 8460 }, { "epoch": 0.18818876644152152, "grad_norm": 2.0625, "learning_rate": 1.6236000000000002e-05, "loss": 0.4338, "step": 8470 }, { "epoch": 0.18841094916459297, "grad_norm": 2.296875, "learning_rate": 1.6231555555555557e-05, "loss": 0.4146, "step": 8480 }, { "epoch": 0.18863313188766442, "grad_norm": 1.5234375, "learning_rate": 1.6227111111111115e-05, "loss": 0.399, "step": 8490 }, { "epoch": 0.18885531461073587, "grad_norm": 1.96875, "learning_rate": 1.622266666666667e-05, "loss": 0.4089, "step": 8500 }, { "epoch": 0.18907749733380733, "grad_norm": 2.0, "learning_rate": 1.6218222222222225e-05, "loss": 0.421, "step": 8510 }, { "epoch": 0.18929968005687878, "grad_norm": 1.9609375, "learning_rate": 1.621377777777778e-05, "loss": 0.4608, "step": 8520 }, { "epoch": 0.18952186277995023, "grad_norm": 1.9296875, "learning_rate": 1.6209333333333334e-05, "loss": 0.4154, "step": 8530 }, { "epoch": 0.18974404550302168, "grad_norm": 2.0, "learning_rate": 1.620488888888889e-05, "loss": 0.394, "step": 8540 }, { "epoch": 0.18996622822609313, "grad_norm": 2.046875, "learning_rate": 1.6200444444444444e-05, "loss": 0.4051, "step": 8550 }, { "epoch": 0.19018841094916458, "grad_norm": 1.875, "learning_rate": 1.6196000000000002e-05, "loss": 0.3784, "step": 8560 }, { "epoch": 0.19041059367223603, "grad_norm": 1.9453125, "learning_rate": 1.6191555555555557e-05, "loss": 0.4235, "step": 8570 }, { "epoch": 0.1906327763953075, "grad_norm": 1.71875, "learning_rate": 1.6187111111111112e-05, "loss": 0.4422, "step": 8580 }, { "epoch": 0.19085495911837896, "grad_norm": 1.4453125, "learning_rate": 1.618266666666667e-05, "loss": 0.387, "step": 8590 }, { "epoch": 0.19107714184145042, "grad_norm": 1.859375, "learning_rate": 1.6178222222222225e-05, "loss": 0.4141, "step": 8600 }, { "epoch": 0.19129932456452187, "grad_norm": 1.7734375, "learning_rate": 1.617377777777778e-05, "loss": 0.4182, "step": 8610 }, { "epoch": 0.19152150728759332, "grad_norm": 1.8203125, "learning_rate": 1.6169333333333335e-05, "loss": 0.3894, "step": 8620 }, { "epoch": 0.19174369001066477, "grad_norm": 1.875, "learning_rate": 1.616488888888889e-05, "loss": 0.4464, "step": 8630 }, { "epoch": 0.19196587273373622, "grad_norm": 2.015625, "learning_rate": 1.6160444444444444e-05, "loss": 0.4745, "step": 8640 }, { "epoch": 0.19218805545680767, "grad_norm": 2.203125, "learning_rate": 1.6156e-05, "loss": 0.4128, "step": 8650 }, { "epoch": 0.19241023817987912, "grad_norm": 1.953125, "learning_rate": 1.6151555555555557e-05, "loss": 0.4236, "step": 8660 }, { "epoch": 0.19263242090295057, "grad_norm": 2.3125, "learning_rate": 1.6147111111111112e-05, "loss": 0.4418, "step": 8670 }, { "epoch": 0.19285460362602205, "grad_norm": 1.9921875, "learning_rate": 1.6142666666666667e-05, "loss": 0.4106, "step": 8680 }, { "epoch": 0.1930767863490935, "grad_norm": 1.7734375, "learning_rate": 1.6138222222222225e-05, "loss": 0.4154, "step": 8690 }, { "epoch": 0.19329896907216496, "grad_norm": 2.296875, "learning_rate": 1.613377777777778e-05, "loss": 0.4608, "step": 8700 }, { "epoch": 0.1935211517952364, "grad_norm": 2.015625, "learning_rate": 1.6129333333333335e-05, "loss": 0.4428, "step": 8710 }, { "epoch": 0.19374333451830786, "grad_norm": 1.6875, "learning_rate": 1.612488888888889e-05, "loss": 0.4212, "step": 8720 }, { "epoch": 0.1939655172413793, "grad_norm": 1.65625, "learning_rate": 1.6120444444444445e-05, "loss": 0.4195, "step": 8730 }, { "epoch": 0.19418769996445076, "grad_norm": 1.9609375, "learning_rate": 1.6116e-05, "loss": 0.4108, "step": 8740 }, { "epoch": 0.1944098826875222, "grad_norm": 1.53125, "learning_rate": 1.6111555555555554e-05, "loss": 0.3959, "step": 8750 }, { "epoch": 0.19463206541059366, "grad_norm": 1.7578125, "learning_rate": 1.6107111111111113e-05, "loss": 0.4463, "step": 8760 }, { "epoch": 0.19485424813366511, "grad_norm": 2.03125, "learning_rate": 1.6102666666666667e-05, "loss": 0.4036, "step": 8770 }, { "epoch": 0.1950764308567366, "grad_norm": 2.0, "learning_rate": 1.6098222222222222e-05, "loss": 0.3891, "step": 8780 }, { "epoch": 0.19529861357980804, "grad_norm": 2.265625, "learning_rate": 1.609377777777778e-05, "loss": 0.5055, "step": 8790 }, { "epoch": 0.1955207963028795, "grad_norm": 1.9453125, "learning_rate": 1.6089333333333335e-05, "loss": 0.4405, "step": 8800 }, { "epoch": 0.19574297902595095, "grad_norm": 1.96875, "learning_rate": 1.608488888888889e-05, "loss": 0.4027, "step": 8810 }, { "epoch": 0.1959651617490224, "grad_norm": 1.8828125, "learning_rate": 1.6080444444444445e-05, "loss": 0.4373, "step": 8820 }, { "epoch": 0.19618734447209385, "grad_norm": 1.9765625, "learning_rate": 1.6076e-05, "loss": 0.4376, "step": 8830 }, { "epoch": 0.1964095271951653, "grad_norm": 2.171875, "learning_rate": 1.6071555555555555e-05, "loss": 0.4551, "step": 8840 }, { "epoch": 0.19663170991823675, "grad_norm": 1.765625, "learning_rate": 1.6067111111111113e-05, "loss": 0.4178, "step": 8850 }, { "epoch": 0.1968538926413082, "grad_norm": 1.984375, "learning_rate": 1.6062666666666668e-05, "loss": 0.403, "step": 8860 }, { "epoch": 0.19707607536437965, "grad_norm": 2.265625, "learning_rate": 1.6058222222222223e-05, "loss": 0.4423, "step": 8870 }, { "epoch": 0.1972982580874511, "grad_norm": 1.84375, "learning_rate": 1.605377777777778e-05, "loss": 0.4375, "step": 8880 }, { "epoch": 0.19752044081052259, "grad_norm": 2.046875, "learning_rate": 1.6049333333333336e-05, "loss": 0.4115, "step": 8890 }, { "epoch": 0.19774262353359404, "grad_norm": 1.8203125, "learning_rate": 1.604488888888889e-05, "loss": 0.4264, "step": 8900 }, { "epoch": 0.1979648062566655, "grad_norm": 1.671875, "learning_rate": 1.6040444444444445e-05, "loss": 0.4018, "step": 8910 }, { "epoch": 0.19818698897973694, "grad_norm": 1.8359375, "learning_rate": 1.6036e-05, "loss": 0.4006, "step": 8920 }, { "epoch": 0.1984091717028084, "grad_norm": 2.1875, "learning_rate": 1.6031555555555555e-05, "loss": 0.4268, "step": 8930 }, { "epoch": 0.19863135442587984, "grad_norm": 2.03125, "learning_rate": 1.6027111111111113e-05, "loss": 0.4188, "step": 8940 }, { "epoch": 0.1988535371489513, "grad_norm": 2.015625, "learning_rate": 1.6022666666666668e-05, "loss": 0.401, "step": 8950 }, { "epoch": 0.19907571987202274, "grad_norm": 1.953125, "learning_rate": 1.6018222222222223e-05, "loss": 0.4369, "step": 8960 }, { "epoch": 0.1992979025950942, "grad_norm": 2.09375, "learning_rate": 1.6013777777777778e-05, "loss": 0.4732, "step": 8970 }, { "epoch": 0.19952008531816565, "grad_norm": 1.8984375, "learning_rate": 1.6009333333333336e-05, "loss": 0.3873, "step": 8980 }, { "epoch": 0.19974226804123713, "grad_norm": 1.671875, "learning_rate": 1.600488888888889e-05, "loss": 0.4255, "step": 8990 }, { "epoch": 0.19996445076430858, "grad_norm": 2.21875, "learning_rate": 1.6000444444444446e-05, "loss": 0.4358, "step": 9000 }, { "epoch": 0.20018663348738003, "grad_norm": 1.9296875, "learning_rate": 1.5996e-05, "loss": 0.4104, "step": 9010 }, { "epoch": 0.20040881621045148, "grad_norm": 2.21875, "learning_rate": 1.599155555555556e-05, "loss": 0.4249, "step": 9020 }, { "epoch": 0.20063099893352293, "grad_norm": 2.1875, "learning_rate": 1.5987111111111114e-05, "loss": 0.4467, "step": 9030 }, { "epoch": 0.20085318165659438, "grad_norm": 1.8828125, "learning_rate": 1.598266666666667e-05, "loss": 0.4227, "step": 9040 }, { "epoch": 0.20107536437966583, "grad_norm": 1.5, "learning_rate": 1.5978222222222223e-05, "loss": 0.3909, "step": 9050 }, { "epoch": 0.20129754710273728, "grad_norm": 2.046875, "learning_rate": 1.5973777777777778e-05, "loss": 0.4254, "step": 9060 }, { "epoch": 0.20151972982580874, "grad_norm": 1.953125, "learning_rate": 1.5969333333333333e-05, "loss": 0.4576, "step": 9070 }, { "epoch": 0.2017419125488802, "grad_norm": 1.9296875, "learning_rate": 1.596488888888889e-05, "loss": 0.4059, "step": 9080 }, { "epoch": 0.20196409527195167, "grad_norm": 2.015625, "learning_rate": 1.5960444444444446e-05, "loss": 0.4446, "step": 9090 }, { "epoch": 0.20218627799502312, "grad_norm": 1.9921875, "learning_rate": 1.5956e-05, "loss": 0.4466, "step": 9100 }, { "epoch": 0.20240846071809457, "grad_norm": 1.7734375, "learning_rate": 1.595155555555556e-05, "loss": 0.4282, "step": 9110 }, { "epoch": 0.20263064344116602, "grad_norm": 2.09375, "learning_rate": 1.5947111111111114e-05, "loss": 0.4246, "step": 9120 }, { "epoch": 0.20285282616423747, "grad_norm": 2.78125, "learning_rate": 1.594266666666667e-05, "loss": 0.4582, "step": 9130 }, { "epoch": 0.20307500888730892, "grad_norm": 2.21875, "learning_rate": 1.5938222222222224e-05, "loss": 0.4167, "step": 9140 }, { "epoch": 0.20329719161038037, "grad_norm": 2.234375, "learning_rate": 1.593377777777778e-05, "loss": 0.4345, "step": 9150 }, { "epoch": 0.20351937433345182, "grad_norm": 1.9609375, "learning_rate": 1.5929333333333334e-05, "loss": 0.4211, "step": 9160 }, { "epoch": 0.20374155705652328, "grad_norm": 2.28125, "learning_rate": 1.592488888888889e-05, "loss": 0.4329, "step": 9170 }, { "epoch": 0.20396373977959473, "grad_norm": 2.3125, "learning_rate": 1.5920444444444447e-05, "loss": 0.4123, "step": 9180 }, { "epoch": 0.2041859225026662, "grad_norm": 1.53125, "learning_rate": 1.5916e-05, "loss": 0.4131, "step": 9190 }, { "epoch": 0.20440810522573766, "grad_norm": 1.8046875, "learning_rate": 1.5911555555555556e-05, "loss": 0.3962, "step": 9200 }, { "epoch": 0.2046302879488091, "grad_norm": 1.6484375, "learning_rate": 1.5907111111111115e-05, "loss": 0.4045, "step": 9210 }, { "epoch": 0.20485247067188056, "grad_norm": 2.203125, "learning_rate": 1.590266666666667e-05, "loss": 0.4396, "step": 9220 }, { "epoch": 0.205074653394952, "grad_norm": 2.09375, "learning_rate": 1.5898222222222224e-05, "loss": 0.4224, "step": 9230 }, { "epoch": 0.20529683611802346, "grad_norm": 2.03125, "learning_rate": 1.589377777777778e-05, "loss": 0.4518, "step": 9240 }, { "epoch": 0.2055190188410949, "grad_norm": 2.234375, "learning_rate": 1.5889333333333334e-05, "loss": 0.4198, "step": 9250 }, { "epoch": 0.20574120156416636, "grad_norm": 2.125, "learning_rate": 1.588488888888889e-05, "loss": 0.4272, "step": 9260 }, { "epoch": 0.20596338428723782, "grad_norm": 1.875, "learning_rate": 1.5880444444444444e-05, "loss": 0.4243, "step": 9270 }, { "epoch": 0.20618556701030927, "grad_norm": 2.0625, "learning_rate": 1.5876000000000002e-05, "loss": 0.3997, "step": 9280 }, { "epoch": 0.20640774973338072, "grad_norm": 1.953125, "learning_rate": 1.5871555555555557e-05, "loss": 0.4025, "step": 9290 }, { "epoch": 0.2066299324564522, "grad_norm": 1.84375, "learning_rate": 1.586711111111111e-05, "loss": 0.3916, "step": 9300 }, { "epoch": 0.20685211517952365, "grad_norm": 1.8671875, "learning_rate": 1.586266666666667e-05, "loss": 0.4201, "step": 9310 }, { "epoch": 0.2070742979025951, "grad_norm": 1.9609375, "learning_rate": 1.5858222222222225e-05, "loss": 0.4181, "step": 9320 }, { "epoch": 0.20729648062566655, "grad_norm": 2.21875, "learning_rate": 1.585377777777778e-05, "loss": 0.4092, "step": 9330 }, { "epoch": 0.207518663348738, "grad_norm": 2.078125, "learning_rate": 1.5849333333333334e-05, "loss": 0.4069, "step": 9340 }, { "epoch": 0.20774084607180945, "grad_norm": 1.7109375, "learning_rate": 1.584488888888889e-05, "loss": 0.3788, "step": 9350 }, { "epoch": 0.2079630287948809, "grad_norm": 1.765625, "learning_rate": 1.5840444444444444e-05, "loss": 0.3968, "step": 9360 }, { "epoch": 0.20818521151795236, "grad_norm": 1.875, "learning_rate": 1.5836e-05, "loss": 0.4224, "step": 9370 }, { "epoch": 0.2084073942410238, "grad_norm": 1.6640625, "learning_rate": 1.5831555555555557e-05, "loss": 0.4123, "step": 9380 }, { "epoch": 0.20862957696409526, "grad_norm": 1.71875, "learning_rate": 1.5827111111111112e-05, "loss": 0.4285, "step": 9390 }, { "epoch": 0.20885175968716674, "grad_norm": 1.8828125, "learning_rate": 1.5822666666666667e-05, "loss": 0.4053, "step": 9400 }, { "epoch": 0.2090739424102382, "grad_norm": 1.7890625, "learning_rate": 1.5818222222222225e-05, "loss": 0.3969, "step": 9410 }, { "epoch": 0.20929612513330964, "grad_norm": 1.8046875, "learning_rate": 1.581377777777778e-05, "loss": 0.4062, "step": 9420 }, { "epoch": 0.2095183078563811, "grad_norm": 2.03125, "learning_rate": 1.5809333333333335e-05, "loss": 0.4248, "step": 9430 }, { "epoch": 0.20974049057945254, "grad_norm": 1.8125, "learning_rate": 1.580488888888889e-05, "loss": 0.4129, "step": 9440 }, { "epoch": 0.209962673302524, "grad_norm": 1.7578125, "learning_rate": 1.5800444444444444e-05, "loss": 0.4518, "step": 9450 }, { "epoch": 0.21018485602559545, "grad_norm": 2.109375, "learning_rate": 1.5796e-05, "loss": 0.4386, "step": 9460 }, { "epoch": 0.2104070387486669, "grad_norm": 2.15625, "learning_rate": 1.5791555555555557e-05, "loss": 0.42, "step": 9470 }, { "epoch": 0.21062922147173835, "grad_norm": 1.9140625, "learning_rate": 1.5787111111111112e-05, "loss": 0.4053, "step": 9480 }, { "epoch": 0.2108514041948098, "grad_norm": 2.078125, "learning_rate": 1.5782666666666667e-05, "loss": 0.424, "step": 9490 }, { "epoch": 0.21107358691788128, "grad_norm": 2.234375, "learning_rate": 1.5778222222222225e-05, "loss": 0.4544, "step": 9500 }, { "epoch": 0.21129576964095273, "grad_norm": 1.7265625, "learning_rate": 1.577377777777778e-05, "loss": 0.4027, "step": 9510 }, { "epoch": 0.21151795236402418, "grad_norm": 2.40625, "learning_rate": 1.5769333333333335e-05, "loss": 0.4137, "step": 9520 }, { "epoch": 0.21174013508709563, "grad_norm": 1.8671875, "learning_rate": 1.576488888888889e-05, "loss": 0.4224, "step": 9530 }, { "epoch": 0.21196231781016708, "grad_norm": 1.9453125, "learning_rate": 1.5760444444444445e-05, "loss": 0.4379, "step": 9540 }, { "epoch": 0.21218450053323853, "grad_norm": 1.625, "learning_rate": 1.5756e-05, "loss": 0.4296, "step": 9550 }, { "epoch": 0.21240668325630999, "grad_norm": 2.203125, "learning_rate": 1.5751555555555558e-05, "loss": 0.4358, "step": 9560 }, { "epoch": 0.21262886597938144, "grad_norm": 2.25, "learning_rate": 1.5747111111111113e-05, "loss": 0.4208, "step": 9570 }, { "epoch": 0.2128510487024529, "grad_norm": 1.9296875, "learning_rate": 1.5742666666666668e-05, "loss": 0.4214, "step": 9580 }, { "epoch": 0.21307323142552434, "grad_norm": 1.6171875, "learning_rate": 1.5738222222222222e-05, "loss": 0.3849, "step": 9590 }, { "epoch": 0.21329541414859582, "grad_norm": 1.9921875, "learning_rate": 1.573377777777778e-05, "loss": 0.4297, "step": 9600 }, { "epoch": 0.21351759687166727, "grad_norm": 2.046875, "learning_rate": 1.5729333333333335e-05, "loss": 0.4194, "step": 9610 }, { "epoch": 0.21373977959473872, "grad_norm": 1.984375, "learning_rate": 1.572488888888889e-05, "loss": 0.3898, "step": 9620 }, { "epoch": 0.21396196231781017, "grad_norm": 2.046875, "learning_rate": 1.5720444444444445e-05, "loss": 0.4226, "step": 9630 }, { "epoch": 0.21418414504088162, "grad_norm": 2.265625, "learning_rate": 1.5716000000000003e-05, "loss": 0.4438, "step": 9640 }, { "epoch": 0.21440632776395308, "grad_norm": 2.03125, "learning_rate": 1.5711555555555558e-05, "loss": 0.4744, "step": 9650 }, { "epoch": 0.21462851048702453, "grad_norm": 2.296875, "learning_rate": 1.5707111111111113e-05, "loss": 0.4753, "step": 9660 }, { "epoch": 0.21485069321009598, "grad_norm": 2.25, "learning_rate": 1.5702666666666668e-05, "loss": 0.4282, "step": 9670 }, { "epoch": 0.21507287593316743, "grad_norm": 1.8203125, "learning_rate": 1.5698222222222223e-05, "loss": 0.4125, "step": 9680 }, { "epoch": 0.21529505865623888, "grad_norm": 1.8515625, "learning_rate": 1.5693777777777778e-05, "loss": 0.3632, "step": 9690 }, { "epoch": 0.21551724137931033, "grad_norm": 1.9609375, "learning_rate": 1.5689333333333336e-05, "loss": 0.4184, "step": 9700 }, { "epoch": 0.2157394241023818, "grad_norm": 1.9140625, "learning_rate": 1.568488888888889e-05, "loss": 0.431, "step": 9710 }, { "epoch": 0.21596160682545326, "grad_norm": 2.015625, "learning_rate": 1.5680444444444445e-05, "loss": 0.4466, "step": 9720 }, { "epoch": 0.2161837895485247, "grad_norm": 2.09375, "learning_rate": 1.5676000000000004e-05, "loss": 0.4611, "step": 9730 }, { "epoch": 0.21640597227159616, "grad_norm": 1.8203125, "learning_rate": 1.567155555555556e-05, "loss": 0.4283, "step": 9740 }, { "epoch": 0.21662815499466762, "grad_norm": 1.828125, "learning_rate": 1.5667111111111113e-05, "loss": 0.4084, "step": 9750 }, { "epoch": 0.21685033771773907, "grad_norm": 2.359375, "learning_rate": 1.5662666666666668e-05, "loss": 0.4268, "step": 9760 }, { "epoch": 0.21707252044081052, "grad_norm": 2.046875, "learning_rate": 1.5658222222222223e-05, "loss": 0.3887, "step": 9770 }, { "epoch": 0.21729470316388197, "grad_norm": 2.0625, "learning_rate": 1.5653777777777778e-05, "loss": 0.3958, "step": 9780 }, { "epoch": 0.21751688588695342, "grad_norm": 2.078125, "learning_rate": 1.5649333333333333e-05, "loss": 0.4138, "step": 9790 }, { "epoch": 0.21773906861002487, "grad_norm": 1.328125, "learning_rate": 1.564488888888889e-05, "loss": 0.4339, "step": 9800 }, { "epoch": 0.21796125133309635, "grad_norm": 2.046875, "learning_rate": 1.5640444444444446e-05, "loss": 0.414, "step": 9810 }, { "epoch": 0.2181834340561678, "grad_norm": 1.7890625, "learning_rate": 1.5636e-05, "loss": 0.4075, "step": 9820 }, { "epoch": 0.21840561677923925, "grad_norm": 1.6875, "learning_rate": 1.563155555555556e-05, "loss": 0.39, "step": 9830 }, { "epoch": 0.2186277995023107, "grad_norm": 2.03125, "learning_rate": 1.5627111111111114e-05, "loss": 0.4215, "step": 9840 }, { "epoch": 0.21884998222538216, "grad_norm": 2.171875, "learning_rate": 1.562266666666667e-05, "loss": 0.4007, "step": 9850 }, { "epoch": 0.2190721649484536, "grad_norm": 2.015625, "learning_rate": 1.5618222222222223e-05, "loss": 0.391, "step": 9860 }, { "epoch": 0.21929434767152506, "grad_norm": 1.9453125, "learning_rate": 1.561377777777778e-05, "loss": 0.4381, "step": 9870 }, { "epoch": 0.2195165303945965, "grad_norm": 2.03125, "learning_rate": 1.5609333333333333e-05, "loss": 0.4479, "step": 9880 }, { "epoch": 0.21973871311766796, "grad_norm": 2.515625, "learning_rate": 1.5604888888888888e-05, "loss": 0.4425, "step": 9890 }, { "epoch": 0.2199608958407394, "grad_norm": 2.03125, "learning_rate": 1.5600444444444446e-05, "loss": 0.4015, "step": 9900 }, { "epoch": 0.2201830785638109, "grad_norm": 1.921875, "learning_rate": 1.5596e-05, "loss": 0.4212, "step": 9910 }, { "epoch": 0.22040526128688234, "grad_norm": 1.609375, "learning_rate": 1.5591555555555556e-05, "loss": 0.4013, "step": 9920 }, { "epoch": 0.2206274440099538, "grad_norm": 2.1875, "learning_rate": 1.5587111111111114e-05, "loss": 0.4078, "step": 9930 }, { "epoch": 0.22084962673302524, "grad_norm": 2.40625, "learning_rate": 1.558266666666667e-05, "loss": 0.4728, "step": 9940 }, { "epoch": 0.2210718094560967, "grad_norm": 2.234375, "learning_rate": 1.5578222222222224e-05, "loss": 0.4154, "step": 9950 }, { "epoch": 0.22129399217916815, "grad_norm": 1.859375, "learning_rate": 1.557377777777778e-05, "loss": 0.3855, "step": 9960 }, { "epoch": 0.2215161749022396, "grad_norm": 2.453125, "learning_rate": 1.5569333333333334e-05, "loss": 0.4627, "step": 9970 }, { "epoch": 0.22173835762531105, "grad_norm": 2.140625, "learning_rate": 1.556488888888889e-05, "loss": 0.4345, "step": 9980 }, { "epoch": 0.2219605403483825, "grad_norm": 1.7578125, "learning_rate": 1.5560444444444443e-05, "loss": 0.4055, "step": 9990 }, { "epoch": 0.22218272307145395, "grad_norm": 2.015625, "learning_rate": 1.5556e-05, "loss": 0.4263, "step": 10000 }, { "epoch": 0.22240490579452543, "grad_norm": 1.59375, "learning_rate": 1.5551555555555556e-05, "loss": 0.3832, "step": 10010 }, { "epoch": 0.22262708851759688, "grad_norm": 1.859375, "learning_rate": 1.554711111111111e-05, "loss": 0.3835, "step": 10020 }, { "epoch": 0.22284927124066833, "grad_norm": 1.8359375, "learning_rate": 1.554266666666667e-05, "loss": 0.3895, "step": 10030 }, { "epoch": 0.22307145396373979, "grad_norm": 2.0, "learning_rate": 1.5538222222222224e-05, "loss": 0.4213, "step": 10040 }, { "epoch": 0.22329363668681124, "grad_norm": 1.921875, "learning_rate": 1.553377777777778e-05, "loss": 0.3773, "step": 10050 }, { "epoch": 0.2235158194098827, "grad_norm": 1.7890625, "learning_rate": 1.5529333333333334e-05, "loss": 0.3892, "step": 10060 }, { "epoch": 0.22373800213295414, "grad_norm": 2.078125, "learning_rate": 1.552488888888889e-05, "loss": 0.4141, "step": 10070 }, { "epoch": 0.2239601848560256, "grad_norm": 1.78125, "learning_rate": 1.5520444444444444e-05, "loss": 0.3856, "step": 10080 }, { "epoch": 0.22418236757909704, "grad_norm": 1.9765625, "learning_rate": 1.5516000000000002e-05, "loss": 0.4099, "step": 10090 }, { "epoch": 0.2244045503021685, "grad_norm": 1.7421875, "learning_rate": 1.5511555555555557e-05, "loss": 0.4171, "step": 10100 }, { "epoch": 0.22462673302523994, "grad_norm": 2.234375, "learning_rate": 1.550711111111111e-05, "loss": 0.4606, "step": 10110 }, { "epoch": 0.22484891574831142, "grad_norm": 1.71875, "learning_rate": 1.5502666666666666e-05, "loss": 0.4292, "step": 10120 }, { "epoch": 0.22507109847138287, "grad_norm": 2.0625, "learning_rate": 1.5498222222222225e-05, "loss": 0.3862, "step": 10130 }, { "epoch": 0.22529328119445433, "grad_norm": 1.984375, "learning_rate": 1.549377777777778e-05, "loss": 0.4293, "step": 10140 }, { "epoch": 0.22551546391752578, "grad_norm": 1.9609375, "learning_rate": 1.5489333333333334e-05, "loss": 0.4031, "step": 10150 }, { "epoch": 0.22573764664059723, "grad_norm": 1.8125, "learning_rate": 1.548488888888889e-05, "loss": 0.4126, "step": 10160 }, { "epoch": 0.22595982936366868, "grad_norm": 2.359375, "learning_rate": 1.5480444444444444e-05, "loss": 0.4381, "step": 10170 }, { "epoch": 0.22618201208674013, "grad_norm": 2.171875, "learning_rate": 1.5476000000000002e-05, "loss": 0.4138, "step": 10180 }, { "epoch": 0.22640419480981158, "grad_norm": 1.671875, "learning_rate": 1.5471555555555557e-05, "loss": 0.4036, "step": 10190 }, { "epoch": 0.22662637753288303, "grad_norm": 2.15625, "learning_rate": 1.5467111111111112e-05, "loss": 0.4509, "step": 10200 }, { "epoch": 0.22684856025595448, "grad_norm": 1.75, "learning_rate": 1.5462666666666667e-05, "loss": 0.4349, "step": 10210 }, { "epoch": 0.22707074297902596, "grad_norm": 2.125, "learning_rate": 1.5458222222222225e-05, "loss": 0.4387, "step": 10220 }, { "epoch": 0.22729292570209741, "grad_norm": 1.8125, "learning_rate": 1.545377777777778e-05, "loss": 0.3939, "step": 10230 }, { "epoch": 0.22751510842516887, "grad_norm": 2.203125, "learning_rate": 1.5449333333333335e-05, "loss": 0.3793, "step": 10240 }, { "epoch": 0.22773729114824032, "grad_norm": 2.1875, "learning_rate": 1.544488888888889e-05, "loss": 0.4314, "step": 10250 }, { "epoch": 0.22795947387131177, "grad_norm": 2.15625, "learning_rate": 1.5440444444444448e-05, "loss": 0.4209, "step": 10260 }, { "epoch": 0.22818165659438322, "grad_norm": 1.96875, "learning_rate": 1.5436000000000003e-05, "loss": 0.4622, "step": 10270 }, { "epoch": 0.22840383931745467, "grad_norm": 1.703125, "learning_rate": 1.5431555555555557e-05, "loss": 0.4193, "step": 10280 }, { "epoch": 0.22862602204052612, "grad_norm": 2.140625, "learning_rate": 1.5427111111111112e-05, "loss": 0.4271, "step": 10290 }, { "epoch": 0.22884820476359757, "grad_norm": 1.7578125, "learning_rate": 1.5422666666666667e-05, "loss": 0.4034, "step": 10300 }, { "epoch": 0.22907038748666902, "grad_norm": 1.9140625, "learning_rate": 1.5418222222222222e-05, "loss": 0.4247, "step": 10310 }, { "epoch": 0.2292925702097405, "grad_norm": 1.9296875, "learning_rate": 1.541377777777778e-05, "loss": 0.4069, "step": 10320 }, { "epoch": 0.22951475293281196, "grad_norm": 2.0, "learning_rate": 1.5409333333333335e-05, "loss": 0.4488, "step": 10330 }, { "epoch": 0.2297369356558834, "grad_norm": 2.234375, "learning_rate": 1.540488888888889e-05, "loss": 0.4234, "step": 10340 }, { "epoch": 0.22995911837895486, "grad_norm": 1.875, "learning_rate": 1.5400444444444448e-05, "loss": 0.4164, "step": 10350 }, { "epoch": 0.2301813011020263, "grad_norm": 2.03125, "learning_rate": 1.5396000000000003e-05, "loss": 0.4142, "step": 10360 }, { "epoch": 0.23040348382509776, "grad_norm": 2.125, "learning_rate": 1.5391555555555558e-05, "loss": 0.401, "step": 10370 }, { "epoch": 0.2306256665481692, "grad_norm": 2.09375, "learning_rate": 1.5387111111111113e-05, "loss": 0.3975, "step": 10380 }, { "epoch": 0.23084784927124066, "grad_norm": 2.53125, "learning_rate": 1.5382666666666668e-05, "loss": 0.4395, "step": 10390 }, { "epoch": 0.23107003199431211, "grad_norm": 2.359375, "learning_rate": 1.5378222222222222e-05, "loss": 0.4033, "step": 10400 }, { "epoch": 0.23129221471738357, "grad_norm": 2.1875, "learning_rate": 1.5373777777777777e-05, "loss": 0.4159, "step": 10410 }, { "epoch": 0.23151439744045502, "grad_norm": 2.5625, "learning_rate": 1.5369333333333335e-05, "loss": 0.3769, "step": 10420 }, { "epoch": 0.2317365801635265, "grad_norm": 2.109375, "learning_rate": 1.536488888888889e-05, "loss": 0.3959, "step": 10430 }, { "epoch": 0.23195876288659795, "grad_norm": 2.078125, "learning_rate": 1.5360444444444445e-05, "loss": 0.3965, "step": 10440 }, { "epoch": 0.2321809456096694, "grad_norm": 1.984375, "learning_rate": 1.5356000000000003e-05, "loss": 0.4088, "step": 10450 }, { "epoch": 0.23240312833274085, "grad_norm": 1.796875, "learning_rate": 1.5351555555555558e-05, "loss": 0.3845, "step": 10460 }, { "epoch": 0.2326253110558123, "grad_norm": 2.203125, "learning_rate": 1.5347111111111113e-05, "loss": 0.4088, "step": 10470 }, { "epoch": 0.23284749377888375, "grad_norm": 2.125, "learning_rate": 1.5342666666666668e-05, "loss": 0.4099, "step": 10480 }, { "epoch": 0.2330696765019552, "grad_norm": 1.90625, "learning_rate": 1.5338222222222223e-05, "loss": 0.4516, "step": 10490 }, { "epoch": 0.23329185922502665, "grad_norm": 2.234375, "learning_rate": 1.5333777777777778e-05, "loss": 0.4197, "step": 10500 }, { "epoch": 0.2335140419480981, "grad_norm": 1.5234375, "learning_rate": 1.5329333333333332e-05, "loss": 0.4455, "step": 10510 }, { "epoch": 0.23373622467116956, "grad_norm": 2.296875, "learning_rate": 1.532488888888889e-05, "loss": 0.391, "step": 10520 }, { "epoch": 0.23395840739424104, "grad_norm": 1.875, "learning_rate": 1.5320444444444446e-05, "loss": 0.4127, "step": 10530 }, { "epoch": 0.2341805901173125, "grad_norm": 2.203125, "learning_rate": 1.5316e-05, "loss": 0.4628, "step": 10540 }, { "epoch": 0.23440277284038394, "grad_norm": 1.765625, "learning_rate": 1.531155555555556e-05, "loss": 0.414, "step": 10550 }, { "epoch": 0.2346249555634554, "grad_norm": 2.09375, "learning_rate": 1.5307111111111113e-05, "loss": 0.4388, "step": 10560 }, { "epoch": 0.23484713828652684, "grad_norm": 1.984375, "learning_rate": 1.530266666666667e-05, "loss": 0.4559, "step": 10570 }, { "epoch": 0.2350693210095983, "grad_norm": 1.8359375, "learning_rate": 1.5298222222222223e-05, "loss": 0.429, "step": 10580 }, { "epoch": 0.23529150373266974, "grad_norm": 1.4765625, "learning_rate": 1.5293777777777778e-05, "loss": 0.4091, "step": 10590 }, { "epoch": 0.2355136864557412, "grad_norm": 1.6640625, "learning_rate": 1.5289333333333333e-05, "loss": 0.3757, "step": 10600 }, { "epoch": 0.23573586917881265, "grad_norm": 2.265625, "learning_rate": 1.5284888888888888e-05, "loss": 0.4064, "step": 10610 }, { "epoch": 0.2359580519018841, "grad_norm": 2.234375, "learning_rate": 1.5280444444444446e-05, "loss": 0.4397, "step": 10620 }, { "epoch": 0.23618023462495558, "grad_norm": 2.0, "learning_rate": 1.5276e-05, "loss": 0.4588, "step": 10630 }, { "epoch": 0.23640241734802703, "grad_norm": 1.9921875, "learning_rate": 1.5271555555555556e-05, "loss": 0.4108, "step": 10640 }, { "epoch": 0.23662460007109848, "grad_norm": 2.09375, "learning_rate": 1.5267111111111114e-05, "loss": 0.4755, "step": 10650 }, { "epoch": 0.23684678279416993, "grad_norm": 1.9296875, "learning_rate": 1.526266666666667e-05, "loss": 0.4257, "step": 10660 }, { "epoch": 0.23706896551724138, "grad_norm": 1.9140625, "learning_rate": 1.5258222222222224e-05, "loss": 0.403, "step": 10670 }, { "epoch": 0.23729114824031283, "grad_norm": 2.09375, "learning_rate": 1.5253777777777778e-05, "loss": 0.4352, "step": 10680 }, { "epoch": 0.23751333096338428, "grad_norm": 2.296875, "learning_rate": 1.5249333333333333e-05, "loss": 0.4014, "step": 10690 }, { "epoch": 0.23773551368645574, "grad_norm": 2.15625, "learning_rate": 1.524488888888889e-05, "loss": 0.4451, "step": 10700 }, { "epoch": 0.2379576964095272, "grad_norm": 1.765625, "learning_rate": 1.5240444444444446e-05, "loss": 0.4506, "step": 10710 }, { "epoch": 0.23817987913259864, "grad_norm": 2.796875, "learning_rate": 1.5236000000000001e-05, "loss": 0.4418, "step": 10720 }, { "epoch": 0.23840206185567012, "grad_norm": 1.7421875, "learning_rate": 1.5231555555555558e-05, "loss": 0.3964, "step": 10730 }, { "epoch": 0.23862424457874157, "grad_norm": 2.234375, "learning_rate": 1.5227111111111113e-05, "loss": 0.429, "step": 10740 }, { "epoch": 0.23884642730181302, "grad_norm": 1.8125, "learning_rate": 1.5222666666666667e-05, "loss": 0.3924, "step": 10750 }, { "epoch": 0.23906861002488447, "grad_norm": 1.7578125, "learning_rate": 1.5218222222222224e-05, "loss": 0.4009, "step": 10760 }, { "epoch": 0.23929079274795592, "grad_norm": 2.125, "learning_rate": 1.5213777777777779e-05, "loss": 0.411, "step": 10770 }, { "epoch": 0.23951297547102737, "grad_norm": 1.9296875, "learning_rate": 1.5209333333333334e-05, "loss": 0.4171, "step": 10780 }, { "epoch": 0.23973515819409882, "grad_norm": 1.96875, "learning_rate": 1.5204888888888888e-05, "loss": 0.4003, "step": 10790 }, { "epoch": 0.23995734091717028, "grad_norm": 1.765625, "learning_rate": 1.5200444444444447e-05, "loss": 0.3866, "step": 10800 }, { "epoch": 0.24017952364024173, "grad_norm": 2.03125, "learning_rate": 1.5196000000000002e-05, "loss": 0.4146, "step": 10810 }, { "epoch": 0.24040170636331318, "grad_norm": 1.75, "learning_rate": 1.5191555555555556e-05, "loss": 0.3649, "step": 10820 }, { "epoch": 0.24062388908638463, "grad_norm": 2.015625, "learning_rate": 1.5187111111111113e-05, "loss": 0.4661, "step": 10830 }, { "epoch": 0.2408460718094561, "grad_norm": 2.1875, "learning_rate": 1.5182666666666668e-05, "loss": 0.4078, "step": 10840 }, { "epoch": 0.24106825453252756, "grad_norm": 2.609375, "learning_rate": 1.5178222222222223e-05, "loss": 0.401, "step": 10850 }, { "epoch": 0.241290437255599, "grad_norm": 2.03125, "learning_rate": 1.517377777777778e-05, "loss": 0.4201, "step": 10860 }, { "epoch": 0.24151261997867046, "grad_norm": 1.890625, "learning_rate": 1.5169333333333334e-05, "loss": 0.3816, "step": 10870 }, { "epoch": 0.2417348027017419, "grad_norm": 2.03125, "learning_rate": 1.516488888888889e-05, "loss": 0.4488, "step": 10880 }, { "epoch": 0.24195698542481336, "grad_norm": 2.25, "learning_rate": 1.5160444444444447e-05, "loss": 0.3732, "step": 10890 }, { "epoch": 0.24217916814788482, "grad_norm": 2.15625, "learning_rate": 1.5156000000000002e-05, "loss": 0.4348, "step": 10900 }, { "epoch": 0.24240135087095627, "grad_norm": 2.109375, "learning_rate": 1.5151555555555557e-05, "loss": 0.4499, "step": 10910 }, { "epoch": 0.24262353359402772, "grad_norm": 1.828125, "learning_rate": 1.5147111111111112e-05, "loss": 0.4267, "step": 10920 }, { "epoch": 0.24284571631709917, "grad_norm": 1.9140625, "learning_rate": 1.5142666666666668e-05, "loss": 0.4251, "step": 10930 }, { "epoch": 0.24306789904017065, "grad_norm": 2.109375, "learning_rate": 1.5138222222222223e-05, "loss": 0.3651, "step": 10940 }, { "epoch": 0.2432900817632421, "grad_norm": 1.8515625, "learning_rate": 1.5133777777777778e-05, "loss": 0.3934, "step": 10950 }, { "epoch": 0.24351226448631355, "grad_norm": 1.6484375, "learning_rate": 1.5129333333333334e-05, "loss": 0.3921, "step": 10960 }, { "epoch": 0.243734447209385, "grad_norm": 1.9375, "learning_rate": 1.5124888888888891e-05, "loss": 0.4099, "step": 10970 }, { "epoch": 0.24395662993245645, "grad_norm": 2.234375, "learning_rate": 1.5120444444444446e-05, "loss": 0.4271, "step": 10980 }, { "epoch": 0.2441788126555279, "grad_norm": 1.65625, "learning_rate": 1.5116000000000002e-05, "loss": 0.4147, "step": 10990 }, { "epoch": 0.24440099537859936, "grad_norm": 1.890625, "learning_rate": 1.5111555555555557e-05, "loss": 0.411, "step": 11000 }, { "epoch": 0.2446231781016708, "grad_norm": 2.15625, "learning_rate": 1.5107111111111112e-05, "loss": 0.3897, "step": 11010 }, { "epoch": 0.24484536082474226, "grad_norm": 2.09375, "learning_rate": 1.5102666666666667e-05, "loss": 0.4195, "step": 11020 }, { "epoch": 0.2450675435478137, "grad_norm": 2.046875, "learning_rate": 1.5098222222222223e-05, "loss": 0.3904, "step": 11030 }, { "epoch": 0.2452897262708852, "grad_norm": 2.15625, "learning_rate": 1.5093777777777778e-05, "loss": 0.3475, "step": 11040 }, { "epoch": 0.24551190899395664, "grad_norm": 1.953125, "learning_rate": 1.5089333333333333e-05, "loss": 0.4163, "step": 11050 }, { "epoch": 0.2457340917170281, "grad_norm": 1.984375, "learning_rate": 1.5084888888888891e-05, "loss": 0.4046, "step": 11060 }, { "epoch": 0.24595627444009954, "grad_norm": 2.046875, "learning_rate": 1.5080444444444446e-05, "loss": 0.4096, "step": 11070 }, { "epoch": 0.246178457163171, "grad_norm": 2.34375, "learning_rate": 1.5076000000000001e-05, "loss": 0.4577, "step": 11080 }, { "epoch": 0.24640063988624245, "grad_norm": 1.6953125, "learning_rate": 1.5071555555555558e-05, "loss": 0.3678, "step": 11090 }, { "epoch": 0.2466228226093139, "grad_norm": 2.484375, "learning_rate": 1.5067111111111112e-05, "loss": 0.4472, "step": 11100 }, { "epoch": 0.24684500533238535, "grad_norm": 2.125, "learning_rate": 1.5062666666666667e-05, "loss": 0.4231, "step": 11110 }, { "epoch": 0.2470671880554568, "grad_norm": 2.125, "learning_rate": 1.5058222222222224e-05, "loss": 0.3996, "step": 11120 }, { "epoch": 0.24728937077852825, "grad_norm": 2.34375, "learning_rate": 1.5053777777777779e-05, "loss": 0.4105, "step": 11130 }, { "epoch": 0.24751155350159973, "grad_norm": 1.765625, "learning_rate": 1.5049333333333333e-05, "loss": 0.4256, "step": 11140 }, { "epoch": 0.24773373622467118, "grad_norm": 1.6015625, "learning_rate": 1.5044888888888892e-05, "loss": 0.4418, "step": 11150 }, { "epoch": 0.24795591894774263, "grad_norm": 1.546875, "learning_rate": 1.5040444444444447e-05, "loss": 0.4127, "step": 11160 }, { "epoch": 0.24817810167081408, "grad_norm": 2.125, "learning_rate": 1.5036000000000001e-05, "loss": 0.4138, "step": 11170 }, { "epoch": 0.24840028439388553, "grad_norm": 2.25, "learning_rate": 1.5031555555555556e-05, "loss": 0.4248, "step": 11180 }, { "epoch": 0.24862246711695699, "grad_norm": 1.90625, "learning_rate": 1.5027111111111113e-05, "loss": 0.3865, "step": 11190 }, { "epoch": 0.24884464984002844, "grad_norm": 1.515625, "learning_rate": 1.5022666666666668e-05, "loss": 0.4174, "step": 11200 }, { "epoch": 0.2490668325630999, "grad_norm": 2.015625, "learning_rate": 1.5018222222222222e-05, "loss": 0.393, "step": 11210 }, { "epoch": 0.24928901528617134, "grad_norm": 1.8828125, "learning_rate": 1.5013777777777779e-05, "loss": 0.4152, "step": 11220 }, { "epoch": 0.2495111980092428, "grad_norm": 2.203125, "learning_rate": 1.5009333333333334e-05, "loss": 0.4029, "step": 11230 }, { "epoch": 0.24973338073231424, "grad_norm": 2.296875, "learning_rate": 1.500488888888889e-05, "loss": 0.3629, "step": 11240 }, { "epoch": 0.24995556345538572, "grad_norm": 2.03125, "learning_rate": 1.5000444444444447e-05, "loss": 0.3671, "step": 11250 }, { "epoch": 0.25017774617845717, "grad_norm": 1.9453125, "learning_rate": 1.4996000000000002e-05, "loss": 0.413, "step": 11260 }, { "epoch": 0.2503999289015286, "grad_norm": 1.8203125, "learning_rate": 1.4991555555555557e-05, "loss": 0.404, "step": 11270 }, { "epoch": 0.2506221116246001, "grad_norm": 2.03125, "learning_rate": 1.4987111111111111e-05, "loss": 0.4044, "step": 11280 }, { "epoch": 0.2508442943476715, "grad_norm": 2.078125, "learning_rate": 1.4982666666666668e-05, "loss": 0.4293, "step": 11290 }, { "epoch": 0.251066477070743, "grad_norm": 1.84375, "learning_rate": 1.4978222222222223e-05, "loss": 0.3781, "step": 11300 }, { "epoch": 0.25128865979381443, "grad_norm": 2.140625, "learning_rate": 1.4973777777777778e-05, "loss": 0.458, "step": 11310 }, { "epoch": 0.2515108425168859, "grad_norm": 1.6484375, "learning_rate": 1.4969333333333334e-05, "loss": 0.4059, "step": 11320 }, { "epoch": 0.25173302523995733, "grad_norm": 2.21875, "learning_rate": 1.496488888888889e-05, "loss": 0.3664, "step": 11330 }, { "epoch": 0.2519552079630288, "grad_norm": 2.296875, "learning_rate": 1.4960444444444446e-05, "loss": 0.4389, "step": 11340 }, { "epoch": 0.25217739068610023, "grad_norm": 2.625, "learning_rate": 1.4956000000000002e-05, "loss": 0.4298, "step": 11350 }, { "epoch": 0.2523995734091717, "grad_norm": 1.8359375, "learning_rate": 1.4951555555555557e-05, "loss": 0.4423, "step": 11360 }, { "epoch": 0.25262175613224314, "grad_norm": 2.0625, "learning_rate": 1.4947111111111112e-05, "loss": 0.4159, "step": 11370 }, { "epoch": 0.2528439388553146, "grad_norm": 1.7734375, "learning_rate": 1.4942666666666668e-05, "loss": 0.3997, "step": 11380 }, { "epoch": 0.25306612157838604, "grad_norm": 2.0, "learning_rate": 1.4938222222222223e-05, "loss": 0.3869, "step": 11390 }, { "epoch": 0.25328830430145755, "grad_norm": 2.09375, "learning_rate": 1.4933777777777778e-05, "loss": 0.4579, "step": 11400 }, { "epoch": 0.253510487024529, "grad_norm": 2.34375, "learning_rate": 1.4929333333333333e-05, "loss": 0.4292, "step": 11410 }, { "epoch": 0.25373266974760045, "grad_norm": 1.6875, "learning_rate": 1.4924888888888891e-05, "loss": 0.4276, "step": 11420 }, { "epoch": 0.2539548524706719, "grad_norm": 2.109375, "learning_rate": 1.4920444444444446e-05, "loss": 0.3931, "step": 11430 }, { "epoch": 0.25417703519374335, "grad_norm": 2.078125, "learning_rate": 1.4916e-05, "loss": 0.4296, "step": 11440 }, { "epoch": 0.2543992179168148, "grad_norm": 1.5546875, "learning_rate": 1.4911555555555557e-05, "loss": 0.3877, "step": 11450 }, { "epoch": 0.25462140063988625, "grad_norm": 1.7109375, "learning_rate": 1.4907111111111112e-05, "loss": 0.4044, "step": 11460 }, { "epoch": 0.2548435833629577, "grad_norm": 2.15625, "learning_rate": 1.4902666666666667e-05, "loss": 0.4242, "step": 11470 }, { "epoch": 0.25506576608602916, "grad_norm": 2.109375, "learning_rate": 1.4898222222222224e-05, "loss": 0.3976, "step": 11480 }, { "epoch": 0.2552879488091006, "grad_norm": 1.921875, "learning_rate": 1.4893777777777778e-05, "loss": 0.4272, "step": 11490 }, { "epoch": 0.25551013153217206, "grad_norm": 1.84375, "learning_rate": 1.4889333333333335e-05, "loss": 0.4176, "step": 11500 }, { "epoch": 0.2557323142552435, "grad_norm": 1.9375, "learning_rate": 1.4884888888888892e-05, "loss": 0.4055, "step": 11510 }, { "epoch": 0.25595449697831496, "grad_norm": 2.734375, "learning_rate": 1.4880444444444446e-05, "loss": 0.4689, "step": 11520 }, { "epoch": 0.2561766797013864, "grad_norm": 2.328125, "learning_rate": 1.4876000000000001e-05, "loss": 0.4248, "step": 11530 }, { "epoch": 0.25639886242445786, "grad_norm": 1.765625, "learning_rate": 1.4871555555555556e-05, "loss": 0.3976, "step": 11540 }, { "epoch": 0.2566210451475293, "grad_norm": 2.078125, "learning_rate": 1.4867111111111113e-05, "loss": 0.4339, "step": 11550 }, { "epoch": 0.25684322787060077, "grad_norm": 2.375, "learning_rate": 1.4862666666666667e-05, "loss": 0.3969, "step": 11560 }, { "epoch": 0.2570654105936722, "grad_norm": 2.234375, "learning_rate": 1.4858222222222222e-05, "loss": 0.4235, "step": 11570 }, { "epoch": 0.25728759331674367, "grad_norm": 1.9375, "learning_rate": 1.4853777777777779e-05, "loss": 0.4033, "step": 11580 }, { "epoch": 0.2575097760398151, "grad_norm": 2.0625, "learning_rate": 1.4849333333333335e-05, "loss": 0.3886, "step": 11590 }, { "epoch": 0.25773195876288657, "grad_norm": 2.265625, "learning_rate": 1.484488888888889e-05, "loss": 0.3896, "step": 11600 }, { "epoch": 0.2579541414859581, "grad_norm": 1.9453125, "learning_rate": 1.4840444444444447e-05, "loss": 0.3802, "step": 11610 }, { "epoch": 0.25817632420902953, "grad_norm": 2.03125, "learning_rate": 1.4836000000000002e-05, "loss": 0.4334, "step": 11620 }, { "epoch": 0.258398506932101, "grad_norm": 2.0, "learning_rate": 1.4831555555555556e-05, "loss": 0.4029, "step": 11630 }, { "epoch": 0.25862068965517243, "grad_norm": 2.59375, "learning_rate": 1.4827111111111111e-05, "loss": 0.3693, "step": 11640 }, { "epoch": 0.2588428723782439, "grad_norm": 2.328125, "learning_rate": 1.4822666666666668e-05, "loss": 0.4093, "step": 11650 }, { "epoch": 0.25906505510131533, "grad_norm": 2.578125, "learning_rate": 1.4818222222222223e-05, "loss": 0.4496, "step": 11660 }, { "epoch": 0.2592872378243868, "grad_norm": 2.234375, "learning_rate": 1.4813777777777778e-05, "loss": 0.405, "step": 11670 }, { "epoch": 0.25950942054745824, "grad_norm": 2.03125, "learning_rate": 1.4809333333333336e-05, "loss": 0.3803, "step": 11680 }, { "epoch": 0.2597316032705297, "grad_norm": 1.9375, "learning_rate": 1.480488888888889e-05, "loss": 0.3986, "step": 11690 }, { "epoch": 0.25995378599360114, "grad_norm": 1.71875, "learning_rate": 1.4800444444444445e-05, "loss": 0.4121, "step": 11700 }, { "epoch": 0.2601759687166726, "grad_norm": 1.7265625, "learning_rate": 1.4796000000000002e-05, "loss": 0.3735, "step": 11710 }, { "epoch": 0.26039815143974404, "grad_norm": 2.203125, "learning_rate": 1.4791555555555557e-05, "loss": 0.396, "step": 11720 }, { "epoch": 0.2606203341628155, "grad_norm": 2.15625, "learning_rate": 1.4787111111111112e-05, "loss": 0.3862, "step": 11730 }, { "epoch": 0.26084251688588694, "grad_norm": 2.25, "learning_rate": 1.4782666666666668e-05, "loss": 0.3994, "step": 11740 }, { "epoch": 0.2610646996089584, "grad_norm": 2.4375, "learning_rate": 1.4778222222222223e-05, "loss": 0.3949, "step": 11750 }, { "epoch": 0.26128688233202985, "grad_norm": 1.8359375, "learning_rate": 1.4773777777777778e-05, "loss": 0.4026, "step": 11760 }, { "epoch": 0.2615090650551013, "grad_norm": 2.171875, "learning_rate": 1.4769333333333336e-05, "loss": 0.4095, "step": 11770 }, { "epoch": 0.26173124777817275, "grad_norm": 2.28125, "learning_rate": 1.4764888888888891e-05, "loss": 0.4203, "step": 11780 }, { "epoch": 0.2619534305012442, "grad_norm": 1.90625, "learning_rate": 1.4760444444444446e-05, "loss": 0.3837, "step": 11790 }, { "epoch": 0.26217561322431565, "grad_norm": 2.25, "learning_rate": 1.4756e-05, "loss": 0.4313, "step": 11800 }, { "epoch": 0.26239779594738716, "grad_norm": 2.015625, "learning_rate": 1.4751555555555557e-05, "loss": 0.4183, "step": 11810 }, { "epoch": 0.2626199786704586, "grad_norm": 1.953125, "learning_rate": 1.4747111111111112e-05, "loss": 0.4251, "step": 11820 }, { "epoch": 0.26284216139353006, "grad_norm": 2.3125, "learning_rate": 1.4742666666666667e-05, "loss": 0.4186, "step": 11830 }, { "epoch": 0.2630643441166015, "grad_norm": 2.125, "learning_rate": 1.4738222222222223e-05, "loss": 0.3983, "step": 11840 }, { "epoch": 0.26328652683967296, "grad_norm": 1.8359375, "learning_rate": 1.4733777777777778e-05, "loss": 0.4149, "step": 11850 }, { "epoch": 0.2635087095627444, "grad_norm": 1.859375, "learning_rate": 1.4729333333333335e-05, "loss": 0.3967, "step": 11860 }, { "epoch": 0.26373089228581587, "grad_norm": 1.8984375, "learning_rate": 1.4724888888888891e-05, "loss": 0.4256, "step": 11870 }, { "epoch": 0.2639530750088873, "grad_norm": 2.09375, "learning_rate": 1.4720444444444446e-05, "loss": 0.4262, "step": 11880 }, { "epoch": 0.26417525773195877, "grad_norm": 2.109375, "learning_rate": 1.4716000000000001e-05, "loss": 0.4139, "step": 11890 }, { "epoch": 0.2643974404550302, "grad_norm": 1.546875, "learning_rate": 1.4711555555555556e-05, "loss": 0.4439, "step": 11900 }, { "epoch": 0.26461962317810167, "grad_norm": 1.9296875, "learning_rate": 1.4707111111111112e-05, "loss": 0.4166, "step": 11910 }, { "epoch": 0.2648418059011731, "grad_norm": 1.9921875, "learning_rate": 1.4702666666666667e-05, "loss": 0.4344, "step": 11920 }, { "epoch": 0.2650639886242446, "grad_norm": 1.765625, "learning_rate": 1.4698222222222222e-05, "loss": 0.3895, "step": 11930 }, { "epoch": 0.265286171347316, "grad_norm": 2.296875, "learning_rate": 1.4693777777777779e-05, "loss": 0.4079, "step": 11940 }, { "epoch": 0.2655083540703875, "grad_norm": 1.7421875, "learning_rate": 1.4689333333333335e-05, "loss": 0.4007, "step": 11950 }, { "epoch": 0.2657305367934589, "grad_norm": 1.9296875, "learning_rate": 1.468488888888889e-05, "loss": 0.404, "step": 11960 }, { "epoch": 0.2659527195165304, "grad_norm": 1.875, "learning_rate": 1.4680444444444447e-05, "loss": 0.3996, "step": 11970 }, { "epoch": 0.26617490223960183, "grad_norm": 1.7890625, "learning_rate": 1.4676000000000001e-05, "loss": 0.4015, "step": 11980 }, { "epoch": 0.2663970849626733, "grad_norm": 1.9296875, "learning_rate": 1.4671555555555556e-05, "loss": 0.4102, "step": 11990 }, { "epoch": 0.26661926768574473, "grad_norm": 2.03125, "learning_rate": 1.4667111111111111e-05, "loss": 0.398, "step": 12000 }, { "epoch": 0.2668414504088162, "grad_norm": 1.921875, "learning_rate": 1.4662666666666668e-05, "loss": 0.4066, "step": 12010 }, { "epoch": 0.2670636331318877, "grad_norm": 2.28125, "learning_rate": 1.4658222222222223e-05, "loss": 0.4213, "step": 12020 }, { "epoch": 0.26728581585495914, "grad_norm": 2.15625, "learning_rate": 1.4653777777777777e-05, "loss": 0.398, "step": 12030 }, { "epoch": 0.2675079985780306, "grad_norm": 1.6796875, "learning_rate": 1.4649333333333336e-05, "loss": 0.3696, "step": 12040 }, { "epoch": 0.26773018130110204, "grad_norm": 1.8984375, "learning_rate": 1.464488888888889e-05, "loss": 0.3524, "step": 12050 }, { "epoch": 0.2679523640241735, "grad_norm": 1.8515625, "learning_rate": 1.4640444444444445e-05, "loss": 0.4166, "step": 12060 }, { "epoch": 0.26817454674724495, "grad_norm": 2.109375, "learning_rate": 1.4636000000000002e-05, "loss": 0.4267, "step": 12070 }, { "epoch": 0.2683967294703164, "grad_norm": 1.953125, "learning_rate": 1.4631555555555557e-05, "loss": 0.4347, "step": 12080 }, { "epoch": 0.26861891219338785, "grad_norm": 2.21875, "learning_rate": 1.4627111111111112e-05, "loss": 0.436, "step": 12090 }, { "epoch": 0.2688410949164593, "grad_norm": 1.7109375, "learning_rate": 1.4622666666666668e-05, "loss": 0.3948, "step": 12100 }, { "epoch": 0.26906327763953075, "grad_norm": 2.296875, "learning_rate": 1.4618222222222223e-05, "loss": 0.4133, "step": 12110 }, { "epoch": 0.2692854603626022, "grad_norm": 2.015625, "learning_rate": 1.461377777777778e-05, "loss": 0.3911, "step": 12120 }, { "epoch": 0.26950764308567365, "grad_norm": 2.0625, "learning_rate": 1.4609333333333336e-05, "loss": 0.4272, "step": 12130 }, { "epoch": 0.2697298258087451, "grad_norm": 2.578125, "learning_rate": 1.460488888888889e-05, "loss": 0.4426, "step": 12140 }, { "epoch": 0.26995200853181656, "grad_norm": 1.859375, "learning_rate": 1.4600444444444446e-05, "loss": 0.4244, "step": 12150 }, { "epoch": 0.270174191254888, "grad_norm": 2.171875, "learning_rate": 1.4596e-05, "loss": 0.4009, "step": 12160 }, { "epoch": 0.27039637397795946, "grad_norm": 1.9296875, "learning_rate": 1.4591555555555557e-05, "loss": 0.386, "step": 12170 }, { "epoch": 0.2706185567010309, "grad_norm": 1.953125, "learning_rate": 1.4587111111111112e-05, "loss": 0.4325, "step": 12180 }, { "epoch": 0.27084073942410236, "grad_norm": 1.71875, "learning_rate": 1.4582666666666667e-05, "loss": 0.3962, "step": 12190 }, { "epoch": 0.2710629221471738, "grad_norm": 2.265625, "learning_rate": 1.4578222222222223e-05, "loss": 0.4439, "step": 12200 }, { "epoch": 0.27128510487024526, "grad_norm": 1.9453125, "learning_rate": 1.457377777777778e-05, "loss": 0.4283, "step": 12210 }, { "epoch": 0.27150728759331677, "grad_norm": 2.109375, "learning_rate": 1.4569333333333335e-05, "loss": 0.4495, "step": 12220 }, { "epoch": 0.2717294703163882, "grad_norm": 1.90625, "learning_rate": 1.4564888888888891e-05, "loss": 0.3652, "step": 12230 }, { "epoch": 0.2719516530394597, "grad_norm": 1.859375, "learning_rate": 1.4560444444444446e-05, "loss": 0.3978, "step": 12240 }, { "epoch": 0.2721738357625311, "grad_norm": 1.6953125, "learning_rate": 1.4556000000000001e-05, "loss": 0.4179, "step": 12250 }, { "epoch": 0.2723960184856026, "grad_norm": 2.265625, "learning_rate": 1.4551555555555556e-05, "loss": 0.4119, "step": 12260 }, { "epoch": 0.272618201208674, "grad_norm": 2.234375, "learning_rate": 1.4547111111111112e-05, "loss": 0.414, "step": 12270 }, { "epoch": 0.2728403839317455, "grad_norm": 1.734375, "learning_rate": 1.4542666666666667e-05, "loss": 0.4351, "step": 12280 }, { "epoch": 0.27306256665481693, "grad_norm": 1.9921875, "learning_rate": 1.4538222222222222e-05, "loss": 0.3993, "step": 12290 }, { "epoch": 0.2732847493778884, "grad_norm": 2.09375, "learning_rate": 1.453377777777778e-05, "loss": 0.4078, "step": 12300 }, { "epoch": 0.27350693210095983, "grad_norm": 2.015625, "learning_rate": 1.4529333333333335e-05, "loss": 0.4145, "step": 12310 }, { "epoch": 0.2737291148240313, "grad_norm": 2.359375, "learning_rate": 1.452488888888889e-05, "loss": 0.4601, "step": 12320 }, { "epoch": 0.27395129754710273, "grad_norm": 2.3125, "learning_rate": 1.4520444444444446e-05, "loss": 0.4044, "step": 12330 }, { "epoch": 0.2741734802701742, "grad_norm": 2.4375, "learning_rate": 1.4516000000000001e-05, "loss": 0.4171, "step": 12340 }, { "epoch": 0.27439566299324564, "grad_norm": 2.046875, "learning_rate": 1.4511555555555556e-05, "loss": 0.4093, "step": 12350 }, { "epoch": 0.2746178457163171, "grad_norm": 1.7890625, "learning_rate": 1.4507111111111111e-05, "loss": 0.3676, "step": 12360 }, { "epoch": 0.27484002843938854, "grad_norm": 2.359375, "learning_rate": 1.4502666666666668e-05, "loss": 0.4007, "step": 12370 }, { "epoch": 0.27506221116246, "grad_norm": 1.9296875, "learning_rate": 1.4498222222222222e-05, "loss": 0.3973, "step": 12380 }, { "epoch": 0.27528439388553144, "grad_norm": 1.890625, "learning_rate": 1.449377777777778e-05, "loss": 0.3983, "step": 12390 }, { "epoch": 0.2755065766086029, "grad_norm": 2.09375, "learning_rate": 1.4489333333333335e-05, "loss": 0.4332, "step": 12400 }, { "epoch": 0.27572875933167434, "grad_norm": 2.59375, "learning_rate": 1.448488888888889e-05, "loss": 0.4434, "step": 12410 }, { "epoch": 0.2759509420547458, "grad_norm": 1.859375, "learning_rate": 1.4480444444444445e-05, "loss": 0.4142, "step": 12420 }, { "epoch": 0.2761731247778173, "grad_norm": 2.078125, "learning_rate": 1.4476000000000002e-05, "loss": 0.4278, "step": 12430 }, { "epoch": 0.27639530750088875, "grad_norm": 2.15625, "learning_rate": 1.4471555555555557e-05, "loss": 0.4008, "step": 12440 }, { "epoch": 0.2766174902239602, "grad_norm": 1.8984375, "learning_rate": 1.4467111111111111e-05, "loss": 0.3918, "step": 12450 }, { "epoch": 0.27683967294703166, "grad_norm": 2.109375, "learning_rate": 1.4462666666666668e-05, "loss": 0.3931, "step": 12460 }, { "epoch": 0.2770618556701031, "grad_norm": 2.421875, "learning_rate": 1.4458222222222223e-05, "loss": 0.4337, "step": 12470 }, { "epoch": 0.27728403839317456, "grad_norm": 2.0625, "learning_rate": 1.445377777777778e-05, "loss": 0.4374, "step": 12480 }, { "epoch": 0.277506221116246, "grad_norm": 2.015625, "learning_rate": 1.4449333333333336e-05, "loss": 0.4279, "step": 12490 }, { "epoch": 0.27772840383931746, "grad_norm": 1.9921875, "learning_rate": 1.444488888888889e-05, "loss": 0.4163, "step": 12500 }, { "epoch": 0.2779505865623889, "grad_norm": 2.203125, "learning_rate": 1.4440444444444446e-05, "loss": 0.4316, "step": 12510 }, { "epoch": 0.27817276928546036, "grad_norm": 2.078125, "learning_rate": 1.4436e-05, "loss": 0.4183, "step": 12520 }, { "epoch": 0.2783949520085318, "grad_norm": 2.390625, "learning_rate": 1.4431555555555557e-05, "loss": 0.4326, "step": 12530 }, { "epoch": 0.27861713473160327, "grad_norm": 1.9609375, "learning_rate": 1.4427111111111112e-05, "loss": 0.4192, "step": 12540 }, { "epoch": 0.2788393174546747, "grad_norm": 2.21875, "learning_rate": 1.4422666666666667e-05, "loss": 0.4446, "step": 12550 }, { "epoch": 0.27906150017774617, "grad_norm": 2.265625, "learning_rate": 1.4418222222222223e-05, "loss": 0.3878, "step": 12560 }, { "epoch": 0.2792836829008176, "grad_norm": 1.984375, "learning_rate": 1.441377777777778e-05, "loss": 0.4499, "step": 12570 }, { "epoch": 0.27950586562388907, "grad_norm": 1.984375, "learning_rate": 1.4409333333333335e-05, "loss": 0.4303, "step": 12580 }, { "epoch": 0.2797280483469605, "grad_norm": 2.25, "learning_rate": 1.4404888888888891e-05, "loss": 0.3757, "step": 12590 }, { "epoch": 0.279950231070032, "grad_norm": 2.046875, "learning_rate": 1.4400444444444446e-05, "loss": 0.3978, "step": 12600 }, { "epoch": 0.2801724137931034, "grad_norm": 2.046875, "learning_rate": 1.4396e-05, "loss": 0.4275, "step": 12610 }, { "epoch": 0.2803945965161749, "grad_norm": 1.78125, "learning_rate": 1.4391555555555556e-05, "loss": 0.407, "step": 12620 }, { "epoch": 0.2806167792392464, "grad_norm": 2.21875, "learning_rate": 1.4387111111111112e-05, "loss": 0.4344, "step": 12630 }, { "epoch": 0.28083896196231783, "grad_norm": 1.9375, "learning_rate": 1.4382666666666667e-05, "loss": 0.3926, "step": 12640 }, { "epoch": 0.2810611446853893, "grad_norm": 2.015625, "learning_rate": 1.4378222222222222e-05, "loss": 0.4002, "step": 12650 }, { "epoch": 0.28128332740846074, "grad_norm": 2.078125, "learning_rate": 1.437377777777778e-05, "loss": 0.3673, "step": 12660 }, { "epoch": 0.2815055101315322, "grad_norm": 2.765625, "learning_rate": 1.4369333333333335e-05, "loss": 0.3803, "step": 12670 }, { "epoch": 0.28172769285460364, "grad_norm": 2.03125, "learning_rate": 1.436488888888889e-05, "loss": 0.4249, "step": 12680 }, { "epoch": 0.2819498755776751, "grad_norm": 2.296875, "learning_rate": 1.4360444444444446e-05, "loss": 0.4255, "step": 12690 }, { "epoch": 0.28217205830074654, "grad_norm": 2.515625, "learning_rate": 1.4356000000000001e-05, "loss": 0.4691, "step": 12700 }, { "epoch": 0.282394241023818, "grad_norm": 1.96875, "learning_rate": 1.4351555555555556e-05, "loss": 0.4481, "step": 12710 }, { "epoch": 0.28261642374688944, "grad_norm": 2.171875, "learning_rate": 1.434711111111111e-05, "loss": 0.3862, "step": 12720 }, { "epoch": 0.2828386064699609, "grad_norm": 1.8515625, "learning_rate": 1.4342666666666667e-05, "loss": 0.404, "step": 12730 }, { "epoch": 0.28306078919303235, "grad_norm": 2.265625, "learning_rate": 1.4338222222222224e-05, "loss": 0.3762, "step": 12740 }, { "epoch": 0.2832829719161038, "grad_norm": 2.109375, "learning_rate": 1.433377777777778e-05, "loss": 0.4001, "step": 12750 }, { "epoch": 0.28350515463917525, "grad_norm": 2.34375, "learning_rate": 1.4329333333333335e-05, "loss": 0.4061, "step": 12760 }, { "epoch": 0.2837273373622467, "grad_norm": 1.71875, "learning_rate": 1.432488888888889e-05, "loss": 0.416, "step": 12770 }, { "epoch": 0.28394952008531815, "grad_norm": 1.75, "learning_rate": 1.4320444444444445e-05, "loss": 0.3675, "step": 12780 }, { "epoch": 0.2841717028083896, "grad_norm": 2.625, "learning_rate": 1.4316000000000002e-05, "loss": 0.3726, "step": 12790 }, { "epoch": 0.28439388553146105, "grad_norm": 1.8984375, "learning_rate": 1.4311555555555556e-05, "loss": 0.4244, "step": 12800 }, { "epoch": 0.2846160682545325, "grad_norm": 2.03125, "learning_rate": 1.4307111111111111e-05, "loss": 0.4231, "step": 12810 }, { "epoch": 0.28483825097760396, "grad_norm": 1.8984375, "learning_rate": 1.4302666666666668e-05, "loss": 0.4255, "step": 12820 }, { "epoch": 0.2850604337006754, "grad_norm": 2.453125, "learning_rate": 1.4298222222222224e-05, "loss": 0.4275, "step": 12830 }, { "epoch": 0.2852826164237469, "grad_norm": 2.34375, "learning_rate": 1.4293777777777779e-05, "loss": 0.4344, "step": 12840 }, { "epoch": 0.28550479914681837, "grad_norm": 2.671875, "learning_rate": 1.4289333333333336e-05, "loss": 0.4564, "step": 12850 }, { "epoch": 0.2857269818698898, "grad_norm": 1.96875, "learning_rate": 1.428488888888889e-05, "loss": 0.3974, "step": 12860 }, { "epoch": 0.28594916459296127, "grad_norm": 2.09375, "learning_rate": 1.4280444444444445e-05, "loss": 0.3708, "step": 12870 }, { "epoch": 0.2861713473160327, "grad_norm": 2.15625, "learning_rate": 1.4276e-05, "loss": 0.4, "step": 12880 }, { "epoch": 0.28639353003910417, "grad_norm": 2.09375, "learning_rate": 1.4271555555555557e-05, "loss": 0.3767, "step": 12890 }, { "epoch": 0.2866157127621756, "grad_norm": 2.015625, "learning_rate": 1.4267111111111112e-05, "loss": 0.4141, "step": 12900 }, { "epoch": 0.2868378954852471, "grad_norm": 1.7421875, "learning_rate": 1.4262666666666666e-05, "loss": 0.4175, "step": 12910 }, { "epoch": 0.2870600782083185, "grad_norm": 1.953125, "learning_rate": 1.4258222222222225e-05, "loss": 0.4612, "step": 12920 }, { "epoch": 0.28728226093139, "grad_norm": 2.5, "learning_rate": 1.425377777777778e-05, "loss": 0.4161, "step": 12930 }, { "epoch": 0.28750444365446143, "grad_norm": 1.6875, "learning_rate": 1.4249333333333334e-05, "loss": 0.4, "step": 12940 }, { "epoch": 0.2877266263775329, "grad_norm": 2.21875, "learning_rate": 1.4244888888888891e-05, "loss": 0.4594, "step": 12950 }, { "epoch": 0.28794880910060433, "grad_norm": 1.9921875, "learning_rate": 1.4240444444444446e-05, "loss": 0.4308, "step": 12960 }, { "epoch": 0.2881709918236758, "grad_norm": 2.109375, "learning_rate": 1.4236e-05, "loss": 0.4383, "step": 12970 }, { "epoch": 0.28839317454674723, "grad_norm": 2.078125, "learning_rate": 1.4231555555555555e-05, "loss": 0.4272, "step": 12980 }, { "epoch": 0.2886153572698187, "grad_norm": 1.8828125, "learning_rate": 1.4227111111111112e-05, "loss": 0.4004, "step": 12990 }, { "epoch": 0.28883753999289014, "grad_norm": 2.0625, "learning_rate": 1.4222666666666667e-05, "loss": 0.4038, "step": 13000 }, { "epoch": 0.2890597227159616, "grad_norm": 1.7890625, "learning_rate": 1.4218222222222225e-05, "loss": 0.3879, "step": 13010 }, { "epoch": 0.28928190543903304, "grad_norm": 2.046875, "learning_rate": 1.421377777777778e-05, "loss": 0.3951, "step": 13020 }, { "epoch": 0.2895040881621045, "grad_norm": 2.09375, "learning_rate": 1.4209333333333335e-05, "loss": 0.3893, "step": 13030 }, { "epoch": 0.289726270885176, "grad_norm": 2.09375, "learning_rate": 1.420488888888889e-05, "loss": 0.4287, "step": 13040 }, { "epoch": 0.28994845360824745, "grad_norm": 1.8828125, "learning_rate": 1.4200444444444446e-05, "loss": 0.3676, "step": 13050 }, { "epoch": 0.2901706363313189, "grad_norm": 2.46875, "learning_rate": 1.4196000000000001e-05, "loss": 0.4039, "step": 13060 }, { "epoch": 0.29039281905439035, "grad_norm": 2.28125, "learning_rate": 1.4191555555555556e-05, "loss": 0.4102, "step": 13070 }, { "epoch": 0.2906150017774618, "grad_norm": 2.359375, "learning_rate": 1.4187111111111112e-05, "loss": 0.4129, "step": 13080 }, { "epoch": 0.29083718450053325, "grad_norm": 2.0, "learning_rate": 1.4182666666666667e-05, "loss": 0.424, "step": 13090 }, { "epoch": 0.2910593672236047, "grad_norm": 1.8203125, "learning_rate": 1.4178222222222224e-05, "loss": 0.3931, "step": 13100 }, { "epoch": 0.29128154994667615, "grad_norm": 1.9921875, "learning_rate": 1.417377777777778e-05, "loss": 0.4318, "step": 13110 }, { "epoch": 0.2915037326697476, "grad_norm": 1.8828125, "learning_rate": 1.4169333333333335e-05, "loss": 0.3904, "step": 13120 }, { "epoch": 0.29172591539281906, "grad_norm": 2.34375, "learning_rate": 1.416488888888889e-05, "loss": 0.3825, "step": 13130 }, { "epoch": 0.2919480981158905, "grad_norm": 2.296875, "learning_rate": 1.4160444444444445e-05, "loss": 0.3881, "step": 13140 }, { "epoch": 0.29217028083896196, "grad_norm": 2.34375, "learning_rate": 1.4156000000000001e-05, "loss": 0.3714, "step": 13150 }, { "epoch": 0.2923924635620334, "grad_norm": 2.109375, "learning_rate": 1.4151555555555556e-05, "loss": 0.413, "step": 13160 }, { "epoch": 0.29261464628510486, "grad_norm": 2.25, "learning_rate": 1.4147111111111111e-05, "loss": 0.4085, "step": 13170 }, { "epoch": 0.2928368290081763, "grad_norm": 2.03125, "learning_rate": 1.4142666666666668e-05, "loss": 0.4194, "step": 13180 }, { "epoch": 0.29305901173124776, "grad_norm": 2.125, "learning_rate": 1.4138222222222224e-05, "loss": 0.3961, "step": 13190 }, { "epoch": 0.2932811944543192, "grad_norm": 2.09375, "learning_rate": 1.4133777777777779e-05, "loss": 0.413, "step": 13200 }, { "epoch": 0.29350337717739067, "grad_norm": 2.234375, "learning_rate": 1.4129333333333335e-05, "loss": 0.414, "step": 13210 }, { "epoch": 0.2937255599004621, "grad_norm": 2.0, "learning_rate": 1.412488888888889e-05, "loss": 0.4066, "step": 13220 }, { "epoch": 0.29394774262353357, "grad_norm": 2.140625, "learning_rate": 1.4120444444444445e-05, "loss": 0.4498, "step": 13230 }, { "epoch": 0.294169925346605, "grad_norm": 1.7578125, "learning_rate": 1.4116e-05, "loss": 0.3566, "step": 13240 }, { "epoch": 0.29439210806967653, "grad_norm": 1.9140625, "learning_rate": 1.4111555555555557e-05, "loss": 0.3573, "step": 13250 }, { "epoch": 0.294614290792748, "grad_norm": 1.8984375, "learning_rate": 1.4107111111111111e-05, "loss": 0.3703, "step": 13260 }, { "epoch": 0.29483647351581943, "grad_norm": 2.40625, "learning_rate": 1.4102666666666666e-05, "loss": 0.3922, "step": 13270 }, { "epoch": 0.2950586562388909, "grad_norm": 2.375, "learning_rate": 1.4098222222222224e-05, "loss": 0.3918, "step": 13280 }, { "epoch": 0.29528083896196233, "grad_norm": 2.15625, "learning_rate": 1.409377777777778e-05, "loss": 0.4299, "step": 13290 }, { "epoch": 0.2955030216850338, "grad_norm": 2.21875, "learning_rate": 1.4089333333333334e-05, "loss": 0.4128, "step": 13300 }, { "epoch": 0.29572520440810524, "grad_norm": 2.0625, "learning_rate": 1.408488888888889e-05, "loss": 0.4024, "step": 13310 }, { "epoch": 0.2959473871311767, "grad_norm": 2.046875, "learning_rate": 1.4080444444444446e-05, "loss": 0.3516, "step": 13320 }, { "epoch": 0.29616956985424814, "grad_norm": 2.140625, "learning_rate": 1.4076e-05, "loss": 0.4223, "step": 13330 }, { "epoch": 0.2963917525773196, "grad_norm": 2.265625, "learning_rate": 1.4071555555555555e-05, "loss": 0.4104, "step": 13340 }, { "epoch": 0.29661393530039104, "grad_norm": 1.8984375, "learning_rate": 1.4067111111111112e-05, "loss": 0.4217, "step": 13350 }, { "epoch": 0.2968361180234625, "grad_norm": 2.359375, "learning_rate": 1.4062666666666668e-05, "loss": 0.4201, "step": 13360 }, { "epoch": 0.29705830074653394, "grad_norm": 1.84375, "learning_rate": 1.4058222222222225e-05, "loss": 0.3873, "step": 13370 }, { "epoch": 0.2972804834696054, "grad_norm": 1.9140625, "learning_rate": 1.405377777777778e-05, "loss": 0.4223, "step": 13380 }, { "epoch": 0.29750266619267685, "grad_norm": 2.0, "learning_rate": 1.4049333333333335e-05, "loss": 0.3799, "step": 13390 }, { "epoch": 0.2977248489157483, "grad_norm": 2.09375, "learning_rate": 1.404488888888889e-05, "loss": 0.4534, "step": 13400 }, { "epoch": 0.29794703163881975, "grad_norm": 1.9609375, "learning_rate": 1.4040444444444446e-05, "loss": 0.3999, "step": 13410 }, { "epoch": 0.2981692143618912, "grad_norm": 2.046875, "learning_rate": 1.4036e-05, "loss": 0.4292, "step": 13420 }, { "epoch": 0.29839139708496265, "grad_norm": 1.890625, "learning_rate": 1.4031555555555556e-05, "loss": 0.3904, "step": 13430 }, { "epoch": 0.2986135798080341, "grad_norm": 2.390625, "learning_rate": 1.4027111111111112e-05, "loss": 0.4645, "step": 13440 }, { "epoch": 0.2988357625311056, "grad_norm": 2.484375, "learning_rate": 1.4022666666666669e-05, "loss": 0.4084, "step": 13450 }, { "epoch": 0.29905794525417706, "grad_norm": 2.125, "learning_rate": 1.4018222222222224e-05, "loss": 0.437, "step": 13460 }, { "epoch": 0.2992801279772485, "grad_norm": 2.203125, "learning_rate": 1.401377777777778e-05, "loss": 0.3942, "step": 13470 }, { "epoch": 0.29950231070031996, "grad_norm": 1.953125, "learning_rate": 1.4009333333333335e-05, "loss": 0.397, "step": 13480 }, { "epoch": 0.2997244934233914, "grad_norm": 2.3125, "learning_rate": 1.400488888888889e-05, "loss": 0.4051, "step": 13490 }, { "epoch": 0.29994667614646287, "grad_norm": 1.8125, "learning_rate": 1.4000444444444445e-05, "loss": 0.369, "step": 13500 }, { "epoch": 0.3001688588695343, "grad_norm": 2.375, "learning_rate": 1.3996000000000001e-05, "loss": 0.3987, "step": 13510 }, { "epoch": 0.30039104159260577, "grad_norm": 2.34375, "learning_rate": 1.3991555555555556e-05, "loss": 0.4054, "step": 13520 }, { "epoch": 0.3006132243156772, "grad_norm": 2.140625, "learning_rate": 1.3987111111111111e-05, "loss": 0.3922, "step": 13530 }, { "epoch": 0.30083540703874867, "grad_norm": 1.828125, "learning_rate": 1.3982666666666669e-05, "loss": 0.3975, "step": 13540 }, { "epoch": 0.3010575897618201, "grad_norm": 1.9765625, "learning_rate": 1.3978222222222224e-05, "loss": 0.3955, "step": 13550 }, { "epoch": 0.3012797724848916, "grad_norm": 1.984375, "learning_rate": 1.3973777777777779e-05, "loss": 0.4114, "step": 13560 }, { "epoch": 0.301501955207963, "grad_norm": 2.109375, "learning_rate": 1.3969333333333335e-05, "loss": 0.4023, "step": 13570 }, { "epoch": 0.3017241379310345, "grad_norm": 2.1875, "learning_rate": 1.396488888888889e-05, "loss": 0.3926, "step": 13580 }, { "epoch": 0.3019463206541059, "grad_norm": 2.296875, "learning_rate": 1.3960444444444445e-05, "loss": 0.4103, "step": 13590 }, { "epoch": 0.3021685033771774, "grad_norm": 1.953125, "learning_rate": 1.3956e-05, "loss": 0.4331, "step": 13600 }, { "epoch": 0.30239068610024883, "grad_norm": 2.28125, "learning_rate": 1.3951555555555556e-05, "loss": 0.3934, "step": 13610 }, { "epoch": 0.3026128688233203, "grad_norm": 2.328125, "learning_rate": 1.3947111111111111e-05, "loss": 0.4256, "step": 13620 }, { "epoch": 0.30283505154639173, "grad_norm": 2.0, "learning_rate": 1.394266666666667e-05, "loss": 0.4013, "step": 13630 }, { "epoch": 0.3030572342694632, "grad_norm": 2.25, "learning_rate": 1.3938222222222224e-05, "loss": 0.4024, "step": 13640 }, { "epoch": 0.30327941699253463, "grad_norm": 1.953125, "learning_rate": 1.393377777777778e-05, "loss": 0.3839, "step": 13650 }, { "epoch": 0.30350159971560614, "grad_norm": 2.015625, "learning_rate": 1.3929333333333334e-05, "loss": 0.3905, "step": 13660 }, { "epoch": 0.3037237824386776, "grad_norm": 2.140625, "learning_rate": 1.392488888888889e-05, "loss": 0.3926, "step": 13670 }, { "epoch": 0.30394596516174904, "grad_norm": 1.921875, "learning_rate": 1.3920444444444445e-05, "loss": 0.3708, "step": 13680 }, { "epoch": 0.3041681478848205, "grad_norm": 2.046875, "learning_rate": 1.3916e-05, "loss": 0.3952, "step": 13690 }, { "epoch": 0.30439033060789195, "grad_norm": 2.296875, "learning_rate": 1.3911555555555555e-05, "loss": 0.428, "step": 13700 }, { "epoch": 0.3046125133309634, "grad_norm": 1.78125, "learning_rate": 1.3907111111111112e-05, "loss": 0.3858, "step": 13710 }, { "epoch": 0.30483469605403485, "grad_norm": 2.46875, "learning_rate": 1.3902666666666668e-05, "loss": 0.4229, "step": 13720 }, { "epoch": 0.3050568787771063, "grad_norm": 2.3125, "learning_rate": 1.3898222222222225e-05, "loss": 0.4229, "step": 13730 }, { "epoch": 0.30527906150017775, "grad_norm": 2.078125, "learning_rate": 1.389377777777778e-05, "loss": 0.3765, "step": 13740 }, { "epoch": 0.3055012442232492, "grad_norm": 2.234375, "learning_rate": 1.3889333333333334e-05, "loss": 0.4365, "step": 13750 }, { "epoch": 0.30572342694632065, "grad_norm": 2.171875, "learning_rate": 1.388488888888889e-05, "loss": 0.4774, "step": 13760 }, { "epoch": 0.3059456096693921, "grad_norm": 2.03125, "learning_rate": 1.3880444444444446e-05, "loss": 0.4109, "step": 13770 }, { "epoch": 0.30616779239246356, "grad_norm": 2.171875, "learning_rate": 1.3876e-05, "loss": 0.4125, "step": 13780 }, { "epoch": 0.306389975115535, "grad_norm": 1.90625, "learning_rate": 1.3871555555555555e-05, "loss": 0.3687, "step": 13790 }, { "epoch": 0.30661215783860646, "grad_norm": 2.171875, "learning_rate": 1.3867111111111112e-05, "loss": 0.4284, "step": 13800 }, { "epoch": 0.3068343405616779, "grad_norm": 2.34375, "learning_rate": 1.3862666666666669e-05, "loss": 0.3744, "step": 13810 }, { "epoch": 0.30705652328474936, "grad_norm": 1.828125, "learning_rate": 1.3858222222222223e-05, "loss": 0.4065, "step": 13820 }, { "epoch": 0.3072787060078208, "grad_norm": 1.9453125, "learning_rate": 1.385377777777778e-05, "loss": 0.4315, "step": 13830 }, { "epoch": 0.30750088873089226, "grad_norm": 2.140625, "learning_rate": 1.3849333333333335e-05, "loss": 0.3864, "step": 13840 }, { "epoch": 0.3077230714539637, "grad_norm": 2.109375, "learning_rate": 1.384488888888889e-05, "loss": 0.3963, "step": 13850 }, { "epoch": 0.3079452541770352, "grad_norm": 2.1875, "learning_rate": 1.3840444444444444e-05, "loss": 0.3705, "step": 13860 }, { "epoch": 0.3081674369001067, "grad_norm": 2.0, "learning_rate": 1.3836000000000001e-05, "loss": 0.3835, "step": 13870 }, { "epoch": 0.3083896196231781, "grad_norm": 2.625, "learning_rate": 1.3831555555555556e-05, "loss": 0.441, "step": 13880 }, { "epoch": 0.3086118023462496, "grad_norm": 2.03125, "learning_rate": 1.382711111111111e-05, "loss": 0.4058, "step": 13890 }, { "epoch": 0.308833985069321, "grad_norm": 2.390625, "learning_rate": 1.3822666666666669e-05, "loss": 0.4046, "step": 13900 }, { "epoch": 0.3090561677923925, "grad_norm": 2.515625, "learning_rate": 1.3818222222222224e-05, "loss": 0.4499, "step": 13910 }, { "epoch": 0.30927835051546393, "grad_norm": 2.359375, "learning_rate": 1.3813777777777779e-05, "loss": 0.4132, "step": 13920 }, { "epoch": 0.3095005332385354, "grad_norm": 2.078125, "learning_rate": 1.3809333333333335e-05, "loss": 0.3895, "step": 13930 }, { "epoch": 0.30972271596160683, "grad_norm": 2.0, "learning_rate": 1.380488888888889e-05, "loss": 0.417, "step": 13940 }, { "epoch": 0.3099448986846783, "grad_norm": 2.078125, "learning_rate": 1.3800444444444445e-05, "loss": 0.3978, "step": 13950 }, { "epoch": 0.31016708140774973, "grad_norm": 2.0625, "learning_rate": 1.3796e-05, "loss": 0.4105, "step": 13960 }, { "epoch": 0.3103892641308212, "grad_norm": 2.171875, "learning_rate": 1.3791555555555556e-05, "loss": 0.3786, "step": 13970 }, { "epoch": 0.31061144685389264, "grad_norm": 1.984375, "learning_rate": 1.3787111111111113e-05, "loss": 0.4309, "step": 13980 }, { "epoch": 0.3108336295769641, "grad_norm": 2.3125, "learning_rate": 1.378266666666667e-05, "loss": 0.3947, "step": 13990 }, { "epoch": 0.31105581230003554, "grad_norm": 2.078125, "learning_rate": 1.3778222222222224e-05, "loss": 0.3722, "step": 14000 }, { "epoch": 0.311277995023107, "grad_norm": 2.5625, "learning_rate": 1.3773777777777779e-05, "loss": 0.4129, "step": 14010 }, { "epoch": 0.31150017774617844, "grad_norm": 1.875, "learning_rate": 1.3769333333333334e-05, "loss": 0.4232, "step": 14020 }, { "epoch": 0.3117223604692499, "grad_norm": 2.0, "learning_rate": 1.376488888888889e-05, "loss": 0.4003, "step": 14030 }, { "epoch": 0.31194454319232134, "grad_norm": 2.546875, "learning_rate": 1.3760444444444445e-05, "loss": 0.4271, "step": 14040 }, { "epoch": 0.3121667259153928, "grad_norm": 2.390625, "learning_rate": 1.3756e-05, "loss": 0.4188, "step": 14050 }, { "epoch": 0.31238890863846425, "grad_norm": 2.0, "learning_rate": 1.3751555555555555e-05, "loss": 0.3805, "step": 14060 }, { "epoch": 0.31261109136153575, "grad_norm": 2.03125, "learning_rate": 1.3747111111111113e-05, "loss": 0.4182, "step": 14070 }, { "epoch": 0.3128332740846072, "grad_norm": 2.03125, "learning_rate": 1.3742666666666668e-05, "loss": 0.4018, "step": 14080 }, { "epoch": 0.31305545680767866, "grad_norm": 2.0625, "learning_rate": 1.3738222222222225e-05, "loss": 0.4084, "step": 14090 }, { "epoch": 0.3132776395307501, "grad_norm": 2.390625, "learning_rate": 1.373377777777778e-05, "loss": 0.3705, "step": 14100 }, { "epoch": 0.31349982225382156, "grad_norm": 2.296875, "learning_rate": 1.3729333333333334e-05, "loss": 0.413, "step": 14110 }, { "epoch": 0.313722004976893, "grad_norm": 2.609375, "learning_rate": 1.3724888888888889e-05, "loss": 0.4258, "step": 14120 }, { "epoch": 0.31394418769996446, "grad_norm": 2.046875, "learning_rate": 1.3720444444444446e-05, "loss": 0.4217, "step": 14130 }, { "epoch": 0.3141663704230359, "grad_norm": 2.109375, "learning_rate": 1.3716e-05, "loss": 0.428, "step": 14140 }, { "epoch": 0.31438855314610736, "grad_norm": 1.9453125, "learning_rate": 1.3711555555555555e-05, "loss": 0.3744, "step": 14150 }, { "epoch": 0.3146107358691788, "grad_norm": 2.203125, "learning_rate": 1.3707111111111114e-05, "loss": 0.4357, "step": 14160 }, { "epoch": 0.31483291859225027, "grad_norm": 1.84375, "learning_rate": 1.3702666666666668e-05, "loss": 0.4138, "step": 14170 }, { "epoch": 0.3150551013153217, "grad_norm": 2.171875, "learning_rate": 1.3698222222222223e-05, "loss": 0.3804, "step": 14180 }, { "epoch": 0.31527728403839317, "grad_norm": 2.09375, "learning_rate": 1.369377777777778e-05, "loss": 0.4022, "step": 14190 }, { "epoch": 0.3154994667614646, "grad_norm": 2.15625, "learning_rate": 1.3689333333333335e-05, "loss": 0.4232, "step": 14200 }, { "epoch": 0.31572164948453607, "grad_norm": 2.15625, "learning_rate": 1.368488888888889e-05, "loss": 0.4258, "step": 14210 }, { "epoch": 0.3159438322076075, "grad_norm": 2.34375, "learning_rate": 1.3680444444444444e-05, "loss": 0.4339, "step": 14220 }, { "epoch": 0.316166014930679, "grad_norm": 1.90625, "learning_rate": 1.3676000000000001e-05, "loss": 0.3538, "step": 14230 }, { "epoch": 0.3163881976537504, "grad_norm": 2.453125, "learning_rate": 1.3671555555555556e-05, "loss": 0.4367, "step": 14240 }, { "epoch": 0.3166103803768219, "grad_norm": 2.03125, "learning_rate": 1.3667111111111114e-05, "loss": 0.3788, "step": 14250 }, { "epoch": 0.3168325630998933, "grad_norm": 2.1875, "learning_rate": 1.3662666666666669e-05, "loss": 0.4369, "step": 14260 }, { "epoch": 0.3170547458229648, "grad_norm": 2.296875, "learning_rate": 1.3658222222222224e-05, "loss": 0.4107, "step": 14270 }, { "epoch": 0.3172769285460363, "grad_norm": 2.375, "learning_rate": 1.3653777777777778e-05, "loss": 0.4378, "step": 14280 }, { "epoch": 0.31749911126910774, "grad_norm": 1.8515625, "learning_rate": 1.3649333333333335e-05, "loss": 0.3985, "step": 14290 }, { "epoch": 0.3177212939921792, "grad_norm": 2.421875, "learning_rate": 1.364488888888889e-05, "loss": 0.3981, "step": 14300 }, { "epoch": 0.31794347671525064, "grad_norm": 2.0, "learning_rate": 1.3640444444444445e-05, "loss": 0.3764, "step": 14310 }, { "epoch": 0.3181656594383221, "grad_norm": 2.046875, "learning_rate": 1.3636e-05, "loss": 0.3738, "step": 14320 }, { "epoch": 0.31838784216139354, "grad_norm": 1.6875, "learning_rate": 1.3631555555555556e-05, "loss": 0.4342, "step": 14330 }, { "epoch": 0.318610024884465, "grad_norm": 1.9140625, "learning_rate": 1.3627111111111113e-05, "loss": 0.4101, "step": 14340 }, { "epoch": 0.31883220760753644, "grad_norm": 2.34375, "learning_rate": 1.362266666666667e-05, "loss": 0.3919, "step": 14350 }, { "epoch": 0.3190543903306079, "grad_norm": 2.34375, "learning_rate": 1.3618222222222224e-05, "loss": 0.3694, "step": 14360 }, { "epoch": 0.31927657305367935, "grad_norm": 2.15625, "learning_rate": 1.3613777777777779e-05, "loss": 0.4038, "step": 14370 }, { "epoch": 0.3194987557767508, "grad_norm": 2.015625, "learning_rate": 1.3609333333333334e-05, "loss": 0.4014, "step": 14380 }, { "epoch": 0.31972093849982225, "grad_norm": 2.234375, "learning_rate": 1.360488888888889e-05, "loss": 0.377, "step": 14390 }, { "epoch": 0.3199431212228937, "grad_norm": 2.078125, "learning_rate": 1.3600444444444445e-05, "loss": 0.4202, "step": 14400 }, { "epoch": 0.32016530394596515, "grad_norm": 2.140625, "learning_rate": 1.3596e-05, "loss": 0.3957, "step": 14410 }, { "epoch": 0.3203874866690366, "grad_norm": 2.0, "learning_rate": 1.3591555555555555e-05, "loss": 0.4077, "step": 14420 }, { "epoch": 0.32060966939210805, "grad_norm": 2.46875, "learning_rate": 1.3587111111111113e-05, "loss": 0.4165, "step": 14430 }, { "epoch": 0.3208318521151795, "grad_norm": 2.5, "learning_rate": 1.3582666666666668e-05, "loss": 0.4065, "step": 14440 }, { "epoch": 0.32105403483825096, "grad_norm": 2.09375, "learning_rate": 1.3578222222222224e-05, "loss": 0.3884, "step": 14450 }, { "epoch": 0.3212762175613224, "grad_norm": 2.0625, "learning_rate": 1.357377777777778e-05, "loss": 0.403, "step": 14460 }, { "epoch": 0.32149840028439386, "grad_norm": 2.625, "learning_rate": 1.3569333333333334e-05, "loss": 0.4437, "step": 14470 }, { "epoch": 0.32172058300746537, "grad_norm": 1.859375, "learning_rate": 1.3564888888888889e-05, "loss": 0.3638, "step": 14480 }, { "epoch": 0.3219427657305368, "grad_norm": 2.09375, "learning_rate": 1.3560444444444445e-05, "loss": 0.4478, "step": 14490 }, { "epoch": 0.32216494845360827, "grad_norm": 2.265625, "learning_rate": 1.3556e-05, "loss": 0.3968, "step": 14500 }, { "epoch": 0.3223871311766797, "grad_norm": 2.859375, "learning_rate": 1.3551555555555555e-05, "loss": 0.4, "step": 14510 }, { "epoch": 0.32260931389975117, "grad_norm": 1.90625, "learning_rate": 1.3547111111111113e-05, "loss": 0.4057, "step": 14520 }, { "epoch": 0.3228314966228226, "grad_norm": 2.03125, "learning_rate": 1.3542666666666668e-05, "loss": 0.3609, "step": 14530 }, { "epoch": 0.3230536793458941, "grad_norm": 1.9765625, "learning_rate": 1.3538222222222223e-05, "loss": 0.4431, "step": 14540 }, { "epoch": 0.3232758620689655, "grad_norm": 2.25, "learning_rate": 1.353377777777778e-05, "loss": 0.3886, "step": 14550 }, { "epoch": 0.323498044792037, "grad_norm": 2.0625, "learning_rate": 1.3529333333333334e-05, "loss": 0.3583, "step": 14560 }, { "epoch": 0.3237202275151084, "grad_norm": 2.03125, "learning_rate": 1.352488888888889e-05, "loss": 0.3723, "step": 14570 }, { "epoch": 0.3239424102381799, "grad_norm": 1.921875, "learning_rate": 1.3520444444444444e-05, "loss": 0.4102, "step": 14580 }, { "epoch": 0.32416459296125133, "grad_norm": 1.9609375, "learning_rate": 1.3516e-05, "loss": 0.4085, "step": 14590 }, { "epoch": 0.3243867756843228, "grad_norm": 2.75, "learning_rate": 1.3511555555555557e-05, "loss": 0.3869, "step": 14600 }, { "epoch": 0.32460895840739423, "grad_norm": 2.359375, "learning_rate": 1.3507111111111114e-05, "loss": 0.4148, "step": 14610 }, { "epoch": 0.3248311411304657, "grad_norm": 2.03125, "learning_rate": 1.3502666666666669e-05, "loss": 0.3853, "step": 14620 }, { "epoch": 0.32505332385353713, "grad_norm": 1.8984375, "learning_rate": 1.3498222222222223e-05, "loss": 0.3862, "step": 14630 }, { "epoch": 0.3252755065766086, "grad_norm": 2.140625, "learning_rate": 1.3493777777777778e-05, "loss": 0.3866, "step": 14640 }, { "epoch": 0.32549768929968004, "grad_norm": 2.234375, "learning_rate": 1.3489333333333335e-05, "loss": 0.3989, "step": 14650 }, { "epoch": 0.3257198720227515, "grad_norm": 1.890625, "learning_rate": 1.348488888888889e-05, "loss": 0.3554, "step": 14660 }, { "epoch": 0.32594205474582294, "grad_norm": 1.828125, "learning_rate": 1.3480444444444445e-05, "loss": 0.3863, "step": 14670 }, { "epoch": 0.3261642374688944, "grad_norm": 2.015625, "learning_rate": 1.3476e-05, "loss": 0.3912, "step": 14680 }, { "epoch": 0.3263864201919659, "grad_norm": 1.8671875, "learning_rate": 1.3471555555555558e-05, "loss": 0.4403, "step": 14690 }, { "epoch": 0.32660860291503735, "grad_norm": 2.453125, "learning_rate": 1.3467111111111112e-05, "loss": 0.4324, "step": 14700 }, { "epoch": 0.3268307856381088, "grad_norm": 2.09375, "learning_rate": 1.3462666666666669e-05, "loss": 0.3766, "step": 14710 }, { "epoch": 0.32705296836118025, "grad_norm": 1.890625, "learning_rate": 1.3458222222222224e-05, "loss": 0.4056, "step": 14720 }, { "epoch": 0.3272751510842517, "grad_norm": 2.125, "learning_rate": 1.3453777777777779e-05, "loss": 0.3994, "step": 14730 }, { "epoch": 0.32749733380732315, "grad_norm": 2.015625, "learning_rate": 1.3449333333333334e-05, "loss": 0.4055, "step": 14740 }, { "epoch": 0.3277195165303946, "grad_norm": 2.046875, "learning_rate": 1.344488888888889e-05, "loss": 0.4287, "step": 14750 }, { "epoch": 0.32794169925346606, "grad_norm": 1.765625, "learning_rate": 1.3440444444444445e-05, "loss": 0.3927, "step": 14760 }, { "epoch": 0.3281638819765375, "grad_norm": 1.984375, "learning_rate": 1.3436e-05, "loss": 0.418, "step": 14770 }, { "epoch": 0.32838606469960896, "grad_norm": 2.25, "learning_rate": 1.3431555555555558e-05, "loss": 0.379, "step": 14780 }, { "epoch": 0.3286082474226804, "grad_norm": 2.09375, "learning_rate": 1.3427111111111113e-05, "loss": 0.4045, "step": 14790 }, { "epoch": 0.32883043014575186, "grad_norm": 2.109375, "learning_rate": 1.3422666666666668e-05, "loss": 0.4052, "step": 14800 }, { "epoch": 0.3290526128688233, "grad_norm": 2.46875, "learning_rate": 1.3418222222222224e-05, "loss": 0.4304, "step": 14810 }, { "epoch": 0.32927479559189476, "grad_norm": 2.0625, "learning_rate": 1.3413777777777779e-05, "loss": 0.4112, "step": 14820 }, { "epoch": 0.3294969783149662, "grad_norm": 2.359375, "learning_rate": 1.3409333333333334e-05, "loss": 0.3764, "step": 14830 }, { "epoch": 0.32971916103803767, "grad_norm": 2.125, "learning_rate": 1.3404888888888889e-05, "loss": 0.3632, "step": 14840 }, { "epoch": 0.3299413437611091, "grad_norm": 2.328125, "learning_rate": 1.3400444444444445e-05, "loss": 0.4074, "step": 14850 }, { "epoch": 0.33016352648418057, "grad_norm": 2.28125, "learning_rate": 1.3396e-05, "loss": 0.3798, "step": 14860 }, { "epoch": 0.330385709207252, "grad_norm": 2.546875, "learning_rate": 1.3391555555555558e-05, "loss": 0.4275, "step": 14870 }, { "epoch": 0.33060789193032347, "grad_norm": 2.734375, "learning_rate": 1.3387111111111113e-05, "loss": 0.3696, "step": 14880 }, { "epoch": 0.330830074653395, "grad_norm": 1.9765625, "learning_rate": 1.3382666666666668e-05, "loss": 0.3988, "step": 14890 }, { "epoch": 0.33105225737646643, "grad_norm": 2.0, "learning_rate": 1.3378222222222223e-05, "loss": 0.3687, "step": 14900 }, { "epoch": 0.3312744400995379, "grad_norm": 1.96875, "learning_rate": 1.337377777777778e-05, "loss": 0.3799, "step": 14910 }, { "epoch": 0.33149662282260933, "grad_norm": 2.25, "learning_rate": 1.3369333333333334e-05, "loss": 0.3866, "step": 14920 }, { "epoch": 0.3317188055456808, "grad_norm": 2.015625, "learning_rate": 1.3364888888888889e-05, "loss": 0.3815, "step": 14930 }, { "epoch": 0.33194098826875224, "grad_norm": 2.46875, "learning_rate": 1.3360444444444444e-05, "loss": 0.3885, "step": 14940 }, { "epoch": 0.3321631709918237, "grad_norm": 2.0625, "learning_rate": 1.3356e-05, "loss": 0.3973, "step": 14950 }, { "epoch": 0.33238535371489514, "grad_norm": 1.921875, "learning_rate": 1.3351555555555557e-05, "loss": 0.3729, "step": 14960 }, { "epoch": 0.3326075364379666, "grad_norm": 2.421875, "learning_rate": 1.3347111111111114e-05, "loss": 0.3958, "step": 14970 }, { "epoch": 0.33282971916103804, "grad_norm": 2.6875, "learning_rate": 1.3342666666666668e-05, "loss": 0.4206, "step": 14980 }, { "epoch": 0.3330519018841095, "grad_norm": 1.984375, "learning_rate": 1.3338222222222223e-05, "loss": 0.411, "step": 14990 }, { "epoch": 0.33327408460718094, "grad_norm": 2.1875, "learning_rate": 1.3333777777777778e-05, "loss": 0.3738, "step": 15000 }, { "epoch": 0.3334962673302524, "grad_norm": 1.7109375, "learning_rate": 1.3329333333333335e-05, "loss": 0.3963, "step": 15010 }, { "epoch": 0.33371845005332385, "grad_norm": 2.3125, "learning_rate": 1.332488888888889e-05, "loss": 0.4623, "step": 15020 }, { "epoch": 0.3339406327763953, "grad_norm": 1.96875, "learning_rate": 1.3320444444444444e-05, "loss": 0.4067, "step": 15030 }, { "epoch": 0.33416281549946675, "grad_norm": 1.9921875, "learning_rate": 1.3316e-05, "loss": 0.3753, "step": 15040 }, { "epoch": 0.3343849982225382, "grad_norm": 2.078125, "learning_rate": 1.3311555555555557e-05, "loss": 0.4527, "step": 15050 }, { "epoch": 0.33460718094560965, "grad_norm": 2.15625, "learning_rate": 1.3307111111111112e-05, "loss": 0.3978, "step": 15060 }, { "epoch": 0.3348293636686811, "grad_norm": 1.7109375, "learning_rate": 1.3302666666666669e-05, "loss": 0.3915, "step": 15070 }, { "epoch": 0.33505154639175255, "grad_norm": 2.34375, "learning_rate": 1.3298222222222224e-05, "loss": 0.4272, "step": 15080 }, { "epoch": 0.335273729114824, "grad_norm": 2.265625, "learning_rate": 1.3293777777777779e-05, "loss": 0.3927, "step": 15090 }, { "epoch": 0.3354959118378955, "grad_norm": 1.96875, "learning_rate": 1.3289333333333333e-05, "loss": 0.4217, "step": 15100 }, { "epoch": 0.33571809456096696, "grad_norm": 2.25, "learning_rate": 1.328488888888889e-05, "loss": 0.4208, "step": 15110 }, { "epoch": 0.3359402772840384, "grad_norm": 2.140625, "learning_rate": 1.3280444444444445e-05, "loss": 0.3905, "step": 15120 }, { "epoch": 0.33616246000710986, "grad_norm": 2.25, "learning_rate": 1.3276e-05, "loss": 0.4056, "step": 15130 }, { "epoch": 0.3363846427301813, "grad_norm": 2.078125, "learning_rate": 1.3271555555555558e-05, "loss": 0.4163, "step": 15140 }, { "epoch": 0.33660682545325277, "grad_norm": 2.21875, "learning_rate": 1.3267111111111113e-05, "loss": 0.4131, "step": 15150 }, { "epoch": 0.3368290081763242, "grad_norm": 1.921875, "learning_rate": 1.3262666666666668e-05, "loss": 0.3995, "step": 15160 }, { "epoch": 0.33705119089939567, "grad_norm": 2.265625, "learning_rate": 1.3258222222222224e-05, "loss": 0.4, "step": 15170 }, { "epoch": 0.3372733736224671, "grad_norm": 2.46875, "learning_rate": 1.3253777777777779e-05, "loss": 0.4479, "step": 15180 }, { "epoch": 0.33749555634553857, "grad_norm": 2.171875, "learning_rate": 1.3249333333333334e-05, "loss": 0.4247, "step": 15190 }, { "epoch": 0.33771773906861, "grad_norm": 2.21875, "learning_rate": 1.3244888888888889e-05, "loss": 0.4119, "step": 15200 }, { "epoch": 0.3379399217916815, "grad_norm": 2.125, "learning_rate": 1.3240444444444445e-05, "loss": 0.4206, "step": 15210 }, { "epoch": 0.3381621045147529, "grad_norm": 2.015625, "learning_rate": 1.3236000000000002e-05, "loss": 0.3911, "step": 15220 }, { "epoch": 0.3383842872378244, "grad_norm": 2.015625, "learning_rate": 1.3231555555555558e-05, "loss": 0.3802, "step": 15230 }, { "epoch": 0.33860646996089583, "grad_norm": 2.546875, "learning_rate": 1.3227111111111113e-05, "loss": 0.3835, "step": 15240 }, { "epoch": 0.3388286526839673, "grad_norm": 2.203125, "learning_rate": 1.3222666666666668e-05, "loss": 0.4121, "step": 15250 }, { "epoch": 0.33905083540703873, "grad_norm": 2.203125, "learning_rate": 1.3218222222222223e-05, "loss": 0.4217, "step": 15260 }, { "epoch": 0.3392730181301102, "grad_norm": 2.171875, "learning_rate": 1.321377777777778e-05, "loss": 0.3714, "step": 15270 }, { "epoch": 0.33949520085318163, "grad_norm": 2.15625, "learning_rate": 1.3209333333333334e-05, "loss": 0.4171, "step": 15280 }, { "epoch": 0.3397173835762531, "grad_norm": 2.03125, "learning_rate": 1.3204888888888889e-05, "loss": 0.4203, "step": 15290 }, { "epoch": 0.3399395662993246, "grad_norm": 2.21875, "learning_rate": 1.3200444444444444e-05, "loss": 0.4552, "step": 15300 }, { "epoch": 0.34016174902239604, "grad_norm": 2.34375, "learning_rate": 1.3196000000000002e-05, "loss": 0.3636, "step": 15310 }, { "epoch": 0.3403839317454675, "grad_norm": 2.140625, "learning_rate": 1.3191555555555557e-05, "loss": 0.414, "step": 15320 }, { "epoch": 0.34060611446853895, "grad_norm": 2.265625, "learning_rate": 1.3187111111111113e-05, "loss": 0.3958, "step": 15330 }, { "epoch": 0.3408282971916104, "grad_norm": 2.78125, "learning_rate": 1.3182666666666668e-05, "loss": 0.3564, "step": 15340 }, { "epoch": 0.34105047991468185, "grad_norm": 2.0625, "learning_rate": 1.3178222222222223e-05, "loss": 0.3744, "step": 15350 }, { "epoch": 0.3412726626377533, "grad_norm": 1.890625, "learning_rate": 1.3173777777777778e-05, "loss": 0.4155, "step": 15360 }, { "epoch": 0.34149484536082475, "grad_norm": 2.1875, "learning_rate": 1.3169333333333335e-05, "loss": 0.4235, "step": 15370 }, { "epoch": 0.3417170280838962, "grad_norm": 2.46875, "learning_rate": 1.316488888888889e-05, "loss": 0.4195, "step": 15380 }, { "epoch": 0.34193921080696765, "grad_norm": 2.234375, "learning_rate": 1.3160444444444444e-05, "loss": 0.3463, "step": 15390 }, { "epoch": 0.3421613935300391, "grad_norm": 2.375, "learning_rate": 1.3156000000000002e-05, "loss": 0.3943, "step": 15400 }, { "epoch": 0.34238357625311056, "grad_norm": 2.03125, "learning_rate": 1.3151555555555557e-05, "loss": 0.405, "step": 15410 }, { "epoch": 0.342605758976182, "grad_norm": 1.96875, "learning_rate": 1.3147111111111112e-05, "loss": 0.3886, "step": 15420 }, { "epoch": 0.34282794169925346, "grad_norm": 2.21875, "learning_rate": 1.3142666666666669e-05, "loss": 0.4052, "step": 15430 }, { "epoch": 0.3430501244223249, "grad_norm": 2.28125, "learning_rate": 1.3138222222222224e-05, "loss": 0.4238, "step": 15440 }, { "epoch": 0.34327230714539636, "grad_norm": 2.203125, "learning_rate": 1.3133777777777778e-05, "loss": 0.3883, "step": 15450 }, { "epoch": 0.3434944898684678, "grad_norm": 2.109375, "learning_rate": 1.3129333333333333e-05, "loss": 0.3901, "step": 15460 }, { "epoch": 0.34371667259153926, "grad_norm": 2.203125, "learning_rate": 1.312488888888889e-05, "loss": 0.3939, "step": 15470 }, { "epoch": 0.3439388553146107, "grad_norm": 2.1875, "learning_rate": 1.3120444444444445e-05, "loss": 0.3887, "step": 15480 }, { "epoch": 0.34416103803768217, "grad_norm": 1.9375, "learning_rate": 1.3116000000000003e-05, "loss": 0.3727, "step": 15490 }, { "epoch": 0.3443832207607536, "grad_norm": 2.15625, "learning_rate": 1.3111555555555558e-05, "loss": 0.3968, "step": 15500 }, { "epoch": 0.3446054034838251, "grad_norm": 2.53125, "learning_rate": 1.3107111111111113e-05, "loss": 0.4136, "step": 15510 }, { "epoch": 0.3448275862068966, "grad_norm": 2.1875, "learning_rate": 1.3102666666666667e-05, "loss": 0.3942, "step": 15520 }, { "epoch": 0.345049768929968, "grad_norm": 2.15625, "learning_rate": 1.3098222222222224e-05, "loss": 0.4188, "step": 15530 }, { "epoch": 0.3452719516530395, "grad_norm": 2.296875, "learning_rate": 1.3093777777777779e-05, "loss": 0.3917, "step": 15540 }, { "epoch": 0.34549413437611093, "grad_norm": 2.1875, "learning_rate": 1.3089333333333334e-05, "loss": 0.4118, "step": 15550 }, { "epoch": 0.3457163170991824, "grad_norm": 2.34375, "learning_rate": 1.3084888888888888e-05, "loss": 0.4058, "step": 15560 }, { "epoch": 0.34593849982225383, "grad_norm": 2.015625, "learning_rate": 1.3080444444444445e-05, "loss": 0.3819, "step": 15570 }, { "epoch": 0.3461606825453253, "grad_norm": 2.4375, "learning_rate": 1.3076000000000002e-05, "loss": 0.4499, "step": 15580 }, { "epoch": 0.34638286526839673, "grad_norm": 2.28125, "learning_rate": 1.3071555555555558e-05, "loss": 0.3809, "step": 15590 }, { "epoch": 0.3466050479914682, "grad_norm": 1.8671875, "learning_rate": 1.3067111111111113e-05, "loss": 0.3994, "step": 15600 }, { "epoch": 0.34682723071453964, "grad_norm": 2.109375, "learning_rate": 1.3062666666666668e-05, "loss": 0.4276, "step": 15610 }, { "epoch": 0.3470494134376111, "grad_norm": 2.21875, "learning_rate": 1.3058222222222223e-05, "loss": 0.358, "step": 15620 }, { "epoch": 0.34727159616068254, "grad_norm": 2.171875, "learning_rate": 1.3053777777777779e-05, "loss": 0.4068, "step": 15630 }, { "epoch": 0.347493778883754, "grad_norm": 2.078125, "learning_rate": 1.3049333333333334e-05, "loss": 0.4042, "step": 15640 }, { "epoch": 0.34771596160682544, "grad_norm": 2.125, "learning_rate": 1.3044888888888889e-05, "loss": 0.3948, "step": 15650 }, { "epoch": 0.3479381443298969, "grad_norm": 2.3125, "learning_rate": 1.3040444444444444e-05, "loss": 0.4206, "step": 15660 }, { "epoch": 0.34816032705296834, "grad_norm": 1.90625, "learning_rate": 1.3036000000000002e-05, "loss": 0.3708, "step": 15670 }, { "epoch": 0.3483825097760398, "grad_norm": 2.578125, "learning_rate": 1.3031555555555557e-05, "loss": 0.3844, "step": 15680 }, { "epoch": 0.34860469249911125, "grad_norm": 2.65625, "learning_rate": 1.3027111111111113e-05, "loss": 0.4132, "step": 15690 }, { "epoch": 0.3488268752221827, "grad_norm": 1.921875, "learning_rate": 1.3022666666666668e-05, "loss": 0.3929, "step": 15700 }, { "epoch": 0.3490490579452542, "grad_norm": 1.9609375, "learning_rate": 1.3018222222222223e-05, "loss": 0.4396, "step": 15710 }, { "epoch": 0.34927124066832566, "grad_norm": 2.125, "learning_rate": 1.3013777777777778e-05, "loss": 0.4274, "step": 15720 }, { "epoch": 0.3494934233913971, "grad_norm": 2.34375, "learning_rate": 1.3009333333333334e-05, "loss": 0.389, "step": 15730 }, { "epoch": 0.34971560611446856, "grad_norm": 2.4375, "learning_rate": 1.300488888888889e-05, "loss": 0.4186, "step": 15740 }, { "epoch": 0.34993778883754, "grad_norm": 2.046875, "learning_rate": 1.3000444444444444e-05, "loss": 0.3345, "step": 15750 }, { "epoch": 0.35015997156061146, "grad_norm": 2.21875, "learning_rate": 1.2996000000000002e-05, "loss": 0.4343, "step": 15760 }, { "epoch": 0.3503821542836829, "grad_norm": 2.28125, "learning_rate": 1.2991555555555557e-05, "loss": 0.392, "step": 15770 }, { "epoch": 0.35060433700675436, "grad_norm": 2.421875, "learning_rate": 1.2987111111111112e-05, "loss": 0.3902, "step": 15780 }, { "epoch": 0.3508265197298258, "grad_norm": 2.375, "learning_rate": 1.2982666666666669e-05, "loss": 0.4401, "step": 15790 }, { "epoch": 0.35104870245289727, "grad_norm": 2.546875, "learning_rate": 1.2978222222222223e-05, "loss": 0.3895, "step": 15800 }, { "epoch": 0.3512708851759687, "grad_norm": 2.25, "learning_rate": 1.2973777777777778e-05, "loss": 0.3998, "step": 15810 }, { "epoch": 0.35149306789904017, "grad_norm": 1.890625, "learning_rate": 1.2969333333333333e-05, "loss": 0.3912, "step": 15820 }, { "epoch": 0.3517152506221116, "grad_norm": 2.0625, "learning_rate": 1.296488888888889e-05, "loss": 0.3873, "step": 15830 }, { "epoch": 0.35193743334518307, "grad_norm": 2.15625, "learning_rate": 1.2960444444444446e-05, "loss": 0.3816, "step": 15840 }, { "epoch": 0.3521596160682545, "grad_norm": 2.078125, "learning_rate": 1.2956000000000003e-05, "loss": 0.3876, "step": 15850 }, { "epoch": 0.352381798791326, "grad_norm": 2.609375, "learning_rate": 1.2951555555555558e-05, "loss": 0.4435, "step": 15860 }, { "epoch": 0.3526039815143974, "grad_norm": 2.28125, "learning_rate": 1.2947111111111112e-05, "loss": 0.4185, "step": 15870 }, { "epoch": 0.3528261642374689, "grad_norm": 2.265625, "learning_rate": 1.2942666666666667e-05, "loss": 0.4111, "step": 15880 }, { "epoch": 0.3530483469605403, "grad_norm": 2.046875, "learning_rate": 1.2938222222222224e-05, "loss": 0.3857, "step": 15890 }, { "epoch": 0.3532705296836118, "grad_norm": 1.984375, "learning_rate": 1.2933777777777779e-05, "loss": 0.4239, "step": 15900 }, { "epoch": 0.35349271240668323, "grad_norm": 2.359375, "learning_rate": 1.2929333333333333e-05, "loss": 0.4113, "step": 15910 }, { "epoch": 0.35371489512975474, "grad_norm": 2.140625, "learning_rate": 1.2924888888888888e-05, "loss": 0.3829, "step": 15920 }, { "epoch": 0.3539370778528262, "grad_norm": 2.046875, "learning_rate": 1.2920444444444447e-05, "loss": 0.4038, "step": 15930 }, { "epoch": 0.35415926057589764, "grad_norm": 2.765625, "learning_rate": 1.2916000000000001e-05, "loss": 0.4277, "step": 15940 }, { "epoch": 0.3543814432989691, "grad_norm": 2.3125, "learning_rate": 1.2911555555555558e-05, "loss": 0.3843, "step": 15950 }, { "epoch": 0.35460362602204054, "grad_norm": 2.171875, "learning_rate": 1.2907111111111113e-05, "loss": 0.4202, "step": 15960 }, { "epoch": 0.354825808745112, "grad_norm": 2.328125, "learning_rate": 1.2902666666666668e-05, "loss": 0.3613, "step": 15970 }, { "epoch": 0.35504799146818344, "grad_norm": 2.046875, "learning_rate": 1.2898222222222222e-05, "loss": 0.3687, "step": 15980 }, { "epoch": 0.3552701741912549, "grad_norm": 2.46875, "learning_rate": 1.2893777777777779e-05, "loss": 0.3963, "step": 15990 }, { "epoch": 0.35549235691432635, "grad_norm": 2.109375, "learning_rate": 1.2889333333333334e-05, "loss": 0.3936, "step": 16000 }, { "epoch": 0.3557145396373978, "grad_norm": 2.140625, "learning_rate": 1.2884888888888889e-05, "loss": 0.3891, "step": 16010 }, { "epoch": 0.35593672236046925, "grad_norm": 2.296875, "learning_rate": 1.2880444444444447e-05, "loss": 0.406, "step": 16020 }, { "epoch": 0.3561589050835407, "grad_norm": 2.078125, "learning_rate": 1.2876000000000002e-05, "loss": 0.401, "step": 16030 }, { "epoch": 0.35638108780661215, "grad_norm": 2.46875, "learning_rate": 1.2871555555555557e-05, "loss": 0.4107, "step": 16040 }, { "epoch": 0.3566032705296836, "grad_norm": 1.984375, "learning_rate": 1.2867111111111113e-05, "loss": 0.3754, "step": 16050 }, { "epoch": 0.35682545325275505, "grad_norm": 2.4375, "learning_rate": 1.2862666666666668e-05, "loss": 0.3904, "step": 16060 }, { "epoch": 0.3570476359758265, "grad_norm": 2.1875, "learning_rate": 1.2858222222222223e-05, "loss": 0.4039, "step": 16070 }, { "epoch": 0.35726981869889796, "grad_norm": 2.203125, "learning_rate": 1.2853777777777778e-05, "loss": 0.3954, "step": 16080 }, { "epoch": 0.3574920014219694, "grad_norm": 2.296875, "learning_rate": 1.2849333333333334e-05, "loss": 0.3688, "step": 16090 }, { "epoch": 0.35771418414504086, "grad_norm": 2.1875, "learning_rate": 1.2844888888888889e-05, "loss": 0.4089, "step": 16100 }, { "epoch": 0.3579363668681123, "grad_norm": 1.9765625, "learning_rate": 1.2840444444444447e-05, "loss": 0.3689, "step": 16110 }, { "epoch": 0.3581585495911838, "grad_norm": 2.625, "learning_rate": 1.2836000000000002e-05, "loss": 0.3942, "step": 16120 }, { "epoch": 0.35838073231425527, "grad_norm": 2.046875, "learning_rate": 1.2831555555555557e-05, "loss": 0.4092, "step": 16130 }, { "epoch": 0.3586029150373267, "grad_norm": 2.390625, "learning_rate": 1.2827111111111112e-05, "loss": 0.3869, "step": 16140 }, { "epoch": 0.35882509776039817, "grad_norm": 2.546875, "learning_rate": 1.2822666666666668e-05, "loss": 0.4189, "step": 16150 }, { "epoch": 0.3590472804834696, "grad_norm": 2.1875, "learning_rate": 1.2818222222222223e-05, "loss": 0.4198, "step": 16160 }, { "epoch": 0.3592694632065411, "grad_norm": 2.125, "learning_rate": 1.2813777777777778e-05, "loss": 0.4085, "step": 16170 }, { "epoch": 0.3594916459296125, "grad_norm": 2.3125, "learning_rate": 1.2809333333333333e-05, "loss": 0.4151, "step": 16180 }, { "epoch": 0.359713828652684, "grad_norm": 2.53125, "learning_rate": 1.280488888888889e-05, "loss": 0.3998, "step": 16190 }, { "epoch": 0.3599360113757554, "grad_norm": 2.390625, "learning_rate": 1.2800444444444446e-05, "loss": 0.431, "step": 16200 }, { "epoch": 0.3601581940988269, "grad_norm": 2.203125, "learning_rate": 1.2796000000000003e-05, "loss": 0.4133, "step": 16210 }, { "epoch": 0.36038037682189833, "grad_norm": 2.375, "learning_rate": 1.2791555555555557e-05, "loss": 0.3874, "step": 16220 }, { "epoch": 0.3606025595449698, "grad_norm": 2.125, "learning_rate": 1.2787111111111112e-05, "loss": 0.3901, "step": 16230 }, { "epoch": 0.36082474226804123, "grad_norm": 2.625, "learning_rate": 1.2782666666666667e-05, "loss": 0.4536, "step": 16240 }, { "epoch": 0.3610469249911127, "grad_norm": 2.5, "learning_rate": 1.2778222222222224e-05, "loss": 0.3992, "step": 16250 }, { "epoch": 0.36126910771418413, "grad_norm": 2.328125, "learning_rate": 1.2773777777777778e-05, "loss": 0.3939, "step": 16260 }, { "epoch": 0.3614912904372556, "grad_norm": 2.390625, "learning_rate": 1.2769333333333333e-05, "loss": 0.3851, "step": 16270 }, { "epoch": 0.36171347316032704, "grad_norm": 1.984375, "learning_rate": 1.2764888888888888e-05, "loss": 0.4086, "step": 16280 }, { "epoch": 0.3619356558833985, "grad_norm": 1.984375, "learning_rate": 1.2760444444444446e-05, "loss": 0.4101, "step": 16290 }, { "epoch": 0.36215783860646994, "grad_norm": 1.953125, "learning_rate": 1.2756000000000001e-05, "loss": 0.4031, "step": 16300 }, { "epoch": 0.3623800213295414, "grad_norm": 2.390625, "learning_rate": 1.2751555555555558e-05, "loss": 0.3995, "step": 16310 }, { "epoch": 0.36260220405261284, "grad_norm": 1.8203125, "learning_rate": 1.2747111111111113e-05, "loss": 0.3927, "step": 16320 }, { "epoch": 0.36282438677568435, "grad_norm": 2.25, "learning_rate": 1.2742666666666667e-05, "loss": 0.4459, "step": 16330 }, { "epoch": 0.3630465694987558, "grad_norm": 1.84375, "learning_rate": 1.2738222222222222e-05, "loss": 0.3973, "step": 16340 }, { "epoch": 0.36326875222182725, "grad_norm": 1.8203125, "learning_rate": 1.2733777777777779e-05, "loss": 0.3703, "step": 16350 }, { "epoch": 0.3634909349448987, "grad_norm": 1.9140625, "learning_rate": 1.2729333333333334e-05, "loss": 0.4159, "step": 16360 }, { "epoch": 0.36371311766797015, "grad_norm": 1.90625, "learning_rate": 1.2724888888888889e-05, "loss": 0.4324, "step": 16370 }, { "epoch": 0.3639353003910416, "grad_norm": 2.21875, "learning_rate": 1.2720444444444447e-05, "loss": 0.4476, "step": 16380 }, { "epoch": 0.36415748311411306, "grad_norm": 2.078125, "learning_rate": 1.2716000000000002e-05, "loss": 0.4055, "step": 16390 }, { "epoch": 0.3643796658371845, "grad_norm": 2.46875, "learning_rate": 1.2711555555555556e-05, "loss": 0.3853, "step": 16400 }, { "epoch": 0.36460184856025596, "grad_norm": 2.375, "learning_rate": 1.2707111111111113e-05, "loss": 0.4151, "step": 16410 }, { "epoch": 0.3648240312833274, "grad_norm": 2.203125, "learning_rate": 1.2702666666666668e-05, "loss": 0.3948, "step": 16420 }, { "epoch": 0.36504621400639886, "grad_norm": 2.015625, "learning_rate": 1.2698222222222223e-05, "loss": 0.3796, "step": 16430 }, { "epoch": 0.3652683967294703, "grad_norm": 2.359375, "learning_rate": 1.2693777777777778e-05, "loss": 0.428, "step": 16440 }, { "epoch": 0.36549057945254176, "grad_norm": 1.8125, "learning_rate": 1.2689333333333334e-05, "loss": 0.4008, "step": 16450 }, { "epoch": 0.3657127621756132, "grad_norm": 2.171875, "learning_rate": 1.268488888888889e-05, "loss": 0.4065, "step": 16460 }, { "epoch": 0.36593494489868467, "grad_norm": 1.78125, "learning_rate": 1.2680444444444447e-05, "loss": 0.3712, "step": 16470 }, { "epoch": 0.3661571276217561, "grad_norm": 2.125, "learning_rate": 1.2676000000000002e-05, "loss": 0.3857, "step": 16480 }, { "epoch": 0.36637931034482757, "grad_norm": 2.328125, "learning_rate": 1.2671555555555557e-05, "loss": 0.3649, "step": 16490 }, { "epoch": 0.366601493067899, "grad_norm": 2.21875, "learning_rate": 1.2667111111111112e-05, "loss": 0.4072, "step": 16500 }, { "epoch": 0.36682367579097047, "grad_norm": 2.34375, "learning_rate": 1.2662666666666668e-05, "loss": 0.3622, "step": 16510 }, { "epoch": 0.3670458585140419, "grad_norm": 2.171875, "learning_rate": 1.2658222222222223e-05, "loss": 0.3704, "step": 16520 }, { "epoch": 0.36726804123711343, "grad_norm": 2.421875, "learning_rate": 1.2653777777777778e-05, "loss": 0.3951, "step": 16530 }, { "epoch": 0.3674902239601849, "grad_norm": 2.5, "learning_rate": 1.2649333333333333e-05, "loss": 0.3834, "step": 16540 }, { "epoch": 0.36771240668325633, "grad_norm": 2.59375, "learning_rate": 1.2644888888888891e-05, "loss": 0.3831, "step": 16550 }, { "epoch": 0.3679345894063278, "grad_norm": 2.125, "learning_rate": 1.2640444444444446e-05, "loss": 0.4009, "step": 16560 }, { "epoch": 0.36815677212939923, "grad_norm": 2.1875, "learning_rate": 1.2636000000000002e-05, "loss": 0.3755, "step": 16570 }, { "epoch": 0.3683789548524707, "grad_norm": 2.328125, "learning_rate": 1.2631555555555557e-05, "loss": 0.3906, "step": 16580 }, { "epoch": 0.36860113757554214, "grad_norm": 2.265625, "learning_rate": 1.2627111111111112e-05, "loss": 0.386, "step": 16590 }, { "epoch": 0.3688233202986136, "grad_norm": 2.25, "learning_rate": 1.2622666666666667e-05, "loss": 0.3982, "step": 16600 }, { "epoch": 0.36904550302168504, "grad_norm": 2.203125, "learning_rate": 1.2618222222222223e-05, "loss": 0.3969, "step": 16610 }, { "epoch": 0.3692676857447565, "grad_norm": 2.375, "learning_rate": 1.2613777777777778e-05, "loss": 0.3981, "step": 16620 }, { "epoch": 0.36948986846782794, "grad_norm": 2.125, "learning_rate": 1.2609333333333333e-05, "loss": 0.3991, "step": 16630 }, { "epoch": 0.3697120511908994, "grad_norm": 2.234375, "learning_rate": 1.2604888888888891e-05, "loss": 0.3996, "step": 16640 }, { "epoch": 0.36993423391397084, "grad_norm": 1.8125, "learning_rate": 1.2600444444444446e-05, "loss": 0.3984, "step": 16650 }, { "epoch": 0.3701564166370423, "grad_norm": 2.171875, "learning_rate": 1.2596000000000001e-05, "loss": 0.3777, "step": 16660 }, { "epoch": 0.37037859936011375, "grad_norm": 2.09375, "learning_rate": 1.2591555555555558e-05, "loss": 0.4213, "step": 16670 }, { "epoch": 0.3706007820831852, "grad_norm": 1.71875, "learning_rate": 1.2587111111111112e-05, "loss": 0.3577, "step": 16680 }, { "epoch": 0.37082296480625665, "grad_norm": 1.9609375, "learning_rate": 1.2582666666666667e-05, "loss": 0.4019, "step": 16690 }, { "epoch": 0.3710451475293281, "grad_norm": 2.046875, "learning_rate": 1.2578222222222222e-05, "loss": 0.3884, "step": 16700 }, { "epoch": 0.37126733025239955, "grad_norm": 2.40625, "learning_rate": 1.2573777777777779e-05, "loss": 0.3848, "step": 16710 }, { "epoch": 0.371489512975471, "grad_norm": 1.8515625, "learning_rate": 1.2569333333333333e-05, "loss": 0.3885, "step": 16720 }, { "epoch": 0.37171169569854245, "grad_norm": 2.421875, "learning_rate": 1.2564888888888892e-05, "loss": 0.404, "step": 16730 }, { "epoch": 0.37193387842161396, "grad_norm": 2.015625, "learning_rate": 1.2560444444444447e-05, "loss": 0.4297, "step": 16740 }, { "epoch": 0.3721560611446854, "grad_norm": 2.078125, "learning_rate": 1.2556000000000001e-05, "loss": 0.39, "step": 16750 }, { "epoch": 0.37237824386775686, "grad_norm": 2.03125, "learning_rate": 1.2551555555555556e-05, "loss": 0.3653, "step": 16760 }, { "epoch": 0.3726004265908283, "grad_norm": 2.375, "learning_rate": 1.2547111111111113e-05, "loss": 0.4062, "step": 16770 }, { "epoch": 0.37282260931389977, "grad_norm": 2.140625, "learning_rate": 1.2542666666666668e-05, "loss": 0.3857, "step": 16780 }, { "epoch": 0.3730447920369712, "grad_norm": 2.1875, "learning_rate": 1.2538222222222222e-05, "loss": 0.3776, "step": 16790 }, { "epoch": 0.37326697476004267, "grad_norm": 2.15625, "learning_rate": 1.2533777777777777e-05, "loss": 0.391, "step": 16800 }, { "epoch": 0.3734891574831141, "grad_norm": 2.28125, "learning_rate": 1.2529333333333334e-05, "loss": 0.4218, "step": 16810 }, { "epoch": 0.37371134020618557, "grad_norm": 2.75, "learning_rate": 1.252488888888889e-05, "loss": 0.398, "step": 16820 }, { "epoch": 0.373933522929257, "grad_norm": 2.109375, "learning_rate": 1.2520444444444447e-05, "loss": 0.3775, "step": 16830 }, { "epoch": 0.3741557056523285, "grad_norm": 2.21875, "learning_rate": 1.2516000000000002e-05, "loss": 0.3884, "step": 16840 }, { "epoch": 0.3743778883753999, "grad_norm": 2.875, "learning_rate": 1.2511555555555557e-05, "loss": 0.4421, "step": 16850 }, { "epoch": 0.3746000710984714, "grad_norm": 1.9453125, "learning_rate": 1.2507111111111111e-05, "loss": 0.3946, "step": 16860 }, { "epoch": 0.37482225382154283, "grad_norm": 2.390625, "learning_rate": 1.2502666666666668e-05, "loss": 0.4128, "step": 16870 }, { "epoch": 0.3750444365446143, "grad_norm": 2.171875, "learning_rate": 1.2498222222222223e-05, "loss": 0.4037, "step": 16880 }, { "epoch": 0.37526661926768573, "grad_norm": 2.53125, "learning_rate": 1.2493777777777778e-05, "loss": 0.3919, "step": 16890 }, { "epoch": 0.3754888019907572, "grad_norm": 2.28125, "learning_rate": 1.2489333333333333e-05, "loss": 0.3728, "step": 16900 }, { "epoch": 0.37571098471382863, "grad_norm": 2.28125, "learning_rate": 1.248488888888889e-05, "loss": 0.3716, "step": 16910 }, { "epoch": 0.3759331674369001, "grad_norm": 2.1875, "learning_rate": 1.2480444444444446e-05, "loss": 0.4272, "step": 16920 }, { "epoch": 0.37615535015997154, "grad_norm": 1.9765625, "learning_rate": 1.2476000000000002e-05, "loss": 0.3697, "step": 16930 }, { "epoch": 0.37637753288304304, "grad_norm": 2.515625, "learning_rate": 1.2471555555555557e-05, "loss": 0.3907, "step": 16940 }, { "epoch": 0.3765997156061145, "grad_norm": 2.390625, "learning_rate": 1.2467111111111112e-05, "loss": 0.4133, "step": 16950 }, { "epoch": 0.37682189832918594, "grad_norm": 2.09375, "learning_rate": 1.2462666666666667e-05, "loss": 0.4295, "step": 16960 }, { "epoch": 0.3770440810522574, "grad_norm": 2.34375, "learning_rate": 1.2458222222222223e-05, "loss": 0.3938, "step": 16970 }, { "epoch": 0.37726626377532885, "grad_norm": 2.25, "learning_rate": 1.2453777777777778e-05, "loss": 0.4059, "step": 16980 }, { "epoch": 0.3774884464984003, "grad_norm": 2.15625, "learning_rate": 1.2449333333333333e-05, "loss": 0.366, "step": 16990 }, { "epoch": 0.37771062922147175, "grad_norm": 2.546875, "learning_rate": 1.2444888888888891e-05, "loss": 0.4216, "step": 17000 }, { "epoch": 0.3779328119445432, "grad_norm": 2.484375, "learning_rate": 1.2440444444444446e-05, "loss": 0.3814, "step": 17010 }, { "epoch": 0.37815499466761465, "grad_norm": 2.03125, "learning_rate": 1.2436000000000001e-05, "loss": 0.3967, "step": 17020 }, { "epoch": 0.3783771773906861, "grad_norm": 2.15625, "learning_rate": 1.2431555555555557e-05, "loss": 0.399, "step": 17030 }, { "epoch": 0.37859936011375755, "grad_norm": 2.015625, "learning_rate": 1.2427111111111112e-05, "loss": 0.3599, "step": 17040 }, { "epoch": 0.378821542836829, "grad_norm": 2.734375, "learning_rate": 1.2422666666666667e-05, "loss": 0.4512, "step": 17050 }, { "epoch": 0.37904372555990046, "grad_norm": 1.9609375, "learning_rate": 1.2418222222222222e-05, "loss": 0.4067, "step": 17060 }, { "epoch": 0.3792659082829719, "grad_norm": 2.21875, "learning_rate": 1.2413777777777778e-05, "loss": 0.4283, "step": 17070 }, { "epoch": 0.37948809100604336, "grad_norm": 2.65625, "learning_rate": 1.2409333333333335e-05, "loss": 0.4055, "step": 17080 }, { "epoch": 0.3797102737291148, "grad_norm": 2.203125, "learning_rate": 1.2404888888888892e-05, "loss": 0.3983, "step": 17090 }, { "epoch": 0.37993245645218626, "grad_norm": 2.109375, "learning_rate": 1.2400444444444446e-05, "loss": 0.4344, "step": 17100 }, { "epoch": 0.3801546391752577, "grad_norm": 2.203125, "learning_rate": 1.2396000000000001e-05, "loss": 0.4133, "step": 17110 }, { "epoch": 0.38037682189832916, "grad_norm": 2.15625, "learning_rate": 1.2391555555555556e-05, "loss": 0.4089, "step": 17120 }, { "epoch": 0.3805990046214006, "grad_norm": 2.046875, "learning_rate": 1.2387111111111113e-05, "loss": 0.3972, "step": 17130 }, { "epoch": 0.38082118734447207, "grad_norm": 2.375, "learning_rate": 1.2382666666666667e-05, "loss": 0.3929, "step": 17140 }, { "epoch": 0.3810433700675436, "grad_norm": 1.8515625, "learning_rate": 1.2378222222222222e-05, "loss": 0.4264, "step": 17150 }, { "epoch": 0.381265552790615, "grad_norm": 2.28125, "learning_rate": 1.2373777777777777e-05, "loss": 0.3825, "step": 17160 }, { "epoch": 0.3814877355136865, "grad_norm": 2.359375, "learning_rate": 1.2369333333333335e-05, "loss": 0.3898, "step": 17170 }, { "epoch": 0.38170991823675793, "grad_norm": 2.109375, "learning_rate": 1.236488888888889e-05, "loss": 0.3757, "step": 17180 }, { "epoch": 0.3819321009598294, "grad_norm": 2.1875, "learning_rate": 1.2360444444444447e-05, "loss": 0.4031, "step": 17190 }, { "epoch": 0.38215428368290083, "grad_norm": 2.8125, "learning_rate": 1.2356000000000002e-05, "loss": 0.4101, "step": 17200 }, { "epoch": 0.3823764664059723, "grad_norm": 2.078125, "learning_rate": 1.2351555555555556e-05, "loss": 0.4174, "step": 17210 }, { "epoch": 0.38259864912904373, "grad_norm": 2.03125, "learning_rate": 1.2347111111111111e-05, "loss": 0.3623, "step": 17220 }, { "epoch": 0.3828208318521152, "grad_norm": 1.8359375, "learning_rate": 1.2342666666666668e-05, "loss": 0.3731, "step": 17230 }, { "epoch": 0.38304301457518664, "grad_norm": 2.375, "learning_rate": 1.2338222222222223e-05, "loss": 0.4104, "step": 17240 }, { "epoch": 0.3832651972982581, "grad_norm": 2.390625, "learning_rate": 1.2333777777777778e-05, "loss": 0.4195, "step": 17250 }, { "epoch": 0.38348738002132954, "grad_norm": 1.953125, "learning_rate": 1.2329333333333336e-05, "loss": 0.3738, "step": 17260 }, { "epoch": 0.383709562744401, "grad_norm": 1.703125, "learning_rate": 1.232488888888889e-05, "loss": 0.3437, "step": 17270 }, { "epoch": 0.38393174546747244, "grad_norm": 1.84375, "learning_rate": 1.2320444444444445e-05, "loss": 0.3965, "step": 17280 }, { "epoch": 0.3841539281905439, "grad_norm": 2.203125, "learning_rate": 1.2316000000000002e-05, "loss": 0.4006, "step": 17290 }, { "epoch": 0.38437611091361534, "grad_norm": 2.03125, "learning_rate": 1.2311555555555557e-05, "loss": 0.3767, "step": 17300 }, { "epoch": 0.3845982936366868, "grad_norm": 3.0625, "learning_rate": 1.2307111111111112e-05, "loss": 0.3993, "step": 17310 }, { "epoch": 0.38482047635975825, "grad_norm": 2.484375, "learning_rate": 1.2302666666666667e-05, "loss": 0.3828, "step": 17320 }, { "epoch": 0.3850426590828297, "grad_norm": 2.609375, "learning_rate": 1.2298222222222223e-05, "loss": 0.3965, "step": 17330 }, { "epoch": 0.38526484180590115, "grad_norm": 2.296875, "learning_rate": 1.2293777777777778e-05, "loss": 0.3843, "step": 17340 }, { "epoch": 0.3854870245289726, "grad_norm": 2.0, "learning_rate": 1.2289333333333336e-05, "loss": 0.4183, "step": 17350 }, { "epoch": 0.3857092072520441, "grad_norm": 2.140625, "learning_rate": 1.2284888888888891e-05, "loss": 0.399, "step": 17360 }, { "epoch": 0.38593138997511556, "grad_norm": 1.96875, "learning_rate": 1.2280444444444446e-05, "loss": 0.3745, "step": 17370 }, { "epoch": 0.386153572698187, "grad_norm": 2.265625, "learning_rate": 1.2276e-05, "loss": 0.3713, "step": 17380 }, { "epoch": 0.38637575542125846, "grad_norm": 2.359375, "learning_rate": 1.2271555555555557e-05, "loss": 0.4097, "step": 17390 }, { "epoch": 0.3865979381443299, "grad_norm": 2.28125, "learning_rate": 1.2267111111111112e-05, "loss": 0.4068, "step": 17400 }, { "epoch": 0.38682012086740136, "grad_norm": 2.28125, "learning_rate": 1.2262666666666667e-05, "loss": 0.4166, "step": 17410 }, { "epoch": 0.3870423035904728, "grad_norm": 2.046875, "learning_rate": 1.2258222222222222e-05, "loss": 0.4135, "step": 17420 }, { "epoch": 0.38726448631354426, "grad_norm": 2.046875, "learning_rate": 1.2253777777777778e-05, "loss": 0.3857, "step": 17430 }, { "epoch": 0.3874866690366157, "grad_norm": 1.734375, "learning_rate": 1.2249333333333335e-05, "loss": 0.3891, "step": 17440 }, { "epoch": 0.38770885175968717, "grad_norm": 2.09375, "learning_rate": 1.2244888888888891e-05, "loss": 0.3675, "step": 17450 }, { "epoch": 0.3879310344827586, "grad_norm": 2.484375, "learning_rate": 1.2240444444444446e-05, "loss": 0.3991, "step": 17460 }, { "epoch": 0.38815321720583007, "grad_norm": 2.59375, "learning_rate": 1.2236000000000001e-05, "loss": 0.3881, "step": 17470 }, { "epoch": 0.3883753999289015, "grad_norm": 2.25, "learning_rate": 1.2231555555555556e-05, "loss": 0.4251, "step": 17480 }, { "epoch": 0.388597582651973, "grad_norm": 2.234375, "learning_rate": 1.2227111111111112e-05, "loss": 0.4082, "step": 17490 }, { "epoch": 0.3888197653750444, "grad_norm": 2.15625, "learning_rate": 1.2222666666666667e-05, "loss": 0.38, "step": 17500 }, { "epoch": 0.3890419480981159, "grad_norm": 2.1875, "learning_rate": 1.2218222222222222e-05, "loss": 0.4046, "step": 17510 }, { "epoch": 0.3892641308211873, "grad_norm": 2.109375, "learning_rate": 1.2213777777777777e-05, "loss": 0.3826, "step": 17520 }, { "epoch": 0.3894863135442588, "grad_norm": 2.484375, "learning_rate": 1.2209333333333335e-05, "loss": 0.3907, "step": 17530 }, { "epoch": 0.38970849626733023, "grad_norm": 2.234375, "learning_rate": 1.220488888888889e-05, "loss": 0.371, "step": 17540 }, { "epoch": 0.3899306789904017, "grad_norm": 2.296875, "learning_rate": 1.2200444444444447e-05, "loss": 0.3836, "step": 17550 }, { "epoch": 0.3901528617134732, "grad_norm": 2.015625, "learning_rate": 1.2196000000000001e-05, "loss": 0.4352, "step": 17560 }, { "epoch": 0.39037504443654464, "grad_norm": 1.953125, "learning_rate": 1.2191555555555556e-05, "loss": 0.3747, "step": 17570 }, { "epoch": 0.3905972271596161, "grad_norm": 2.53125, "learning_rate": 1.2187111111111111e-05, "loss": 0.3985, "step": 17580 }, { "epoch": 0.39081940988268754, "grad_norm": 2.3125, "learning_rate": 1.2182666666666668e-05, "loss": 0.4214, "step": 17590 }, { "epoch": 0.391041592605759, "grad_norm": 2.046875, "learning_rate": 1.2178222222222223e-05, "loss": 0.3523, "step": 17600 }, { "epoch": 0.39126377532883044, "grad_norm": 2.296875, "learning_rate": 1.2173777777777777e-05, "loss": 0.389, "step": 17610 }, { "epoch": 0.3914859580519019, "grad_norm": 2.6875, "learning_rate": 1.2169333333333336e-05, "loss": 0.4053, "step": 17620 }, { "epoch": 0.39170814077497335, "grad_norm": 2.25, "learning_rate": 1.216488888888889e-05, "loss": 0.3832, "step": 17630 }, { "epoch": 0.3919303234980448, "grad_norm": 2.046875, "learning_rate": 1.2160444444444445e-05, "loss": 0.3999, "step": 17640 }, { "epoch": 0.39215250622111625, "grad_norm": 2.234375, "learning_rate": 1.2156000000000002e-05, "loss": 0.4054, "step": 17650 }, { "epoch": 0.3923746889441877, "grad_norm": 2.3125, "learning_rate": 1.2151555555555557e-05, "loss": 0.4095, "step": 17660 }, { "epoch": 0.39259687166725915, "grad_norm": 2.171875, "learning_rate": 1.2147111111111112e-05, "loss": 0.4301, "step": 17670 }, { "epoch": 0.3928190543903306, "grad_norm": 1.7421875, "learning_rate": 1.2142666666666666e-05, "loss": 0.3823, "step": 17680 }, { "epoch": 0.39304123711340205, "grad_norm": 2.640625, "learning_rate": 1.2138222222222223e-05, "loss": 0.4237, "step": 17690 }, { "epoch": 0.3932634198364735, "grad_norm": 2.25, "learning_rate": 1.213377777777778e-05, "loss": 0.4074, "step": 17700 }, { "epoch": 0.39348560255954496, "grad_norm": 2.234375, "learning_rate": 1.2129333333333336e-05, "loss": 0.4239, "step": 17710 }, { "epoch": 0.3937077852826164, "grad_norm": 2.296875, "learning_rate": 1.2124888888888891e-05, "loss": 0.3956, "step": 17720 }, { "epoch": 0.39392996800568786, "grad_norm": 2.0, "learning_rate": 1.2120444444444446e-05, "loss": 0.3783, "step": 17730 }, { "epoch": 0.3941521507287593, "grad_norm": 1.8984375, "learning_rate": 1.2116e-05, "loss": 0.3882, "step": 17740 }, { "epoch": 0.39437433345183076, "grad_norm": 2.203125, "learning_rate": 1.2111555555555557e-05, "loss": 0.3935, "step": 17750 }, { "epoch": 0.3945965161749022, "grad_norm": 2.296875, "learning_rate": 1.2107111111111112e-05, "loss": 0.4248, "step": 17760 }, { "epoch": 0.3948186988979737, "grad_norm": 2.828125, "learning_rate": 1.2102666666666667e-05, "loss": 0.4196, "step": 17770 }, { "epoch": 0.39504088162104517, "grad_norm": 2.109375, "learning_rate": 1.2098222222222222e-05, "loss": 0.4248, "step": 17780 }, { "epoch": 0.3952630643441166, "grad_norm": 2.234375, "learning_rate": 1.209377777777778e-05, "loss": 0.3692, "step": 17790 }, { "epoch": 0.3954852470671881, "grad_norm": 2.09375, "learning_rate": 1.2089333333333335e-05, "loss": 0.3917, "step": 17800 }, { "epoch": 0.3957074297902595, "grad_norm": 2.046875, "learning_rate": 1.2084888888888891e-05, "loss": 0.3894, "step": 17810 }, { "epoch": 0.395929612513331, "grad_norm": 2.46875, "learning_rate": 1.2080444444444446e-05, "loss": 0.3956, "step": 17820 }, { "epoch": 0.3961517952364024, "grad_norm": 2.15625, "learning_rate": 1.2076000000000001e-05, "loss": 0.3929, "step": 17830 }, { "epoch": 0.3963739779594739, "grad_norm": 2.171875, "learning_rate": 1.2071555555555556e-05, "loss": 0.406, "step": 17840 }, { "epoch": 0.39659616068254533, "grad_norm": 1.9609375, "learning_rate": 1.2067111111111112e-05, "loss": 0.3919, "step": 17850 }, { "epoch": 0.3968183434056168, "grad_norm": 2.109375, "learning_rate": 1.2062666666666667e-05, "loss": 0.417, "step": 17860 }, { "epoch": 0.39704052612868823, "grad_norm": 1.9921875, "learning_rate": 1.2058222222222222e-05, "loss": 0.3916, "step": 17870 }, { "epoch": 0.3972627088517597, "grad_norm": 2.1875, "learning_rate": 1.205377777777778e-05, "loss": 0.4051, "step": 17880 }, { "epoch": 0.39748489157483113, "grad_norm": 2.078125, "learning_rate": 1.2049333333333335e-05, "loss": 0.3559, "step": 17890 }, { "epoch": 0.3977070742979026, "grad_norm": 2.828125, "learning_rate": 1.204488888888889e-05, "loss": 0.4128, "step": 17900 }, { "epoch": 0.39792925702097404, "grad_norm": 2.109375, "learning_rate": 1.2040444444444446e-05, "loss": 0.3935, "step": 17910 }, { "epoch": 0.3981514397440455, "grad_norm": 2.34375, "learning_rate": 1.2036000000000001e-05, "loss": 0.396, "step": 17920 }, { "epoch": 0.39837362246711694, "grad_norm": 2.28125, "learning_rate": 1.2031555555555556e-05, "loss": 0.388, "step": 17930 }, { "epoch": 0.3985958051901884, "grad_norm": 2.28125, "learning_rate": 1.2027111111111111e-05, "loss": 0.4001, "step": 17940 }, { "epoch": 0.39881798791325984, "grad_norm": 2.015625, "learning_rate": 1.2022666666666668e-05, "loss": 0.3741, "step": 17950 }, { "epoch": 0.3990401706363313, "grad_norm": 2.5, "learning_rate": 1.2018222222222222e-05, "loss": 0.4268, "step": 17960 }, { "epoch": 0.3992623533594028, "grad_norm": 2.1875, "learning_rate": 1.2013777777777779e-05, "loss": 0.4013, "step": 17970 }, { "epoch": 0.39948453608247425, "grad_norm": 2.328125, "learning_rate": 1.2009333333333335e-05, "loss": 0.3873, "step": 17980 }, { "epoch": 0.3997067188055457, "grad_norm": 2.046875, "learning_rate": 1.200488888888889e-05, "loss": 0.3654, "step": 17990 }, { "epoch": 0.39992890152861715, "grad_norm": 2.71875, "learning_rate": 1.2000444444444445e-05, "loss": 0.3867, "step": 18000 }, { "epoch": 0.4001510842516886, "grad_norm": 2.109375, "learning_rate": 1.1996000000000002e-05, "loss": 0.415, "step": 18010 }, { "epoch": 0.40037326697476006, "grad_norm": 2.03125, "learning_rate": 1.1991555555555557e-05, "loss": 0.3893, "step": 18020 }, { "epoch": 0.4005954496978315, "grad_norm": 2.296875, "learning_rate": 1.1987111111111111e-05, "loss": 0.398, "step": 18030 }, { "epoch": 0.40081763242090296, "grad_norm": 2.421875, "learning_rate": 1.1982666666666666e-05, "loss": 0.3704, "step": 18040 }, { "epoch": 0.4010398151439744, "grad_norm": 2.3125, "learning_rate": 1.1978222222222223e-05, "loss": 0.3863, "step": 18050 }, { "epoch": 0.40126199786704586, "grad_norm": 2.6875, "learning_rate": 1.197377777777778e-05, "loss": 0.3964, "step": 18060 }, { "epoch": 0.4014841805901173, "grad_norm": 2.09375, "learning_rate": 1.1969333333333336e-05, "loss": 0.3743, "step": 18070 }, { "epoch": 0.40170636331318876, "grad_norm": 2.53125, "learning_rate": 1.196488888888889e-05, "loss": 0.3623, "step": 18080 }, { "epoch": 0.4019285460362602, "grad_norm": 1.96875, "learning_rate": 1.1960444444444446e-05, "loss": 0.358, "step": 18090 }, { "epoch": 0.40215072875933167, "grad_norm": 1.859375, "learning_rate": 1.1956e-05, "loss": 0.4099, "step": 18100 }, { "epoch": 0.4023729114824031, "grad_norm": 2.203125, "learning_rate": 1.1951555555555557e-05, "loss": 0.3787, "step": 18110 }, { "epoch": 0.40259509420547457, "grad_norm": 2.046875, "learning_rate": 1.1947111111111112e-05, "loss": 0.3866, "step": 18120 }, { "epoch": 0.402817276928546, "grad_norm": 2.140625, "learning_rate": 1.1942666666666667e-05, "loss": 0.3891, "step": 18130 }, { "epoch": 0.40303945965161747, "grad_norm": 2.03125, "learning_rate": 1.1938222222222221e-05, "loss": 0.3637, "step": 18140 }, { "epoch": 0.4032616423746889, "grad_norm": 2.203125, "learning_rate": 1.193377777777778e-05, "loss": 0.3642, "step": 18150 }, { "epoch": 0.4034838250977604, "grad_norm": 2.09375, "learning_rate": 1.1929333333333335e-05, "loss": 0.3591, "step": 18160 }, { "epoch": 0.4037060078208318, "grad_norm": 2.5, "learning_rate": 1.1924888888888891e-05, "loss": 0.3992, "step": 18170 }, { "epoch": 0.40392819054390333, "grad_norm": 2.265625, "learning_rate": 1.1920444444444446e-05, "loss": 0.3903, "step": 18180 }, { "epoch": 0.4041503732669748, "grad_norm": 2.140625, "learning_rate": 1.1916e-05, "loss": 0.4211, "step": 18190 }, { "epoch": 0.40437255599004623, "grad_norm": 2.390625, "learning_rate": 1.1911555555555556e-05, "loss": 0.4168, "step": 18200 }, { "epoch": 0.4045947387131177, "grad_norm": 2.359375, "learning_rate": 1.1907111111111112e-05, "loss": 0.4052, "step": 18210 }, { "epoch": 0.40481692143618914, "grad_norm": 2.375, "learning_rate": 1.1902666666666667e-05, "loss": 0.404, "step": 18220 }, { "epoch": 0.4050391041592606, "grad_norm": 2.71875, "learning_rate": 1.1898222222222222e-05, "loss": 0.4127, "step": 18230 }, { "epoch": 0.40526128688233204, "grad_norm": 2.09375, "learning_rate": 1.189377777777778e-05, "loss": 0.3987, "step": 18240 }, { "epoch": 0.4054834696054035, "grad_norm": 1.8203125, "learning_rate": 1.1889333333333335e-05, "loss": 0.3983, "step": 18250 }, { "epoch": 0.40570565232847494, "grad_norm": 2.296875, "learning_rate": 1.188488888888889e-05, "loss": 0.3852, "step": 18260 }, { "epoch": 0.4059278350515464, "grad_norm": 2.625, "learning_rate": 1.1880444444444446e-05, "loss": 0.382, "step": 18270 }, { "epoch": 0.40615001777461784, "grad_norm": 2.34375, "learning_rate": 1.1876000000000001e-05, "loss": 0.3908, "step": 18280 }, { "epoch": 0.4063722004976893, "grad_norm": 2.296875, "learning_rate": 1.1871555555555556e-05, "loss": 0.4179, "step": 18290 }, { "epoch": 0.40659438322076075, "grad_norm": 2.484375, "learning_rate": 1.186711111111111e-05, "loss": 0.4292, "step": 18300 }, { "epoch": 0.4068165659438322, "grad_norm": 2.09375, "learning_rate": 1.1862666666666667e-05, "loss": 0.4112, "step": 18310 }, { "epoch": 0.40703874866690365, "grad_norm": 2.28125, "learning_rate": 1.1858222222222224e-05, "loss": 0.4137, "step": 18320 }, { "epoch": 0.4072609313899751, "grad_norm": 2.359375, "learning_rate": 1.1853777777777779e-05, "loss": 0.4142, "step": 18330 }, { "epoch": 0.40748311411304655, "grad_norm": 2.4375, "learning_rate": 1.1849333333333335e-05, "loss": 0.3724, "step": 18340 }, { "epoch": 0.407705296836118, "grad_norm": 2.125, "learning_rate": 1.184488888888889e-05, "loss": 0.3825, "step": 18350 }, { "epoch": 0.40792747955918945, "grad_norm": 2.5625, "learning_rate": 1.1840444444444445e-05, "loss": 0.4147, "step": 18360 }, { "epoch": 0.4081496622822609, "grad_norm": 2.171875, "learning_rate": 1.1836000000000002e-05, "loss": 0.38, "step": 18370 }, { "epoch": 0.4083718450053324, "grad_norm": 2.3125, "learning_rate": 1.1831555555555556e-05, "loss": 0.3932, "step": 18380 }, { "epoch": 0.40859402772840386, "grad_norm": 2.171875, "learning_rate": 1.1827111111111111e-05, "loss": 0.4194, "step": 18390 }, { "epoch": 0.4088162104514753, "grad_norm": 2.015625, "learning_rate": 1.1822666666666666e-05, "loss": 0.4206, "step": 18400 }, { "epoch": 0.40903839317454677, "grad_norm": 2.234375, "learning_rate": 1.1818222222222224e-05, "loss": 0.3969, "step": 18410 }, { "epoch": 0.4092605758976182, "grad_norm": 2.609375, "learning_rate": 1.1813777777777779e-05, "loss": 0.3743, "step": 18420 }, { "epoch": 0.40948275862068967, "grad_norm": 2.15625, "learning_rate": 1.1809333333333336e-05, "loss": 0.4294, "step": 18430 }, { "epoch": 0.4097049413437611, "grad_norm": 2.171875, "learning_rate": 1.180488888888889e-05, "loss": 0.3588, "step": 18440 }, { "epoch": 0.40992712406683257, "grad_norm": 2.21875, "learning_rate": 1.1800444444444445e-05, "loss": 0.404, "step": 18450 }, { "epoch": 0.410149306789904, "grad_norm": 2.234375, "learning_rate": 1.1796e-05, "loss": 0.422, "step": 18460 }, { "epoch": 0.4103714895129755, "grad_norm": 2.0625, "learning_rate": 1.1791555555555557e-05, "loss": 0.3502, "step": 18470 }, { "epoch": 0.4105936722360469, "grad_norm": 2.09375, "learning_rate": 1.1787111111111112e-05, "loss": 0.3651, "step": 18480 }, { "epoch": 0.4108158549591184, "grad_norm": 2.34375, "learning_rate": 1.1782666666666666e-05, "loss": 0.4026, "step": 18490 }, { "epoch": 0.4110380376821898, "grad_norm": 2.125, "learning_rate": 1.1778222222222225e-05, "loss": 0.4164, "step": 18500 }, { "epoch": 0.4112602204052613, "grad_norm": 1.96875, "learning_rate": 1.177377777777778e-05, "loss": 0.3597, "step": 18510 }, { "epoch": 0.41148240312833273, "grad_norm": 2.0, "learning_rate": 1.1769333333333334e-05, "loss": 0.3407, "step": 18520 }, { "epoch": 0.4117045858514042, "grad_norm": 2.140625, "learning_rate": 1.1764888888888891e-05, "loss": 0.4096, "step": 18530 }, { "epoch": 0.41192676857447563, "grad_norm": 2.25, "learning_rate": 1.1760444444444446e-05, "loss": 0.3792, "step": 18540 }, { "epoch": 0.4121489512975471, "grad_norm": 2.109375, "learning_rate": 1.1756e-05, "loss": 0.391, "step": 18550 }, { "epoch": 0.41237113402061853, "grad_norm": 2.140625, "learning_rate": 1.1751555555555555e-05, "loss": 0.3822, "step": 18560 }, { "epoch": 0.41259331674369, "grad_norm": 2.640625, "learning_rate": 1.1747111111111112e-05, "loss": 0.3866, "step": 18570 }, { "epoch": 0.41281549946676144, "grad_norm": 2.234375, "learning_rate": 1.1742666666666667e-05, "loss": 0.3711, "step": 18580 }, { "epoch": 0.41303768218983294, "grad_norm": 2.59375, "learning_rate": 1.1738222222222223e-05, "loss": 0.4253, "step": 18590 }, { "epoch": 0.4132598649129044, "grad_norm": 2.34375, "learning_rate": 1.173377777777778e-05, "loss": 0.3965, "step": 18600 }, { "epoch": 0.41348204763597585, "grad_norm": 2.234375, "learning_rate": 1.1729333333333335e-05, "loss": 0.4161, "step": 18610 }, { "epoch": 0.4137042303590473, "grad_norm": 2.109375, "learning_rate": 1.172488888888889e-05, "loss": 0.3879, "step": 18620 }, { "epoch": 0.41392641308211875, "grad_norm": 2.78125, "learning_rate": 1.1720444444444446e-05, "loss": 0.4, "step": 18630 }, { "epoch": 0.4141485958051902, "grad_norm": 2.234375, "learning_rate": 1.1716000000000001e-05, "loss": 0.4214, "step": 18640 }, { "epoch": 0.41437077852826165, "grad_norm": 2.265625, "learning_rate": 1.1711555555555556e-05, "loss": 0.4266, "step": 18650 }, { "epoch": 0.4145929612513331, "grad_norm": 2.21875, "learning_rate": 1.170711111111111e-05, "loss": 0.4241, "step": 18660 }, { "epoch": 0.41481514397440455, "grad_norm": 2.21875, "learning_rate": 1.1702666666666667e-05, "loss": 0.3736, "step": 18670 }, { "epoch": 0.415037326697476, "grad_norm": 2.0625, "learning_rate": 1.1698222222222224e-05, "loss": 0.3792, "step": 18680 }, { "epoch": 0.41525950942054746, "grad_norm": 2.03125, "learning_rate": 1.1693777777777779e-05, "loss": 0.3791, "step": 18690 }, { "epoch": 0.4154816921436189, "grad_norm": 2.578125, "learning_rate": 1.1689333333333335e-05, "loss": 0.4218, "step": 18700 }, { "epoch": 0.41570387486669036, "grad_norm": 2.140625, "learning_rate": 1.168488888888889e-05, "loss": 0.3575, "step": 18710 }, { "epoch": 0.4159260575897618, "grad_norm": 2.46875, "learning_rate": 1.1680444444444445e-05, "loss": 0.4201, "step": 18720 }, { "epoch": 0.41614824031283326, "grad_norm": 2.390625, "learning_rate": 1.1676000000000001e-05, "loss": 0.4063, "step": 18730 }, { "epoch": 0.4163704230359047, "grad_norm": 2.171875, "learning_rate": 1.1671555555555556e-05, "loss": 0.4136, "step": 18740 }, { "epoch": 0.41659260575897616, "grad_norm": 2.171875, "learning_rate": 1.1667111111111111e-05, "loss": 0.3875, "step": 18750 }, { "epoch": 0.4168147884820476, "grad_norm": 2.21875, "learning_rate": 1.1662666666666666e-05, "loss": 0.4041, "step": 18760 }, { "epoch": 0.41703697120511907, "grad_norm": 2.296875, "learning_rate": 1.1658222222222224e-05, "loss": 0.368, "step": 18770 }, { "epoch": 0.4172591539281905, "grad_norm": 2.203125, "learning_rate": 1.1653777777777779e-05, "loss": 0.3725, "step": 18780 }, { "epoch": 0.417481336651262, "grad_norm": 2.09375, "learning_rate": 1.1649333333333336e-05, "loss": 0.3615, "step": 18790 }, { "epoch": 0.4177035193743335, "grad_norm": 2.453125, "learning_rate": 1.164488888888889e-05, "loss": 0.3657, "step": 18800 }, { "epoch": 0.4179257020974049, "grad_norm": 2.296875, "learning_rate": 1.1640444444444445e-05, "loss": 0.4285, "step": 18810 }, { "epoch": 0.4181478848204764, "grad_norm": 2.453125, "learning_rate": 1.1636e-05, "loss": 0.3817, "step": 18820 }, { "epoch": 0.41837006754354783, "grad_norm": 1.921875, "learning_rate": 1.1631555555555557e-05, "loss": 0.4134, "step": 18830 }, { "epoch": 0.4185922502666193, "grad_norm": 2.1875, "learning_rate": 1.1627111111111111e-05, "loss": 0.4261, "step": 18840 }, { "epoch": 0.41881443298969073, "grad_norm": 2.5625, "learning_rate": 1.1622666666666666e-05, "loss": 0.3703, "step": 18850 }, { "epoch": 0.4190366157127622, "grad_norm": 1.9765625, "learning_rate": 1.1618222222222225e-05, "loss": 0.3884, "step": 18860 }, { "epoch": 0.41925879843583364, "grad_norm": 2.09375, "learning_rate": 1.161377777777778e-05, "loss": 0.364, "step": 18870 }, { "epoch": 0.4194809811589051, "grad_norm": 1.984375, "learning_rate": 1.1609333333333334e-05, "loss": 0.3927, "step": 18880 }, { "epoch": 0.41970316388197654, "grad_norm": 2.375, "learning_rate": 1.160488888888889e-05, "loss": 0.3564, "step": 18890 }, { "epoch": 0.419925346605048, "grad_norm": 2.53125, "learning_rate": 1.1600444444444446e-05, "loss": 0.3842, "step": 18900 }, { "epoch": 0.42014752932811944, "grad_norm": 1.90625, "learning_rate": 1.1596e-05, "loss": 0.4098, "step": 18910 }, { "epoch": 0.4203697120511909, "grad_norm": 1.890625, "learning_rate": 1.1591555555555555e-05, "loss": 0.3775, "step": 18920 }, { "epoch": 0.42059189477426234, "grad_norm": 2.3125, "learning_rate": 1.1587111111111112e-05, "loss": 0.4061, "step": 18930 }, { "epoch": 0.4208140774973338, "grad_norm": 2.234375, "learning_rate": 1.1582666666666668e-05, "loss": 0.4203, "step": 18940 }, { "epoch": 0.42103626022040525, "grad_norm": 2.03125, "learning_rate": 1.1578222222222223e-05, "loss": 0.4124, "step": 18950 }, { "epoch": 0.4212584429434767, "grad_norm": 2.296875, "learning_rate": 1.157377777777778e-05, "loss": 0.3668, "step": 18960 }, { "epoch": 0.42148062566654815, "grad_norm": 2.390625, "learning_rate": 1.1569333333333335e-05, "loss": 0.3855, "step": 18970 }, { "epoch": 0.4217028083896196, "grad_norm": 2.015625, "learning_rate": 1.156488888888889e-05, "loss": 0.3755, "step": 18980 }, { "epoch": 0.42192499111269105, "grad_norm": 2.3125, "learning_rate": 1.1560444444444446e-05, "loss": 0.4197, "step": 18990 }, { "epoch": 0.42214717383576256, "grad_norm": 2.359375, "learning_rate": 1.1556e-05, "loss": 0.383, "step": 19000 }, { "epoch": 0.422369356558834, "grad_norm": 1.90625, "learning_rate": 1.1551555555555556e-05, "loss": 0.4043, "step": 19010 }, { "epoch": 0.42259153928190546, "grad_norm": 2.46875, "learning_rate": 1.154711111111111e-05, "loss": 0.401, "step": 19020 }, { "epoch": 0.4228137220049769, "grad_norm": 1.7578125, "learning_rate": 1.1542666666666669e-05, "loss": 0.3877, "step": 19030 }, { "epoch": 0.42303590472804836, "grad_norm": 2.5, "learning_rate": 1.1538222222222224e-05, "loss": 0.4399, "step": 19040 }, { "epoch": 0.4232580874511198, "grad_norm": 2.1875, "learning_rate": 1.153377777777778e-05, "loss": 0.4048, "step": 19050 }, { "epoch": 0.42348027017419126, "grad_norm": 2.15625, "learning_rate": 1.1529333333333335e-05, "loss": 0.3851, "step": 19060 }, { "epoch": 0.4237024528972627, "grad_norm": 2.296875, "learning_rate": 1.152488888888889e-05, "loss": 0.4034, "step": 19070 }, { "epoch": 0.42392463562033417, "grad_norm": 2.296875, "learning_rate": 1.1520444444444445e-05, "loss": 0.4198, "step": 19080 }, { "epoch": 0.4241468183434056, "grad_norm": 1.7890625, "learning_rate": 1.1516000000000001e-05, "loss": 0.4207, "step": 19090 }, { "epoch": 0.42436900106647707, "grad_norm": 2.28125, "learning_rate": 1.1511555555555556e-05, "loss": 0.4425, "step": 19100 }, { "epoch": 0.4245911837895485, "grad_norm": 1.984375, "learning_rate": 1.1507111111111111e-05, "loss": 0.3781, "step": 19110 }, { "epoch": 0.42481336651261997, "grad_norm": 2.109375, "learning_rate": 1.1502666666666669e-05, "loss": 0.4106, "step": 19120 }, { "epoch": 0.4250355492356914, "grad_norm": 2.5625, "learning_rate": 1.1498222222222224e-05, "loss": 0.3809, "step": 19130 }, { "epoch": 0.4252577319587629, "grad_norm": 2.015625, "learning_rate": 1.1493777777777779e-05, "loss": 0.3521, "step": 19140 }, { "epoch": 0.4254799146818343, "grad_norm": 2.25, "learning_rate": 1.1489333333333335e-05, "loss": 0.388, "step": 19150 }, { "epoch": 0.4257020974049058, "grad_norm": 2.40625, "learning_rate": 1.148488888888889e-05, "loss": 0.4044, "step": 19160 }, { "epoch": 0.42592428012797723, "grad_norm": 2.15625, "learning_rate": 1.1480444444444445e-05, "loss": 0.4445, "step": 19170 }, { "epoch": 0.4261464628510487, "grad_norm": 1.984375, "learning_rate": 1.1476e-05, "loss": 0.405, "step": 19180 }, { "epoch": 0.42636864557412013, "grad_norm": 2.203125, "learning_rate": 1.1471555555555556e-05, "loss": 0.3559, "step": 19190 }, { "epoch": 0.42659082829719164, "grad_norm": 2.4375, "learning_rate": 1.1467111111111111e-05, "loss": 0.393, "step": 19200 }, { "epoch": 0.4268130110202631, "grad_norm": 2.21875, "learning_rate": 1.1462666666666668e-05, "loss": 0.3441, "step": 19210 }, { "epoch": 0.42703519374333454, "grad_norm": 2.140625, "learning_rate": 1.1458222222222224e-05, "loss": 0.3888, "step": 19220 }, { "epoch": 0.427257376466406, "grad_norm": 2.28125, "learning_rate": 1.145377777777778e-05, "loss": 0.399, "step": 19230 }, { "epoch": 0.42747955918947744, "grad_norm": 2.109375, "learning_rate": 1.1449333333333334e-05, "loss": 0.3745, "step": 19240 }, { "epoch": 0.4277017419125489, "grad_norm": 2.203125, "learning_rate": 1.144488888888889e-05, "loss": 0.3968, "step": 19250 }, { "epoch": 0.42792392463562035, "grad_norm": 2.15625, "learning_rate": 1.1440444444444445e-05, "loss": 0.4016, "step": 19260 }, { "epoch": 0.4281461073586918, "grad_norm": 2.09375, "learning_rate": 1.1436e-05, "loss": 0.3813, "step": 19270 }, { "epoch": 0.42836829008176325, "grad_norm": 2.28125, "learning_rate": 1.1431555555555555e-05, "loss": 0.4106, "step": 19280 }, { "epoch": 0.4285904728048347, "grad_norm": 2.609375, "learning_rate": 1.1427111111111112e-05, "loss": 0.3633, "step": 19290 }, { "epoch": 0.42881265552790615, "grad_norm": 2.171875, "learning_rate": 1.1422666666666668e-05, "loss": 0.3968, "step": 19300 }, { "epoch": 0.4290348382509776, "grad_norm": 2.296875, "learning_rate": 1.1418222222222223e-05, "loss": 0.4016, "step": 19310 }, { "epoch": 0.42925702097404905, "grad_norm": 2.484375, "learning_rate": 1.141377777777778e-05, "loss": 0.3636, "step": 19320 }, { "epoch": 0.4294792036971205, "grad_norm": 1.96875, "learning_rate": 1.1409333333333334e-05, "loss": 0.3992, "step": 19330 }, { "epoch": 0.42970138642019196, "grad_norm": 1.9296875, "learning_rate": 1.140488888888889e-05, "loss": 0.4122, "step": 19340 }, { "epoch": 0.4299235691432634, "grad_norm": 2.640625, "learning_rate": 1.1400444444444446e-05, "loss": 0.3773, "step": 19350 }, { "epoch": 0.43014575186633486, "grad_norm": 2.625, "learning_rate": 1.1396e-05, "loss": 0.3969, "step": 19360 }, { "epoch": 0.4303679345894063, "grad_norm": 1.953125, "learning_rate": 1.1391555555555556e-05, "loss": 0.3679, "step": 19370 }, { "epoch": 0.43059011731247776, "grad_norm": 2.625, "learning_rate": 1.138711111111111e-05, "loss": 0.3742, "step": 19380 }, { "epoch": 0.4308123000355492, "grad_norm": 2.40625, "learning_rate": 1.1382666666666669e-05, "loss": 0.3824, "step": 19390 }, { "epoch": 0.43103448275862066, "grad_norm": 2.5, "learning_rate": 1.1378222222222223e-05, "loss": 0.4468, "step": 19400 }, { "epoch": 0.43125666548169217, "grad_norm": 2.15625, "learning_rate": 1.137377777777778e-05, "loss": 0.3628, "step": 19410 }, { "epoch": 0.4314788482047636, "grad_norm": 2.34375, "learning_rate": 1.1369333333333335e-05, "loss": 0.3756, "step": 19420 }, { "epoch": 0.43170103092783507, "grad_norm": 2.15625, "learning_rate": 1.136488888888889e-05, "loss": 0.3864, "step": 19430 }, { "epoch": 0.4319232136509065, "grad_norm": 2.015625, "learning_rate": 1.1360444444444445e-05, "loss": 0.4071, "step": 19440 }, { "epoch": 0.432145396373978, "grad_norm": 2.53125, "learning_rate": 1.1356000000000001e-05, "loss": 0.4236, "step": 19450 }, { "epoch": 0.4323675790970494, "grad_norm": 2.21875, "learning_rate": 1.1351555555555556e-05, "loss": 0.423, "step": 19460 }, { "epoch": 0.4325897618201209, "grad_norm": 3.03125, "learning_rate": 1.134711111111111e-05, "loss": 0.379, "step": 19470 }, { "epoch": 0.43281194454319233, "grad_norm": 2.546875, "learning_rate": 1.1342666666666669e-05, "loss": 0.4025, "step": 19480 }, { "epoch": 0.4330341272662638, "grad_norm": 2.0625, "learning_rate": 1.1338222222222224e-05, "loss": 0.3735, "step": 19490 }, { "epoch": 0.43325630998933523, "grad_norm": 2.59375, "learning_rate": 1.1333777777777779e-05, "loss": 0.397, "step": 19500 }, { "epoch": 0.4334784927124067, "grad_norm": 2.4375, "learning_rate": 1.1329333333333335e-05, "loss": 0.4139, "step": 19510 }, { "epoch": 0.43370067543547813, "grad_norm": 2.8125, "learning_rate": 1.132488888888889e-05, "loss": 0.391, "step": 19520 }, { "epoch": 0.4339228581585496, "grad_norm": 2.328125, "learning_rate": 1.1320444444444445e-05, "loss": 0.3767, "step": 19530 }, { "epoch": 0.43414504088162104, "grad_norm": 2.375, "learning_rate": 1.1316e-05, "loss": 0.4362, "step": 19540 }, { "epoch": 0.4343672236046925, "grad_norm": 3.0, "learning_rate": 1.1311555555555556e-05, "loss": 0.4181, "step": 19550 }, { "epoch": 0.43458940632776394, "grad_norm": 2.359375, "learning_rate": 1.1307111111111111e-05, "loss": 0.363, "step": 19560 }, { "epoch": 0.4348115890508354, "grad_norm": 2.765625, "learning_rate": 1.1302666666666668e-05, "loss": 0.4238, "step": 19570 }, { "epoch": 0.43503377177390684, "grad_norm": 2.0625, "learning_rate": 1.1298222222222224e-05, "loss": 0.3864, "step": 19580 }, { "epoch": 0.4352559544969783, "grad_norm": 2.0, "learning_rate": 1.1293777777777779e-05, "loss": 0.4356, "step": 19590 }, { "epoch": 0.43547813722004974, "grad_norm": 2.453125, "learning_rate": 1.1289333333333334e-05, "loss": 0.4085, "step": 19600 }, { "epoch": 0.43570031994312125, "grad_norm": 2.03125, "learning_rate": 1.128488888888889e-05, "loss": 0.3822, "step": 19610 }, { "epoch": 0.4359225026661927, "grad_norm": 2.265625, "learning_rate": 1.1280444444444445e-05, "loss": 0.3907, "step": 19620 }, { "epoch": 0.43614468538926415, "grad_norm": 2.3125, "learning_rate": 1.1276e-05, "loss": 0.4203, "step": 19630 }, { "epoch": 0.4363668681123356, "grad_norm": 1.875, "learning_rate": 1.1271555555555555e-05, "loss": 0.4278, "step": 19640 }, { "epoch": 0.43658905083540706, "grad_norm": 2.140625, "learning_rate": 1.1267111111111113e-05, "loss": 0.3878, "step": 19650 }, { "epoch": 0.4368112335584785, "grad_norm": 2.515625, "learning_rate": 1.1262666666666668e-05, "loss": 0.4072, "step": 19660 }, { "epoch": 0.43703341628154996, "grad_norm": 2.265625, "learning_rate": 1.1258222222222223e-05, "loss": 0.3515, "step": 19670 }, { "epoch": 0.4372555990046214, "grad_norm": 2.96875, "learning_rate": 1.125377777777778e-05, "loss": 0.3994, "step": 19680 }, { "epoch": 0.43747778172769286, "grad_norm": 2.921875, "learning_rate": 1.1249333333333334e-05, "loss": 0.4353, "step": 19690 }, { "epoch": 0.4376999644507643, "grad_norm": 2.015625, "learning_rate": 1.1244888888888889e-05, "loss": 0.42, "step": 19700 }, { "epoch": 0.43792214717383576, "grad_norm": 1.9375, "learning_rate": 1.1240444444444446e-05, "loss": 0.3881, "step": 19710 }, { "epoch": 0.4381443298969072, "grad_norm": 2.046875, "learning_rate": 1.1236e-05, "loss": 0.3918, "step": 19720 }, { "epoch": 0.43836651261997867, "grad_norm": 2.234375, "learning_rate": 1.1231555555555555e-05, "loss": 0.3917, "step": 19730 }, { "epoch": 0.4385886953430501, "grad_norm": 2.375, "learning_rate": 1.1227111111111114e-05, "loss": 0.3976, "step": 19740 }, { "epoch": 0.43881087806612157, "grad_norm": 1.75, "learning_rate": 1.1222666666666668e-05, "loss": 0.3924, "step": 19750 }, { "epoch": 0.439033060789193, "grad_norm": 2.34375, "learning_rate": 1.1218222222222223e-05, "loss": 0.3909, "step": 19760 }, { "epoch": 0.43925524351226447, "grad_norm": 1.8984375, "learning_rate": 1.121377777777778e-05, "loss": 0.42, "step": 19770 }, { "epoch": 0.4394774262353359, "grad_norm": 2.109375, "learning_rate": 1.1209333333333335e-05, "loss": 0.3655, "step": 19780 }, { "epoch": 0.4396996089584074, "grad_norm": 2.6875, "learning_rate": 1.120488888888889e-05, "loss": 0.3663, "step": 19790 }, { "epoch": 0.4399217916814788, "grad_norm": 2.125, "learning_rate": 1.1200444444444444e-05, "loss": 0.3854, "step": 19800 }, { "epoch": 0.4401439744045503, "grad_norm": 2.15625, "learning_rate": 1.1196000000000001e-05, "loss": 0.374, "step": 19810 }, { "epoch": 0.4403661571276218, "grad_norm": 1.875, "learning_rate": 1.1191555555555556e-05, "loss": 0.3725, "step": 19820 }, { "epoch": 0.44058833985069323, "grad_norm": 1.8984375, "learning_rate": 1.1187111111111112e-05, "loss": 0.3875, "step": 19830 }, { "epoch": 0.4408105225737647, "grad_norm": 2.84375, "learning_rate": 1.1182666666666669e-05, "loss": 0.4269, "step": 19840 }, { "epoch": 0.44103270529683614, "grad_norm": 2.046875, "learning_rate": 1.1178222222222224e-05, "loss": 0.3947, "step": 19850 }, { "epoch": 0.4412548880199076, "grad_norm": 2.46875, "learning_rate": 1.1173777777777779e-05, "loss": 0.4126, "step": 19860 }, { "epoch": 0.44147707074297904, "grad_norm": 2.640625, "learning_rate": 1.1169333333333335e-05, "loss": 0.4062, "step": 19870 }, { "epoch": 0.4416992534660505, "grad_norm": 2.703125, "learning_rate": 1.116488888888889e-05, "loss": 0.3836, "step": 19880 }, { "epoch": 0.44192143618912194, "grad_norm": 1.9921875, "learning_rate": 1.1160444444444445e-05, "loss": 0.3643, "step": 19890 }, { "epoch": 0.4421436189121934, "grad_norm": 2.03125, "learning_rate": 1.1156e-05, "loss": 0.4177, "step": 19900 }, { "epoch": 0.44236580163526484, "grad_norm": 2.234375, "learning_rate": 1.1151555555555556e-05, "loss": 0.3948, "step": 19910 }, { "epoch": 0.4425879843583363, "grad_norm": 2.546875, "learning_rate": 1.1147111111111113e-05, "loss": 0.3898, "step": 19920 }, { "epoch": 0.44281016708140775, "grad_norm": 1.953125, "learning_rate": 1.1142666666666667e-05, "loss": 0.3983, "step": 19930 }, { "epoch": 0.4430323498044792, "grad_norm": 2.296875, "learning_rate": 1.1138222222222224e-05, "loss": 0.3946, "step": 19940 }, { "epoch": 0.44325453252755065, "grad_norm": 2.375, "learning_rate": 1.1133777777777779e-05, "loss": 0.3744, "step": 19950 }, { "epoch": 0.4434767152506221, "grad_norm": 2.59375, "learning_rate": 1.1129333333333334e-05, "loss": 0.4106, "step": 19960 }, { "epoch": 0.44369889797369355, "grad_norm": 2.46875, "learning_rate": 1.112488888888889e-05, "loss": 0.3814, "step": 19970 }, { "epoch": 0.443921080696765, "grad_norm": 2.265625, "learning_rate": 1.1120444444444445e-05, "loss": 0.4158, "step": 19980 }, { "epoch": 0.44414326341983645, "grad_norm": 2.171875, "learning_rate": 1.1116e-05, "loss": 0.4096, "step": 19990 }, { "epoch": 0.4443654461429079, "grad_norm": 2.578125, "learning_rate": 1.1111555555555555e-05, "loss": 0.3942, "step": 20000 }, { "epoch": 0.44458762886597936, "grad_norm": 2.390625, "learning_rate": 1.1107111111111113e-05, "loss": 0.3741, "step": 20010 }, { "epoch": 0.44480981158905086, "grad_norm": 2.203125, "learning_rate": 1.1102666666666668e-05, "loss": 0.3804, "step": 20020 }, { "epoch": 0.4450319943121223, "grad_norm": 1.8828125, "learning_rate": 1.1098222222222223e-05, "loss": 0.3675, "step": 20030 }, { "epoch": 0.44525417703519377, "grad_norm": 2.4375, "learning_rate": 1.109377777777778e-05, "loss": 0.3957, "step": 20040 }, { "epoch": 0.4454763597582652, "grad_norm": 2.59375, "learning_rate": 1.1089333333333334e-05, "loss": 0.3769, "step": 20050 }, { "epoch": 0.44569854248133667, "grad_norm": 2.0, "learning_rate": 1.1084888888888889e-05, "loss": 0.3605, "step": 20060 }, { "epoch": 0.4459207252044081, "grad_norm": 2.375, "learning_rate": 1.1080444444444445e-05, "loss": 0.4117, "step": 20070 }, { "epoch": 0.44614290792747957, "grad_norm": 1.9609375, "learning_rate": 1.1076e-05, "loss": 0.4063, "step": 20080 }, { "epoch": 0.446365090650551, "grad_norm": 2.21875, "learning_rate": 1.1071555555555555e-05, "loss": 0.4014, "step": 20090 }, { "epoch": 0.4465872733736225, "grad_norm": 2.546875, "learning_rate": 1.1067111111111113e-05, "loss": 0.3968, "step": 20100 }, { "epoch": 0.4468094560966939, "grad_norm": 2.3125, "learning_rate": 1.1062666666666668e-05, "loss": 0.3791, "step": 20110 }, { "epoch": 0.4470316388197654, "grad_norm": 2.453125, "learning_rate": 1.1058222222222223e-05, "loss": 0.4061, "step": 20120 }, { "epoch": 0.4472538215428368, "grad_norm": 2.375, "learning_rate": 1.105377777777778e-05, "loss": 0.4039, "step": 20130 }, { "epoch": 0.4474760042659083, "grad_norm": 1.8125, "learning_rate": 1.1049333333333334e-05, "loss": 0.3768, "step": 20140 }, { "epoch": 0.44769818698897973, "grad_norm": 2.296875, "learning_rate": 1.104488888888889e-05, "loss": 0.388, "step": 20150 }, { "epoch": 0.4479203697120512, "grad_norm": 2.296875, "learning_rate": 1.1040444444444444e-05, "loss": 0.4123, "step": 20160 }, { "epoch": 0.44814255243512263, "grad_norm": 2.484375, "learning_rate": 1.1036e-05, "loss": 0.3996, "step": 20170 }, { "epoch": 0.4483647351581941, "grad_norm": 2.0, "learning_rate": 1.1031555555555556e-05, "loss": 0.3852, "step": 20180 }, { "epoch": 0.44858691788126553, "grad_norm": 2.015625, "learning_rate": 1.1027111111111112e-05, "loss": 0.3636, "step": 20190 }, { "epoch": 0.448809100604337, "grad_norm": 2.046875, "learning_rate": 1.1022666666666669e-05, "loss": 0.4001, "step": 20200 }, { "epoch": 0.44903128332740844, "grad_norm": 1.75, "learning_rate": 1.1018222222222223e-05, "loss": 0.3724, "step": 20210 }, { "epoch": 0.4492534660504799, "grad_norm": 2.453125, "learning_rate": 1.1013777777777778e-05, "loss": 0.3837, "step": 20220 }, { "epoch": 0.4494756487735514, "grad_norm": 2.109375, "learning_rate": 1.1009333333333335e-05, "loss": 0.3334, "step": 20230 }, { "epoch": 0.44969783149662285, "grad_norm": 2.75, "learning_rate": 1.100488888888889e-05, "loss": 0.41, "step": 20240 }, { "epoch": 0.4499200142196943, "grad_norm": 2.0625, "learning_rate": 1.1000444444444445e-05, "loss": 0.4168, "step": 20250 }, { "epoch": 0.45014219694276575, "grad_norm": 2.21875, "learning_rate": 1.0996e-05, "loss": 0.4111, "step": 20260 }, { "epoch": 0.4503643796658372, "grad_norm": 1.8671875, "learning_rate": 1.0991555555555558e-05, "loss": 0.3801, "step": 20270 }, { "epoch": 0.45058656238890865, "grad_norm": 2.375, "learning_rate": 1.0987111111111112e-05, "loss": 0.3671, "step": 20280 }, { "epoch": 0.4508087451119801, "grad_norm": 2.09375, "learning_rate": 1.0982666666666667e-05, "loss": 0.3702, "step": 20290 }, { "epoch": 0.45103092783505155, "grad_norm": 2.46875, "learning_rate": 1.0978222222222224e-05, "loss": 0.4006, "step": 20300 }, { "epoch": 0.451253110558123, "grad_norm": 2.109375, "learning_rate": 1.0973777777777779e-05, "loss": 0.4027, "step": 20310 }, { "epoch": 0.45147529328119446, "grad_norm": 2.390625, "learning_rate": 1.0969333333333334e-05, "loss": 0.3681, "step": 20320 }, { "epoch": 0.4516974760042659, "grad_norm": 1.9375, "learning_rate": 1.096488888888889e-05, "loss": 0.3681, "step": 20330 }, { "epoch": 0.45191965872733736, "grad_norm": 2.390625, "learning_rate": 1.0960444444444445e-05, "loss": 0.3897, "step": 20340 }, { "epoch": 0.4521418414504088, "grad_norm": 2.71875, "learning_rate": 1.0956e-05, "loss": 0.4075, "step": 20350 }, { "epoch": 0.45236402417348026, "grad_norm": 1.8984375, "learning_rate": 1.0951555555555558e-05, "loss": 0.377, "step": 20360 }, { "epoch": 0.4525862068965517, "grad_norm": 2.0, "learning_rate": 1.0947111111111113e-05, "loss": 0.372, "step": 20370 }, { "epoch": 0.45280838961962316, "grad_norm": 2.109375, "learning_rate": 1.0942666666666668e-05, "loss": 0.389, "step": 20380 }, { "epoch": 0.4530305723426946, "grad_norm": 2.125, "learning_rate": 1.0938222222222223e-05, "loss": 0.3631, "step": 20390 }, { "epoch": 0.45325275506576607, "grad_norm": 2.265625, "learning_rate": 1.0933777777777779e-05, "loss": 0.3857, "step": 20400 }, { "epoch": 0.4534749377888375, "grad_norm": 2.109375, "learning_rate": 1.0929333333333334e-05, "loss": 0.3991, "step": 20410 }, { "epoch": 0.45369712051190897, "grad_norm": 2.21875, "learning_rate": 1.0924888888888889e-05, "loss": 0.3523, "step": 20420 }, { "epoch": 0.4539193032349804, "grad_norm": 2.0, "learning_rate": 1.0920444444444445e-05, "loss": 0.4037, "step": 20430 }, { "epoch": 0.4541414859580519, "grad_norm": 2.25, "learning_rate": 1.0916e-05, "loss": 0.3844, "step": 20440 }, { "epoch": 0.4543636686811234, "grad_norm": 1.9609375, "learning_rate": 1.0911555555555557e-05, "loss": 0.3743, "step": 20450 }, { "epoch": 0.45458585140419483, "grad_norm": 1.9765625, "learning_rate": 1.0907111111111113e-05, "loss": 0.3594, "step": 20460 }, { "epoch": 0.4548080341272663, "grad_norm": 2.4375, "learning_rate": 1.0902666666666668e-05, "loss": 0.391, "step": 20470 }, { "epoch": 0.45503021685033773, "grad_norm": 2.171875, "learning_rate": 1.0898222222222223e-05, "loss": 0.3538, "step": 20480 }, { "epoch": 0.4552523995734092, "grad_norm": 2.390625, "learning_rate": 1.089377777777778e-05, "loss": 0.398, "step": 20490 }, { "epoch": 0.45547458229648063, "grad_norm": 2.109375, "learning_rate": 1.0889333333333334e-05, "loss": 0.4254, "step": 20500 }, { "epoch": 0.4556967650195521, "grad_norm": 2.234375, "learning_rate": 1.088488888888889e-05, "loss": 0.4078, "step": 20510 }, { "epoch": 0.45591894774262354, "grad_norm": 2.203125, "learning_rate": 1.0880444444444444e-05, "loss": 0.3843, "step": 20520 }, { "epoch": 0.456141130465695, "grad_norm": 2.421875, "learning_rate": 1.0876e-05, "loss": 0.4057, "step": 20530 }, { "epoch": 0.45636331318876644, "grad_norm": 2.25, "learning_rate": 1.0871555555555557e-05, "loss": 0.3866, "step": 20540 }, { "epoch": 0.4565854959118379, "grad_norm": 2.28125, "learning_rate": 1.0867111111111112e-05, "loss": 0.3896, "step": 20550 }, { "epoch": 0.45680767863490934, "grad_norm": 2.703125, "learning_rate": 1.0862666666666668e-05, "loss": 0.4137, "step": 20560 }, { "epoch": 0.4570298613579808, "grad_norm": 2.015625, "learning_rate": 1.0858222222222223e-05, "loss": 0.3563, "step": 20570 }, { "epoch": 0.45725204408105224, "grad_norm": 2.1875, "learning_rate": 1.0853777777777778e-05, "loss": 0.4091, "step": 20580 }, { "epoch": 0.4574742268041237, "grad_norm": 2.3125, "learning_rate": 1.0849333333333335e-05, "loss": 0.3981, "step": 20590 }, { "epoch": 0.45769640952719515, "grad_norm": 2.59375, "learning_rate": 1.084488888888889e-05, "loss": 0.4079, "step": 20600 }, { "epoch": 0.4579185922502666, "grad_norm": 1.9296875, "learning_rate": 1.0840444444444444e-05, "loss": 0.3801, "step": 20610 }, { "epoch": 0.45814077497333805, "grad_norm": 1.890625, "learning_rate": 1.0836e-05, "loss": 0.3811, "step": 20620 }, { "epoch": 0.4583629576964095, "grad_norm": 2.28125, "learning_rate": 1.0831555555555557e-05, "loss": 0.3834, "step": 20630 }, { "epoch": 0.458585140419481, "grad_norm": 2.359375, "learning_rate": 1.0827111111111112e-05, "loss": 0.394, "step": 20640 }, { "epoch": 0.45880732314255246, "grad_norm": 2.53125, "learning_rate": 1.0822666666666667e-05, "loss": 0.412, "step": 20650 }, { "epoch": 0.4590295058656239, "grad_norm": 2.578125, "learning_rate": 1.0818222222222224e-05, "loss": 0.424, "step": 20660 }, { "epoch": 0.45925168858869536, "grad_norm": 2.109375, "learning_rate": 1.0813777777777779e-05, "loss": 0.3918, "step": 20670 }, { "epoch": 0.4594738713117668, "grad_norm": 2.265625, "learning_rate": 1.0809333333333333e-05, "loss": 0.3855, "step": 20680 }, { "epoch": 0.45969605403483826, "grad_norm": 2.390625, "learning_rate": 1.080488888888889e-05, "loss": 0.3949, "step": 20690 }, { "epoch": 0.4599182367579097, "grad_norm": 1.84375, "learning_rate": 1.0800444444444445e-05, "loss": 0.4035, "step": 20700 }, { "epoch": 0.46014041948098117, "grad_norm": 2.125, "learning_rate": 1.0796e-05, "loss": 0.3553, "step": 20710 }, { "epoch": 0.4603626022040526, "grad_norm": 2.265625, "learning_rate": 1.0791555555555558e-05, "loss": 0.4035, "step": 20720 }, { "epoch": 0.46058478492712407, "grad_norm": 2.359375, "learning_rate": 1.0787111111111113e-05, "loss": 0.3682, "step": 20730 }, { "epoch": 0.4608069676501955, "grad_norm": 2.21875, "learning_rate": 1.0782666666666668e-05, "loss": 0.384, "step": 20740 }, { "epoch": 0.46102915037326697, "grad_norm": 2.234375, "learning_rate": 1.0778222222222224e-05, "loss": 0.3888, "step": 20750 }, { "epoch": 0.4612513330963384, "grad_norm": 2.40625, "learning_rate": 1.0773777777777779e-05, "loss": 0.4149, "step": 20760 }, { "epoch": 0.4614735158194099, "grad_norm": 2.328125, "learning_rate": 1.0769333333333334e-05, "loss": 0.3992, "step": 20770 }, { "epoch": 0.4616956985424813, "grad_norm": 2.40625, "learning_rate": 1.0764888888888889e-05, "loss": 0.4211, "step": 20780 }, { "epoch": 0.4619178812655528, "grad_norm": 2.03125, "learning_rate": 1.0760444444444445e-05, "loss": 0.3719, "step": 20790 }, { "epoch": 0.46214006398862423, "grad_norm": 2.578125, "learning_rate": 1.0756e-05, "loss": 0.3809, "step": 20800 }, { "epoch": 0.4623622467116957, "grad_norm": 2.28125, "learning_rate": 1.0751555555555557e-05, "loss": 0.3777, "step": 20810 }, { "epoch": 0.46258442943476713, "grad_norm": 2.71875, "learning_rate": 1.0747111111111113e-05, "loss": 0.3977, "step": 20820 }, { "epoch": 0.4628066121578386, "grad_norm": 2.65625, "learning_rate": 1.0742666666666668e-05, "loss": 0.3746, "step": 20830 }, { "epoch": 0.46302879488091003, "grad_norm": 2.34375, "learning_rate": 1.0738222222222223e-05, "loss": 0.4218, "step": 20840 }, { "epoch": 0.46325097760398154, "grad_norm": 2.46875, "learning_rate": 1.073377777777778e-05, "loss": 0.4456, "step": 20850 }, { "epoch": 0.463473160327053, "grad_norm": 2.21875, "learning_rate": 1.0729333333333334e-05, "loss": 0.3884, "step": 20860 }, { "epoch": 0.46369534305012444, "grad_norm": 2.078125, "learning_rate": 1.0724888888888889e-05, "loss": 0.417, "step": 20870 }, { "epoch": 0.4639175257731959, "grad_norm": 2.421875, "learning_rate": 1.0720444444444444e-05, "loss": 0.3808, "step": 20880 }, { "epoch": 0.46413970849626734, "grad_norm": 2.390625, "learning_rate": 1.0716000000000002e-05, "loss": 0.3692, "step": 20890 }, { "epoch": 0.4643618912193388, "grad_norm": 2.765625, "learning_rate": 1.0711555555555557e-05, "loss": 0.4041, "step": 20900 }, { "epoch": 0.46458407394241025, "grad_norm": 1.71875, "learning_rate": 1.0707111111111112e-05, "loss": 0.3721, "step": 20910 }, { "epoch": 0.4648062566654817, "grad_norm": 2.21875, "learning_rate": 1.0702666666666668e-05, "loss": 0.4174, "step": 20920 }, { "epoch": 0.46502843938855315, "grad_norm": 2.28125, "learning_rate": 1.0698222222222223e-05, "loss": 0.3944, "step": 20930 }, { "epoch": 0.4652506221116246, "grad_norm": 3.015625, "learning_rate": 1.0693777777777778e-05, "loss": 0.3833, "step": 20940 }, { "epoch": 0.46547280483469605, "grad_norm": 2.28125, "learning_rate": 1.0689333333333335e-05, "loss": 0.4119, "step": 20950 }, { "epoch": 0.4656949875577675, "grad_norm": 1.75, "learning_rate": 1.068488888888889e-05, "loss": 0.3927, "step": 20960 }, { "epoch": 0.46591717028083895, "grad_norm": 2.25, "learning_rate": 1.0680444444444444e-05, "loss": 0.394, "step": 20970 }, { "epoch": 0.4661393530039104, "grad_norm": 2.296875, "learning_rate": 1.0676000000000002e-05, "loss": 0.3588, "step": 20980 }, { "epoch": 0.46636153572698186, "grad_norm": 2.671875, "learning_rate": 1.0671555555555557e-05, "loss": 0.3979, "step": 20990 }, { "epoch": 0.4665837184500533, "grad_norm": 2.171875, "learning_rate": 1.0667111111111112e-05, "loss": 0.3983, "step": 21000 }, { "epoch": 0.46680590117312476, "grad_norm": 1.9453125, "learning_rate": 1.0662666666666667e-05, "loss": 0.3962, "step": 21010 }, { "epoch": 0.4670280838961962, "grad_norm": 2.46875, "learning_rate": 1.0658222222222224e-05, "loss": 0.4133, "step": 21020 }, { "epoch": 0.46725026661926766, "grad_norm": 2.5625, "learning_rate": 1.0653777777777778e-05, "loss": 0.3879, "step": 21030 }, { "epoch": 0.4674724493423391, "grad_norm": 1.8828125, "learning_rate": 1.0649333333333333e-05, "loss": 0.4, "step": 21040 }, { "epoch": 0.4676946320654106, "grad_norm": 2.15625, "learning_rate": 1.064488888888889e-05, "loss": 0.3913, "step": 21050 }, { "epoch": 0.46791681478848207, "grad_norm": 2.65625, "learning_rate": 1.0640444444444445e-05, "loss": 0.3635, "step": 21060 }, { "epoch": 0.4681389975115535, "grad_norm": 2.1875, "learning_rate": 1.0636000000000001e-05, "loss": 0.3726, "step": 21070 }, { "epoch": 0.468361180234625, "grad_norm": 2.3125, "learning_rate": 1.0631555555555558e-05, "loss": 0.3947, "step": 21080 }, { "epoch": 0.4685833629576964, "grad_norm": 1.890625, "learning_rate": 1.0627111111111113e-05, "loss": 0.3738, "step": 21090 }, { "epoch": 0.4688055456807679, "grad_norm": 2.296875, "learning_rate": 1.0622666666666667e-05, "loss": 0.4123, "step": 21100 }, { "epoch": 0.46902772840383933, "grad_norm": 2.484375, "learning_rate": 1.0618222222222224e-05, "loss": 0.4035, "step": 21110 }, { "epoch": 0.4692499111269108, "grad_norm": 2.515625, "learning_rate": 1.0613777777777779e-05, "loss": 0.4067, "step": 21120 }, { "epoch": 0.46947209384998223, "grad_norm": 2.28125, "learning_rate": 1.0609333333333334e-05, "loss": 0.3745, "step": 21130 }, { "epoch": 0.4696942765730537, "grad_norm": 2.40625, "learning_rate": 1.0604888888888888e-05, "loss": 0.3901, "step": 21140 }, { "epoch": 0.46991645929612513, "grad_norm": 2.75, "learning_rate": 1.0600444444444445e-05, "loss": 0.4082, "step": 21150 }, { "epoch": 0.4701386420191966, "grad_norm": 2.796875, "learning_rate": 1.0596000000000002e-05, "loss": 0.3994, "step": 21160 }, { "epoch": 0.47036082474226804, "grad_norm": 2.375, "learning_rate": 1.0591555555555556e-05, "loss": 0.4238, "step": 21170 }, { "epoch": 0.4705830074653395, "grad_norm": 1.9921875, "learning_rate": 1.0587111111111113e-05, "loss": 0.4065, "step": 21180 }, { "epoch": 0.47080519018841094, "grad_norm": 2.0625, "learning_rate": 1.0582666666666668e-05, "loss": 0.4038, "step": 21190 }, { "epoch": 0.4710273729114824, "grad_norm": 2.15625, "learning_rate": 1.0578222222222223e-05, "loss": 0.3967, "step": 21200 }, { "epoch": 0.47124955563455384, "grad_norm": 2.828125, "learning_rate": 1.057377777777778e-05, "loss": 0.3742, "step": 21210 }, { "epoch": 0.4714717383576253, "grad_norm": 1.921875, "learning_rate": 1.0569333333333334e-05, "loss": 0.4002, "step": 21220 }, { "epoch": 0.47169392108069674, "grad_norm": 2.28125, "learning_rate": 1.0564888888888889e-05, "loss": 0.3993, "step": 21230 }, { "epoch": 0.4719161038037682, "grad_norm": 2.40625, "learning_rate": 1.0560444444444444e-05, "loss": 0.4094, "step": 21240 }, { "epoch": 0.47213828652683965, "grad_norm": 2.375, "learning_rate": 1.0556000000000002e-05, "loss": 0.3779, "step": 21250 }, { "epoch": 0.47236046924991115, "grad_norm": 2.578125, "learning_rate": 1.0551555555555557e-05, "loss": 0.3729, "step": 21260 }, { "epoch": 0.4725826519729826, "grad_norm": 2.03125, "learning_rate": 1.0547111111111112e-05, "loss": 0.3719, "step": 21270 }, { "epoch": 0.47280483469605405, "grad_norm": 2.5, "learning_rate": 1.0542666666666668e-05, "loss": 0.385, "step": 21280 }, { "epoch": 0.4730270174191255, "grad_norm": 2.40625, "learning_rate": 1.0538222222222223e-05, "loss": 0.3988, "step": 21290 }, { "epoch": 0.47324920014219696, "grad_norm": 2.609375, "learning_rate": 1.0533777777777778e-05, "loss": 0.3811, "step": 21300 }, { "epoch": 0.4734713828652684, "grad_norm": 2.4375, "learning_rate": 1.0529333333333334e-05, "loss": 0.4309, "step": 21310 }, { "epoch": 0.47369356558833986, "grad_norm": 2.359375, "learning_rate": 1.052488888888889e-05, "loss": 0.3763, "step": 21320 }, { "epoch": 0.4739157483114113, "grad_norm": 2.671875, "learning_rate": 1.0520444444444444e-05, "loss": 0.3886, "step": 21330 }, { "epoch": 0.47413793103448276, "grad_norm": 3.0, "learning_rate": 1.0516000000000002e-05, "loss": 0.3976, "step": 21340 }, { "epoch": 0.4743601137575542, "grad_norm": 2.265625, "learning_rate": 1.0511555555555557e-05, "loss": 0.3736, "step": 21350 }, { "epoch": 0.47458229648062566, "grad_norm": 2.421875, "learning_rate": 1.0507111111111112e-05, "loss": 0.4161, "step": 21360 }, { "epoch": 0.4748044792036971, "grad_norm": 2.328125, "learning_rate": 1.0502666666666667e-05, "loss": 0.4237, "step": 21370 }, { "epoch": 0.47502666192676857, "grad_norm": 2.28125, "learning_rate": 1.0498222222222223e-05, "loss": 0.3793, "step": 21380 }, { "epoch": 0.47524884464984, "grad_norm": 2.21875, "learning_rate": 1.0493777777777778e-05, "loss": 0.4024, "step": 21390 }, { "epoch": 0.47547102737291147, "grad_norm": 2.96875, "learning_rate": 1.0489333333333333e-05, "loss": 0.3968, "step": 21400 }, { "epoch": 0.4756932100959829, "grad_norm": 2.53125, "learning_rate": 1.048488888888889e-05, "loss": 0.402, "step": 21410 }, { "epoch": 0.4759153928190544, "grad_norm": 2.171875, "learning_rate": 1.0480444444444444e-05, "loss": 0.3966, "step": 21420 }, { "epoch": 0.4761375755421258, "grad_norm": 2.53125, "learning_rate": 1.0476000000000001e-05, "loss": 0.4159, "step": 21430 }, { "epoch": 0.4763597582651973, "grad_norm": 2.28125, "learning_rate": 1.0471555555555558e-05, "loss": 0.4008, "step": 21440 }, { "epoch": 0.4765819409882687, "grad_norm": 2.25, "learning_rate": 1.0467111111111112e-05, "loss": 0.3913, "step": 21450 }, { "epoch": 0.47680412371134023, "grad_norm": 2.25, "learning_rate": 1.0462666666666667e-05, "loss": 0.3975, "step": 21460 }, { "epoch": 0.4770263064344117, "grad_norm": 2.28125, "learning_rate": 1.0458222222222224e-05, "loss": 0.3919, "step": 21470 }, { "epoch": 0.47724848915748314, "grad_norm": 2.328125, "learning_rate": 1.0453777777777779e-05, "loss": 0.4096, "step": 21480 }, { "epoch": 0.4774706718805546, "grad_norm": 2.375, "learning_rate": 1.0449333333333333e-05, "loss": 0.3979, "step": 21490 }, { "epoch": 0.47769285460362604, "grad_norm": 2.921875, "learning_rate": 1.0444888888888888e-05, "loss": 0.388, "step": 21500 }, { "epoch": 0.4779150373266975, "grad_norm": 2.140625, "learning_rate": 1.0440444444444447e-05, "loss": 0.3575, "step": 21510 }, { "epoch": 0.47813722004976894, "grad_norm": 2.640625, "learning_rate": 1.0436000000000001e-05, "loss": 0.4031, "step": 21520 }, { "epoch": 0.4783594027728404, "grad_norm": 2.390625, "learning_rate": 1.0431555555555556e-05, "loss": 0.386, "step": 21530 }, { "epoch": 0.47858158549591184, "grad_norm": 2.453125, "learning_rate": 1.0427111111111113e-05, "loss": 0.3975, "step": 21540 }, { "epoch": 0.4788037682189833, "grad_norm": 2.265625, "learning_rate": 1.0422666666666668e-05, "loss": 0.3538, "step": 21550 }, { "epoch": 0.47902595094205475, "grad_norm": 1.9375, "learning_rate": 1.0418222222222222e-05, "loss": 0.3753, "step": 21560 }, { "epoch": 0.4792481336651262, "grad_norm": 2.546875, "learning_rate": 1.0413777777777779e-05, "loss": 0.3741, "step": 21570 }, { "epoch": 0.47947031638819765, "grad_norm": 2.109375, "learning_rate": 1.0409333333333334e-05, "loss": 0.4022, "step": 21580 }, { "epoch": 0.4796924991112691, "grad_norm": 2.234375, "learning_rate": 1.0404888888888889e-05, "loss": 0.3794, "step": 21590 }, { "epoch": 0.47991468183434055, "grad_norm": 2.359375, "learning_rate": 1.0400444444444447e-05, "loss": 0.3702, "step": 21600 }, { "epoch": 0.480136864557412, "grad_norm": 2.46875, "learning_rate": 1.0396000000000002e-05, "loss": 0.435, "step": 21610 }, { "epoch": 0.48035904728048345, "grad_norm": 2.4375, "learning_rate": 1.0391555555555557e-05, "loss": 0.389, "step": 21620 }, { "epoch": 0.4805812300035549, "grad_norm": 2.046875, "learning_rate": 1.0387111111111111e-05, "loss": 0.3863, "step": 21630 }, { "epoch": 0.48080341272662636, "grad_norm": 2.4375, "learning_rate": 1.0382666666666668e-05, "loss": 0.3822, "step": 21640 }, { "epoch": 0.4810255954496978, "grad_norm": 1.9453125, "learning_rate": 1.0378222222222223e-05, "loss": 0.4093, "step": 21650 }, { "epoch": 0.48124777817276926, "grad_norm": 2.21875, "learning_rate": 1.0373777777777778e-05, "loss": 0.3749, "step": 21660 }, { "epoch": 0.48146996089584077, "grad_norm": 3.078125, "learning_rate": 1.0369333333333334e-05, "loss": 0.4006, "step": 21670 }, { "epoch": 0.4816921436189122, "grad_norm": 2.375, "learning_rate": 1.0364888888888889e-05, "loss": 0.4306, "step": 21680 }, { "epoch": 0.48191432634198367, "grad_norm": 2.5, "learning_rate": 1.0360444444444446e-05, "loss": 0.38, "step": 21690 }, { "epoch": 0.4821365090650551, "grad_norm": 2.421875, "learning_rate": 1.0356000000000002e-05, "loss": 0.414, "step": 21700 }, { "epoch": 0.48235869178812657, "grad_norm": 2.296875, "learning_rate": 1.0351555555555557e-05, "loss": 0.403, "step": 21710 }, { "epoch": 0.482580874511198, "grad_norm": 2.25, "learning_rate": 1.0347111111111112e-05, "loss": 0.3978, "step": 21720 }, { "epoch": 0.4828030572342695, "grad_norm": 2.46875, "learning_rate": 1.0342666666666667e-05, "loss": 0.393, "step": 21730 }, { "epoch": 0.4830252399573409, "grad_norm": 2.25, "learning_rate": 1.0338222222222223e-05, "loss": 0.3547, "step": 21740 }, { "epoch": 0.4832474226804124, "grad_norm": 2.296875, "learning_rate": 1.0333777777777778e-05, "loss": 0.4171, "step": 21750 }, { "epoch": 0.4834696054034838, "grad_norm": 2.171875, "learning_rate": 1.0329333333333333e-05, "loss": 0.417, "step": 21760 }, { "epoch": 0.4836917881265553, "grad_norm": 2.46875, "learning_rate": 1.032488888888889e-05, "loss": 0.4144, "step": 21770 }, { "epoch": 0.48391397084962673, "grad_norm": 2.5625, "learning_rate": 1.0320444444444446e-05, "loss": 0.3907, "step": 21780 }, { "epoch": 0.4841361535726982, "grad_norm": 2.328125, "learning_rate": 1.0316e-05, "loss": 0.3828, "step": 21790 }, { "epoch": 0.48435833629576963, "grad_norm": 2.03125, "learning_rate": 1.0311555555555557e-05, "loss": 0.3694, "step": 21800 }, { "epoch": 0.4845805190188411, "grad_norm": 1.9765625, "learning_rate": 1.0307111111111112e-05, "loss": 0.3973, "step": 21810 }, { "epoch": 0.48480270174191253, "grad_norm": 2.078125, "learning_rate": 1.0302666666666667e-05, "loss": 0.3438, "step": 21820 }, { "epoch": 0.485024884464984, "grad_norm": 2.21875, "learning_rate": 1.0298222222222224e-05, "loss": 0.3919, "step": 21830 }, { "epoch": 0.48524706718805544, "grad_norm": 2.28125, "learning_rate": 1.0293777777777778e-05, "loss": 0.3648, "step": 21840 }, { "epoch": 0.4854692499111269, "grad_norm": 2.421875, "learning_rate": 1.0289333333333333e-05, "loss": 0.3749, "step": 21850 }, { "epoch": 0.48569143263419834, "grad_norm": 2.890625, "learning_rate": 1.0284888888888888e-05, "loss": 0.4041, "step": 21860 }, { "epoch": 0.48591361535726985, "grad_norm": 2.484375, "learning_rate": 1.0280444444444446e-05, "loss": 0.4086, "step": 21870 }, { "epoch": 0.4861357980803413, "grad_norm": 2.34375, "learning_rate": 1.0276000000000001e-05, "loss": 0.3983, "step": 21880 }, { "epoch": 0.48635798080341275, "grad_norm": 1.9765625, "learning_rate": 1.0271555555555556e-05, "loss": 0.4025, "step": 21890 }, { "epoch": 0.4865801635264842, "grad_norm": 2.203125, "learning_rate": 1.0267111111111113e-05, "loss": 0.3899, "step": 21900 }, { "epoch": 0.48680234624955565, "grad_norm": 2.421875, "learning_rate": 1.0262666666666667e-05, "loss": 0.3517, "step": 21910 }, { "epoch": 0.4870245289726271, "grad_norm": 3.0625, "learning_rate": 1.0258222222222222e-05, "loss": 0.3603, "step": 21920 }, { "epoch": 0.48724671169569855, "grad_norm": 2.078125, "learning_rate": 1.0253777777777779e-05, "loss": 0.3897, "step": 21930 }, { "epoch": 0.48746889441877, "grad_norm": 2.328125, "learning_rate": 1.0249333333333334e-05, "loss": 0.3824, "step": 21940 }, { "epoch": 0.48769107714184146, "grad_norm": 2.15625, "learning_rate": 1.0244888888888889e-05, "loss": 0.4117, "step": 21950 }, { "epoch": 0.4879132598649129, "grad_norm": 2.3125, "learning_rate": 1.0240444444444447e-05, "loss": 0.3726, "step": 21960 }, { "epoch": 0.48813544258798436, "grad_norm": 2.46875, "learning_rate": 1.0236000000000002e-05, "loss": 0.3954, "step": 21970 }, { "epoch": 0.4883576253110558, "grad_norm": 2.21875, "learning_rate": 1.0231555555555556e-05, "loss": 0.397, "step": 21980 }, { "epoch": 0.48857980803412726, "grad_norm": 2.453125, "learning_rate": 1.0227111111111111e-05, "loss": 0.4118, "step": 21990 }, { "epoch": 0.4888019907571987, "grad_norm": 2.375, "learning_rate": 1.0222666666666668e-05, "loss": 0.4002, "step": 22000 }, { "epoch": 0.48902417348027016, "grad_norm": 2.859375, "learning_rate": 1.0218222222222223e-05, "loss": 0.409, "step": 22010 }, { "epoch": 0.4892463562033416, "grad_norm": 2.59375, "learning_rate": 1.0213777777777778e-05, "loss": 0.3543, "step": 22020 }, { "epoch": 0.48946853892641307, "grad_norm": 2.109375, "learning_rate": 1.0209333333333334e-05, "loss": 0.4016, "step": 22030 }, { "epoch": 0.4896907216494845, "grad_norm": 3.03125, "learning_rate": 1.0204888888888889e-05, "loss": 0.3772, "step": 22040 }, { "epoch": 0.48991290437255597, "grad_norm": 2.375, "learning_rate": 1.0200444444444445e-05, "loss": 0.3841, "step": 22050 }, { "epoch": 0.4901350870956274, "grad_norm": 2.34375, "learning_rate": 1.0196000000000002e-05, "loss": 0.3945, "step": 22060 }, { "epoch": 0.49035726981869887, "grad_norm": 2.578125, "learning_rate": 1.0191555555555557e-05, "loss": 0.357, "step": 22070 }, { "epoch": 0.4905794525417704, "grad_norm": 2.625, "learning_rate": 1.0187111111111112e-05, "loss": 0.4068, "step": 22080 }, { "epoch": 0.49080163526484183, "grad_norm": 2.65625, "learning_rate": 1.0182666666666667e-05, "loss": 0.4001, "step": 22090 }, { "epoch": 0.4910238179879133, "grad_norm": 2.46875, "learning_rate": 1.0178222222222223e-05, "loss": 0.3655, "step": 22100 }, { "epoch": 0.49124600071098473, "grad_norm": 2.375, "learning_rate": 1.0173777777777778e-05, "loss": 0.3908, "step": 22110 }, { "epoch": 0.4914681834340562, "grad_norm": 2.625, "learning_rate": 1.0169333333333333e-05, "loss": 0.4094, "step": 22120 }, { "epoch": 0.49169036615712763, "grad_norm": 2.109375, "learning_rate": 1.0164888888888891e-05, "loss": 0.3689, "step": 22130 }, { "epoch": 0.4919125488801991, "grad_norm": 2.046875, "learning_rate": 1.0160444444444446e-05, "loss": 0.382, "step": 22140 }, { "epoch": 0.49213473160327054, "grad_norm": 2.515625, "learning_rate": 1.0156e-05, "loss": 0.3878, "step": 22150 }, { "epoch": 0.492356914326342, "grad_norm": 2.328125, "learning_rate": 1.0151555555555557e-05, "loss": 0.4285, "step": 22160 }, { "epoch": 0.49257909704941344, "grad_norm": 3.03125, "learning_rate": 1.0147111111111112e-05, "loss": 0.356, "step": 22170 }, { "epoch": 0.4928012797724849, "grad_norm": 2.25, "learning_rate": 1.0142666666666667e-05, "loss": 0.3692, "step": 22180 }, { "epoch": 0.49302346249555634, "grad_norm": 2.0625, "learning_rate": 1.0138222222222223e-05, "loss": 0.3603, "step": 22190 }, { "epoch": 0.4932456452186278, "grad_norm": 2.125, "learning_rate": 1.0133777777777778e-05, "loss": 0.409, "step": 22200 }, { "epoch": 0.49346782794169924, "grad_norm": 2.25, "learning_rate": 1.0129333333333333e-05, "loss": 0.3898, "step": 22210 }, { "epoch": 0.4936900106647707, "grad_norm": 2.671875, "learning_rate": 1.0124888888888891e-05, "loss": 0.4, "step": 22220 }, { "epoch": 0.49391219338784215, "grad_norm": 1.8984375, "learning_rate": 1.0120444444444446e-05, "loss": 0.3752, "step": 22230 }, { "epoch": 0.4941343761109136, "grad_norm": 2.625, "learning_rate": 1.0116000000000001e-05, "loss": 0.4245, "step": 22240 }, { "epoch": 0.49435655883398505, "grad_norm": 2.890625, "learning_rate": 1.0111555555555556e-05, "loss": 0.4112, "step": 22250 }, { "epoch": 0.4945787415570565, "grad_norm": 2.171875, "learning_rate": 1.0107111111111112e-05, "loss": 0.3718, "step": 22260 }, { "epoch": 0.49480092428012795, "grad_norm": 2.265625, "learning_rate": 1.0102666666666667e-05, "loss": 0.3746, "step": 22270 }, { "epoch": 0.49502310700319946, "grad_norm": 1.7890625, "learning_rate": 1.0098222222222222e-05, "loss": 0.3785, "step": 22280 }, { "epoch": 0.4952452897262709, "grad_norm": 2.0, "learning_rate": 1.0093777777777779e-05, "loss": 0.3779, "step": 22290 }, { "epoch": 0.49546747244934236, "grad_norm": 2.421875, "learning_rate": 1.0089333333333334e-05, "loss": 0.3781, "step": 22300 }, { "epoch": 0.4956896551724138, "grad_norm": 2.3125, "learning_rate": 1.008488888888889e-05, "loss": 0.3549, "step": 22310 }, { "epoch": 0.49591183789548526, "grad_norm": 3.125, "learning_rate": 1.0080444444444447e-05, "loss": 0.417, "step": 22320 }, { "epoch": 0.4961340206185567, "grad_norm": 2.109375, "learning_rate": 1.0076000000000001e-05, "loss": 0.3762, "step": 22330 }, { "epoch": 0.49635620334162817, "grad_norm": 2.109375, "learning_rate": 1.0071555555555556e-05, "loss": 0.4215, "step": 22340 }, { "epoch": 0.4965783860646996, "grad_norm": 2.484375, "learning_rate": 1.0067111111111111e-05, "loss": 0.4219, "step": 22350 }, { "epoch": 0.49680056878777107, "grad_norm": 2.734375, "learning_rate": 1.0062666666666668e-05, "loss": 0.3871, "step": 22360 }, { "epoch": 0.4970227515108425, "grad_norm": 2.0, "learning_rate": 1.0058222222222223e-05, "loss": 0.3559, "step": 22370 }, { "epoch": 0.49724493423391397, "grad_norm": 2.40625, "learning_rate": 1.0053777777777777e-05, "loss": 0.3805, "step": 22380 }, { "epoch": 0.4974671169569854, "grad_norm": 1.9453125, "learning_rate": 1.0049333333333334e-05, "loss": 0.4216, "step": 22390 }, { "epoch": 0.4976892996800569, "grad_norm": 1.9296875, "learning_rate": 1.004488888888889e-05, "loss": 0.3915, "step": 22400 }, { "epoch": 0.4979114824031283, "grad_norm": 2.1875, "learning_rate": 1.0040444444444445e-05, "loss": 0.3722, "step": 22410 }, { "epoch": 0.4981336651261998, "grad_norm": 2.453125, "learning_rate": 1.0036000000000002e-05, "loss": 0.4061, "step": 22420 }, { "epoch": 0.4983558478492712, "grad_norm": 2.046875, "learning_rate": 1.0031555555555557e-05, "loss": 0.4162, "step": 22430 }, { "epoch": 0.4985780305723427, "grad_norm": 2.265625, "learning_rate": 1.0027111111111112e-05, "loss": 0.4154, "step": 22440 }, { "epoch": 0.49880021329541413, "grad_norm": 2.625, "learning_rate": 1.0022666666666666e-05, "loss": 0.3969, "step": 22450 }, { "epoch": 0.4990223960184856, "grad_norm": 2.421875, "learning_rate": 1.0018222222222223e-05, "loss": 0.3987, "step": 22460 }, { "epoch": 0.49924457874155703, "grad_norm": 2.96875, "learning_rate": 1.0013777777777778e-05, "loss": 0.387, "step": 22470 }, { "epoch": 0.4994667614646285, "grad_norm": 2.875, "learning_rate": 1.0009333333333333e-05, "loss": 0.3965, "step": 22480 }, { "epoch": 0.4996889441877, "grad_norm": 2.234375, "learning_rate": 1.000488888888889e-05, "loss": 0.4201, "step": 22490 }, { "epoch": 0.49991112691077144, "grad_norm": 2.109375, "learning_rate": 1.0000444444444446e-05, "loss": 0.407, "step": 22500 }, { "epoch": 0.5001333096338428, "grad_norm": 2.546875, "learning_rate": 9.996e-06, "loss": 0.4298, "step": 22510 }, { "epoch": 0.5003554923569143, "grad_norm": 2.640625, "learning_rate": 9.991555555555557e-06, "loss": 0.3779, "step": 22520 }, { "epoch": 0.5005776750799857, "grad_norm": 2.515625, "learning_rate": 9.987111111111112e-06, "loss": 0.3997, "step": 22530 }, { "epoch": 0.5007998578030572, "grad_norm": 2.421875, "learning_rate": 9.982666666666667e-06, "loss": 0.3894, "step": 22540 }, { "epoch": 0.5010220405261286, "grad_norm": 2.40625, "learning_rate": 9.978222222222223e-06, "loss": 0.4136, "step": 22550 }, { "epoch": 0.5012442232492001, "grad_norm": 2.171875, "learning_rate": 9.973777777777778e-06, "loss": 0.3812, "step": 22560 }, { "epoch": 0.5014664059722715, "grad_norm": 2.15625, "learning_rate": 9.969333333333335e-06, "loss": 0.4132, "step": 22570 }, { "epoch": 0.501688588695343, "grad_norm": 2.34375, "learning_rate": 9.96488888888889e-06, "loss": 0.3851, "step": 22580 }, { "epoch": 0.5019107714184146, "grad_norm": 2.125, "learning_rate": 9.960444444444444e-06, "loss": 0.3617, "step": 22590 }, { "epoch": 0.502132954141486, "grad_norm": 2.546875, "learning_rate": 9.956000000000001e-06, "loss": 0.3363, "step": 22600 }, { "epoch": 0.5023551368645575, "grad_norm": 2.40625, "learning_rate": 9.951555555555556e-06, "loss": 0.3396, "step": 22610 }, { "epoch": 0.5025773195876289, "grad_norm": 2.359375, "learning_rate": 9.947111111111112e-06, "loss": 0.3656, "step": 22620 }, { "epoch": 0.5027995023107004, "grad_norm": 2.0, "learning_rate": 9.942666666666667e-06, "loss": 0.4024, "step": 22630 }, { "epoch": 0.5030216850337718, "grad_norm": 1.8828125, "learning_rate": 9.938222222222224e-06, "loss": 0.4049, "step": 22640 }, { "epoch": 0.5032438677568433, "grad_norm": 2.28125, "learning_rate": 9.933777777777779e-06, "loss": 0.4076, "step": 22650 }, { "epoch": 0.5034660504799147, "grad_norm": 2.5, "learning_rate": 9.929333333333333e-06, "loss": 0.3608, "step": 22660 }, { "epoch": 0.5036882332029862, "grad_norm": 2.484375, "learning_rate": 9.92488888888889e-06, "loss": 0.4079, "step": 22670 }, { "epoch": 0.5039104159260576, "grad_norm": 2.40625, "learning_rate": 9.920444444444445e-06, "loss": 0.4066, "step": 22680 }, { "epoch": 0.5041325986491291, "grad_norm": 2.1875, "learning_rate": 9.916000000000001e-06, "loss": 0.4185, "step": 22690 }, { "epoch": 0.5043547813722005, "grad_norm": 2.21875, "learning_rate": 9.911555555555556e-06, "loss": 0.3507, "step": 22700 }, { "epoch": 0.504576964095272, "grad_norm": 2.125, "learning_rate": 9.907111111111111e-06, "loss": 0.3864, "step": 22710 }, { "epoch": 0.5047991468183434, "grad_norm": 2.484375, "learning_rate": 9.902666666666668e-06, "loss": 0.4213, "step": 22720 }, { "epoch": 0.5050213295414149, "grad_norm": 2.328125, "learning_rate": 9.898222222222224e-06, "loss": 0.3925, "step": 22730 }, { "epoch": 0.5052435122644863, "grad_norm": 2.546875, "learning_rate": 9.893777777777779e-06, "loss": 0.4311, "step": 22740 }, { "epoch": 0.5054656949875578, "grad_norm": 2.328125, "learning_rate": 9.889333333333334e-06, "loss": 0.3781, "step": 22750 }, { "epoch": 0.5056878777106292, "grad_norm": 2.25, "learning_rate": 9.884888888888889e-06, "loss": 0.3717, "step": 22760 }, { "epoch": 0.5059100604337007, "grad_norm": 2.109375, "learning_rate": 9.880444444444445e-06, "loss": 0.3664, "step": 22770 }, { "epoch": 0.5061322431567721, "grad_norm": 2.203125, "learning_rate": 9.876000000000002e-06, "loss": 0.3805, "step": 22780 }, { "epoch": 0.5063544258798436, "grad_norm": 2.75, "learning_rate": 9.871555555555557e-06, "loss": 0.4212, "step": 22790 }, { "epoch": 0.5065766086029151, "grad_norm": 2.578125, "learning_rate": 9.867111111111111e-06, "loss": 0.4189, "step": 22800 }, { "epoch": 0.5067987913259865, "grad_norm": 2.0, "learning_rate": 9.862666666666668e-06, "loss": 0.3924, "step": 22810 }, { "epoch": 0.507020974049058, "grad_norm": 2.390625, "learning_rate": 9.858222222222223e-06, "loss": 0.3742, "step": 22820 }, { "epoch": 0.5072431567721294, "grad_norm": 2.546875, "learning_rate": 9.85377777777778e-06, "loss": 0.3877, "step": 22830 }, { "epoch": 0.5074653394952009, "grad_norm": 2.3125, "learning_rate": 9.849333333333334e-06, "loss": 0.4126, "step": 22840 }, { "epoch": 0.5076875222182723, "grad_norm": 2.578125, "learning_rate": 9.844888888888889e-06, "loss": 0.3659, "step": 22850 }, { "epoch": 0.5079097049413438, "grad_norm": 2.359375, "learning_rate": 9.840444444444446e-06, "loss": 0.4144, "step": 22860 }, { "epoch": 0.5081318876644152, "grad_norm": 2.234375, "learning_rate": 9.836e-06, "loss": 0.3972, "step": 22870 }, { "epoch": 0.5083540703874867, "grad_norm": 2.515625, "learning_rate": 9.831555555555557e-06, "loss": 0.364, "step": 22880 }, { "epoch": 0.5085762531105581, "grad_norm": 2.96875, "learning_rate": 9.827111111111112e-06, "loss": 0.4129, "step": 22890 }, { "epoch": 0.5087984358336296, "grad_norm": 2.484375, "learning_rate": 9.822666666666667e-06, "loss": 0.4082, "step": 22900 }, { "epoch": 0.509020618556701, "grad_norm": 3.046875, "learning_rate": 9.818222222222223e-06, "loss": 0.4159, "step": 22910 }, { "epoch": 0.5092428012797725, "grad_norm": 2.421875, "learning_rate": 9.813777777777778e-06, "loss": 0.3992, "step": 22920 }, { "epoch": 0.5094649840028439, "grad_norm": 2.75, "learning_rate": 9.809333333333335e-06, "loss": 0.3495, "step": 22930 }, { "epoch": 0.5096871667259154, "grad_norm": 2.390625, "learning_rate": 9.80488888888889e-06, "loss": 0.376, "step": 22940 }, { "epoch": 0.5099093494489868, "grad_norm": 2.234375, "learning_rate": 9.800444444444446e-06, "loss": 0.3861, "step": 22950 }, { "epoch": 0.5101315321720583, "grad_norm": 2.359375, "learning_rate": 9.796e-06, "loss": 0.4001, "step": 22960 }, { "epoch": 0.5103537148951297, "grad_norm": 1.984375, "learning_rate": 9.791555555555556e-06, "loss": 0.4003, "step": 22970 }, { "epoch": 0.5105758976182012, "grad_norm": 2.59375, "learning_rate": 9.787111111111112e-06, "loss": 0.3659, "step": 22980 }, { "epoch": 0.5107980803412726, "grad_norm": 2.09375, "learning_rate": 9.782666666666667e-06, "loss": 0.3836, "step": 22990 }, { "epoch": 0.5110202630643441, "grad_norm": 2.234375, "learning_rate": 9.778222222222224e-06, "loss": 0.3947, "step": 23000 }, { "epoch": 0.5112424457874156, "grad_norm": 2.46875, "learning_rate": 9.773777777777778e-06, "loss": 0.3722, "step": 23010 }, { "epoch": 0.511464628510487, "grad_norm": 2.3125, "learning_rate": 9.769333333333333e-06, "loss": 0.384, "step": 23020 }, { "epoch": 0.5116868112335585, "grad_norm": 2.375, "learning_rate": 9.76488888888889e-06, "loss": 0.3549, "step": 23030 }, { "epoch": 0.5119089939566299, "grad_norm": 2.140625, "learning_rate": 9.760444444444446e-06, "loss": 0.395, "step": 23040 }, { "epoch": 0.5121311766797014, "grad_norm": 2.1875, "learning_rate": 9.756000000000001e-06, "loss": 0.34, "step": 23050 }, { "epoch": 0.5123533594027728, "grad_norm": 2.15625, "learning_rate": 9.751555555555556e-06, "loss": 0.3952, "step": 23060 }, { "epoch": 0.5125755421258443, "grad_norm": 2.359375, "learning_rate": 9.74711111111111e-06, "loss": 0.4082, "step": 23070 }, { "epoch": 0.5127977248489157, "grad_norm": 2.359375, "learning_rate": 9.742666666666667e-06, "loss": 0.3826, "step": 23080 }, { "epoch": 0.5130199075719872, "grad_norm": 2.421875, "learning_rate": 9.738222222222224e-06, "loss": 0.3905, "step": 23090 }, { "epoch": 0.5132420902950586, "grad_norm": 1.9609375, "learning_rate": 9.733777777777779e-06, "loss": 0.3996, "step": 23100 }, { "epoch": 0.5134642730181301, "grad_norm": 2.359375, "learning_rate": 9.729333333333334e-06, "loss": 0.3944, "step": 23110 }, { "epoch": 0.5136864557412015, "grad_norm": 2.21875, "learning_rate": 9.724888888888888e-06, "loss": 0.3675, "step": 23120 }, { "epoch": 0.513908638464273, "grad_norm": 2.328125, "learning_rate": 9.720444444444445e-06, "loss": 0.4111, "step": 23130 }, { "epoch": 0.5141308211873444, "grad_norm": 2.59375, "learning_rate": 9.716000000000002e-06, "loss": 0.4227, "step": 23140 }, { "epoch": 0.5143530039104159, "grad_norm": 2.265625, "learning_rate": 9.711555555555556e-06, "loss": 0.396, "step": 23150 }, { "epoch": 0.5145751866334873, "grad_norm": 2.625, "learning_rate": 9.707111111111111e-06, "loss": 0.3875, "step": 23160 }, { "epoch": 0.5147973693565588, "grad_norm": 2.5625, "learning_rate": 9.702666666666668e-06, "loss": 0.387, "step": 23170 }, { "epoch": 0.5150195520796302, "grad_norm": 2.484375, "learning_rate": 9.698222222222223e-06, "loss": 0.3835, "step": 23180 }, { "epoch": 0.5152417348027017, "grad_norm": 2.5, "learning_rate": 9.693777777777779e-06, "loss": 0.3879, "step": 23190 }, { "epoch": 0.5154639175257731, "grad_norm": 2.640625, "learning_rate": 9.689333333333334e-06, "loss": 0.3927, "step": 23200 }, { "epoch": 0.5156861002488446, "grad_norm": 2.625, "learning_rate": 9.684888888888889e-06, "loss": 0.3956, "step": 23210 }, { "epoch": 0.5159082829719162, "grad_norm": 2.46875, "learning_rate": 9.680444444444445e-06, "loss": 0.3682, "step": 23220 }, { "epoch": 0.5161304656949876, "grad_norm": 2.5, "learning_rate": 9.676e-06, "loss": 0.4165, "step": 23230 }, { "epoch": 0.5163526484180591, "grad_norm": 2.453125, "learning_rate": 9.671555555555557e-06, "loss": 0.3805, "step": 23240 }, { "epoch": 0.5165748311411305, "grad_norm": 2.203125, "learning_rate": 9.667111111111112e-06, "loss": 0.3986, "step": 23250 }, { "epoch": 0.516797013864202, "grad_norm": 2.390625, "learning_rate": 9.662666666666668e-06, "loss": 0.3789, "step": 23260 }, { "epoch": 0.5170191965872734, "grad_norm": 1.9921875, "learning_rate": 9.658222222222223e-06, "loss": 0.4063, "step": 23270 }, { "epoch": 0.5172413793103449, "grad_norm": 2.890625, "learning_rate": 9.653777777777778e-06, "loss": 0.4068, "step": 23280 }, { "epoch": 0.5174635620334163, "grad_norm": 2.140625, "learning_rate": 9.649333333333334e-06, "loss": 0.3881, "step": 23290 }, { "epoch": 0.5176857447564878, "grad_norm": 2.234375, "learning_rate": 9.64488888888889e-06, "loss": 0.403, "step": 23300 }, { "epoch": 0.5179079274795592, "grad_norm": 2.3125, "learning_rate": 9.640444444444446e-06, "loss": 0.411, "step": 23310 }, { "epoch": 0.5181301102026307, "grad_norm": 2.71875, "learning_rate": 9.636e-06, "loss": 0.411, "step": 23320 }, { "epoch": 0.5183522929257021, "grad_norm": 2.875, "learning_rate": 9.631555555555555e-06, "loss": 0.3763, "step": 23330 }, { "epoch": 0.5185744756487736, "grad_norm": 2.15625, "learning_rate": 9.627111111111112e-06, "loss": 0.4007, "step": 23340 }, { "epoch": 0.518796658371845, "grad_norm": 2.90625, "learning_rate": 9.622666666666668e-06, "loss": 0.3983, "step": 23350 }, { "epoch": 0.5190188410949165, "grad_norm": 2.171875, "learning_rate": 9.618222222222223e-06, "loss": 0.4572, "step": 23360 }, { "epoch": 0.5192410238179879, "grad_norm": 2.265625, "learning_rate": 9.613777777777778e-06, "loss": 0.3783, "step": 23370 }, { "epoch": 0.5194632065410594, "grad_norm": 2.109375, "learning_rate": 9.609333333333333e-06, "loss": 0.3842, "step": 23380 }, { "epoch": 0.5196853892641308, "grad_norm": 2.5, "learning_rate": 9.60488888888889e-06, "loss": 0.3901, "step": 23390 }, { "epoch": 0.5199075719872023, "grad_norm": 2.5, "learning_rate": 9.600444444444446e-06, "loss": 0.4148, "step": 23400 }, { "epoch": 0.5201297547102738, "grad_norm": 2.609375, "learning_rate": 9.596000000000001e-06, "loss": 0.3899, "step": 23410 }, { "epoch": 0.5203519374333452, "grad_norm": 2.65625, "learning_rate": 9.591555555555556e-06, "loss": 0.4579, "step": 23420 }, { "epoch": 0.5205741201564167, "grad_norm": 2.75, "learning_rate": 9.58711111111111e-06, "loss": 0.3828, "step": 23430 }, { "epoch": 0.5207963028794881, "grad_norm": 1.9296875, "learning_rate": 9.582666666666667e-06, "loss": 0.3632, "step": 23440 }, { "epoch": 0.5210184856025596, "grad_norm": 2.046875, "learning_rate": 9.578222222222224e-06, "loss": 0.4083, "step": 23450 }, { "epoch": 0.521240668325631, "grad_norm": 2.3125, "learning_rate": 9.573777777777779e-06, "loss": 0.37, "step": 23460 }, { "epoch": 0.5214628510487025, "grad_norm": 1.78125, "learning_rate": 9.569333333333333e-06, "loss": 0.3641, "step": 23470 }, { "epoch": 0.5216850337717739, "grad_norm": 1.9140625, "learning_rate": 9.56488888888889e-06, "loss": 0.3725, "step": 23480 }, { "epoch": 0.5219072164948454, "grad_norm": 2.296875, "learning_rate": 9.560444444444445e-06, "loss": 0.3971, "step": 23490 }, { "epoch": 0.5221293992179168, "grad_norm": 2.453125, "learning_rate": 9.556000000000001e-06, "loss": 0.3923, "step": 23500 }, { "epoch": 0.5223515819409883, "grad_norm": 2.453125, "learning_rate": 9.551555555555556e-06, "loss": 0.3926, "step": 23510 }, { "epoch": 0.5225737646640597, "grad_norm": 2.546875, "learning_rate": 9.547111111111111e-06, "loss": 0.3701, "step": 23520 }, { "epoch": 0.5227959473871312, "grad_norm": 2.5625, "learning_rate": 9.542666666666668e-06, "loss": 0.3761, "step": 23530 }, { "epoch": 0.5230181301102026, "grad_norm": 1.9765625, "learning_rate": 9.538222222222222e-06, "loss": 0.3749, "step": 23540 }, { "epoch": 0.5232403128332741, "grad_norm": 2.0, "learning_rate": 9.533777777777779e-06, "loss": 0.401, "step": 23550 }, { "epoch": 0.5234624955563455, "grad_norm": 3.125, "learning_rate": 9.529333333333334e-06, "loss": 0.3932, "step": 23560 }, { "epoch": 0.523684678279417, "grad_norm": 2.5, "learning_rate": 9.52488888888889e-06, "loss": 0.409, "step": 23570 }, { "epoch": 0.5239068610024884, "grad_norm": 2.359375, "learning_rate": 9.520444444444445e-06, "loss": 0.3663, "step": 23580 }, { "epoch": 0.5241290437255599, "grad_norm": 2.59375, "learning_rate": 9.516e-06, "loss": 0.4164, "step": 23590 }, { "epoch": 0.5243512264486313, "grad_norm": 2.234375, "learning_rate": 9.511555555555557e-06, "loss": 0.3705, "step": 23600 }, { "epoch": 0.5245734091717028, "grad_norm": 1.8828125, "learning_rate": 9.507111111111111e-06, "loss": 0.3507, "step": 23610 }, { "epoch": 0.5247955918947743, "grad_norm": 2.53125, "learning_rate": 9.502666666666668e-06, "loss": 0.4186, "step": 23620 }, { "epoch": 0.5250177746178457, "grad_norm": 2.703125, "learning_rate": 9.498222222222223e-06, "loss": 0.4051, "step": 23630 }, { "epoch": 0.5252399573409172, "grad_norm": 2.40625, "learning_rate": 9.493777777777778e-06, "loss": 0.4121, "step": 23640 }, { "epoch": 0.5254621400639886, "grad_norm": 2.4375, "learning_rate": 9.489333333333334e-06, "loss": 0.372, "step": 23650 }, { "epoch": 0.5256843227870601, "grad_norm": 2.875, "learning_rate": 9.48488888888889e-06, "loss": 0.4198, "step": 23660 }, { "epoch": 0.5259065055101315, "grad_norm": 2.5, "learning_rate": 9.480444444444446e-06, "loss": 0.368, "step": 23670 }, { "epoch": 0.526128688233203, "grad_norm": 2.625, "learning_rate": 9.476e-06, "loss": 0.3731, "step": 23680 }, { "epoch": 0.5263508709562744, "grad_norm": 2.328125, "learning_rate": 9.471555555555555e-06, "loss": 0.3682, "step": 23690 }, { "epoch": 0.5265730536793459, "grad_norm": 2.96875, "learning_rate": 9.467111111111112e-06, "loss": 0.3844, "step": 23700 }, { "epoch": 0.5267952364024173, "grad_norm": 2.28125, "learning_rate": 9.462666666666668e-06, "loss": 0.3779, "step": 23710 }, { "epoch": 0.5270174191254888, "grad_norm": 2.453125, "learning_rate": 9.458222222222223e-06, "loss": 0.3784, "step": 23720 }, { "epoch": 0.5272396018485602, "grad_norm": 2.765625, "learning_rate": 9.453777777777778e-06, "loss": 0.3681, "step": 23730 }, { "epoch": 0.5274617845716317, "grad_norm": 2.203125, "learning_rate": 9.449333333333333e-06, "loss": 0.3966, "step": 23740 }, { "epoch": 0.5276839672947031, "grad_norm": 2.5, "learning_rate": 9.44488888888889e-06, "loss": 0.36, "step": 23750 }, { "epoch": 0.5279061500177746, "grad_norm": 2.171875, "learning_rate": 9.440444444444446e-06, "loss": 0.3675, "step": 23760 }, { "epoch": 0.528128332740846, "grad_norm": 2.375, "learning_rate": 9.436e-06, "loss": 0.3679, "step": 23770 }, { "epoch": 0.5283505154639175, "grad_norm": 2.109375, "learning_rate": 9.431555555555556e-06, "loss": 0.3862, "step": 23780 }, { "epoch": 0.5285726981869889, "grad_norm": 2.453125, "learning_rate": 9.427111111111112e-06, "loss": 0.365, "step": 23790 }, { "epoch": 0.5287948809100604, "grad_norm": 2.21875, "learning_rate": 9.422666666666667e-06, "loss": 0.3974, "step": 23800 }, { "epoch": 0.5290170636331318, "grad_norm": 2.1875, "learning_rate": 9.418222222222224e-06, "loss": 0.35, "step": 23810 }, { "epoch": 0.5292392463562033, "grad_norm": 2.84375, "learning_rate": 9.413777777777778e-06, "loss": 0.423, "step": 23820 }, { "epoch": 0.5294614290792748, "grad_norm": 2.203125, "learning_rate": 9.409333333333333e-06, "loss": 0.3607, "step": 23830 }, { "epoch": 0.5296836118023462, "grad_norm": 2.765625, "learning_rate": 9.40488888888889e-06, "loss": 0.3973, "step": 23840 }, { "epoch": 0.5299057945254178, "grad_norm": 2.0625, "learning_rate": 9.400444444444445e-06, "loss": 0.4067, "step": 23850 }, { "epoch": 0.5301279772484891, "grad_norm": 2.1875, "learning_rate": 9.396000000000001e-06, "loss": 0.4117, "step": 23860 }, { "epoch": 0.5303501599715607, "grad_norm": 2.1875, "learning_rate": 9.391555555555556e-06, "loss": 0.3517, "step": 23870 }, { "epoch": 0.530572342694632, "grad_norm": 2.375, "learning_rate": 9.387111111111113e-06, "loss": 0.3877, "step": 23880 }, { "epoch": 0.5307945254177036, "grad_norm": 2.40625, "learning_rate": 9.382666666666667e-06, "loss": 0.4231, "step": 23890 }, { "epoch": 0.531016708140775, "grad_norm": 2.8125, "learning_rate": 9.378222222222222e-06, "loss": 0.3845, "step": 23900 }, { "epoch": 0.5312388908638465, "grad_norm": 2.078125, "learning_rate": 9.373777777777779e-06, "loss": 0.436, "step": 23910 }, { "epoch": 0.5314610735869179, "grad_norm": 2.40625, "learning_rate": 9.369333333333334e-06, "loss": 0.3605, "step": 23920 }, { "epoch": 0.5316832563099894, "grad_norm": 2.078125, "learning_rate": 9.36488888888889e-06, "loss": 0.3881, "step": 23930 }, { "epoch": 0.5319054390330608, "grad_norm": 2.46875, "learning_rate": 9.360444444444445e-06, "loss": 0.4014, "step": 23940 }, { "epoch": 0.5321276217561323, "grad_norm": 2.8125, "learning_rate": 9.356e-06, "loss": 0.3885, "step": 23950 }, { "epoch": 0.5323498044792037, "grad_norm": 2.296875, "learning_rate": 9.351555555555556e-06, "loss": 0.3429, "step": 23960 }, { "epoch": 0.5325719872022752, "grad_norm": 2.640625, "learning_rate": 9.347111111111113e-06, "loss": 0.4116, "step": 23970 }, { "epoch": 0.5327941699253466, "grad_norm": 2.203125, "learning_rate": 9.342666666666668e-06, "loss": 0.4032, "step": 23980 }, { "epoch": 0.5330163526484181, "grad_norm": 1.875, "learning_rate": 9.338222222222223e-06, "loss": 0.3314, "step": 23990 }, { "epoch": 0.5332385353714895, "grad_norm": 2.40625, "learning_rate": 9.333777777777777e-06, "loss": 0.3691, "step": 24000 }, { "epoch": 0.533460718094561, "grad_norm": 2.125, "learning_rate": 9.329333333333334e-06, "loss": 0.3932, "step": 24010 }, { "epoch": 0.5336829008176324, "grad_norm": 2.25, "learning_rate": 9.32488888888889e-06, "loss": 0.3472, "step": 24020 }, { "epoch": 0.5339050835407039, "grad_norm": 2.71875, "learning_rate": 9.320444444444445e-06, "loss": 0.4251, "step": 24030 }, { "epoch": 0.5341272662637754, "grad_norm": 2.375, "learning_rate": 9.316e-06, "loss": 0.3884, "step": 24040 }, { "epoch": 0.5343494489868468, "grad_norm": 1.953125, "learning_rate": 9.311555555555555e-06, "loss": 0.3509, "step": 24050 }, { "epoch": 0.5345716317099183, "grad_norm": 2.140625, "learning_rate": 9.307111111111112e-06, "loss": 0.3587, "step": 24060 }, { "epoch": 0.5347938144329897, "grad_norm": 2.34375, "learning_rate": 9.302666666666668e-06, "loss": 0.4026, "step": 24070 }, { "epoch": 0.5350159971560612, "grad_norm": 2.390625, "learning_rate": 9.298222222222223e-06, "loss": 0.3798, "step": 24080 }, { "epoch": 0.5352381798791326, "grad_norm": 2.46875, "learning_rate": 9.293777777777778e-06, "loss": 0.3895, "step": 24090 }, { "epoch": 0.5354603626022041, "grad_norm": 2.09375, "learning_rate": 9.289333333333334e-06, "loss": 0.3804, "step": 24100 }, { "epoch": 0.5356825453252755, "grad_norm": 2.125, "learning_rate": 9.28488888888889e-06, "loss": 0.3708, "step": 24110 }, { "epoch": 0.535904728048347, "grad_norm": 2.5625, "learning_rate": 9.280444444444446e-06, "loss": 0.3976, "step": 24120 }, { "epoch": 0.5361269107714184, "grad_norm": 2.484375, "learning_rate": 9.276e-06, "loss": 0.3709, "step": 24130 }, { "epoch": 0.5363490934944899, "grad_norm": 2.328125, "learning_rate": 9.271555555555555e-06, "loss": 0.3845, "step": 24140 }, { "epoch": 0.5365712762175613, "grad_norm": 2.21875, "learning_rate": 9.267111111111112e-06, "loss": 0.3443, "step": 24150 }, { "epoch": 0.5367934589406328, "grad_norm": 1.9765625, "learning_rate": 9.262666666666667e-06, "loss": 0.3965, "step": 24160 }, { "epoch": 0.5370156416637042, "grad_norm": 2.5, "learning_rate": 9.258222222222223e-06, "loss": 0.3619, "step": 24170 }, { "epoch": 0.5372378243867757, "grad_norm": 2.671875, "learning_rate": 9.253777777777778e-06, "loss": 0.4331, "step": 24180 }, { "epoch": 0.5374600071098471, "grad_norm": 2.953125, "learning_rate": 9.249333333333335e-06, "loss": 0.3977, "step": 24190 }, { "epoch": 0.5376821898329186, "grad_norm": 2.4375, "learning_rate": 9.24488888888889e-06, "loss": 0.404, "step": 24200 }, { "epoch": 0.53790437255599, "grad_norm": 2.8125, "learning_rate": 9.240444444444444e-06, "loss": 0.4076, "step": 24210 }, { "epoch": 0.5381265552790615, "grad_norm": 2.5, "learning_rate": 9.236000000000001e-06, "loss": 0.4087, "step": 24220 }, { "epoch": 0.538348738002133, "grad_norm": 2.84375, "learning_rate": 9.231555555555556e-06, "loss": 0.4065, "step": 24230 }, { "epoch": 0.5385709207252044, "grad_norm": 2.703125, "learning_rate": 9.227111111111112e-06, "loss": 0.4035, "step": 24240 }, { "epoch": 0.5387931034482759, "grad_norm": 2.234375, "learning_rate": 9.222666666666667e-06, "loss": 0.3972, "step": 24250 }, { "epoch": 0.5390152861713473, "grad_norm": 2.328125, "learning_rate": 9.218222222222222e-06, "loss": 0.4168, "step": 24260 }, { "epoch": 0.5392374688944188, "grad_norm": 2.453125, "learning_rate": 9.213777777777779e-06, "loss": 0.3985, "step": 24270 }, { "epoch": 0.5394596516174902, "grad_norm": 2.515625, "learning_rate": 9.209333333333335e-06, "loss": 0.3861, "step": 24280 }, { "epoch": 0.5396818343405617, "grad_norm": 2.1875, "learning_rate": 9.20488888888889e-06, "loss": 0.3952, "step": 24290 }, { "epoch": 0.5399040170636331, "grad_norm": 2.46875, "learning_rate": 9.200444444444445e-06, "loss": 0.3793, "step": 24300 }, { "epoch": 0.5401261997867046, "grad_norm": 2.546875, "learning_rate": 9.196e-06, "loss": 0.3831, "step": 24310 }, { "epoch": 0.540348382509776, "grad_norm": 2.9375, "learning_rate": 9.191555555555556e-06, "loss": 0.3802, "step": 24320 }, { "epoch": 0.5405705652328475, "grad_norm": 2.5, "learning_rate": 9.187111111111113e-06, "loss": 0.4176, "step": 24330 }, { "epoch": 0.5407927479559189, "grad_norm": 2.59375, "learning_rate": 9.182666666666668e-06, "loss": 0.342, "step": 24340 }, { "epoch": 0.5410149306789904, "grad_norm": 2.359375, "learning_rate": 9.178222222222222e-06, "loss": 0.3505, "step": 24350 }, { "epoch": 0.5412371134020618, "grad_norm": 2.328125, "learning_rate": 9.173777777777777e-06, "loss": 0.4308, "step": 24360 }, { "epoch": 0.5414592961251333, "grad_norm": 2.375, "learning_rate": 9.169333333333334e-06, "loss": 0.3593, "step": 24370 }, { "epoch": 0.5416814788482047, "grad_norm": 2.546875, "learning_rate": 9.16488888888889e-06, "loss": 0.3972, "step": 24380 }, { "epoch": 0.5419036615712762, "grad_norm": 2.234375, "learning_rate": 9.160444444444445e-06, "loss": 0.3626, "step": 24390 }, { "epoch": 0.5421258442943476, "grad_norm": 2.78125, "learning_rate": 9.156e-06, "loss": 0.3998, "step": 24400 }, { "epoch": 0.5423480270174191, "grad_norm": 2.8125, "learning_rate": 9.151555555555557e-06, "loss": 0.4031, "step": 24410 }, { "epoch": 0.5425702097404905, "grad_norm": 2.53125, "learning_rate": 9.147111111111111e-06, "loss": 0.3568, "step": 24420 }, { "epoch": 0.542792392463562, "grad_norm": 2.578125, "learning_rate": 9.142666666666668e-06, "loss": 0.3548, "step": 24430 }, { "epoch": 0.5430145751866335, "grad_norm": 2.484375, "learning_rate": 9.138222222222223e-06, "loss": 0.3947, "step": 24440 }, { "epoch": 0.5432367579097049, "grad_norm": 2.4375, "learning_rate": 9.133777777777778e-06, "loss": 0.4046, "step": 24450 }, { "epoch": 0.5434589406327764, "grad_norm": 2.65625, "learning_rate": 9.129333333333334e-06, "loss": 0.4305, "step": 24460 }, { "epoch": 0.5436811233558478, "grad_norm": 2.515625, "learning_rate": 9.124888888888889e-06, "loss": 0.413, "step": 24470 }, { "epoch": 0.5439033060789193, "grad_norm": 2.078125, "learning_rate": 9.120444444444446e-06, "loss": 0.3882, "step": 24480 }, { "epoch": 0.5441254888019907, "grad_norm": 2.390625, "learning_rate": 9.116e-06, "loss": 0.4162, "step": 24490 }, { "epoch": 0.5443476715250622, "grad_norm": 2.296875, "learning_rate": 9.111555555555557e-06, "loss": 0.402, "step": 24500 }, { "epoch": 0.5445698542481336, "grad_norm": 2.34375, "learning_rate": 9.107111111111112e-06, "loss": 0.4079, "step": 24510 }, { "epoch": 0.5447920369712052, "grad_norm": 2.640625, "learning_rate": 9.102666666666667e-06, "loss": 0.3852, "step": 24520 }, { "epoch": 0.5450142196942765, "grad_norm": 2.84375, "learning_rate": 9.098222222222223e-06, "loss": 0.3792, "step": 24530 }, { "epoch": 0.545236402417348, "grad_norm": 2.609375, "learning_rate": 9.093777777777778e-06, "loss": 0.3756, "step": 24540 }, { "epoch": 0.5454585851404194, "grad_norm": 2.5625, "learning_rate": 9.089333333333335e-06, "loss": 0.379, "step": 24550 }, { "epoch": 0.545680767863491, "grad_norm": 2.25, "learning_rate": 9.08488888888889e-06, "loss": 0.3756, "step": 24560 }, { "epoch": 0.5459029505865624, "grad_norm": 2.703125, "learning_rate": 9.080444444444444e-06, "loss": 0.3867, "step": 24570 }, { "epoch": 0.5461251333096339, "grad_norm": 2.1875, "learning_rate": 9.076000000000001e-06, "loss": 0.3852, "step": 24580 }, { "epoch": 0.5463473160327053, "grad_norm": 2.03125, "learning_rate": 9.071555555555557e-06, "loss": 0.3832, "step": 24590 }, { "epoch": 0.5465694987557768, "grad_norm": 1.9296875, "learning_rate": 9.067111111111112e-06, "loss": 0.363, "step": 24600 }, { "epoch": 0.5467916814788482, "grad_norm": 2.609375, "learning_rate": 9.062666666666667e-06, "loss": 0.4217, "step": 24610 }, { "epoch": 0.5470138642019197, "grad_norm": 2.21875, "learning_rate": 9.058222222222222e-06, "loss": 0.3823, "step": 24620 }, { "epoch": 0.5472360469249911, "grad_norm": 2.59375, "learning_rate": 9.053777777777778e-06, "loss": 0.3898, "step": 24630 }, { "epoch": 0.5474582296480626, "grad_norm": 3.078125, "learning_rate": 9.049333333333335e-06, "loss": 0.3935, "step": 24640 }, { "epoch": 0.5476804123711341, "grad_norm": 2.453125, "learning_rate": 9.04488888888889e-06, "loss": 0.3809, "step": 24650 }, { "epoch": 0.5479025950942055, "grad_norm": 2.390625, "learning_rate": 9.040444444444445e-06, "loss": 0.3726, "step": 24660 }, { "epoch": 0.548124777817277, "grad_norm": 2.453125, "learning_rate": 9.036e-06, "loss": 0.3664, "step": 24670 }, { "epoch": 0.5483469605403484, "grad_norm": 2.203125, "learning_rate": 9.031555555555556e-06, "loss": 0.3716, "step": 24680 }, { "epoch": 0.5485691432634199, "grad_norm": 2.09375, "learning_rate": 9.027111111111113e-06, "loss": 0.4025, "step": 24690 }, { "epoch": 0.5487913259864913, "grad_norm": 2.21875, "learning_rate": 9.022666666666667e-06, "loss": 0.3298, "step": 24700 }, { "epoch": 0.5490135087095628, "grad_norm": 2.078125, "learning_rate": 9.018222222222222e-06, "loss": 0.3857, "step": 24710 }, { "epoch": 0.5492356914326342, "grad_norm": 2.609375, "learning_rate": 9.013777777777779e-06, "loss": 0.3528, "step": 24720 }, { "epoch": 0.5494578741557057, "grad_norm": 2.21875, "learning_rate": 9.009333333333334e-06, "loss": 0.3563, "step": 24730 }, { "epoch": 0.5496800568787771, "grad_norm": 2.4375, "learning_rate": 9.00488888888889e-06, "loss": 0.3696, "step": 24740 }, { "epoch": 0.5499022396018486, "grad_norm": 2.328125, "learning_rate": 9.000444444444445e-06, "loss": 0.3847, "step": 24750 }, { "epoch": 0.55012442232492, "grad_norm": 2.359375, "learning_rate": 8.996e-06, "loss": 0.3865, "step": 24760 }, { "epoch": 0.5503466050479915, "grad_norm": 2.890625, "learning_rate": 8.991555555555556e-06, "loss": 0.3664, "step": 24770 }, { "epoch": 0.5505687877710629, "grad_norm": 2.625, "learning_rate": 8.987111111111111e-06, "loss": 0.4078, "step": 24780 }, { "epoch": 0.5507909704941344, "grad_norm": 2.453125, "learning_rate": 8.982666666666668e-06, "loss": 0.3581, "step": 24790 }, { "epoch": 0.5510131532172058, "grad_norm": 2.25, "learning_rate": 8.978222222222223e-06, "loss": 0.3675, "step": 24800 }, { "epoch": 0.5512353359402773, "grad_norm": 2.65625, "learning_rate": 8.97377777777778e-06, "loss": 0.4063, "step": 24810 }, { "epoch": 0.5514575186633487, "grad_norm": 2.453125, "learning_rate": 8.969333333333334e-06, "loss": 0.385, "step": 24820 }, { "epoch": 0.5516797013864202, "grad_norm": 2.625, "learning_rate": 8.964888888888889e-06, "loss": 0.4183, "step": 24830 }, { "epoch": 0.5519018841094916, "grad_norm": 3.484375, "learning_rate": 8.960444444444445e-06, "loss": 0.4121, "step": 24840 }, { "epoch": 0.5521240668325631, "grad_norm": 2.0625, "learning_rate": 8.956e-06, "loss": 0.3987, "step": 24850 }, { "epoch": 0.5523462495556346, "grad_norm": 2.484375, "learning_rate": 8.951555555555557e-06, "loss": 0.3913, "step": 24860 }, { "epoch": 0.552568432278706, "grad_norm": 2.15625, "learning_rate": 8.947111111111112e-06, "loss": 0.4085, "step": 24870 }, { "epoch": 0.5527906150017775, "grad_norm": 2.296875, "learning_rate": 8.942666666666667e-06, "loss": 0.3699, "step": 24880 }, { "epoch": 0.5530127977248489, "grad_norm": 2.578125, "learning_rate": 8.938222222222223e-06, "loss": 0.3996, "step": 24890 }, { "epoch": 0.5532349804479204, "grad_norm": 2.1875, "learning_rate": 8.93377777777778e-06, "loss": 0.378, "step": 24900 }, { "epoch": 0.5534571631709918, "grad_norm": 2.75, "learning_rate": 8.929333333333334e-06, "loss": 0.4091, "step": 24910 }, { "epoch": 0.5536793458940633, "grad_norm": 2.765625, "learning_rate": 8.92488888888889e-06, "loss": 0.3781, "step": 24920 }, { "epoch": 0.5539015286171347, "grad_norm": 2.5, "learning_rate": 8.920444444444444e-06, "loss": 0.3566, "step": 24930 }, { "epoch": 0.5541237113402062, "grad_norm": 2.265625, "learning_rate": 8.916e-06, "loss": 0.3532, "step": 24940 }, { "epoch": 0.5543458940632776, "grad_norm": 2.4375, "learning_rate": 8.911555555555557e-06, "loss": 0.3517, "step": 24950 }, { "epoch": 0.5545680767863491, "grad_norm": 2.625, "learning_rate": 8.907111111111112e-06, "loss": 0.3661, "step": 24960 }, { "epoch": 0.5547902595094205, "grad_norm": 2.203125, "learning_rate": 8.902666666666667e-06, "loss": 0.3667, "step": 24970 }, { "epoch": 0.555012442232492, "grad_norm": 2.9375, "learning_rate": 8.898222222222222e-06, "loss": 0.384, "step": 24980 }, { "epoch": 0.5552346249555634, "grad_norm": 2.46875, "learning_rate": 8.893777777777778e-06, "loss": 0.3959, "step": 24990 }, { "epoch": 0.5554568076786349, "grad_norm": 2.21875, "learning_rate": 8.889333333333335e-06, "loss": 0.3585, "step": 25000 }, { "epoch": 0.5556789904017063, "grad_norm": 2.546875, "learning_rate": 8.88488888888889e-06, "loss": 0.3841, "step": 25010 }, { "epoch": 0.5559011731247778, "grad_norm": 2.78125, "learning_rate": 8.880444444444445e-06, "loss": 0.3772, "step": 25020 }, { "epoch": 0.5561233558478492, "grad_norm": 2.4375, "learning_rate": 8.876e-06, "loss": 0.3723, "step": 25030 }, { "epoch": 0.5563455385709207, "grad_norm": 2.46875, "learning_rate": 8.871555555555556e-06, "loss": 0.4183, "step": 25040 }, { "epoch": 0.5565677212939921, "grad_norm": 2.765625, "learning_rate": 8.867111111111112e-06, "loss": 0.3735, "step": 25050 }, { "epoch": 0.5567899040170636, "grad_norm": 2.328125, "learning_rate": 8.862666666666667e-06, "loss": 0.3475, "step": 25060 }, { "epoch": 0.5570120867401351, "grad_norm": 2.3125, "learning_rate": 8.858222222222222e-06, "loss": 0.3926, "step": 25070 }, { "epoch": 0.5572342694632065, "grad_norm": 2.78125, "learning_rate": 8.853777777777779e-06, "loss": 0.4119, "step": 25080 }, { "epoch": 0.557456452186278, "grad_norm": 2.84375, "learning_rate": 8.849333333333334e-06, "loss": 0.3947, "step": 25090 }, { "epoch": 0.5576786349093494, "grad_norm": 2.5625, "learning_rate": 8.84488888888889e-06, "loss": 0.4107, "step": 25100 }, { "epoch": 0.5579008176324209, "grad_norm": 2.609375, "learning_rate": 8.840444444444445e-06, "loss": 0.4038, "step": 25110 }, { "epoch": 0.5581230003554923, "grad_norm": 2.421875, "learning_rate": 8.836000000000001e-06, "loss": 0.3674, "step": 25120 }, { "epoch": 0.5583451830785638, "grad_norm": 2.765625, "learning_rate": 8.831555555555556e-06, "loss": 0.428, "step": 25130 }, { "epoch": 0.5585673658016352, "grad_norm": 2.3125, "learning_rate": 8.827111111111111e-06, "loss": 0.3403, "step": 25140 }, { "epoch": 0.5587895485247067, "grad_norm": 2.328125, "learning_rate": 8.822666666666668e-06, "loss": 0.3861, "step": 25150 }, { "epoch": 0.5590117312477781, "grad_norm": 2.625, "learning_rate": 8.818222222222223e-06, "loss": 0.3654, "step": 25160 }, { "epoch": 0.5592339139708496, "grad_norm": 2.5, "learning_rate": 8.813777777777779e-06, "loss": 0.3821, "step": 25170 }, { "epoch": 0.559456096693921, "grad_norm": 2.578125, "learning_rate": 8.809333333333334e-06, "loss": 0.3817, "step": 25180 }, { "epoch": 0.5596782794169926, "grad_norm": 2.515625, "learning_rate": 8.804888888888889e-06, "loss": 0.3875, "step": 25190 }, { "epoch": 0.559900462140064, "grad_norm": 3.578125, "learning_rate": 8.800444444444445e-06, "loss": 0.4084, "step": 25200 }, { "epoch": 0.5601226448631355, "grad_norm": 2.5, "learning_rate": 8.796000000000002e-06, "loss": 0.4038, "step": 25210 }, { "epoch": 0.5603448275862069, "grad_norm": 1.984375, "learning_rate": 8.791555555555557e-06, "loss": 0.3568, "step": 25220 }, { "epoch": 0.5605670103092784, "grad_norm": 2.078125, "learning_rate": 8.787111111111112e-06, "loss": 0.3638, "step": 25230 }, { "epoch": 0.5607891930323498, "grad_norm": 2.515625, "learning_rate": 8.782666666666666e-06, "loss": 0.4023, "step": 25240 }, { "epoch": 0.5610113757554213, "grad_norm": 2.3125, "learning_rate": 8.778222222222223e-06, "loss": 0.3715, "step": 25250 }, { "epoch": 0.5612335584784928, "grad_norm": 2.65625, "learning_rate": 8.77377777777778e-06, "loss": 0.3963, "step": 25260 }, { "epoch": 0.5614557412015642, "grad_norm": 2.90625, "learning_rate": 8.769333333333334e-06, "loss": 0.3837, "step": 25270 }, { "epoch": 0.5616779239246357, "grad_norm": 2.296875, "learning_rate": 8.764888888888889e-06, "loss": 0.4131, "step": 25280 }, { "epoch": 0.5619001066477071, "grad_norm": 2.046875, "learning_rate": 8.760444444444444e-06, "loss": 0.3561, "step": 25290 }, { "epoch": 0.5621222893707786, "grad_norm": 2.109375, "learning_rate": 8.756e-06, "loss": 0.3487, "step": 25300 }, { "epoch": 0.56234447209385, "grad_norm": 2.421875, "learning_rate": 8.751555555555557e-06, "loss": 0.388, "step": 25310 }, { "epoch": 0.5625666548169215, "grad_norm": 2.296875, "learning_rate": 8.747111111111112e-06, "loss": 0.3671, "step": 25320 }, { "epoch": 0.5627888375399929, "grad_norm": 2.53125, "learning_rate": 8.742666666666667e-06, "loss": 0.4174, "step": 25330 }, { "epoch": 0.5630110202630644, "grad_norm": 2.546875, "learning_rate": 8.738222222222222e-06, "loss": 0.405, "step": 25340 }, { "epoch": 0.5632332029861358, "grad_norm": 2.40625, "learning_rate": 8.733777777777778e-06, "loss": 0.3833, "step": 25350 }, { "epoch": 0.5634553857092073, "grad_norm": 2.328125, "learning_rate": 8.729333333333335e-06, "loss": 0.4073, "step": 25360 }, { "epoch": 0.5636775684322787, "grad_norm": 2.78125, "learning_rate": 8.72488888888889e-06, "loss": 0.3715, "step": 25370 }, { "epoch": 0.5638997511553502, "grad_norm": 2.28125, "learning_rate": 8.720444444444444e-06, "loss": 0.3855, "step": 25380 }, { "epoch": 0.5641219338784216, "grad_norm": 2.265625, "learning_rate": 8.716000000000001e-06, "loss": 0.3614, "step": 25390 }, { "epoch": 0.5643441166014931, "grad_norm": 2.125, "learning_rate": 8.711555555555556e-06, "loss": 0.3656, "step": 25400 }, { "epoch": 0.5645662993245645, "grad_norm": 2.1875, "learning_rate": 8.707111111111112e-06, "loss": 0.3981, "step": 25410 }, { "epoch": 0.564788482047636, "grad_norm": 2.3125, "learning_rate": 8.702666666666667e-06, "loss": 0.4118, "step": 25420 }, { "epoch": 0.5650106647707074, "grad_norm": 2.703125, "learning_rate": 8.698222222222224e-06, "loss": 0.3816, "step": 25430 }, { "epoch": 0.5652328474937789, "grad_norm": 2.484375, "learning_rate": 8.693777777777779e-06, "loss": 0.3911, "step": 25440 }, { "epoch": 0.5654550302168503, "grad_norm": 1.984375, "learning_rate": 8.689333333333333e-06, "loss": 0.3567, "step": 25450 }, { "epoch": 0.5656772129399218, "grad_norm": 2.546875, "learning_rate": 8.68488888888889e-06, "loss": 0.3928, "step": 25460 }, { "epoch": 0.5658993956629933, "grad_norm": 1.671875, "learning_rate": 8.680444444444445e-06, "loss": 0.3439, "step": 25470 }, { "epoch": 0.5661215783860647, "grad_norm": 2.65625, "learning_rate": 8.676000000000001e-06, "loss": 0.3818, "step": 25480 }, { "epoch": 0.5663437611091362, "grad_norm": 3.171875, "learning_rate": 8.671555555555556e-06, "loss": 0.3726, "step": 25490 }, { "epoch": 0.5665659438322076, "grad_norm": 2.8125, "learning_rate": 8.667111111111111e-06, "loss": 0.3898, "step": 25500 }, { "epoch": 0.5667881265552791, "grad_norm": 2.515625, "learning_rate": 8.662666666666668e-06, "loss": 0.4124, "step": 25510 }, { "epoch": 0.5670103092783505, "grad_norm": 2.234375, "learning_rate": 8.658222222222224e-06, "loss": 0.3822, "step": 25520 }, { "epoch": 0.567232492001422, "grad_norm": 2.1875, "learning_rate": 8.653777777777779e-06, "loss": 0.3788, "step": 25530 }, { "epoch": 0.5674546747244934, "grad_norm": 2.125, "learning_rate": 8.649333333333334e-06, "loss": 0.3916, "step": 25540 }, { "epoch": 0.5676768574475649, "grad_norm": 2.40625, "learning_rate": 8.644888888888889e-06, "loss": 0.3715, "step": 25550 }, { "epoch": 0.5678990401706363, "grad_norm": 2.65625, "learning_rate": 8.640444444444445e-06, "loss": 0.3836, "step": 25560 }, { "epoch": 0.5681212228937078, "grad_norm": 2.203125, "learning_rate": 8.636000000000002e-06, "loss": 0.4102, "step": 25570 }, { "epoch": 0.5683434056167792, "grad_norm": 2.109375, "learning_rate": 8.631555555555557e-06, "loss": 0.3541, "step": 25580 }, { "epoch": 0.5685655883398507, "grad_norm": 2.4375, "learning_rate": 8.627111111111111e-06, "loss": 0.3905, "step": 25590 }, { "epoch": 0.5687877710629221, "grad_norm": 2.359375, "learning_rate": 8.622666666666666e-06, "loss": 0.3719, "step": 25600 }, { "epoch": 0.5690099537859936, "grad_norm": 2.5, "learning_rate": 8.618222222222223e-06, "loss": 0.3946, "step": 25610 }, { "epoch": 0.569232136509065, "grad_norm": 2.578125, "learning_rate": 8.61377777777778e-06, "loss": 0.4057, "step": 25620 }, { "epoch": 0.5694543192321365, "grad_norm": 2.265625, "learning_rate": 8.609333333333334e-06, "loss": 0.4055, "step": 25630 }, { "epoch": 0.5696765019552079, "grad_norm": 2.25, "learning_rate": 8.604888888888889e-06, "loss": 0.4136, "step": 25640 }, { "epoch": 0.5698986846782794, "grad_norm": 2.40625, "learning_rate": 8.600444444444444e-06, "loss": 0.396, "step": 25650 }, { "epoch": 0.5701208674013508, "grad_norm": 2.90625, "learning_rate": 8.596e-06, "loss": 0.3647, "step": 25660 }, { "epoch": 0.5703430501244223, "grad_norm": 1.96875, "learning_rate": 8.591555555555557e-06, "loss": 0.3717, "step": 25670 }, { "epoch": 0.5705652328474938, "grad_norm": 2.265625, "learning_rate": 8.587111111111112e-06, "loss": 0.3827, "step": 25680 }, { "epoch": 0.5707874155705652, "grad_norm": 2.515625, "learning_rate": 8.582666666666667e-06, "loss": 0.4217, "step": 25690 }, { "epoch": 0.5710095982936367, "grad_norm": 2.640625, "learning_rate": 8.578222222222223e-06, "loss": 0.4349, "step": 25700 }, { "epoch": 0.5712317810167081, "grad_norm": 2.203125, "learning_rate": 8.573777777777778e-06, "loss": 0.3873, "step": 25710 }, { "epoch": 0.5714539637397796, "grad_norm": 2.015625, "learning_rate": 8.569333333333335e-06, "loss": 0.4171, "step": 25720 }, { "epoch": 0.571676146462851, "grad_norm": 2.53125, "learning_rate": 8.56488888888889e-06, "loss": 0.4147, "step": 25730 }, { "epoch": 0.5718983291859225, "grad_norm": 2.296875, "learning_rate": 8.560444444444446e-06, "loss": 0.4042, "step": 25740 }, { "epoch": 0.5721205119089939, "grad_norm": 2.703125, "learning_rate": 8.556e-06, "loss": 0.4124, "step": 25750 }, { "epoch": 0.5723426946320654, "grad_norm": 2.078125, "learning_rate": 8.551555555555556e-06, "loss": 0.3899, "step": 25760 }, { "epoch": 0.5725648773551368, "grad_norm": 2.15625, "learning_rate": 8.547111111111112e-06, "loss": 0.397, "step": 25770 }, { "epoch": 0.5727870600782083, "grad_norm": 2.453125, "learning_rate": 8.542666666666667e-06, "loss": 0.3999, "step": 25780 }, { "epoch": 0.5730092428012797, "grad_norm": 2.53125, "learning_rate": 8.538222222222224e-06, "loss": 0.3891, "step": 25790 }, { "epoch": 0.5732314255243512, "grad_norm": 2.78125, "learning_rate": 8.533777777777778e-06, "loss": 0.4335, "step": 25800 }, { "epoch": 0.5734536082474226, "grad_norm": 2.203125, "learning_rate": 8.529333333333333e-06, "loss": 0.4054, "step": 25810 }, { "epoch": 0.5736757909704941, "grad_norm": 2.171875, "learning_rate": 8.52488888888889e-06, "loss": 0.3576, "step": 25820 }, { "epoch": 0.5738979736935655, "grad_norm": 2.640625, "learning_rate": 8.520444444444446e-06, "loss": 0.428, "step": 25830 }, { "epoch": 0.574120156416637, "grad_norm": 2.078125, "learning_rate": 8.516000000000001e-06, "loss": 0.362, "step": 25840 }, { "epoch": 0.5743423391397084, "grad_norm": 2.0625, "learning_rate": 8.511555555555556e-06, "loss": 0.3951, "step": 25850 }, { "epoch": 0.57456452186278, "grad_norm": 2.375, "learning_rate": 8.50711111111111e-06, "loss": 0.3886, "step": 25860 }, { "epoch": 0.5747867045858513, "grad_norm": 2.8125, "learning_rate": 8.502666666666667e-06, "loss": 0.4179, "step": 25870 }, { "epoch": 0.5750088873089229, "grad_norm": 1.9921875, "learning_rate": 8.498222222222224e-06, "loss": 0.3508, "step": 25880 }, { "epoch": 0.5752310700319944, "grad_norm": 2.71875, "learning_rate": 8.493777777777779e-06, "loss": 0.3949, "step": 25890 }, { "epoch": 0.5754532527550658, "grad_norm": 2.53125, "learning_rate": 8.489333333333334e-06, "loss": 0.4156, "step": 25900 }, { "epoch": 0.5756754354781373, "grad_norm": 2.15625, "learning_rate": 8.484888888888888e-06, "loss": 0.3551, "step": 25910 }, { "epoch": 0.5758976182012087, "grad_norm": 2.15625, "learning_rate": 8.480444444444445e-06, "loss": 0.3424, "step": 25920 }, { "epoch": 0.5761198009242802, "grad_norm": 2.578125, "learning_rate": 8.476000000000002e-06, "loss": 0.4259, "step": 25930 }, { "epoch": 0.5763419836473516, "grad_norm": 2.4375, "learning_rate": 8.471555555555556e-06, "loss": 0.3698, "step": 25940 }, { "epoch": 0.5765641663704231, "grad_norm": 2.78125, "learning_rate": 8.467111111111111e-06, "loss": 0.3744, "step": 25950 }, { "epoch": 0.5767863490934945, "grad_norm": 2.015625, "learning_rate": 8.462666666666666e-06, "loss": 0.3816, "step": 25960 }, { "epoch": 0.577008531816566, "grad_norm": 2.515625, "learning_rate": 8.458222222222223e-06, "loss": 0.3592, "step": 25970 }, { "epoch": 0.5772307145396374, "grad_norm": 2.0625, "learning_rate": 8.453777777777779e-06, "loss": 0.361, "step": 25980 }, { "epoch": 0.5774528972627089, "grad_norm": 2.5625, "learning_rate": 8.449333333333334e-06, "loss": 0.3623, "step": 25990 }, { "epoch": 0.5776750799857803, "grad_norm": 2.28125, "learning_rate": 8.444888888888889e-06, "loss": 0.4267, "step": 26000 }, { "epoch": 0.5778972627088518, "grad_norm": 2.4375, "learning_rate": 8.440444444444445e-06, "loss": 0.4347, "step": 26010 }, { "epoch": 0.5781194454319232, "grad_norm": 2.453125, "learning_rate": 8.436e-06, "loss": 0.3921, "step": 26020 }, { "epoch": 0.5783416281549947, "grad_norm": 2.484375, "learning_rate": 8.431555555555557e-06, "loss": 0.3784, "step": 26030 }, { "epoch": 0.5785638108780661, "grad_norm": 2.25, "learning_rate": 8.427111111111112e-06, "loss": 0.3643, "step": 26040 }, { "epoch": 0.5787859936011376, "grad_norm": 2.640625, "learning_rate": 8.422666666666668e-06, "loss": 0.3954, "step": 26050 }, { "epoch": 0.579008176324209, "grad_norm": 2.109375, "learning_rate": 8.418222222222223e-06, "loss": 0.3729, "step": 26060 }, { "epoch": 0.5792303590472805, "grad_norm": 2.421875, "learning_rate": 8.413777777777778e-06, "loss": 0.4098, "step": 26070 }, { "epoch": 0.579452541770352, "grad_norm": 2.5, "learning_rate": 8.409333333333334e-06, "loss": 0.3831, "step": 26080 }, { "epoch": 0.5796747244934234, "grad_norm": 2.40625, "learning_rate": 8.40488888888889e-06, "loss": 0.3327, "step": 26090 }, { "epoch": 0.5798969072164949, "grad_norm": 2.296875, "learning_rate": 8.400444444444446e-06, "loss": 0.389, "step": 26100 }, { "epoch": 0.5801190899395663, "grad_norm": 2.453125, "learning_rate": 8.396e-06, "loss": 0.4266, "step": 26110 }, { "epoch": 0.5803412726626378, "grad_norm": 2.40625, "learning_rate": 8.391555555555555e-06, "loss": 0.3712, "step": 26120 }, { "epoch": 0.5805634553857092, "grad_norm": 2.59375, "learning_rate": 8.387111111111112e-06, "loss": 0.3867, "step": 26130 }, { "epoch": 0.5807856381087807, "grad_norm": 2.359375, "learning_rate": 8.382666666666669e-06, "loss": 0.4288, "step": 26140 }, { "epoch": 0.5810078208318521, "grad_norm": 2.46875, "learning_rate": 8.378222222222223e-06, "loss": 0.3929, "step": 26150 }, { "epoch": 0.5812300035549236, "grad_norm": 2.375, "learning_rate": 8.373777777777778e-06, "loss": 0.3626, "step": 26160 }, { "epoch": 0.581452186277995, "grad_norm": 2.71875, "learning_rate": 8.369333333333333e-06, "loss": 0.3701, "step": 26170 }, { "epoch": 0.5816743690010665, "grad_norm": 2.25, "learning_rate": 8.36488888888889e-06, "loss": 0.3522, "step": 26180 }, { "epoch": 0.5818965517241379, "grad_norm": 2.390625, "learning_rate": 8.360444444444446e-06, "loss": 0.3935, "step": 26190 }, { "epoch": 0.5821187344472094, "grad_norm": 2.0625, "learning_rate": 8.356000000000001e-06, "loss": 0.3453, "step": 26200 }, { "epoch": 0.5823409171702808, "grad_norm": 2.1875, "learning_rate": 8.351555555555556e-06, "loss": 0.3886, "step": 26210 }, { "epoch": 0.5825630998933523, "grad_norm": 2.8125, "learning_rate": 8.34711111111111e-06, "loss": 0.4037, "step": 26220 }, { "epoch": 0.5827852826164237, "grad_norm": 2.421875, "learning_rate": 8.342666666666667e-06, "loss": 0.3791, "step": 26230 }, { "epoch": 0.5830074653394952, "grad_norm": 2.015625, "learning_rate": 8.338222222222224e-06, "loss": 0.3771, "step": 26240 }, { "epoch": 0.5832296480625666, "grad_norm": 2.53125, "learning_rate": 8.333777777777779e-06, "loss": 0.3691, "step": 26250 }, { "epoch": 0.5834518307856381, "grad_norm": 2.53125, "learning_rate": 8.329333333333333e-06, "loss": 0.3587, "step": 26260 }, { "epoch": 0.5836740135087095, "grad_norm": 2.53125, "learning_rate": 8.324888888888888e-06, "loss": 0.3607, "step": 26270 }, { "epoch": 0.583896196231781, "grad_norm": 2.671875, "learning_rate": 8.320444444444445e-06, "loss": 0.3732, "step": 26280 }, { "epoch": 0.5841183789548525, "grad_norm": 2.578125, "learning_rate": 8.316000000000001e-06, "loss": 0.3848, "step": 26290 }, { "epoch": 0.5843405616779239, "grad_norm": 2.515625, "learning_rate": 8.311555555555556e-06, "loss": 0.3833, "step": 26300 }, { "epoch": 0.5845627444009954, "grad_norm": 2.390625, "learning_rate": 8.307111111111111e-06, "loss": 0.351, "step": 26310 }, { "epoch": 0.5847849271240668, "grad_norm": 2.46875, "learning_rate": 8.302666666666668e-06, "loss": 0.3716, "step": 26320 }, { "epoch": 0.5850071098471383, "grad_norm": 2.3125, "learning_rate": 8.298222222222222e-06, "loss": 0.3664, "step": 26330 }, { "epoch": 0.5852292925702097, "grad_norm": 3.125, "learning_rate": 8.293777777777779e-06, "loss": 0.4228, "step": 26340 }, { "epoch": 0.5854514752932812, "grad_norm": 3.15625, "learning_rate": 8.289333333333334e-06, "loss": 0.3897, "step": 26350 }, { "epoch": 0.5856736580163526, "grad_norm": 2.890625, "learning_rate": 8.28488888888889e-06, "loss": 0.3806, "step": 26360 }, { "epoch": 0.5858958407394241, "grad_norm": 2.28125, "learning_rate": 8.280444444444445e-06, "loss": 0.378, "step": 26370 }, { "epoch": 0.5861180234624955, "grad_norm": 2.65625, "learning_rate": 8.276e-06, "loss": 0.3796, "step": 26380 }, { "epoch": 0.586340206185567, "grad_norm": 2.125, "learning_rate": 8.271555555555557e-06, "loss": 0.3603, "step": 26390 }, { "epoch": 0.5865623889086384, "grad_norm": 2.171875, "learning_rate": 8.267111111111111e-06, "loss": 0.3759, "step": 26400 }, { "epoch": 0.5867845716317099, "grad_norm": 2.53125, "learning_rate": 8.262666666666668e-06, "loss": 0.3788, "step": 26410 }, { "epoch": 0.5870067543547813, "grad_norm": 3.0625, "learning_rate": 8.258222222222223e-06, "loss": 0.3692, "step": 26420 }, { "epoch": 0.5872289370778528, "grad_norm": 2.53125, "learning_rate": 8.253777777777778e-06, "loss": 0.4502, "step": 26430 }, { "epoch": 0.5874511198009242, "grad_norm": 2.609375, "learning_rate": 8.249333333333334e-06, "loss": 0.3761, "step": 26440 }, { "epoch": 0.5876733025239957, "grad_norm": 2.546875, "learning_rate": 8.24488888888889e-06, "loss": 0.3842, "step": 26450 }, { "epoch": 0.5878954852470671, "grad_norm": 2.078125, "learning_rate": 8.240444444444446e-06, "loss": 0.3799, "step": 26460 }, { "epoch": 0.5881176679701386, "grad_norm": 2.4375, "learning_rate": 8.236e-06, "loss": 0.3935, "step": 26470 }, { "epoch": 0.58833985069321, "grad_norm": 3.0625, "learning_rate": 8.231555555555555e-06, "loss": 0.4229, "step": 26480 }, { "epoch": 0.5885620334162815, "grad_norm": 2.15625, "learning_rate": 8.227111111111112e-06, "loss": 0.39, "step": 26490 }, { "epoch": 0.5887842161393531, "grad_norm": 2.359375, "learning_rate": 8.222666666666668e-06, "loss": 0.386, "step": 26500 }, { "epoch": 0.5890063988624245, "grad_norm": 2.515625, "learning_rate": 8.218222222222223e-06, "loss": 0.3853, "step": 26510 }, { "epoch": 0.589228581585496, "grad_norm": 2.3125, "learning_rate": 8.213777777777778e-06, "loss": 0.4081, "step": 26520 }, { "epoch": 0.5894507643085674, "grad_norm": 2.484375, "learning_rate": 8.209333333333333e-06, "loss": 0.3724, "step": 26530 }, { "epoch": 0.5896729470316389, "grad_norm": 2.140625, "learning_rate": 8.20488888888889e-06, "loss": 0.3663, "step": 26540 }, { "epoch": 0.5898951297547103, "grad_norm": 2.25, "learning_rate": 8.200444444444446e-06, "loss": 0.402, "step": 26550 }, { "epoch": 0.5901173124777818, "grad_norm": 2.578125, "learning_rate": 8.196e-06, "loss": 0.3919, "step": 26560 }, { "epoch": 0.5903394952008532, "grad_norm": 2.390625, "learning_rate": 8.191555555555556e-06, "loss": 0.3896, "step": 26570 }, { "epoch": 0.5905616779239247, "grad_norm": 2.421875, "learning_rate": 8.18711111111111e-06, "loss": 0.4163, "step": 26580 }, { "epoch": 0.5907838606469961, "grad_norm": 2.0625, "learning_rate": 8.182666666666667e-06, "loss": 0.3443, "step": 26590 }, { "epoch": 0.5910060433700676, "grad_norm": 2.328125, "learning_rate": 8.178222222222224e-06, "loss": 0.3769, "step": 26600 }, { "epoch": 0.591228226093139, "grad_norm": 2.6875, "learning_rate": 8.173777777777778e-06, "loss": 0.3943, "step": 26610 }, { "epoch": 0.5914504088162105, "grad_norm": 2.4375, "learning_rate": 8.169333333333333e-06, "loss": 0.406, "step": 26620 }, { "epoch": 0.5916725915392819, "grad_norm": 2.234375, "learning_rate": 8.16488888888889e-06, "loss": 0.3781, "step": 26630 }, { "epoch": 0.5918947742623534, "grad_norm": 2.421875, "learning_rate": 8.160444444444445e-06, "loss": 0.3836, "step": 26640 }, { "epoch": 0.5921169569854248, "grad_norm": 2.25, "learning_rate": 8.156000000000001e-06, "loss": 0.4014, "step": 26650 }, { "epoch": 0.5923391397084963, "grad_norm": 2.375, "learning_rate": 8.151555555555556e-06, "loss": 0.3852, "step": 26660 }, { "epoch": 0.5925613224315677, "grad_norm": 2.40625, "learning_rate": 8.147111111111113e-06, "loss": 0.3652, "step": 26670 }, { "epoch": 0.5927835051546392, "grad_norm": 2.234375, "learning_rate": 8.142666666666667e-06, "loss": 0.3818, "step": 26680 }, { "epoch": 0.5930056878777106, "grad_norm": 2.78125, "learning_rate": 8.138222222222222e-06, "loss": 0.3772, "step": 26690 }, { "epoch": 0.5932278706007821, "grad_norm": 3.140625, "learning_rate": 8.133777777777779e-06, "loss": 0.4421, "step": 26700 }, { "epoch": 0.5934500533238536, "grad_norm": 2.28125, "learning_rate": 8.129333333333334e-06, "loss": 0.3549, "step": 26710 }, { "epoch": 0.593672236046925, "grad_norm": 2.625, "learning_rate": 8.12488888888889e-06, "loss": 0.4144, "step": 26720 }, { "epoch": 0.5938944187699965, "grad_norm": 2.265625, "learning_rate": 8.120444444444445e-06, "loss": 0.3973, "step": 26730 }, { "epoch": 0.5941166014930679, "grad_norm": 2.3125, "learning_rate": 8.116e-06, "loss": 0.4206, "step": 26740 }, { "epoch": 0.5943387842161394, "grad_norm": 2.359375, "learning_rate": 8.111555555555556e-06, "loss": 0.3941, "step": 26750 }, { "epoch": 0.5945609669392108, "grad_norm": 2.1875, "learning_rate": 8.107111111111113e-06, "loss": 0.3912, "step": 26760 }, { "epoch": 0.5947831496622823, "grad_norm": 2.234375, "learning_rate": 8.102666666666668e-06, "loss": 0.4154, "step": 26770 }, { "epoch": 0.5950053323853537, "grad_norm": 2.421875, "learning_rate": 8.098222222222223e-06, "loss": 0.3884, "step": 26780 }, { "epoch": 0.5952275151084252, "grad_norm": 2.484375, "learning_rate": 8.093777777777777e-06, "loss": 0.3626, "step": 26790 }, { "epoch": 0.5954496978314966, "grad_norm": 2.015625, "learning_rate": 8.089333333333334e-06, "loss": 0.4196, "step": 26800 }, { "epoch": 0.5956718805545681, "grad_norm": 2.3125, "learning_rate": 8.08488888888889e-06, "loss": 0.365, "step": 26810 }, { "epoch": 0.5958940632776395, "grad_norm": 2.484375, "learning_rate": 8.080444444444445e-06, "loss": 0.36, "step": 26820 }, { "epoch": 0.596116246000711, "grad_norm": 2.265625, "learning_rate": 8.076e-06, "loss": 0.362, "step": 26830 }, { "epoch": 0.5963384287237824, "grad_norm": 2.375, "learning_rate": 8.071555555555555e-06, "loss": 0.3744, "step": 26840 }, { "epoch": 0.5965606114468539, "grad_norm": 1.9765625, "learning_rate": 8.067111111111112e-06, "loss": 0.3593, "step": 26850 }, { "epoch": 0.5967827941699253, "grad_norm": 2.375, "learning_rate": 8.062666666666668e-06, "loss": 0.3708, "step": 26860 }, { "epoch": 0.5970049768929968, "grad_norm": 2.390625, "learning_rate": 8.058222222222223e-06, "loss": 0.372, "step": 26870 }, { "epoch": 0.5972271596160682, "grad_norm": 2.53125, "learning_rate": 8.053777777777778e-06, "loss": 0.3874, "step": 26880 }, { "epoch": 0.5974493423391397, "grad_norm": 2.53125, "learning_rate": 8.049333333333333e-06, "loss": 0.3587, "step": 26890 }, { "epoch": 0.5976715250622112, "grad_norm": 2.25, "learning_rate": 8.04488888888889e-06, "loss": 0.3558, "step": 26900 }, { "epoch": 0.5978937077852826, "grad_norm": 2.484375, "learning_rate": 8.040444444444446e-06, "loss": 0.3847, "step": 26910 }, { "epoch": 0.5981158905083541, "grad_norm": 2.3125, "learning_rate": 8.036e-06, "loss": 0.4165, "step": 26920 }, { "epoch": 0.5983380732314255, "grad_norm": 2.546875, "learning_rate": 8.031555555555555e-06, "loss": 0.3804, "step": 26930 }, { "epoch": 0.598560255954497, "grad_norm": 2.703125, "learning_rate": 8.027111111111112e-06, "loss": 0.3987, "step": 26940 }, { "epoch": 0.5987824386775684, "grad_norm": 3.0, "learning_rate": 8.022666666666667e-06, "loss": 0.4194, "step": 26950 }, { "epoch": 0.5990046214006399, "grad_norm": 2.234375, "learning_rate": 8.018222222222223e-06, "loss": 0.3575, "step": 26960 }, { "epoch": 0.5992268041237113, "grad_norm": 2.0625, "learning_rate": 8.013777777777778e-06, "loss": 0.3862, "step": 26970 }, { "epoch": 0.5994489868467828, "grad_norm": 2.640625, "learning_rate": 8.009333333333335e-06, "loss": 0.3968, "step": 26980 }, { "epoch": 0.5996711695698542, "grad_norm": 2.859375, "learning_rate": 8.00488888888889e-06, "loss": 0.3703, "step": 26990 }, { "epoch": 0.5998933522929257, "grad_norm": 1.921875, "learning_rate": 8.000444444444444e-06, "loss": 0.388, "step": 27000 }, { "epoch": 0.6001155350159971, "grad_norm": 2.671875, "learning_rate": 7.996000000000001e-06, "loss": 0.3826, "step": 27010 }, { "epoch": 0.6003377177390686, "grad_norm": 2.140625, "learning_rate": 7.991555555555556e-06, "loss": 0.382, "step": 27020 }, { "epoch": 0.60055990046214, "grad_norm": 2.671875, "learning_rate": 7.987111111111112e-06, "loss": 0.3621, "step": 27030 }, { "epoch": 0.6007820831852115, "grad_norm": 2.5625, "learning_rate": 7.982666666666667e-06, "loss": 0.4083, "step": 27040 }, { "epoch": 0.6010042659082829, "grad_norm": 2.40625, "learning_rate": 7.978222222222222e-06, "loss": 0.3844, "step": 27050 }, { "epoch": 0.6012264486313544, "grad_norm": 2.53125, "learning_rate": 7.973777777777779e-06, "loss": 0.3781, "step": 27060 }, { "epoch": 0.6014486313544258, "grad_norm": 2.265625, "learning_rate": 7.969333333333335e-06, "loss": 0.3586, "step": 27070 }, { "epoch": 0.6016708140774973, "grad_norm": 2.46875, "learning_rate": 7.96488888888889e-06, "loss": 0.3765, "step": 27080 }, { "epoch": 0.6018929968005687, "grad_norm": 2.859375, "learning_rate": 7.960444444444445e-06, "loss": 0.3895, "step": 27090 }, { "epoch": 0.6021151795236402, "grad_norm": 2.296875, "learning_rate": 7.956e-06, "loss": 0.3906, "step": 27100 }, { "epoch": 0.6023373622467117, "grad_norm": 2.21875, "learning_rate": 7.951555555555556e-06, "loss": 0.3882, "step": 27110 }, { "epoch": 0.6025595449697831, "grad_norm": 2.078125, "learning_rate": 7.947111111111113e-06, "loss": 0.3914, "step": 27120 }, { "epoch": 0.6027817276928547, "grad_norm": 2.46875, "learning_rate": 7.942666666666668e-06, "loss": 0.3816, "step": 27130 }, { "epoch": 0.603003910415926, "grad_norm": 2.140625, "learning_rate": 7.938222222222222e-06, "loss": 0.3587, "step": 27140 }, { "epoch": 0.6032260931389976, "grad_norm": 2.453125, "learning_rate": 7.933777777777777e-06, "loss": 0.4219, "step": 27150 }, { "epoch": 0.603448275862069, "grad_norm": 2.53125, "learning_rate": 7.929333333333334e-06, "loss": 0.3842, "step": 27160 }, { "epoch": 0.6036704585851405, "grad_norm": 2.171875, "learning_rate": 7.92488888888889e-06, "loss": 0.3762, "step": 27170 }, { "epoch": 0.6038926413082119, "grad_norm": 2.265625, "learning_rate": 7.920444444444445e-06, "loss": 0.3777, "step": 27180 }, { "epoch": 0.6041148240312834, "grad_norm": 2.96875, "learning_rate": 7.916e-06, "loss": 0.389, "step": 27190 }, { "epoch": 0.6043370067543548, "grad_norm": 2.46875, "learning_rate": 7.911555555555555e-06, "loss": 0.3915, "step": 27200 }, { "epoch": 0.6045591894774263, "grad_norm": 2.265625, "learning_rate": 7.907111111111111e-06, "loss": 0.3741, "step": 27210 }, { "epoch": 0.6047813722004977, "grad_norm": 2.421875, "learning_rate": 7.902666666666668e-06, "loss": 0.3503, "step": 27220 }, { "epoch": 0.6050035549235692, "grad_norm": 2.71875, "learning_rate": 7.898222222222223e-06, "loss": 0.3603, "step": 27230 }, { "epoch": 0.6052257376466406, "grad_norm": 2.640625, "learning_rate": 7.893777777777778e-06, "loss": 0.3596, "step": 27240 }, { "epoch": 0.6054479203697121, "grad_norm": 2.65625, "learning_rate": 7.889333333333334e-06, "loss": 0.4017, "step": 27250 }, { "epoch": 0.6056701030927835, "grad_norm": 2.21875, "learning_rate": 7.884888888888889e-06, "loss": 0.375, "step": 27260 }, { "epoch": 0.605892285815855, "grad_norm": 2.671875, "learning_rate": 7.880444444444446e-06, "loss": 0.3834, "step": 27270 }, { "epoch": 0.6061144685389264, "grad_norm": 2.21875, "learning_rate": 7.876e-06, "loss": 0.3794, "step": 27280 }, { "epoch": 0.6063366512619979, "grad_norm": 2.359375, "learning_rate": 7.871555555555557e-06, "loss": 0.3893, "step": 27290 }, { "epoch": 0.6065588339850693, "grad_norm": 2.0625, "learning_rate": 7.867111111111112e-06, "loss": 0.3511, "step": 27300 }, { "epoch": 0.6067810167081408, "grad_norm": 2.5, "learning_rate": 7.862666666666667e-06, "loss": 0.3741, "step": 27310 }, { "epoch": 0.6070031994312123, "grad_norm": 2.921875, "learning_rate": 7.858222222222223e-06, "loss": 0.4052, "step": 27320 }, { "epoch": 0.6072253821542837, "grad_norm": 2.609375, "learning_rate": 7.853777777777778e-06, "loss": 0.4087, "step": 27330 }, { "epoch": 0.6074475648773552, "grad_norm": 2.28125, "learning_rate": 7.849333333333335e-06, "loss": 0.3986, "step": 27340 }, { "epoch": 0.6076697476004266, "grad_norm": 2.71875, "learning_rate": 7.84488888888889e-06, "loss": 0.3892, "step": 27350 }, { "epoch": 0.6078919303234981, "grad_norm": 2.4375, "learning_rate": 7.840444444444444e-06, "loss": 0.3862, "step": 27360 }, { "epoch": 0.6081141130465695, "grad_norm": 2.8125, "learning_rate": 7.836000000000001e-06, "loss": 0.3889, "step": 27370 }, { "epoch": 0.608336295769641, "grad_norm": 2.640625, "learning_rate": 7.831555555555557e-06, "loss": 0.3702, "step": 27380 }, { "epoch": 0.6085584784927124, "grad_norm": 2.625, "learning_rate": 7.827111111111112e-06, "loss": 0.3859, "step": 27390 }, { "epoch": 0.6087806612157839, "grad_norm": 2.78125, "learning_rate": 7.822666666666667e-06, "loss": 0.4044, "step": 27400 }, { "epoch": 0.6090028439388553, "grad_norm": 2.28125, "learning_rate": 7.818222222222222e-06, "loss": 0.3996, "step": 27410 }, { "epoch": 0.6092250266619268, "grad_norm": 2.078125, "learning_rate": 7.813777777777778e-06, "loss": 0.3879, "step": 27420 }, { "epoch": 0.6094472093849982, "grad_norm": 3.0, "learning_rate": 7.809333333333335e-06, "loss": 0.3552, "step": 27430 }, { "epoch": 0.6096693921080697, "grad_norm": 2.328125, "learning_rate": 7.80488888888889e-06, "loss": 0.3508, "step": 27440 }, { "epoch": 0.6098915748311411, "grad_norm": 1.9453125, "learning_rate": 7.800444444444445e-06, "loss": 0.3573, "step": 27450 }, { "epoch": 0.6101137575542126, "grad_norm": 2.4375, "learning_rate": 7.796e-06, "loss": 0.385, "step": 27460 }, { "epoch": 0.610335940277284, "grad_norm": 2.296875, "learning_rate": 7.791555555555556e-06, "loss": 0.3949, "step": 27470 }, { "epoch": 0.6105581230003555, "grad_norm": 2.78125, "learning_rate": 7.787111111111113e-06, "loss": 0.393, "step": 27480 }, { "epoch": 0.6107803057234269, "grad_norm": 2.4375, "learning_rate": 7.782666666666667e-06, "loss": 0.3738, "step": 27490 }, { "epoch": 0.6110024884464984, "grad_norm": 2.859375, "learning_rate": 7.778222222222222e-06, "loss": 0.4097, "step": 27500 }, { "epoch": 0.6112246711695698, "grad_norm": 2.109375, "learning_rate": 7.773777777777777e-06, "loss": 0.3911, "step": 27510 }, { "epoch": 0.6114468538926413, "grad_norm": 2.5, "learning_rate": 7.769333333333334e-06, "loss": 0.4197, "step": 27520 }, { "epoch": 0.6116690366157128, "grad_norm": 2.375, "learning_rate": 7.76488888888889e-06, "loss": 0.3551, "step": 27530 }, { "epoch": 0.6118912193387842, "grad_norm": 2.8125, "learning_rate": 7.760444444444445e-06, "loss": 0.3914, "step": 27540 }, { "epoch": 0.6121134020618557, "grad_norm": 3.125, "learning_rate": 7.756e-06, "loss": 0.371, "step": 27550 }, { "epoch": 0.6123355847849271, "grad_norm": 2.328125, "learning_rate": 7.751555555555556e-06, "loss": 0.3976, "step": 27560 }, { "epoch": 0.6125577675079986, "grad_norm": 2.734375, "learning_rate": 7.747111111111111e-06, "loss": 0.3953, "step": 27570 }, { "epoch": 0.61277995023107, "grad_norm": 2.765625, "learning_rate": 7.742666666666668e-06, "loss": 0.3821, "step": 27580 }, { "epoch": 0.6130021329541415, "grad_norm": 2.296875, "learning_rate": 7.738222222222223e-06, "loss": 0.3977, "step": 27590 }, { "epoch": 0.6132243156772129, "grad_norm": 2.390625, "learning_rate": 7.73377777777778e-06, "loss": 0.392, "step": 27600 }, { "epoch": 0.6134464984002844, "grad_norm": 2.0625, "learning_rate": 7.729333333333334e-06, "loss": 0.3432, "step": 27610 }, { "epoch": 0.6136686811233558, "grad_norm": 2.46875, "learning_rate": 7.724888888888889e-06, "loss": 0.381, "step": 27620 }, { "epoch": 0.6138908638464273, "grad_norm": 2.703125, "learning_rate": 7.720444444444445e-06, "loss": 0.3911, "step": 27630 }, { "epoch": 0.6141130465694987, "grad_norm": 2.34375, "learning_rate": 7.716e-06, "loss": 0.3796, "step": 27640 }, { "epoch": 0.6143352292925702, "grad_norm": 2.578125, "learning_rate": 7.711555555555557e-06, "loss": 0.3854, "step": 27650 }, { "epoch": 0.6145574120156416, "grad_norm": 2.578125, "learning_rate": 7.707111111111112e-06, "loss": 0.4328, "step": 27660 }, { "epoch": 0.6147795947387131, "grad_norm": 2.46875, "learning_rate": 7.702666666666667e-06, "loss": 0.3718, "step": 27670 }, { "epoch": 0.6150017774617845, "grad_norm": 2.359375, "learning_rate": 7.698222222222223e-06, "loss": 0.3647, "step": 27680 }, { "epoch": 0.615223960184856, "grad_norm": 2.3125, "learning_rate": 7.69377777777778e-06, "loss": 0.4028, "step": 27690 }, { "epoch": 0.6154461429079274, "grad_norm": 2.453125, "learning_rate": 7.689333333333334e-06, "loss": 0.399, "step": 27700 }, { "epoch": 0.6156683256309989, "grad_norm": 2.15625, "learning_rate": 7.68488888888889e-06, "loss": 0.3847, "step": 27710 }, { "epoch": 0.6158905083540704, "grad_norm": 2.578125, "learning_rate": 7.680444444444444e-06, "loss": 0.3966, "step": 27720 }, { "epoch": 0.6161126910771418, "grad_norm": 2.59375, "learning_rate": 7.676e-06, "loss": 0.4005, "step": 27730 }, { "epoch": 0.6163348738002133, "grad_norm": 2.25, "learning_rate": 7.671555555555557e-06, "loss": 0.3959, "step": 27740 }, { "epoch": 0.6165570565232847, "grad_norm": 2.328125, "learning_rate": 7.667111111111112e-06, "loss": 0.3794, "step": 27750 }, { "epoch": 0.6167792392463562, "grad_norm": 2.578125, "learning_rate": 7.662666666666667e-06, "loss": 0.376, "step": 27760 }, { "epoch": 0.6170014219694276, "grad_norm": 2.21875, "learning_rate": 7.658222222222222e-06, "loss": 0.388, "step": 27770 }, { "epoch": 0.6172236046924992, "grad_norm": 1.828125, "learning_rate": 7.653777777777778e-06, "loss": 0.398, "step": 27780 }, { "epoch": 0.6174457874155705, "grad_norm": 2.5625, "learning_rate": 7.649333333333335e-06, "loss": 0.3529, "step": 27790 }, { "epoch": 0.617667970138642, "grad_norm": 3.046875, "learning_rate": 7.64488888888889e-06, "loss": 0.3881, "step": 27800 }, { "epoch": 0.6178901528617134, "grad_norm": 2.140625, "learning_rate": 7.640444444444445e-06, "loss": 0.357, "step": 27810 }, { "epoch": 0.618112335584785, "grad_norm": 2.703125, "learning_rate": 7.636e-06, "loss": 0.3694, "step": 27820 }, { "epoch": 0.6183345183078564, "grad_norm": 2.25, "learning_rate": 7.631555555555556e-06, "loss": 0.396, "step": 27830 }, { "epoch": 0.6185567010309279, "grad_norm": 3.09375, "learning_rate": 7.627111111111112e-06, "loss": 0.4501, "step": 27840 }, { "epoch": 0.6187788837539993, "grad_norm": 2.421875, "learning_rate": 7.622666666666667e-06, "loss": 0.3959, "step": 27850 }, { "epoch": 0.6190010664770708, "grad_norm": 3.0625, "learning_rate": 7.618222222222222e-06, "loss": 0.3726, "step": 27860 }, { "epoch": 0.6192232492001422, "grad_norm": 2.453125, "learning_rate": 7.613777777777779e-06, "loss": 0.3592, "step": 27870 }, { "epoch": 0.6194454319232137, "grad_norm": 2.171875, "learning_rate": 7.609333333333334e-06, "loss": 0.3812, "step": 27880 }, { "epoch": 0.6196676146462851, "grad_norm": 2.71875, "learning_rate": 7.604888888888889e-06, "loss": 0.3947, "step": 27890 }, { "epoch": 0.6198897973693566, "grad_norm": 2.3125, "learning_rate": 7.600444444444445e-06, "loss": 0.3816, "step": 27900 }, { "epoch": 0.620111980092428, "grad_norm": 2.359375, "learning_rate": 7.5960000000000015e-06, "loss": 0.3579, "step": 27910 }, { "epoch": 0.6203341628154995, "grad_norm": 2.5, "learning_rate": 7.591555555555556e-06, "loss": 0.3766, "step": 27920 }, { "epoch": 0.620556345538571, "grad_norm": 2.21875, "learning_rate": 7.587111111111112e-06, "loss": 0.4472, "step": 27930 }, { "epoch": 0.6207785282616424, "grad_norm": 2.140625, "learning_rate": 7.582666666666667e-06, "loss": 0.3622, "step": 27940 }, { "epoch": 0.6210007109847139, "grad_norm": 2.609375, "learning_rate": 7.5782222222222225e-06, "loss": 0.3767, "step": 27950 }, { "epoch": 0.6212228937077853, "grad_norm": 2.34375, "learning_rate": 7.573777777777779e-06, "loss": 0.3808, "step": 27960 }, { "epoch": 0.6214450764308568, "grad_norm": 2.359375, "learning_rate": 7.569333333333334e-06, "loss": 0.3676, "step": 27970 }, { "epoch": 0.6216672591539282, "grad_norm": 2.734375, "learning_rate": 7.56488888888889e-06, "loss": 0.3983, "step": 27980 }, { "epoch": 0.6218894418769997, "grad_norm": 2.421875, "learning_rate": 7.5604444444444445e-06, "loss": 0.4203, "step": 27990 }, { "epoch": 0.6221116246000711, "grad_norm": 2.203125, "learning_rate": 7.556000000000001e-06, "loss": 0.4059, "step": 28000 }, { "epoch": 0.6223338073231426, "grad_norm": 2.25, "learning_rate": 7.551555555555557e-06, "loss": 0.3762, "step": 28010 }, { "epoch": 0.622555990046214, "grad_norm": 2.359375, "learning_rate": 7.5471111111111115e-06, "loss": 0.3727, "step": 28020 }, { "epoch": 0.6227781727692855, "grad_norm": 2.5625, "learning_rate": 7.542666666666667e-06, "loss": 0.3787, "step": 28030 }, { "epoch": 0.6230003554923569, "grad_norm": 2.515625, "learning_rate": 7.538222222222222e-06, "loss": 0.3643, "step": 28040 }, { "epoch": 0.6232225382154284, "grad_norm": 2.453125, "learning_rate": 7.533777777777779e-06, "loss": 0.3842, "step": 28050 }, { "epoch": 0.6234447209384998, "grad_norm": 2.59375, "learning_rate": 7.529333333333334e-06, "loss": 0.3752, "step": 28060 }, { "epoch": 0.6236669036615713, "grad_norm": 2.484375, "learning_rate": 7.524888888888889e-06, "loss": 0.3572, "step": 28070 }, { "epoch": 0.6238890863846427, "grad_norm": 2.28125, "learning_rate": 7.520444444444445e-06, "loss": 0.3834, "step": 28080 }, { "epoch": 0.6241112691077142, "grad_norm": 2.40625, "learning_rate": 7.516000000000001e-06, "loss": 0.3519, "step": 28090 }, { "epoch": 0.6243334518307856, "grad_norm": 2.265625, "learning_rate": 7.511555555555556e-06, "loss": 0.3824, "step": 28100 }, { "epoch": 0.6245556345538571, "grad_norm": 2.59375, "learning_rate": 7.507111111111112e-06, "loss": 0.3752, "step": 28110 }, { "epoch": 0.6247778172769285, "grad_norm": 2.25, "learning_rate": 7.502666666666667e-06, "loss": 0.3523, "step": 28120 }, { "epoch": 0.625, "grad_norm": 2.40625, "learning_rate": 7.4982222222222225e-06, "loss": 0.3832, "step": 28130 }, { "epoch": 0.6252221827230715, "grad_norm": 2.484375, "learning_rate": 7.493777777777779e-06, "loss": 0.4148, "step": 28140 }, { "epoch": 0.6254443654461429, "grad_norm": 2.484375, "learning_rate": 7.489333333333334e-06, "loss": 0.366, "step": 28150 }, { "epoch": 0.6256665481692144, "grad_norm": 2.6875, "learning_rate": 7.4848888888888895e-06, "loss": 0.3789, "step": 28160 }, { "epoch": 0.6258887308922858, "grad_norm": 2.15625, "learning_rate": 7.480444444444444e-06, "loss": 0.3716, "step": 28170 }, { "epoch": 0.6261109136153573, "grad_norm": 2.296875, "learning_rate": 7.476000000000001e-06, "loss": 0.3479, "step": 28180 }, { "epoch": 0.6263330963384287, "grad_norm": 2.296875, "learning_rate": 7.471555555555557e-06, "loss": 0.3754, "step": 28190 }, { "epoch": 0.6265552790615002, "grad_norm": 2.59375, "learning_rate": 7.4671111111111115e-06, "loss": 0.3692, "step": 28200 }, { "epoch": 0.6267774617845716, "grad_norm": 2.390625, "learning_rate": 7.462666666666667e-06, "loss": 0.3499, "step": 28210 }, { "epoch": 0.6269996445076431, "grad_norm": 2.53125, "learning_rate": 7.458222222222224e-06, "loss": 0.3593, "step": 28220 }, { "epoch": 0.6272218272307145, "grad_norm": 2.515625, "learning_rate": 7.4537777777777785e-06, "loss": 0.3982, "step": 28230 }, { "epoch": 0.627444009953786, "grad_norm": 2.21875, "learning_rate": 7.449333333333334e-06, "loss": 0.3793, "step": 28240 }, { "epoch": 0.6276661926768574, "grad_norm": 2.609375, "learning_rate": 7.444888888888889e-06, "loss": 0.3641, "step": 28250 }, { "epoch": 0.6278883753999289, "grad_norm": 2.15625, "learning_rate": 7.440444444444445e-06, "loss": 0.3766, "step": 28260 }, { "epoch": 0.6281105581230003, "grad_norm": 2.5625, "learning_rate": 7.436000000000001e-06, "loss": 0.3702, "step": 28270 }, { "epoch": 0.6283327408460718, "grad_norm": 2.703125, "learning_rate": 7.431555555555556e-06, "loss": 0.4161, "step": 28280 }, { "epoch": 0.6285549235691432, "grad_norm": 2.796875, "learning_rate": 7.427111111111112e-06, "loss": 0.3946, "step": 28290 }, { "epoch": 0.6287771062922147, "grad_norm": 2.375, "learning_rate": 7.422666666666667e-06, "loss": 0.4037, "step": 28300 }, { "epoch": 0.6289992890152861, "grad_norm": 2.5625, "learning_rate": 7.418222222222223e-06, "loss": 0.3448, "step": 28310 }, { "epoch": 0.6292214717383576, "grad_norm": 2.296875, "learning_rate": 7.413777777777779e-06, "loss": 0.3975, "step": 28320 }, { "epoch": 0.629443654461429, "grad_norm": 2.734375, "learning_rate": 7.409333333333334e-06, "loss": 0.3917, "step": 28330 }, { "epoch": 0.6296658371845005, "grad_norm": 2.40625, "learning_rate": 7.4048888888888895e-06, "loss": 0.3971, "step": 28340 }, { "epoch": 0.629888019907572, "grad_norm": 2.40625, "learning_rate": 7.400444444444444e-06, "loss": 0.3488, "step": 28350 }, { "epoch": 0.6301102026306434, "grad_norm": 2.9375, "learning_rate": 7.396000000000001e-06, "loss": 0.3703, "step": 28360 }, { "epoch": 0.6303323853537149, "grad_norm": 2.546875, "learning_rate": 7.3915555555555565e-06, "loss": 0.3957, "step": 28370 }, { "epoch": 0.6305545680767863, "grad_norm": 2.765625, "learning_rate": 7.387111111111111e-06, "loss": 0.3904, "step": 28380 }, { "epoch": 0.6307767507998578, "grad_norm": 2.75, "learning_rate": 7.382666666666667e-06, "loss": 0.3894, "step": 28390 }, { "epoch": 0.6309989335229292, "grad_norm": 2.25, "learning_rate": 7.378222222222224e-06, "loss": 0.3837, "step": 28400 }, { "epoch": 0.6312211162460007, "grad_norm": 2.234375, "learning_rate": 7.3737777777777785e-06, "loss": 0.3542, "step": 28410 }, { "epoch": 0.6314432989690721, "grad_norm": 2.265625, "learning_rate": 7.369333333333334e-06, "loss": 0.4, "step": 28420 }, { "epoch": 0.6316654816921436, "grad_norm": 2.640625, "learning_rate": 7.364888888888889e-06, "loss": 0.3896, "step": 28430 }, { "epoch": 0.631887664415215, "grad_norm": 2.40625, "learning_rate": 7.360444444444445e-06, "loss": 0.3741, "step": 28440 }, { "epoch": 0.6321098471382866, "grad_norm": 2.96875, "learning_rate": 7.356000000000001e-06, "loss": 0.386, "step": 28450 }, { "epoch": 0.632332029861358, "grad_norm": 2.71875, "learning_rate": 7.351555555555556e-06, "loss": 0.372, "step": 28460 }, { "epoch": 0.6325542125844295, "grad_norm": 2.484375, "learning_rate": 7.347111111111112e-06, "loss": 0.3576, "step": 28470 }, { "epoch": 0.6327763953075008, "grad_norm": 2.578125, "learning_rate": 7.342666666666667e-06, "loss": 0.3976, "step": 28480 }, { "epoch": 0.6329985780305724, "grad_norm": 2.640625, "learning_rate": 7.338222222222223e-06, "loss": 0.3562, "step": 28490 }, { "epoch": 0.6332207607536438, "grad_norm": 2.5, "learning_rate": 7.333777777777779e-06, "loss": 0.3926, "step": 28500 }, { "epoch": 0.6334429434767153, "grad_norm": 3.015625, "learning_rate": 7.329333333333334e-06, "loss": 0.3995, "step": 28510 }, { "epoch": 0.6336651261997867, "grad_norm": 2.140625, "learning_rate": 7.324888888888889e-06, "loss": 0.3772, "step": 28520 }, { "epoch": 0.6338873089228582, "grad_norm": 2.546875, "learning_rate": 7.320444444444446e-06, "loss": 0.3619, "step": 28530 }, { "epoch": 0.6341094916459296, "grad_norm": 2.703125, "learning_rate": 7.316000000000001e-06, "loss": 0.3671, "step": 28540 }, { "epoch": 0.6343316743690011, "grad_norm": 2.578125, "learning_rate": 7.3115555555555565e-06, "loss": 0.3554, "step": 28550 }, { "epoch": 0.6345538570920726, "grad_norm": 2.421875, "learning_rate": 7.307111111111111e-06, "loss": 0.3984, "step": 28560 }, { "epoch": 0.634776039815144, "grad_norm": 2.359375, "learning_rate": 7.302666666666667e-06, "loss": 0.3698, "step": 28570 }, { "epoch": 0.6349982225382155, "grad_norm": 2.515625, "learning_rate": 7.2982222222222235e-06, "loss": 0.358, "step": 28580 }, { "epoch": 0.6352204052612869, "grad_norm": 2.03125, "learning_rate": 7.293777777777778e-06, "loss": 0.3577, "step": 28590 }, { "epoch": 0.6354425879843584, "grad_norm": 2.59375, "learning_rate": 7.289333333333334e-06, "loss": 0.4029, "step": 28600 }, { "epoch": 0.6356647707074298, "grad_norm": 2.0625, "learning_rate": 7.284888888888889e-06, "loss": 0.3996, "step": 28610 }, { "epoch": 0.6358869534305013, "grad_norm": 2.015625, "learning_rate": 7.2804444444444455e-06, "loss": 0.3708, "step": 28620 }, { "epoch": 0.6361091361535727, "grad_norm": 2.125, "learning_rate": 7.276000000000001e-06, "loss": 0.3876, "step": 28630 }, { "epoch": 0.6363313188766442, "grad_norm": 2.390625, "learning_rate": 7.271555555555556e-06, "loss": 0.3658, "step": 28640 }, { "epoch": 0.6365535015997156, "grad_norm": 2.4375, "learning_rate": 7.267111111111112e-06, "loss": 0.3854, "step": 28650 }, { "epoch": 0.6367756843227871, "grad_norm": 2.328125, "learning_rate": 7.2626666666666665e-06, "loss": 0.3653, "step": 28660 }, { "epoch": 0.6369978670458585, "grad_norm": 2.640625, "learning_rate": 7.258222222222223e-06, "loss": 0.3889, "step": 28670 }, { "epoch": 0.63722004976893, "grad_norm": 2.546875, "learning_rate": 7.253777777777779e-06, "loss": 0.3712, "step": 28680 }, { "epoch": 0.6374422324920014, "grad_norm": 2.203125, "learning_rate": 7.249333333333334e-06, "loss": 0.3837, "step": 28690 }, { "epoch": 0.6376644152150729, "grad_norm": 2.375, "learning_rate": 7.244888888888889e-06, "loss": 0.3379, "step": 28700 }, { "epoch": 0.6378865979381443, "grad_norm": 2.484375, "learning_rate": 7.240444444444446e-06, "loss": 0.3851, "step": 28710 }, { "epoch": 0.6381087806612158, "grad_norm": 2.953125, "learning_rate": 7.236000000000001e-06, "loss": 0.3968, "step": 28720 }, { "epoch": 0.6383309633842872, "grad_norm": 2.15625, "learning_rate": 7.231555555555556e-06, "loss": 0.3234, "step": 28730 }, { "epoch": 0.6385531461073587, "grad_norm": 2.125, "learning_rate": 7.227111111111111e-06, "loss": 0.3903, "step": 28740 }, { "epoch": 0.6387753288304302, "grad_norm": 2.109375, "learning_rate": 7.222666666666667e-06, "loss": 0.359, "step": 28750 }, { "epoch": 0.6389975115535016, "grad_norm": 2.84375, "learning_rate": 7.2182222222222235e-06, "loss": 0.3954, "step": 28760 }, { "epoch": 0.6392196942765731, "grad_norm": 2.265625, "learning_rate": 7.213777777777778e-06, "loss": 0.4062, "step": 28770 }, { "epoch": 0.6394418769996445, "grad_norm": 2.28125, "learning_rate": 7.209333333333334e-06, "loss": 0.3806, "step": 28780 }, { "epoch": 0.639664059722716, "grad_norm": 2.421875, "learning_rate": 7.204888888888889e-06, "loss": 0.3964, "step": 28790 }, { "epoch": 0.6398862424457874, "grad_norm": 2.53125, "learning_rate": 7.200444444444445e-06, "loss": 0.3821, "step": 28800 }, { "epoch": 0.6401084251688589, "grad_norm": 3.140625, "learning_rate": 7.196000000000001e-06, "loss": 0.3998, "step": 28810 }, { "epoch": 0.6403306078919303, "grad_norm": 2.71875, "learning_rate": 7.191555555555556e-06, "loss": 0.3813, "step": 28820 }, { "epoch": 0.6405527906150018, "grad_norm": 2.4375, "learning_rate": 7.187111111111112e-06, "loss": 0.437, "step": 28830 }, { "epoch": 0.6407749733380732, "grad_norm": 3.140625, "learning_rate": 7.182666666666668e-06, "loss": 0.3915, "step": 28840 }, { "epoch": 0.6409971560611447, "grad_norm": 2.609375, "learning_rate": 7.178222222222223e-06, "loss": 0.372, "step": 28850 }, { "epoch": 0.6412193387842161, "grad_norm": 2.4375, "learning_rate": 7.173777777777779e-06, "loss": 0.3675, "step": 28860 }, { "epoch": 0.6414415215072876, "grad_norm": 2.453125, "learning_rate": 7.1693333333333335e-06, "loss": 0.3891, "step": 28870 }, { "epoch": 0.641663704230359, "grad_norm": 2.15625, "learning_rate": 7.164888888888889e-06, "loss": 0.3852, "step": 28880 }, { "epoch": 0.6418858869534305, "grad_norm": 2.328125, "learning_rate": 7.160444444444446e-06, "loss": 0.3704, "step": 28890 }, { "epoch": 0.6421080696765019, "grad_norm": 2.34375, "learning_rate": 7.156000000000001e-06, "loss": 0.4069, "step": 28900 }, { "epoch": 0.6423302523995734, "grad_norm": 2.453125, "learning_rate": 7.151555555555556e-06, "loss": 0.3886, "step": 28910 }, { "epoch": 0.6425524351226448, "grad_norm": 2.25, "learning_rate": 7.147111111111111e-06, "loss": 0.3955, "step": 28920 }, { "epoch": 0.6427746178457163, "grad_norm": 2.34375, "learning_rate": 7.142666666666668e-06, "loss": 0.3679, "step": 28930 }, { "epoch": 0.6429968005687877, "grad_norm": 2.3125, "learning_rate": 7.138222222222223e-06, "loss": 0.3426, "step": 28940 }, { "epoch": 0.6432189832918592, "grad_norm": 2.0625, "learning_rate": 7.133777777777778e-06, "loss": 0.378, "step": 28950 }, { "epoch": 0.6434411660149307, "grad_norm": 2.265625, "learning_rate": 7.129333333333334e-06, "loss": 0.4086, "step": 28960 }, { "epoch": 0.6436633487380021, "grad_norm": 2.84375, "learning_rate": 7.124888888888889e-06, "loss": 0.3604, "step": 28970 }, { "epoch": 0.6438855314610736, "grad_norm": 3.0, "learning_rate": 7.120444444444445e-06, "loss": 0.41, "step": 28980 }, { "epoch": 0.644107714184145, "grad_norm": 2.234375, "learning_rate": 7.116000000000001e-06, "loss": 0.3723, "step": 28990 }, { "epoch": 0.6443298969072165, "grad_norm": 2.546875, "learning_rate": 7.111555555555556e-06, "loss": 0.4027, "step": 29000 }, { "epoch": 0.6445520796302879, "grad_norm": 2.484375, "learning_rate": 7.1071111111111115e-06, "loss": 0.3697, "step": 29010 }, { "epoch": 0.6447742623533594, "grad_norm": 2.46875, "learning_rate": 7.102666666666668e-06, "loss": 0.3818, "step": 29020 }, { "epoch": 0.6449964450764308, "grad_norm": 2.3125, "learning_rate": 7.098222222222223e-06, "loss": 0.3822, "step": 29030 }, { "epoch": 0.6452186277995023, "grad_norm": 2.34375, "learning_rate": 7.093777777777779e-06, "loss": 0.358, "step": 29040 }, { "epoch": 0.6454408105225737, "grad_norm": 2.109375, "learning_rate": 7.0893333333333334e-06, "loss": 0.4125, "step": 29050 }, { "epoch": 0.6456629932456452, "grad_norm": 2.484375, "learning_rate": 7.084888888888889e-06, "loss": 0.3836, "step": 29060 }, { "epoch": 0.6458851759687166, "grad_norm": 2.4375, "learning_rate": 7.080444444444446e-06, "loss": 0.3953, "step": 29070 }, { "epoch": 0.6461073586917881, "grad_norm": 2.84375, "learning_rate": 7.0760000000000005e-06, "loss": 0.4048, "step": 29080 }, { "epoch": 0.6463295414148595, "grad_norm": 2.046875, "learning_rate": 7.071555555555556e-06, "loss": 0.3697, "step": 29090 }, { "epoch": 0.646551724137931, "grad_norm": 2.6875, "learning_rate": 7.067111111111111e-06, "loss": 0.3944, "step": 29100 }, { "epoch": 0.6467739068610024, "grad_norm": 2.171875, "learning_rate": 7.062666666666668e-06, "loss": 0.3524, "step": 29110 }, { "epoch": 0.646996089584074, "grad_norm": 2.46875, "learning_rate": 7.058222222222223e-06, "loss": 0.3895, "step": 29120 }, { "epoch": 0.6472182723071453, "grad_norm": 2.265625, "learning_rate": 7.053777777777778e-06, "loss": 0.4205, "step": 29130 }, { "epoch": 0.6474404550302169, "grad_norm": 2.53125, "learning_rate": 7.049333333333334e-06, "loss": 0.3825, "step": 29140 }, { "epoch": 0.6476626377532883, "grad_norm": 2.890625, "learning_rate": 7.04488888888889e-06, "loss": 0.4281, "step": 29150 }, { "epoch": 0.6478848204763598, "grad_norm": 2.34375, "learning_rate": 7.040444444444445e-06, "loss": 0.3488, "step": 29160 }, { "epoch": 0.6481070031994313, "grad_norm": 2.078125, "learning_rate": 7.036000000000001e-06, "loss": 0.3986, "step": 29170 }, { "epoch": 0.6483291859225027, "grad_norm": 3.15625, "learning_rate": 7.031555555555556e-06, "loss": 0.3739, "step": 29180 }, { "epoch": 0.6485513686455742, "grad_norm": 2.171875, "learning_rate": 7.0271111111111114e-06, "loss": 0.3763, "step": 29190 }, { "epoch": 0.6487735513686456, "grad_norm": 2.671875, "learning_rate": 7.022666666666668e-06, "loss": 0.3906, "step": 29200 }, { "epoch": 0.6489957340917171, "grad_norm": 2.734375, "learning_rate": 7.018222222222223e-06, "loss": 0.3285, "step": 29210 }, { "epoch": 0.6492179168147885, "grad_norm": 2.5, "learning_rate": 7.0137777777777785e-06, "loss": 0.4033, "step": 29220 }, { "epoch": 0.64944009953786, "grad_norm": 2.65625, "learning_rate": 7.009333333333333e-06, "loss": 0.4007, "step": 29230 }, { "epoch": 0.6496622822609314, "grad_norm": 2.28125, "learning_rate": 7.00488888888889e-06, "loss": 0.4126, "step": 29240 }, { "epoch": 0.6498844649840029, "grad_norm": 2.5625, "learning_rate": 7.000444444444446e-06, "loss": 0.357, "step": 29250 }, { "epoch": 0.6501066477070743, "grad_norm": 2.171875, "learning_rate": 6.9960000000000004e-06, "loss": 0.3915, "step": 29260 }, { "epoch": 0.6503288304301458, "grad_norm": 2.71875, "learning_rate": 6.991555555555556e-06, "loss": 0.3613, "step": 29270 }, { "epoch": 0.6505510131532172, "grad_norm": 2.4375, "learning_rate": 6.987111111111111e-06, "loss": 0.3869, "step": 29280 }, { "epoch": 0.6507731958762887, "grad_norm": 3.578125, "learning_rate": 6.9826666666666675e-06, "loss": 0.3879, "step": 29290 }, { "epoch": 0.6509953785993601, "grad_norm": 2.203125, "learning_rate": 6.978222222222223e-06, "loss": 0.3714, "step": 29300 }, { "epoch": 0.6512175613224316, "grad_norm": 2.375, "learning_rate": 6.973777777777778e-06, "loss": 0.426, "step": 29310 }, { "epoch": 0.651439744045503, "grad_norm": 2.765625, "learning_rate": 6.969333333333334e-06, "loss": 0.3995, "step": 29320 }, { "epoch": 0.6516619267685745, "grad_norm": 2.46875, "learning_rate": 6.96488888888889e-06, "loss": 0.3908, "step": 29330 }, { "epoch": 0.6518841094916459, "grad_norm": 2.453125, "learning_rate": 6.960444444444445e-06, "loss": 0.3951, "step": 29340 }, { "epoch": 0.6521062922147174, "grad_norm": 2.171875, "learning_rate": 6.956000000000001e-06, "loss": 0.3983, "step": 29350 }, { "epoch": 0.6523284749377888, "grad_norm": 2.625, "learning_rate": 6.951555555555556e-06, "loss": 0.3974, "step": 29360 }, { "epoch": 0.6525506576608603, "grad_norm": 2.59375, "learning_rate": 6.947111111111111e-06, "loss": 0.3993, "step": 29370 }, { "epoch": 0.6527728403839318, "grad_norm": 2.859375, "learning_rate": 6.942666666666668e-06, "loss": 0.3831, "step": 29380 }, { "epoch": 0.6529950231070032, "grad_norm": 2.671875, "learning_rate": 6.938222222222223e-06, "loss": 0.412, "step": 29390 }, { "epoch": 0.6532172058300747, "grad_norm": 2.734375, "learning_rate": 6.9337777777777784e-06, "loss": 0.3932, "step": 29400 }, { "epoch": 0.6534393885531461, "grad_norm": 2.515625, "learning_rate": 6.929333333333333e-06, "loss": 0.3707, "step": 29410 }, { "epoch": 0.6536615712762176, "grad_norm": 2.859375, "learning_rate": 6.92488888888889e-06, "loss": 0.3733, "step": 29420 }, { "epoch": 0.653883753999289, "grad_norm": 2.46875, "learning_rate": 6.9204444444444455e-06, "loss": 0.3707, "step": 29430 }, { "epoch": 0.6541059367223605, "grad_norm": 3.203125, "learning_rate": 6.916e-06, "loss": 0.3863, "step": 29440 }, { "epoch": 0.6543281194454319, "grad_norm": 1.8046875, "learning_rate": 6.911555555555556e-06, "loss": 0.3652, "step": 29450 }, { "epoch": 0.6545503021685034, "grad_norm": 2.671875, "learning_rate": 6.907111111111113e-06, "loss": 0.3804, "step": 29460 }, { "epoch": 0.6547724848915748, "grad_norm": 2.578125, "learning_rate": 6.9026666666666674e-06, "loss": 0.3855, "step": 29470 }, { "epoch": 0.6549946676146463, "grad_norm": 2.15625, "learning_rate": 6.898222222222223e-06, "loss": 0.3665, "step": 29480 }, { "epoch": 0.6552168503377177, "grad_norm": 2.40625, "learning_rate": 6.893777777777778e-06, "loss": 0.3597, "step": 29490 }, { "epoch": 0.6554390330607892, "grad_norm": 3.25, "learning_rate": 6.889333333333334e-06, "loss": 0.358, "step": 29500 }, { "epoch": 0.6556612157838606, "grad_norm": 2.265625, "learning_rate": 6.88488888888889e-06, "loss": 0.3633, "step": 29510 }, { "epoch": 0.6558833985069321, "grad_norm": 2.734375, "learning_rate": 6.880444444444445e-06, "loss": 0.3802, "step": 29520 }, { "epoch": 0.6561055812300035, "grad_norm": 2.5625, "learning_rate": 6.876000000000001e-06, "loss": 0.3688, "step": 29530 }, { "epoch": 0.656327763953075, "grad_norm": 2.234375, "learning_rate": 6.871555555555556e-06, "loss": 0.3919, "step": 29540 }, { "epoch": 0.6565499466761464, "grad_norm": 2.375, "learning_rate": 6.867111111111112e-06, "loss": 0.394, "step": 29550 }, { "epoch": 0.6567721293992179, "grad_norm": 2.375, "learning_rate": 6.862666666666668e-06, "loss": 0.3783, "step": 29560 }, { "epoch": 0.6569943121222894, "grad_norm": 2.375, "learning_rate": 6.858222222222223e-06, "loss": 0.3726, "step": 29570 }, { "epoch": 0.6572164948453608, "grad_norm": 2.296875, "learning_rate": 6.853777777777778e-06, "loss": 0.3872, "step": 29580 }, { "epoch": 0.6574386775684323, "grad_norm": 2.5, "learning_rate": 6.849333333333333e-06, "loss": 0.3658, "step": 29590 }, { "epoch": 0.6576608602915037, "grad_norm": 2.484375, "learning_rate": 6.84488888888889e-06, "loss": 0.3468, "step": 29600 }, { "epoch": 0.6578830430145752, "grad_norm": 2.625, "learning_rate": 6.8404444444444454e-06, "loss": 0.4079, "step": 29610 }, { "epoch": 0.6581052257376466, "grad_norm": 2.609375, "learning_rate": 6.836e-06, "loss": 0.3706, "step": 29620 }, { "epoch": 0.6583274084607181, "grad_norm": 2.453125, "learning_rate": 6.831555555555556e-06, "loss": 0.3939, "step": 29630 }, { "epoch": 0.6585495911837895, "grad_norm": 2.859375, "learning_rate": 6.8271111111111125e-06, "loss": 0.4351, "step": 29640 }, { "epoch": 0.658771773906861, "grad_norm": 2.46875, "learning_rate": 6.822666666666667e-06, "loss": 0.4096, "step": 29650 }, { "epoch": 0.6589939566299324, "grad_norm": 2.15625, "learning_rate": 6.818222222222223e-06, "loss": 0.3693, "step": 29660 }, { "epoch": 0.6592161393530039, "grad_norm": 2.171875, "learning_rate": 6.813777777777778e-06, "loss": 0.3968, "step": 29670 }, { "epoch": 0.6594383220760753, "grad_norm": 2.265625, "learning_rate": 6.809333333333334e-06, "loss": 0.3775, "step": 29680 }, { "epoch": 0.6596605047991468, "grad_norm": 2.296875, "learning_rate": 6.80488888888889e-06, "loss": 0.3893, "step": 29690 }, { "epoch": 0.6598826875222182, "grad_norm": 2.734375, "learning_rate": 6.800444444444445e-06, "loss": 0.3933, "step": 29700 }, { "epoch": 0.6601048702452897, "grad_norm": 2.40625, "learning_rate": 6.796000000000001e-06, "loss": 0.412, "step": 29710 }, { "epoch": 0.6603270529683611, "grad_norm": 2.3125, "learning_rate": 6.7915555555555555e-06, "loss": 0.3521, "step": 29720 }, { "epoch": 0.6605492356914326, "grad_norm": 2.1875, "learning_rate": 6.787111111111112e-06, "loss": 0.3412, "step": 29730 }, { "epoch": 0.660771418414504, "grad_norm": 2.453125, "learning_rate": 6.782666666666668e-06, "loss": 0.367, "step": 29740 }, { "epoch": 0.6609936011375755, "grad_norm": 2.359375, "learning_rate": 6.778222222222223e-06, "loss": 0.3539, "step": 29750 }, { "epoch": 0.6612157838606469, "grad_norm": 2.171875, "learning_rate": 6.773777777777778e-06, "loss": 0.3605, "step": 29760 }, { "epoch": 0.6614379665837185, "grad_norm": 2.15625, "learning_rate": 6.769333333333335e-06, "loss": 0.3604, "step": 29770 }, { "epoch": 0.66166014930679, "grad_norm": 2.265625, "learning_rate": 6.76488888888889e-06, "loss": 0.3803, "step": 29780 }, { "epoch": 0.6618823320298614, "grad_norm": 2.65625, "learning_rate": 6.760444444444445e-06, "loss": 0.4003, "step": 29790 }, { "epoch": 0.6621045147529329, "grad_norm": 2.03125, "learning_rate": 6.756e-06, "loss": 0.3609, "step": 29800 }, { "epoch": 0.6623266974760043, "grad_norm": 2.1875, "learning_rate": 6.751555555555556e-06, "loss": 0.3755, "step": 29810 }, { "epoch": 0.6625488801990758, "grad_norm": 2.484375, "learning_rate": 6.7471111111111124e-06, "loss": 0.3864, "step": 29820 }, { "epoch": 0.6627710629221472, "grad_norm": 2.625, "learning_rate": 6.742666666666667e-06, "loss": 0.3745, "step": 29830 }, { "epoch": 0.6629932456452187, "grad_norm": 2.125, "learning_rate": 6.738222222222223e-06, "loss": 0.3283, "step": 29840 }, { "epoch": 0.6632154283682901, "grad_norm": 2.703125, "learning_rate": 6.733777777777778e-06, "loss": 0.3893, "step": 29850 }, { "epoch": 0.6634376110913616, "grad_norm": 2.21875, "learning_rate": 6.729333333333334e-06, "loss": 0.3899, "step": 29860 }, { "epoch": 0.663659793814433, "grad_norm": 3.125, "learning_rate": 6.72488888888889e-06, "loss": 0.3545, "step": 29870 }, { "epoch": 0.6638819765375045, "grad_norm": 2.421875, "learning_rate": 6.720444444444445e-06, "loss": 0.357, "step": 29880 }, { "epoch": 0.6641041592605759, "grad_norm": 2.40625, "learning_rate": 6.716000000000001e-06, "loss": 0.369, "step": 29890 }, { "epoch": 0.6643263419836474, "grad_norm": 2.515625, "learning_rate": 6.7115555555555554e-06, "loss": 0.4066, "step": 29900 }, { "epoch": 0.6645485247067188, "grad_norm": 2.375, "learning_rate": 6.707111111111112e-06, "loss": 0.3894, "step": 29910 }, { "epoch": 0.6647707074297903, "grad_norm": 2.703125, "learning_rate": 6.702666666666668e-06, "loss": 0.3918, "step": 29920 }, { "epoch": 0.6649928901528617, "grad_norm": 2.609375, "learning_rate": 6.6982222222222225e-06, "loss": 0.402, "step": 29930 }, { "epoch": 0.6652150728759332, "grad_norm": 3.09375, "learning_rate": 6.693777777777778e-06, "loss": 0.3942, "step": 29940 }, { "epoch": 0.6654372555990046, "grad_norm": 2.234375, "learning_rate": 6.689333333333335e-06, "loss": 0.3799, "step": 29950 }, { "epoch": 0.6656594383220761, "grad_norm": 2.375, "learning_rate": 6.68488888888889e-06, "loss": 0.4013, "step": 29960 }, { "epoch": 0.6658816210451475, "grad_norm": 3.0625, "learning_rate": 6.680444444444445e-06, "loss": 0.406, "step": 29970 }, { "epoch": 0.666103803768219, "grad_norm": 2.34375, "learning_rate": 6.676e-06, "loss": 0.3626, "step": 29980 }, { "epoch": 0.6663259864912905, "grad_norm": 2.890625, "learning_rate": 6.671555555555556e-06, "loss": 0.4231, "step": 29990 }, { "epoch": 0.6665481692143619, "grad_norm": 2.359375, "learning_rate": 6.667111111111112e-06, "loss": 0.4099, "step": 30000 }, { "epoch": 0.6667703519374334, "grad_norm": 2.609375, "learning_rate": 6.662666666666667e-06, "loss": 0.3699, "step": 30010 }, { "epoch": 0.6669925346605048, "grad_norm": 2.640625, "learning_rate": 6.658222222222223e-06, "loss": 0.3677, "step": 30020 }, { "epoch": 0.6672147173835763, "grad_norm": 2.4375, "learning_rate": 6.653777777777778e-06, "loss": 0.3877, "step": 30030 }, { "epoch": 0.6674369001066477, "grad_norm": 2.859375, "learning_rate": 6.649333333333334e-06, "loss": 0.3941, "step": 30040 }, { "epoch": 0.6676590828297192, "grad_norm": 2.3125, "learning_rate": 6.64488888888889e-06, "loss": 0.3895, "step": 30050 }, { "epoch": 0.6678812655527906, "grad_norm": 2.671875, "learning_rate": 6.640444444444445e-06, "loss": 0.4168, "step": 30060 }, { "epoch": 0.6681034482758621, "grad_norm": 2.984375, "learning_rate": 6.6360000000000005e-06, "loss": 0.3704, "step": 30070 }, { "epoch": 0.6683256309989335, "grad_norm": 2.921875, "learning_rate": 6.631555555555557e-06, "loss": 0.3571, "step": 30080 }, { "epoch": 0.668547813722005, "grad_norm": 2.828125, "learning_rate": 6.627111111111112e-06, "loss": 0.3695, "step": 30090 }, { "epoch": 0.6687699964450764, "grad_norm": 2.515625, "learning_rate": 6.622666666666668e-06, "loss": 0.3893, "step": 30100 }, { "epoch": 0.6689921791681479, "grad_norm": 2.5, "learning_rate": 6.618222222222222e-06, "loss": 0.3676, "step": 30110 }, { "epoch": 0.6692143618912193, "grad_norm": 2.84375, "learning_rate": 6.613777777777778e-06, "loss": 0.3457, "step": 30120 }, { "epoch": 0.6694365446142908, "grad_norm": 2.453125, "learning_rate": 6.609333333333335e-06, "loss": 0.388, "step": 30130 }, { "epoch": 0.6696587273373622, "grad_norm": 2.328125, "learning_rate": 6.6048888888888895e-06, "loss": 0.3966, "step": 30140 }, { "epoch": 0.6698809100604337, "grad_norm": 2.609375, "learning_rate": 6.600444444444445e-06, "loss": 0.3633, "step": 30150 }, { "epoch": 0.6701030927835051, "grad_norm": 3.234375, "learning_rate": 6.596e-06, "loss": 0.3536, "step": 30160 }, { "epoch": 0.6703252755065766, "grad_norm": 2.359375, "learning_rate": 6.5915555555555566e-06, "loss": 0.3891, "step": 30170 }, { "epoch": 0.670547458229648, "grad_norm": 2.34375, "learning_rate": 6.587111111111112e-06, "loss": 0.379, "step": 30180 }, { "epoch": 0.6707696409527195, "grad_norm": 2.328125, "learning_rate": 6.582666666666667e-06, "loss": 0.379, "step": 30190 }, { "epoch": 0.670991823675791, "grad_norm": 2.640625, "learning_rate": 6.578222222222223e-06, "loss": 0.3898, "step": 30200 }, { "epoch": 0.6712140063988624, "grad_norm": 2.671875, "learning_rate": 6.573777777777778e-06, "loss": 0.3695, "step": 30210 }, { "epoch": 0.6714361891219339, "grad_norm": 2.875, "learning_rate": 6.569333333333334e-06, "loss": 0.3822, "step": 30220 }, { "epoch": 0.6716583718450053, "grad_norm": 2.140625, "learning_rate": 6.56488888888889e-06, "loss": 0.3455, "step": 30230 }, { "epoch": 0.6718805545680768, "grad_norm": 2.390625, "learning_rate": 6.560444444444445e-06, "loss": 0.3591, "step": 30240 }, { "epoch": 0.6721027372911482, "grad_norm": 2.75, "learning_rate": 6.556e-06, "loss": 0.3789, "step": 30250 }, { "epoch": 0.6723249200142197, "grad_norm": 2.53125, "learning_rate": 6.551555555555557e-06, "loss": 0.3421, "step": 30260 }, { "epoch": 0.6725471027372911, "grad_norm": 2.5625, "learning_rate": 6.547111111111112e-06, "loss": 0.3677, "step": 30270 }, { "epoch": 0.6727692854603626, "grad_norm": 2.984375, "learning_rate": 6.5426666666666675e-06, "loss": 0.3675, "step": 30280 }, { "epoch": 0.672991468183434, "grad_norm": 2.375, "learning_rate": 6.538222222222222e-06, "loss": 0.3698, "step": 30290 }, { "epoch": 0.6732136509065055, "grad_norm": 2.828125, "learning_rate": 6.533777777777778e-06, "loss": 0.3691, "step": 30300 }, { "epoch": 0.6734358336295769, "grad_norm": 2.375, "learning_rate": 6.5293333333333346e-06, "loss": 0.3903, "step": 30310 }, { "epoch": 0.6736580163526484, "grad_norm": 2.46875, "learning_rate": 6.524888888888889e-06, "loss": 0.3781, "step": 30320 }, { "epoch": 0.6738801990757198, "grad_norm": 2.53125, "learning_rate": 6.520444444444445e-06, "loss": 0.3968, "step": 30330 }, { "epoch": 0.6741023817987913, "grad_norm": 2.3125, "learning_rate": 6.516e-06, "loss": 0.3784, "step": 30340 }, { "epoch": 0.6743245645218627, "grad_norm": 2.28125, "learning_rate": 6.5115555555555565e-06, "loss": 0.3678, "step": 30350 }, { "epoch": 0.6745467472449342, "grad_norm": 2.375, "learning_rate": 6.507111111111112e-06, "loss": 0.3828, "step": 30360 }, { "epoch": 0.6747689299680056, "grad_norm": 2.296875, "learning_rate": 6.502666666666667e-06, "loss": 0.3999, "step": 30370 }, { "epoch": 0.6749911126910771, "grad_norm": 2.171875, "learning_rate": 6.498222222222223e-06, "loss": 0.3861, "step": 30380 }, { "epoch": 0.6752132954141487, "grad_norm": 2.578125, "learning_rate": 6.493777777777779e-06, "loss": 0.3735, "step": 30390 }, { "epoch": 0.67543547813722, "grad_norm": 2.59375, "learning_rate": 6.489333333333334e-06, "loss": 0.3889, "step": 30400 }, { "epoch": 0.6756576608602916, "grad_norm": 2.734375, "learning_rate": 6.48488888888889e-06, "loss": 0.3923, "step": 30410 }, { "epoch": 0.675879843583363, "grad_norm": 2.5625, "learning_rate": 6.480444444444445e-06, "loss": 0.3722, "step": 30420 }, { "epoch": 0.6761020263064345, "grad_norm": 2.234375, "learning_rate": 6.476e-06, "loss": 0.4002, "step": 30430 }, { "epoch": 0.6763242090295059, "grad_norm": 2.828125, "learning_rate": 6.471555555555557e-06, "loss": 0.4155, "step": 30440 }, { "epoch": 0.6765463917525774, "grad_norm": 2.421875, "learning_rate": 6.467111111111112e-06, "loss": 0.4074, "step": 30450 }, { "epoch": 0.6767685744756488, "grad_norm": 2.765625, "learning_rate": 6.462666666666667e-06, "loss": 0.3554, "step": 30460 }, { "epoch": 0.6769907571987203, "grad_norm": 2.234375, "learning_rate": 6.458222222222222e-06, "loss": 0.3737, "step": 30470 }, { "epoch": 0.6772129399217917, "grad_norm": 2.34375, "learning_rate": 6.453777777777779e-06, "loss": 0.3717, "step": 30480 }, { "epoch": 0.6774351226448632, "grad_norm": 2.21875, "learning_rate": 6.4493333333333345e-06, "loss": 0.3333, "step": 30490 }, { "epoch": 0.6776573053679346, "grad_norm": 2.1875, "learning_rate": 6.444888888888889e-06, "loss": 0.405, "step": 30500 }, { "epoch": 0.6778794880910061, "grad_norm": 2.234375, "learning_rate": 6.440444444444445e-06, "loss": 0.3492, "step": 30510 }, { "epoch": 0.6781016708140775, "grad_norm": 2.796875, "learning_rate": 6.436e-06, "loss": 0.4077, "step": 30520 }, { "epoch": 0.678323853537149, "grad_norm": 2.765625, "learning_rate": 6.431555555555556e-06, "loss": 0.3826, "step": 30530 }, { "epoch": 0.6785460362602204, "grad_norm": 2.578125, "learning_rate": 6.427111111111112e-06, "loss": 0.3546, "step": 30540 }, { "epoch": 0.6787682189832919, "grad_norm": 2.578125, "learning_rate": 6.422666666666667e-06, "loss": 0.3804, "step": 30550 }, { "epoch": 0.6789904017063633, "grad_norm": 2.546875, "learning_rate": 6.418222222222223e-06, "loss": 0.4225, "step": 30560 }, { "epoch": 0.6792125844294348, "grad_norm": 2.96875, "learning_rate": 6.413777777777779e-06, "loss": 0.3724, "step": 30570 }, { "epoch": 0.6794347671525062, "grad_norm": 2.28125, "learning_rate": 6.409333333333334e-06, "loss": 0.3994, "step": 30580 }, { "epoch": 0.6796569498755777, "grad_norm": 2.03125, "learning_rate": 6.40488888888889e-06, "loss": 0.3607, "step": 30590 }, { "epoch": 0.6798791325986492, "grad_norm": 1.921875, "learning_rate": 6.4004444444444446e-06, "loss": 0.393, "step": 30600 }, { "epoch": 0.6801013153217206, "grad_norm": 2.296875, "learning_rate": 6.396e-06, "loss": 0.3754, "step": 30610 }, { "epoch": 0.6803234980447921, "grad_norm": 2.765625, "learning_rate": 6.391555555555557e-06, "loss": 0.4055, "step": 30620 }, { "epoch": 0.6805456807678635, "grad_norm": 2.359375, "learning_rate": 6.387111111111112e-06, "loss": 0.3717, "step": 30630 }, { "epoch": 0.680767863490935, "grad_norm": 2.375, "learning_rate": 6.382666666666667e-06, "loss": 0.3224, "step": 30640 }, { "epoch": 0.6809900462140064, "grad_norm": 2.5625, "learning_rate": 6.378222222222222e-06, "loss": 0.3843, "step": 30650 }, { "epoch": 0.6812122289370779, "grad_norm": 2.0, "learning_rate": 6.373777777777779e-06, "loss": 0.4126, "step": 30660 }, { "epoch": 0.6814344116601493, "grad_norm": 2.375, "learning_rate": 6.369333333333334e-06, "loss": 0.3901, "step": 30670 }, { "epoch": 0.6816565943832208, "grad_norm": 2.78125, "learning_rate": 6.364888888888889e-06, "loss": 0.3607, "step": 30680 }, { "epoch": 0.6818787771062922, "grad_norm": 2.796875, "learning_rate": 6.360444444444445e-06, "loss": 0.3835, "step": 30690 }, { "epoch": 0.6821009598293637, "grad_norm": 2.515625, "learning_rate": 6.356000000000001e-06, "loss": 0.3999, "step": 30700 }, { "epoch": 0.6823231425524351, "grad_norm": 2.828125, "learning_rate": 6.351555555555556e-06, "loss": 0.3683, "step": 30710 }, { "epoch": 0.6825453252755066, "grad_norm": 3.421875, "learning_rate": 6.347111111111112e-06, "loss": 0.3753, "step": 30720 }, { "epoch": 0.682767507998578, "grad_norm": 2.28125, "learning_rate": 6.342666666666667e-06, "loss": 0.375, "step": 30730 }, { "epoch": 0.6829896907216495, "grad_norm": 2.390625, "learning_rate": 6.3382222222222226e-06, "loss": 0.3567, "step": 30740 }, { "epoch": 0.6832118734447209, "grad_norm": 2.625, "learning_rate": 6.333777777777779e-06, "loss": 0.3455, "step": 30750 }, { "epoch": 0.6834340561677924, "grad_norm": 2.3125, "learning_rate": 6.329333333333334e-06, "loss": 0.3536, "step": 30760 }, { "epoch": 0.6836562388908638, "grad_norm": 2.6875, "learning_rate": 6.32488888888889e-06, "loss": 0.3903, "step": 30770 }, { "epoch": 0.6838784216139353, "grad_norm": 2.625, "learning_rate": 6.3204444444444445e-06, "loss": 0.402, "step": 30780 }, { "epoch": 0.6841006043370067, "grad_norm": 2.828125, "learning_rate": 6.316000000000001e-06, "loss": 0.3951, "step": 30790 }, { "epoch": 0.6843227870600782, "grad_norm": 2.265625, "learning_rate": 6.311555555555557e-06, "loss": 0.3408, "step": 30800 }, { "epoch": 0.6845449697831497, "grad_norm": 2.328125, "learning_rate": 6.3071111111111116e-06, "loss": 0.3605, "step": 30810 }, { "epoch": 0.6847671525062211, "grad_norm": 2.171875, "learning_rate": 6.302666666666667e-06, "loss": 0.4112, "step": 30820 }, { "epoch": 0.6849893352292926, "grad_norm": 2.453125, "learning_rate": 6.298222222222222e-06, "loss": 0.379, "step": 30830 }, { "epoch": 0.685211517952364, "grad_norm": 3.140625, "learning_rate": 6.293777777777779e-06, "loss": 0.4127, "step": 30840 }, { "epoch": 0.6854337006754355, "grad_norm": 2.96875, "learning_rate": 6.289333333333334e-06, "loss": 0.4008, "step": 30850 }, { "epoch": 0.6856558833985069, "grad_norm": 2.28125, "learning_rate": 6.284888888888889e-06, "loss": 0.4076, "step": 30860 }, { "epoch": 0.6858780661215784, "grad_norm": 3.203125, "learning_rate": 6.280444444444445e-06, "loss": 0.3614, "step": 30870 }, { "epoch": 0.6861002488446498, "grad_norm": 2.265625, "learning_rate": 6.2760000000000006e-06, "loss": 0.357, "step": 30880 }, { "epoch": 0.6863224315677213, "grad_norm": 2.390625, "learning_rate": 6.271555555555556e-06, "loss": 0.3925, "step": 30890 }, { "epoch": 0.6865446142907927, "grad_norm": 2.59375, "learning_rate": 6.267111111111112e-06, "loss": 0.393, "step": 30900 }, { "epoch": 0.6867667970138642, "grad_norm": 2.171875, "learning_rate": 6.262666666666667e-06, "loss": 0.4081, "step": 30910 }, { "epoch": 0.6869889797369356, "grad_norm": 2.125, "learning_rate": 6.2582222222222225e-06, "loss": 0.4138, "step": 30920 }, { "epoch": 0.6872111624600071, "grad_norm": 2.609375, "learning_rate": 6.253777777777779e-06, "loss": 0.38, "step": 30930 }, { "epoch": 0.6874333451830785, "grad_norm": 3.125, "learning_rate": 6.249333333333334e-06, "loss": 0.397, "step": 30940 }, { "epoch": 0.68765552790615, "grad_norm": 2.328125, "learning_rate": 6.2448888888888896e-06, "loss": 0.4009, "step": 30950 }, { "epoch": 0.6878777106292214, "grad_norm": 3.0625, "learning_rate": 6.240444444444444e-06, "loss": 0.3613, "step": 30960 }, { "epoch": 0.6880998933522929, "grad_norm": 2.546875, "learning_rate": 6.236000000000001e-06, "loss": 0.4026, "step": 30970 }, { "epoch": 0.6883220760753643, "grad_norm": 2.40625, "learning_rate": 6.231555555555557e-06, "loss": 0.3755, "step": 30980 }, { "epoch": 0.6885442587984358, "grad_norm": 2.328125, "learning_rate": 6.2271111111111115e-06, "loss": 0.4011, "step": 30990 }, { "epoch": 0.6887664415215072, "grad_norm": 2.59375, "learning_rate": 6.222666666666667e-06, "loss": 0.3851, "step": 31000 }, { "epoch": 0.6889886242445787, "grad_norm": 2.53125, "learning_rate": 6.218222222222223e-06, "loss": 0.3872, "step": 31010 }, { "epoch": 0.6892108069676502, "grad_norm": 2.3125, "learning_rate": 6.2137777777777786e-06, "loss": 0.3654, "step": 31020 }, { "epoch": 0.6894329896907216, "grad_norm": 2.25, "learning_rate": 6.209333333333334e-06, "loss": 0.3944, "step": 31030 }, { "epoch": 0.6896551724137931, "grad_norm": 2.8125, "learning_rate": 6.204888888888889e-06, "loss": 0.3752, "step": 31040 }, { "epoch": 0.6898773551368645, "grad_norm": 2.671875, "learning_rate": 6.200444444444445e-06, "loss": 0.3726, "step": 31050 }, { "epoch": 0.690099537859936, "grad_norm": 2.515625, "learning_rate": 6.196000000000001e-06, "loss": 0.3663, "step": 31060 }, { "epoch": 0.6903217205830074, "grad_norm": 2.140625, "learning_rate": 6.191555555555556e-06, "loss": 0.3583, "step": 31070 }, { "epoch": 0.690543903306079, "grad_norm": 2.265625, "learning_rate": 6.187111111111112e-06, "loss": 0.3539, "step": 31080 }, { "epoch": 0.6907660860291504, "grad_norm": 2.59375, "learning_rate": 6.182666666666667e-06, "loss": 0.4004, "step": 31090 }, { "epoch": 0.6909882687522219, "grad_norm": 2.703125, "learning_rate": 6.178222222222223e-06, "loss": 0.3762, "step": 31100 }, { "epoch": 0.6912104514752933, "grad_norm": 2.046875, "learning_rate": 6.173777777777779e-06, "loss": 0.3915, "step": 31110 }, { "epoch": 0.6914326341983648, "grad_norm": 2.84375, "learning_rate": 6.169333333333334e-06, "loss": 0.4161, "step": 31120 }, { "epoch": 0.6916548169214362, "grad_norm": 1.9453125, "learning_rate": 6.1648888888888895e-06, "loss": 0.333, "step": 31130 }, { "epoch": 0.6918769996445077, "grad_norm": 2.265625, "learning_rate": 6.160444444444444e-06, "loss": 0.3612, "step": 31140 }, { "epoch": 0.6920991823675791, "grad_norm": 2.234375, "learning_rate": 6.156000000000001e-06, "loss": 0.3741, "step": 31150 }, { "epoch": 0.6923213650906506, "grad_norm": 2.203125, "learning_rate": 6.1515555555555566e-06, "loss": 0.3687, "step": 31160 }, { "epoch": 0.692543547813722, "grad_norm": 2.71875, "learning_rate": 6.147111111111111e-06, "loss": 0.4184, "step": 31170 }, { "epoch": 0.6927657305367935, "grad_norm": 2.0625, "learning_rate": 6.142666666666667e-06, "loss": 0.3746, "step": 31180 }, { "epoch": 0.6929879132598649, "grad_norm": 2.890625, "learning_rate": 6.138222222222223e-06, "loss": 0.4053, "step": 31190 }, { "epoch": 0.6932100959829364, "grad_norm": 1.984375, "learning_rate": 6.1337777777777785e-06, "loss": 0.3382, "step": 31200 }, { "epoch": 0.6934322787060079, "grad_norm": 2.609375, "learning_rate": 6.129333333333334e-06, "loss": 0.4028, "step": 31210 }, { "epoch": 0.6936544614290793, "grad_norm": 2.6875, "learning_rate": 6.124888888888889e-06, "loss": 0.4148, "step": 31220 }, { "epoch": 0.6938766441521508, "grad_norm": 2.296875, "learning_rate": 6.120444444444445e-06, "loss": 0.3841, "step": 31230 }, { "epoch": 0.6940988268752222, "grad_norm": 2.765625, "learning_rate": 6.116000000000001e-06, "loss": 0.4052, "step": 31240 }, { "epoch": 0.6943210095982937, "grad_norm": 3.359375, "learning_rate": 6.111555555555556e-06, "loss": 0.3667, "step": 31250 }, { "epoch": 0.6945431923213651, "grad_norm": 2.546875, "learning_rate": 6.107111111111112e-06, "loss": 0.3724, "step": 31260 }, { "epoch": 0.6947653750444366, "grad_norm": 2.515625, "learning_rate": 6.102666666666667e-06, "loss": 0.3665, "step": 31270 }, { "epoch": 0.694987557767508, "grad_norm": 2.71875, "learning_rate": 6.098222222222223e-06, "loss": 0.3945, "step": 31280 }, { "epoch": 0.6952097404905795, "grad_norm": 2.75, "learning_rate": 6.093777777777779e-06, "loss": 0.3324, "step": 31290 }, { "epoch": 0.6954319232136509, "grad_norm": 2.546875, "learning_rate": 6.089333333333334e-06, "loss": 0.3721, "step": 31300 }, { "epoch": 0.6956541059367224, "grad_norm": 2.375, "learning_rate": 6.084888888888889e-06, "loss": 0.3802, "step": 31310 }, { "epoch": 0.6958762886597938, "grad_norm": 2.328125, "learning_rate": 6.080444444444445e-06, "loss": 0.3598, "step": 31320 }, { "epoch": 0.6960984713828653, "grad_norm": 2.375, "learning_rate": 6.076000000000001e-06, "loss": 0.3883, "step": 31330 }, { "epoch": 0.6963206541059367, "grad_norm": 2.078125, "learning_rate": 6.0715555555555565e-06, "loss": 0.3485, "step": 31340 }, { "epoch": 0.6965428368290082, "grad_norm": 2.53125, "learning_rate": 6.067111111111111e-06, "loss": 0.4005, "step": 31350 }, { "epoch": 0.6967650195520796, "grad_norm": 2.4375, "learning_rate": 6.062666666666667e-06, "loss": 0.3968, "step": 31360 }, { "epoch": 0.6969872022751511, "grad_norm": 2.25, "learning_rate": 6.058222222222223e-06, "loss": 0.3712, "step": 31370 }, { "epoch": 0.6972093849982225, "grad_norm": 2.5, "learning_rate": 6.053777777777778e-06, "loss": 0.4079, "step": 31380 }, { "epoch": 0.697431567721294, "grad_norm": 2.3125, "learning_rate": 6.049333333333334e-06, "loss": 0.369, "step": 31390 }, { "epoch": 0.6976537504443654, "grad_norm": 2.546875, "learning_rate": 6.044888888888889e-06, "loss": 0.3667, "step": 31400 }, { "epoch": 0.6978759331674369, "grad_norm": 2.171875, "learning_rate": 6.0404444444444455e-06, "loss": 0.3936, "step": 31410 }, { "epoch": 0.6980981158905084, "grad_norm": 3.046875, "learning_rate": 6.036000000000001e-06, "loss": 0.415, "step": 31420 }, { "epoch": 0.6983202986135798, "grad_norm": 2.421875, "learning_rate": 6.031555555555556e-06, "loss": 0.3716, "step": 31430 }, { "epoch": 0.6985424813366513, "grad_norm": 2.515625, "learning_rate": 6.027111111111112e-06, "loss": 0.3485, "step": 31440 }, { "epoch": 0.6987646640597227, "grad_norm": 2.15625, "learning_rate": 6.0226666666666665e-06, "loss": 0.3572, "step": 31450 }, { "epoch": 0.6989868467827942, "grad_norm": 2.671875, "learning_rate": 6.018222222222223e-06, "loss": 0.3455, "step": 31460 }, { "epoch": 0.6992090295058656, "grad_norm": 2.46875, "learning_rate": 6.013777777777779e-06, "loss": 0.38, "step": 31470 }, { "epoch": 0.6994312122289371, "grad_norm": 2.9375, "learning_rate": 6.009333333333334e-06, "loss": 0.3581, "step": 31480 }, { "epoch": 0.6996533949520085, "grad_norm": 2.265625, "learning_rate": 6.004888888888889e-06, "loss": 0.4038, "step": 31490 }, { "epoch": 0.69987557767508, "grad_norm": 2.4375, "learning_rate": 6.000444444444445e-06, "loss": 0.3667, "step": 31500 }, { "epoch": 0.7000977603981514, "grad_norm": 2.34375, "learning_rate": 5.996000000000001e-06, "loss": 0.3834, "step": 31510 }, { "epoch": 0.7003199431212229, "grad_norm": 2.546875, "learning_rate": 5.991555555555556e-06, "loss": 0.364, "step": 31520 }, { "epoch": 0.7005421258442943, "grad_norm": 2.671875, "learning_rate": 5.987111111111111e-06, "loss": 0.4001, "step": 31530 }, { "epoch": 0.7007643085673658, "grad_norm": 2.9375, "learning_rate": 5.982666666666667e-06, "loss": 0.3787, "step": 31540 }, { "epoch": 0.7009864912904372, "grad_norm": 2.90625, "learning_rate": 5.978222222222223e-06, "loss": 0.3874, "step": 31550 }, { "epoch": 0.7012086740135087, "grad_norm": 2.28125, "learning_rate": 5.973777777777778e-06, "loss": 0.3811, "step": 31560 }, { "epoch": 0.7014308567365801, "grad_norm": 2.859375, "learning_rate": 5.969333333333334e-06, "loss": 0.4122, "step": 31570 }, { "epoch": 0.7016530394596516, "grad_norm": 2.328125, "learning_rate": 5.964888888888889e-06, "loss": 0.4028, "step": 31580 }, { "epoch": 0.701875222182723, "grad_norm": 2.71875, "learning_rate": 5.960444444444445e-06, "loss": 0.4028, "step": 31590 }, { "epoch": 0.7020974049057945, "grad_norm": 2.25, "learning_rate": 5.956000000000001e-06, "loss": 0.3656, "step": 31600 }, { "epoch": 0.7023195876288659, "grad_norm": 2.3125, "learning_rate": 5.951555555555556e-06, "loss": 0.3823, "step": 31610 }, { "epoch": 0.7025417703519374, "grad_norm": 2.328125, "learning_rate": 5.947111111111112e-06, "loss": 0.3883, "step": 31620 }, { "epoch": 0.7027639530750089, "grad_norm": 2.21875, "learning_rate": 5.942666666666667e-06, "loss": 0.3654, "step": 31630 }, { "epoch": 0.7029861357980803, "grad_norm": 2.359375, "learning_rate": 5.938222222222223e-06, "loss": 0.4223, "step": 31640 }, { "epoch": 0.7032083185211518, "grad_norm": 2.375, "learning_rate": 5.933777777777779e-06, "loss": 0.3847, "step": 31650 }, { "epoch": 0.7034305012442232, "grad_norm": 2.34375, "learning_rate": 5.9293333333333335e-06, "loss": 0.3774, "step": 31660 }, { "epoch": 0.7036526839672947, "grad_norm": 2.625, "learning_rate": 5.924888888888889e-06, "loss": 0.3562, "step": 31670 }, { "epoch": 0.7038748666903661, "grad_norm": 2.171875, "learning_rate": 5.920444444444445e-06, "loss": 0.3559, "step": 31680 }, { "epoch": 0.7040970494134376, "grad_norm": 2.390625, "learning_rate": 5.916000000000001e-06, "loss": 0.4212, "step": 31690 }, { "epoch": 0.704319232136509, "grad_norm": 2.4375, "learning_rate": 5.911555555555556e-06, "loss": 0.3567, "step": 31700 }, { "epoch": 0.7045414148595806, "grad_norm": 2.609375, "learning_rate": 5.907111111111111e-06, "loss": 0.3667, "step": 31710 }, { "epoch": 0.704763597582652, "grad_norm": 2.265625, "learning_rate": 5.902666666666668e-06, "loss": 0.3793, "step": 31720 }, { "epoch": 0.7049857803057235, "grad_norm": 2.84375, "learning_rate": 5.8982222222222225e-06, "loss": 0.3814, "step": 31730 }, { "epoch": 0.7052079630287948, "grad_norm": 2.640625, "learning_rate": 5.893777777777778e-06, "loss": 0.3853, "step": 31740 }, { "epoch": 0.7054301457518664, "grad_norm": 2.546875, "learning_rate": 5.889333333333334e-06, "loss": 0.4039, "step": 31750 }, { "epoch": 0.7056523284749378, "grad_norm": 2.625, "learning_rate": 5.884888888888889e-06, "loss": 0.3638, "step": 31760 }, { "epoch": 0.7058745111980093, "grad_norm": 2.75, "learning_rate": 5.880444444444445e-06, "loss": 0.433, "step": 31770 }, { "epoch": 0.7060966939210807, "grad_norm": 2.703125, "learning_rate": 5.876000000000001e-06, "loss": 0.434, "step": 31780 }, { "epoch": 0.7063188766441522, "grad_norm": 2.53125, "learning_rate": 5.871555555555556e-06, "loss": 0.384, "step": 31790 }, { "epoch": 0.7065410593672236, "grad_norm": 2.578125, "learning_rate": 5.8671111111111115e-06, "loss": 0.3686, "step": 31800 }, { "epoch": 0.7067632420902951, "grad_norm": 2.375, "learning_rate": 5.862666666666667e-06, "loss": 0.3816, "step": 31810 }, { "epoch": 0.7069854248133665, "grad_norm": 2.171875, "learning_rate": 5.858222222222223e-06, "loss": 0.3428, "step": 31820 }, { "epoch": 0.707207607536438, "grad_norm": 2.84375, "learning_rate": 5.853777777777779e-06, "loss": 0.3623, "step": 31830 }, { "epoch": 0.7074297902595095, "grad_norm": 2.453125, "learning_rate": 5.8493333333333335e-06, "loss": 0.4127, "step": 31840 }, { "epoch": 0.7076519729825809, "grad_norm": 2.71875, "learning_rate": 5.844888888888889e-06, "loss": 0.4007, "step": 31850 }, { "epoch": 0.7078741557056524, "grad_norm": 2.953125, "learning_rate": 5.840444444444445e-06, "loss": 0.4189, "step": 31860 }, { "epoch": 0.7080963384287238, "grad_norm": 2.453125, "learning_rate": 5.8360000000000005e-06, "loss": 0.3812, "step": 31870 }, { "epoch": 0.7083185211517953, "grad_norm": 2.328125, "learning_rate": 5.831555555555556e-06, "loss": 0.3905, "step": 31880 }, { "epoch": 0.7085407038748667, "grad_norm": 2.390625, "learning_rate": 5.827111111111111e-06, "loss": 0.3401, "step": 31890 }, { "epoch": 0.7087628865979382, "grad_norm": 2.46875, "learning_rate": 5.822666666666668e-06, "loss": 0.3723, "step": 31900 }, { "epoch": 0.7089850693210096, "grad_norm": 2.1875, "learning_rate": 5.818222222222223e-06, "loss": 0.3983, "step": 31910 }, { "epoch": 0.7092072520440811, "grad_norm": 2.65625, "learning_rate": 5.813777777777778e-06, "loss": 0.3972, "step": 31920 }, { "epoch": 0.7094294347671525, "grad_norm": 2.390625, "learning_rate": 5.809333333333334e-06, "loss": 0.3765, "step": 31930 }, { "epoch": 0.709651617490224, "grad_norm": 2.234375, "learning_rate": 5.8048888888888895e-06, "loss": 0.4079, "step": 31940 }, { "epoch": 0.7098738002132954, "grad_norm": 2.609375, "learning_rate": 5.800444444444445e-06, "loss": 0.3835, "step": 31950 }, { "epoch": 0.7100959829363669, "grad_norm": 2.109375, "learning_rate": 5.796000000000001e-06, "loss": 0.3712, "step": 31960 }, { "epoch": 0.7103181656594383, "grad_norm": 2.6875, "learning_rate": 5.791555555555556e-06, "loss": 0.3595, "step": 31970 }, { "epoch": 0.7105403483825098, "grad_norm": 2.734375, "learning_rate": 5.7871111111111115e-06, "loss": 0.4171, "step": 31980 }, { "epoch": 0.7107625311055812, "grad_norm": 2.5625, "learning_rate": 5.782666666666667e-06, "loss": 0.375, "step": 31990 }, { "epoch": 0.7109847138286527, "grad_norm": 2.078125, "learning_rate": 5.778222222222223e-06, "loss": 0.3762, "step": 32000 }, { "epoch": 0.7112068965517241, "grad_norm": 2.390625, "learning_rate": 5.7737777777777785e-06, "loss": 0.3487, "step": 32010 }, { "epoch": 0.7114290792747956, "grad_norm": 2.59375, "learning_rate": 5.769333333333333e-06, "loss": 0.387, "step": 32020 }, { "epoch": 0.711651261997867, "grad_norm": 2.28125, "learning_rate": 5.76488888888889e-06, "loss": 0.3527, "step": 32030 }, { "epoch": 0.7118734447209385, "grad_norm": 2.5, "learning_rate": 5.760444444444445e-06, "loss": 0.3559, "step": 32040 }, { "epoch": 0.71209562744401, "grad_norm": 2.1875, "learning_rate": 5.7560000000000005e-06, "loss": 0.3764, "step": 32050 }, { "epoch": 0.7123178101670814, "grad_norm": 2.078125, "learning_rate": 5.751555555555556e-06, "loss": 0.4182, "step": 32060 }, { "epoch": 0.7125399928901529, "grad_norm": 2.625, "learning_rate": 5.747111111111111e-06, "loss": 0.4145, "step": 32070 }, { "epoch": 0.7127621756132243, "grad_norm": 2.546875, "learning_rate": 5.7426666666666675e-06, "loss": 0.3925, "step": 32080 }, { "epoch": 0.7129843583362958, "grad_norm": 2.671875, "learning_rate": 5.738222222222223e-06, "loss": 0.3647, "step": 32090 }, { "epoch": 0.7132065410593672, "grad_norm": 2.765625, "learning_rate": 5.733777777777778e-06, "loss": 0.4259, "step": 32100 }, { "epoch": 0.7134287237824387, "grad_norm": 3.03125, "learning_rate": 5.729333333333334e-06, "loss": 0.363, "step": 32110 }, { "epoch": 0.7136509065055101, "grad_norm": 2.78125, "learning_rate": 5.7248888888888895e-06, "loss": 0.385, "step": 32120 }, { "epoch": 0.7138730892285816, "grad_norm": 3.0, "learning_rate": 5.720444444444445e-06, "loss": 0.3991, "step": 32130 }, { "epoch": 0.714095271951653, "grad_norm": 2.9375, "learning_rate": 5.716000000000001e-06, "loss": 0.374, "step": 32140 }, { "epoch": 0.7143174546747245, "grad_norm": 2.609375, "learning_rate": 5.711555555555556e-06, "loss": 0.3876, "step": 32150 }, { "epoch": 0.7145396373977959, "grad_norm": 2.59375, "learning_rate": 5.707111111111111e-06, "loss": 0.4159, "step": 32160 }, { "epoch": 0.7147618201208674, "grad_norm": 2.46875, "learning_rate": 5.702666666666667e-06, "loss": 0.3928, "step": 32170 }, { "epoch": 0.7149840028439388, "grad_norm": 2.875, "learning_rate": 5.698222222222223e-06, "loss": 0.3942, "step": 32180 }, { "epoch": 0.7152061855670103, "grad_norm": 2.359375, "learning_rate": 5.6937777777777785e-06, "loss": 0.3635, "step": 32190 }, { "epoch": 0.7154283682900817, "grad_norm": 2.484375, "learning_rate": 5.689333333333333e-06, "loss": 0.3683, "step": 32200 }, { "epoch": 0.7156505510131532, "grad_norm": 2.125, "learning_rate": 5.68488888888889e-06, "loss": 0.3795, "step": 32210 }, { "epoch": 0.7158727337362246, "grad_norm": 2.890625, "learning_rate": 5.680444444444445e-06, "loss": 0.3697, "step": 32220 }, { "epoch": 0.7160949164592961, "grad_norm": 2.40625, "learning_rate": 5.676e-06, "loss": 0.3657, "step": 32230 }, { "epoch": 0.7163170991823676, "grad_norm": 2.765625, "learning_rate": 5.671555555555556e-06, "loss": 0.3692, "step": 32240 }, { "epoch": 0.716539281905439, "grad_norm": 2.515625, "learning_rate": 5.667111111111112e-06, "loss": 0.3662, "step": 32250 }, { "epoch": 0.7167614646285105, "grad_norm": 2.6875, "learning_rate": 5.6626666666666675e-06, "loss": 0.3842, "step": 32260 }, { "epoch": 0.7169836473515819, "grad_norm": 2.765625, "learning_rate": 5.658222222222223e-06, "loss": 0.3638, "step": 32270 }, { "epoch": 0.7172058300746534, "grad_norm": 2.28125, "learning_rate": 5.653777777777778e-06, "loss": 0.4022, "step": 32280 }, { "epoch": 0.7174280127977248, "grad_norm": 3.25, "learning_rate": 5.649333333333334e-06, "loss": 0.3911, "step": 32290 }, { "epoch": 0.7176501955207963, "grad_norm": 2.28125, "learning_rate": 5.644888888888889e-06, "loss": 0.3572, "step": 32300 }, { "epoch": 0.7178723782438677, "grad_norm": 2.5625, "learning_rate": 5.640444444444445e-06, "loss": 0.3877, "step": 32310 }, { "epoch": 0.7180945609669392, "grad_norm": 2.46875, "learning_rate": 5.636000000000001e-06, "loss": 0.4065, "step": 32320 }, { "epoch": 0.7183167436900106, "grad_norm": 2.46875, "learning_rate": 5.631555555555556e-06, "loss": 0.3379, "step": 32330 }, { "epoch": 0.7185389264130821, "grad_norm": 2.140625, "learning_rate": 5.627111111111112e-06, "loss": 0.3465, "step": 32340 }, { "epoch": 0.7187611091361535, "grad_norm": 2.515625, "learning_rate": 5.622666666666667e-06, "loss": 0.4159, "step": 32350 }, { "epoch": 0.718983291859225, "grad_norm": 2.703125, "learning_rate": 5.618222222222223e-06, "loss": 0.3727, "step": 32360 }, { "epoch": 0.7192054745822964, "grad_norm": 2.265625, "learning_rate": 5.613777777777778e-06, "loss": 0.3809, "step": 32370 }, { "epoch": 0.719427657305368, "grad_norm": 2.453125, "learning_rate": 5.609333333333333e-06, "loss": 0.35, "step": 32380 }, { "epoch": 0.7196498400284393, "grad_norm": 2.1875, "learning_rate": 5.60488888888889e-06, "loss": 0.4121, "step": 32390 }, { "epoch": 0.7198720227515109, "grad_norm": 2.546875, "learning_rate": 5.600444444444445e-06, "loss": 0.435, "step": 32400 }, { "epoch": 0.7200942054745822, "grad_norm": 2.546875, "learning_rate": 5.596e-06, "loss": 0.3537, "step": 32410 }, { "epoch": 0.7203163881976538, "grad_norm": 2.796875, "learning_rate": 5.591555555555556e-06, "loss": 0.3979, "step": 32420 }, { "epoch": 0.7205385709207252, "grad_norm": 2.40625, "learning_rate": 5.587111111111112e-06, "loss": 0.3518, "step": 32430 }, { "epoch": 0.7207607536437967, "grad_norm": 2.484375, "learning_rate": 5.582666666666667e-06, "loss": 0.3972, "step": 32440 }, { "epoch": 0.7209829363668682, "grad_norm": 2.4375, "learning_rate": 5.578222222222223e-06, "loss": 0.399, "step": 32450 }, { "epoch": 0.7212051190899396, "grad_norm": 2.203125, "learning_rate": 5.573777777777778e-06, "loss": 0.4075, "step": 32460 }, { "epoch": 0.7214273018130111, "grad_norm": 2.65625, "learning_rate": 5.569333333333334e-06, "loss": 0.383, "step": 32470 }, { "epoch": 0.7216494845360825, "grad_norm": 2.53125, "learning_rate": 5.564888888888889e-06, "loss": 0.4107, "step": 32480 }, { "epoch": 0.721871667259154, "grad_norm": 2.734375, "learning_rate": 5.560444444444445e-06, "loss": 0.3835, "step": 32490 }, { "epoch": 0.7220938499822254, "grad_norm": 2.0625, "learning_rate": 5.556000000000001e-06, "loss": 0.4052, "step": 32500 }, { "epoch": 0.7223160327052969, "grad_norm": 2.421875, "learning_rate": 5.5515555555555555e-06, "loss": 0.3796, "step": 32510 }, { "epoch": 0.7225382154283683, "grad_norm": 3.296875, "learning_rate": 5.547111111111112e-06, "loss": 0.386, "step": 32520 }, { "epoch": 0.7227603981514398, "grad_norm": 2.5, "learning_rate": 5.542666666666667e-06, "loss": 0.3719, "step": 32530 }, { "epoch": 0.7229825808745112, "grad_norm": 2.703125, "learning_rate": 5.538222222222223e-06, "loss": 0.3423, "step": 32540 }, { "epoch": 0.7232047635975827, "grad_norm": 2.71875, "learning_rate": 5.533777777777778e-06, "loss": 0.378, "step": 32550 }, { "epoch": 0.7234269463206541, "grad_norm": 2.265625, "learning_rate": 5.529333333333334e-06, "loss": 0.3324, "step": 32560 }, { "epoch": 0.7236491290437256, "grad_norm": 2.265625, "learning_rate": 5.52488888888889e-06, "loss": 0.3906, "step": 32570 }, { "epoch": 0.723871311766797, "grad_norm": 2.515625, "learning_rate": 5.5204444444444445e-06, "loss": 0.3532, "step": 32580 }, { "epoch": 0.7240934944898685, "grad_norm": 2.390625, "learning_rate": 5.516e-06, "loss": 0.4058, "step": 32590 }, { "epoch": 0.7243156772129399, "grad_norm": 2.8125, "learning_rate": 5.511555555555556e-06, "loss": 0.3908, "step": 32600 }, { "epoch": 0.7245378599360114, "grad_norm": 2.8125, "learning_rate": 5.507111111111112e-06, "loss": 0.3788, "step": 32610 }, { "epoch": 0.7247600426590828, "grad_norm": 2.75, "learning_rate": 5.502666666666667e-06, "loss": 0.4041, "step": 32620 }, { "epoch": 0.7249822253821543, "grad_norm": 2.34375, "learning_rate": 5.498222222222223e-06, "loss": 0.3679, "step": 32630 }, { "epoch": 0.7252044081052257, "grad_norm": 2.796875, "learning_rate": 5.493777777777778e-06, "loss": 0.4047, "step": 32640 }, { "epoch": 0.7254265908282972, "grad_norm": 2.296875, "learning_rate": 5.489333333333334e-06, "loss": 0.3897, "step": 32650 }, { "epoch": 0.7256487735513687, "grad_norm": 2.4375, "learning_rate": 5.484888888888889e-06, "loss": 0.3811, "step": 32660 }, { "epoch": 0.7258709562744401, "grad_norm": 2.15625, "learning_rate": 5.480444444444445e-06, "loss": 0.3456, "step": 32670 }, { "epoch": 0.7260931389975116, "grad_norm": 2.40625, "learning_rate": 5.476000000000001e-06, "loss": 0.3679, "step": 32680 }, { "epoch": 0.726315321720583, "grad_norm": 2.71875, "learning_rate": 5.4715555555555554e-06, "loss": 0.3522, "step": 32690 }, { "epoch": 0.7265375044436545, "grad_norm": 2.765625, "learning_rate": 5.467111111111112e-06, "loss": 0.4007, "step": 32700 }, { "epoch": 0.7267596871667259, "grad_norm": 2.078125, "learning_rate": 5.462666666666667e-06, "loss": 0.3458, "step": 32710 }, { "epoch": 0.7269818698897974, "grad_norm": 2.65625, "learning_rate": 5.4582222222222225e-06, "loss": 0.3631, "step": 32720 }, { "epoch": 0.7272040526128688, "grad_norm": 2.296875, "learning_rate": 5.453777777777778e-06, "loss": 0.379, "step": 32730 }, { "epoch": 0.7274262353359403, "grad_norm": 2.328125, "learning_rate": 5.449333333333334e-06, "loss": 0.3544, "step": 32740 }, { "epoch": 0.7276484180590117, "grad_norm": 2.953125, "learning_rate": 5.44488888888889e-06, "loss": 0.3485, "step": 32750 }, { "epoch": 0.7278706007820832, "grad_norm": 2.578125, "learning_rate": 5.4404444444444444e-06, "loss": 0.4027, "step": 32760 }, { "epoch": 0.7280927835051546, "grad_norm": 2.6875, "learning_rate": 5.436e-06, "loss": 0.4076, "step": 32770 }, { "epoch": 0.7283149662282261, "grad_norm": 2.203125, "learning_rate": 5.431555555555556e-06, "loss": 0.3965, "step": 32780 }, { "epoch": 0.7285371489512975, "grad_norm": 2.25, "learning_rate": 5.4271111111111115e-06, "loss": 0.3601, "step": 32790 }, { "epoch": 0.728759331674369, "grad_norm": 2.1875, "learning_rate": 5.422666666666667e-06, "loss": 0.3642, "step": 32800 }, { "epoch": 0.7289815143974404, "grad_norm": 2.609375, "learning_rate": 5.418222222222223e-06, "loss": 0.3831, "step": 32810 }, { "epoch": 0.7292036971205119, "grad_norm": 2.59375, "learning_rate": 5.413777777777778e-06, "loss": 0.3745, "step": 32820 }, { "epoch": 0.7294258798435833, "grad_norm": 2.40625, "learning_rate": 5.409333333333334e-06, "loss": 0.3593, "step": 32830 }, { "epoch": 0.7296480625666548, "grad_norm": 2.4375, "learning_rate": 5.404888888888889e-06, "loss": 0.3802, "step": 32840 }, { "epoch": 0.7298702452897262, "grad_norm": 2.703125, "learning_rate": 5.400444444444445e-06, "loss": 0.3895, "step": 32850 }, { "epoch": 0.7300924280127977, "grad_norm": 2.6875, "learning_rate": 5.3960000000000005e-06, "loss": 0.3854, "step": 32860 }, { "epoch": 0.7303146107358692, "grad_norm": 2.796875, "learning_rate": 5.391555555555556e-06, "loss": 0.389, "step": 32870 }, { "epoch": 0.7305367934589406, "grad_norm": 3.140625, "learning_rate": 5.387111111111112e-06, "loss": 0.3785, "step": 32880 }, { "epoch": 0.7307589761820121, "grad_norm": 2.359375, "learning_rate": 5.382666666666667e-06, "loss": 0.4139, "step": 32890 }, { "epoch": 0.7309811589050835, "grad_norm": 2.5625, "learning_rate": 5.3782222222222224e-06, "loss": 0.3552, "step": 32900 }, { "epoch": 0.731203341628155, "grad_norm": 2.46875, "learning_rate": 5.373777777777778e-06, "loss": 0.3713, "step": 32910 }, { "epoch": 0.7314255243512264, "grad_norm": 2.421875, "learning_rate": 5.369333333333334e-06, "loss": 0.3602, "step": 32920 }, { "epoch": 0.7316477070742979, "grad_norm": 2.34375, "learning_rate": 5.3648888888888895e-06, "loss": 0.3437, "step": 32930 }, { "epoch": 0.7318698897973693, "grad_norm": 2.25, "learning_rate": 5.360444444444445e-06, "loss": 0.3611, "step": 32940 }, { "epoch": 0.7320920725204408, "grad_norm": 2.8125, "learning_rate": 5.356e-06, "loss": 0.3704, "step": 32950 }, { "epoch": 0.7323142552435122, "grad_norm": 2.515625, "learning_rate": 5.351555555555557e-06, "loss": 0.4018, "step": 32960 }, { "epoch": 0.7325364379665837, "grad_norm": 2.53125, "learning_rate": 5.3471111111111114e-06, "loss": 0.4128, "step": 32970 }, { "epoch": 0.7327586206896551, "grad_norm": 2.046875, "learning_rate": 5.342666666666667e-06, "loss": 0.3732, "step": 32980 }, { "epoch": 0.7329808034127266, "grad_norm": 2.421875, "learning_rate": 5.338222222222223e-06, "loss": 0.3742, "step": 32990 }, { "epoch": 0.733202986135798, "grad_norm": 2.40625, "learning_rate": 5.333777777777778e-06, "loss": 0.3759, "step": 33000 }, { "epoch": 0.7334251688588695, "grad_norm": 3.125, "learning_rate": 5.329333333333334e-06, "loss": 0.3769, "step": 33010 }, { "epoch": 0.7336473515819409, "grad_norm": 2.828125, "learning_rate": 5.324888888888889e-06, "loss": 0.3619, "step": 33020 }, { "epoch": 0.7338695343050124, "grad_norm": 2.484375, "learning_rate": 5.320444444444445e-06, "loss": 0.4092, "step": 33030 }, { "epoch": 0.7340917170280838, "grad_norm": 2.625, "learning_rate": 5.3160000000000004e-06, "loss": 0.4058, "step": 33040 }, { "epoch": 0.7343138997511554, "grad_norm": 2.65625, "learning_rate": 5.311555555555556e-06, "loss": 0.4174, "step": 33050 }, { "epoch": 0.7345360824742269, "grad_norm": 2.3125, "learning_rate": 5.307111111111112e-06, "loss": 0.4063, "step": 33060 }, { "epoch": 0.7347582651972983, "grad_norm": 2.75, "learning_rate": 5.302666666666667e-06, "loss": 0.3657, "step": 33070 }, { "epoch": 0.7349804479203698, "grad_norm": 2.328125, "learning_rate": 5.298222222222222e-06, "loss": 0.3567, "step": 33080 }, { "epoch": 0.7352026306434412, "grad_norm": 2.578125, "learning_rate": 5.293777777777778e-06, "loss": 0.395, "step": 33090 }, { "epoch": 0.7354248133665127, "grad_norm": 2.421875, "learning_rate": 5.289333333333334e-06, "loss": 0.3675, "step": 33100 }, { "epoch": 0.7356469960895841, "grad_norm": 2.515625, "learning_rate": 5.2848888888888894e-06, "loss": 0.4294, "step": 33110 }, { "epoch": 0.7358691788126556, "grad_norm": 2.65625, "learning_rate": 5.280444444444445e-06, "loss": 0.3893, "step": 33120 }, { "epoch": 0.736091361535727, "grad_norm": 2.8125, "learning_rate": 5.276e-06, "loss": 0.3707, "step": 33130 }, { "epoch": 0.7363135442587985, "grad_norm": 2.375, "learning_rate": 5.2715555555555565e-06, "loss": 0.4016, "step": 33140 }, { "epoch": 0.7365357269818699, "grad_norm": 2.484375, "learning_rate": 5.267111111111111e-06, "loss": 0.3467, "step": 33150 }, { "epoch": 0.7367579097049414, "grad_norm": 2.078125, "learning_rate": 5.262666666666667e-06, "loss": 0.3979, "step": 33160 }, { "epoch": 0.7369800924280128, "grad_norm": 2.421875, "learning_rate": 5.258222222222223e-06, "loss": 0.4023, "step": 33170 }, { "epoch": 0.7372022751510843, "grad_norm": 2.171875, "learning_rate": 5.2537777777777784e-06, "loss": 0.3649, "step": 33180 }, { "epoch": 0.7374244578741557, "grad_norm": 2.5625, "learning_rate": 5.249333333333334e-06, "loss": 0.3974, "step": 33190 }, { "epoch": 0.7376466405972272, "grad_norm": 2.59375, "learning_rate": 5.244888888888889e-06, "loss": 0.3581, "step": 33200 }, { "epoch": 0.7378688233202986, "grad_norm": 2.90625, "learning_rate": 5.240444444444445e-06, "loss": 0.3588, "step": 33210 }, { "epoch": 0.7380910060433701, "grad_norm": 2.796875, "learning_rate": 5.236e-06, "loss": 0.367, "step": 33220 }, { "epoch": 0.7383131887664415, "grad_norm": 2.703125, "learning_rate": 5.231555555555556e-06, "loss": 0.4256, "step": 33230 }, { "epoch": 0.738535371489513, "grad_norm": 2.5625, "learning_rate": 5.227111111111112e-06, "loss": 0.3773, "step": 33240 }, { "epoch": 0.7387575542125844, "grad_norm": 2.46875, "learning_rate": 5.222666666666667e-06, "loss": 0.3831, "step": 33250 }, { "epoch": 0.7389797369356559, "grad_norm": 2.703125, "learning_rate": 5.218222222222222e-06, "loss": 0.3948, "step": 33260 }, { "epoch": 0.7392019196587274, "grad_norm": 2.65625, "learning_rate": 5.213777777777779e-06, "loss": 0.3287, "step": 33270 }, { "epoch": 0.7394241023817988, "grad_norm": 2.59375, "learning_rate": 5.209333333333334e-06, "loss": 0.3519, "step": 33280 }, { "epoch": 0.7396462851048703, "grad_norm": 2.53125, "learning_rate": 5.204888888888889e-06, "loss": 0.3808, "step": 33290 }, { "epoch": 0.7398684678279417, "grad_norm": 2.40625, "learning_rate": 5.200444444444445e-06, "loss": 0.3694, "step": 33300 }, { "epoch": 0.7400906505510132, "grad_norm": 2.3125, "learning_rate": 5.196e-06, "loss": 0.3577, "step": 33310 }, { "epoch": 0.7403128332740846, "grad_norm": 2.875, "learning_rate": 5.1915555555555564e-06, "loss": 0.4061, "step": 33320 }, { "epoch": 0.7405350159971561, "grad_norm": 2.703125, "learning_rate": 5.187111111111111e-06, "loss": 0.3947, "step": 33330 }, { "epoch": 0.7407571987202275, "grad_norm": 2.359375, "learning_rate": 5.182666666666667e-06, "loss": 0.3818, "step": 33340 }, { "epoch": 0.740979381443299, "grad_norm": 2.296875, "learning_rate": 5.178222222222223e-06, "loss": 0.3778, "step": 33350 }, { "epoch": 0.7412015641663704, "grad_norm": 2.0, "learning_rate": 5.173777777777778e-06, "loss": 0.3661, "step": 33360 }, { "epoch": 0.7414237468894419, "grad_norm": 2.59375, "learning_rate": 5.169333333333334e-06, "loss": 0.364, "step": 33370 }, { "epoch": 0.7416459296125133, "grad_norm": 2.578125, "learning_rate": 5.164888888888889e-06, "loss": 0.3494, "step": 33380 }, { "epoch": 0.7418681123355848, "grad_norm": 2.515625, "learning_rate": 5.160444444444445e-06, "loss": 0.3959, "step": 33390 }, { "epoch": 0.7420902950586562, "grad_norm": 2.703125, "learning_rate": 5.156e-06, "loss": 0.3567, "step": 33400 }, { "epoch": 0.7423124777817277, "grad_norm": 2.421875, "learning_rate": 5.151555555555556e-06, "loss": 0.3257, "step": 33410 }, { "epoch": 0.7425346605047991, "grad_norm": 2.265625, "learning_rate": 5.147111111111112e-06, "loss": 0.3331, "step": 33420 }, { "epoch": 0.7427568432278706, "grad_norm": 3.046875, "learning_rate": 5.1426666666666665e-06, "loss": 0.3841, "step": 33430 }, { "epoch": 0.742979025950942, "grad_norm": 2.484375, "learning_rate": 5.138222222222222e-06, "loss": 0.3922, "step": 33440 }, { "epoch": 0.7432012086740135, "grad_norm": 2.1875, "learning_rate": 5.133777777777779e-06, "loss": 0.3382, "step": 33450 }, { "epoch": 0.7434233913970849, "grad_norm": 2.484375, "learning_rate": 5.129333333333334e-06, "loss": 0.3916, "step": 33460 }, { "epoch": 0.7436455741201564, "grad_norm": 2.546875, "learning_rate": 5.124888888888889e-06, "loss": 0.3768, "step": 33470 }, { "epoch": 0.7438677568432279, "grad_norm": 2.5, "learning_rate": 5.120444444444445e-06, "loss": 0.35, "step": 33480 }, { "epoch": 0.7440899395662993, "grad_norm": 2.578125, "learning_rate": 5.116000000000001e-06, "loss": 0.3907, "step": 33490 }, { "epoch": 0.7443121222893708, "grad_norm": 2.265625, "learning_rate": 5.111555555555556e-06, "loss": 0.4041, "step": 33500 }, { "epoch": 0.7445343050124422, "grad_norm": 2.5625, "learning_rate": 5.107111111111111e-06, "loss": 0.3545, "step": 33510 }, { "epoch": 0.7447564877355137, "grad_norm": 2.359375, "learning_rate": 5.102666666666667e-06, "loss": 0.4054, "step": 33520 }, { "epoch": 0.7449786704585851, "grad_norm": 2.75, "learning_rate": 5.0982222222222226e-06, "loss": 0.3445, "step": 33530 }, { "epoch": 0.7452008531816566, "grad_norm": 2.4375, "learning_rate": 5.093777777777778e-06, "loss": 0.377, "step": 33540 }, { "epoch": 0.745423035904728, "grad_norm": 2.265625, "learning_rate": 5.089333333333334e-06, "loss": 0.3659, "step": 33550 }, { "epoch": 0.7456452186277995, "grad_norm": 2.4375, "learning_rate": 5.084888888888889e-06, "loss": 0.3905, "step": 33560 }, { "epoch": 0.7458674013508709, "grad_norm": 3.0625, "learning_rate": 5.0804444444444445e-06, "loss": 0.4162, "step": 33570 }, { "epoch": 0.7460895840739424, "grad_norm": 2.5625, "learning_rate": 5.076000000000001e-06, "loss": 0.4015, "step": 33580 }, { "epoch": 0.7463117667970138, "grad_norm": 2.296875, "learning_rate": 5.071555555555556e-06, "loss": 0.3701, "step": 33590 }, { "epoch": 0.7465339495200853, "grad_norm": 2.90625, "learning_rate": 5.0671111111111116e-06, "loss": 0.3764, "step": 33600 }, { "epoch": 0.7467561322431567, "grad_norm": 1.828125, "learning_rate": 5.062666666666666e-06, "loss": 0.3787, "step": 33610 }, { "epoch": 0.7469783149662282, "grad_norm": 2.328125, "learning_rate": 5.058222222222222e-06, "loss": 0.3511, "step": 33620 }, { "epoch": 0.7472004976892996, "grad_norm": 2.234375, "learning_rate": 5.053777777777779e-06, "loss": 0.386, "step": 33630 }, { "epoch": 0.7474226804123711, "grad_norm": 2.546875, "learning_rate": 5.0493333333333335e-06, "loss": 0.4056, "step": 33640 }, { "epoch": 0.7476448631354425, "grad_norm": 2.625, "learning_rate": 5.044888888888889e-06, "loss": 0.4146, "step": 33650 }, { "epoch": 0.747867045858514, "grad_norm": 2.453125, "learning_rate": 5.040444444444445e-06, "loss": 0.3825, "step": 33660 }, { "epoch": 0.7480892285815854, "grad_norm": 2.640625, "learning_rate": 5.0360000000000006e-06, "loss": 0.3307, "step": 33670 }, { "epoch": 0.748311411304657, "grad_norm": 2.5, "learning_rate": 5.031555555555556e-06, "loss": 0.3789, "step": 33680 }, { "epoch": 0.7485335940277285, "grad_norm": 2.625, "learning_rate": 5.027111111111111e-06, "loss": 0.3696, "step": 33690 }, { "epoch": 0.7487557767507999, "grad_norm": 2.5, "learning_rate": 5.022666666666667e-06, "loss": 0.4013, "step": 33700 }, { "epoch": 0.7489779594738714, "grad_norm": 2.5625, "learning_rate": 5.0182222222222225e-06, "loss": 0.3663, "step": 33710 }, { "epoch": 0.7492001421969428, "grad_norm": 2.53125, "learning_rate": 5.013777777777778e-06, "loss": 0.3976, "step": 33720 }, { "epoch": 0.7494223249200143, "grad_norm": 2.71875, "learning_rate": 5.009333333333334e-06, "loss": 0.3801, "step": 33730 }, { "epoch": 0.7496445076430857, "grad_norm": 2.140625, "learning_rate": 5.004888888888889e-06, "loss": 0.3946, "step": 33740 }, { "epoch": 0.7498666903661572, "grad_norm": 2.1875, "learning_rate": 5.000444444444444e-06, "loss": 0.3812, "step": 33750 }, { "epoch": 0.7500888730892286, "grad_norm": 2.765625, "learning_rate": 4.996e-06, "loss": 0.4265, "step": 33760 }, { "epoch": 0.7503110558123001, "grad_norm": 2.359375, "learning_rate": 4.991555555555556e-06, "loss": 0.3479, "step": 33770 }, { "epoch": 0.7505332385353715, "grad_norm": 2.484375, "learning_rate": 4.9871111111111115e-06, "loss": 0.3982, "step": 33780 }, { "epoch": 0.750755421258443, "grad_norm": 2.796875, "learning_rate": 4.982666666666667e-06, "loss": 0.4098, "step": 33790 }, { "epoch": 0.7509776039815144, "grad_norm": 2.78125, "learning_rate": 4.978222222222223e-06, "loss": 0.4078, "step": 33800 }, { "epoch": 0.7511997867045859, "grad_norm": 2.21875, "learning_rate": 4.973777777777778e-06, "loss": 0.3804, "step": 33810 }, { "epoch": 0.7514219694276573, "grad_norm": 2.453125, "learning_rate": 4.969333333333333e-06, "loss": 0.3774, "step": 33820 }, { "epoch": 0.7516441521507288, "grad_norm": 2.421875, "learning_rate": 4.964888888888889e-06, "loss": 0.3881, "step": 33830 }, { "epoch": 0.7518663348738002, "grad_norm": 2.71875, "learning_rate": 4.960444444444445e-06, "loss": 0.3709, "step": 33840 }, { "epoch": 0.7520885175968717, "grad_norm": 2.5, "learning_rate": 4.9560000000000005e-06, "loss": 0.3514, "step": 33850 }, { "epoch": 0.7523107003199431, "grad_norm": 2.296875, "learning_rate": 4.951555555555556e-06, "loss": 0.3587, "step": 33860 }, { "epoch": 0.7525328830430146, "grad_norm": 2.390625, "learning_rate": 4.947111111111111e-06, "loss": 0.3717, "step": 33870 }, { "epoch": 0.7527550657660861, "grad_norm": 2.828125, "learning_rate": 4.9426666666666676e-06, "loss": 0.3629, "step": 33880 }, { "epoch": 0.7529772484891575, "grad_norm": 2.171875, "learning_rate": 4.938222222222222e-06, "loss": 0.3995, "step": 33890 }, { "epoch": 0.753199431212229, "grad_norm": 2.890625, "learning_rate": 4.933777777777778e-06, "loss": 0.3856, "step": 33900 }, { "epoch": 0.7534216139353004, "grad_norm": 2.859375, "learning_rate": 4.929333333333334e-06, "loss": 0.382, "step": 33910 }, { "epoch": 0.7536437966583719, "grad_norm": 2.578125, "learning_rate": 4.924888888888889e-06, "loss": 0.3794, "step": 33920 }, { "epoch": 0.7538659793814433, "grad_norm": 2.4375, "learning_rate": 4.920444444444445e-06, "loss": 0.3611, "step": 33930 }, { "epoch": 0.7540881621045148, "grad_norm": 2.640625, "learning_rate": 4.916e-06, "loss": 0.3722, "step": 33940 }, { "epoch": 0.7543103448275862, "grad_norm": 2.265625, "learning_rate": 4.911555555555556e-06, "loss": 0.366, "step": 33950 }, { "epoch": 0.7545325275506577, "grad_norm": 2.390625, "learning_rate": 4.907111111111111e-06, "loss": 0.3687, "step": 33960 }, { "epoch": 0.7547547102737291, "grad_norm": 2.421875, "learning_rate": 4.902666666666667e-06, "loss": 0.3423, "step": 33970 }, { "epoch": 0.7549768929968006, "grad_norm": 2.46875, "learning_rate": 4.898222222222223e-06, "loss": 0.4181, "step": 33980 }, { "epoch": 0.755199075719872, "grad_norm": 3.125, "learning_rate": 4.8937777777777785e-06, "loss": 0.3907, "step": 33990 }, { "epoch": 0.7554212584429435, "grad_norm": 2.828125, "learning_rate": 4.889333333333333e-06, "loss": 0.3739, "step": 34000 }, { "epoch": 0.7556434411660149, "grad_norm": 2.578125, "learning_rate": 4.884888888888889e-06, "loss": 0.3817, "step": 34010 }, { "epoch": 0.7558656238890864, "grad_norm": 2.328125, "learning_rate": 4.880444444444445e-06, "loss": 0.3424, "step": 34020 }, { "epoch": 0.7560878066121578, "grad_norm": 2.328125, "learning_rate": 4.876e-06, "loss": 0.3504, "step": 34030 }, { "epoch": 0.7563099893352293, "grad_norm": 2.796875, "learning_rate": 4.871555555555556e-06, "loss": 0.4067, "step": 34040 }, { "epoch": 0.7565321720583007, "grad_norm": 2.890625, "learning_rate": 4.867111111111111e-06, "loss": 0.3779, "step": 34050 }, { "epoch": 0.7567543547813722, "grad_norm": 2.296875, "learning_rate": 4.8626666666666675e-06, "loss": 0.3708, "step": 34060 }, { "epoch": 0.7569765375044436, "grad_norm": 2.0625, "learning_rate": 4.858222222222222e-06, "loss": 0.3694, "step": 34070 }, { "epoch": 0.7571987202275151, "grad_norm": 2.5625, "learning_rate": 4.853777777777778e-06, "loss": 0.3966, "step": 34080 }, { "epoch": 0.7574209029505866, "grad_norm": 2.390625, "learning_rate": 4.849333333333334e-06, "loss": 0.3978, "step": 34090 }, { "epoch": 0.757643085673658, "grad_norm": 2.3125, "learning_rate": 4.844888888888889e-06, "loss": 0.3731, "step": 34100 }, { "epoch": 0.7578652683967295, "grad_norm": 2.21875, "learning_rate": 4.840444444444445e-06, "loss": 0.352, "step": 34110 }, { "epoch": 0.7580874511198009, "grad_norm": 2.328125, "learning_rate": 4.836e-06, "loss": 0.367, "step": 34120 }, { "epoch": 0.7583096338428724, "grad_norm": 2.4375, "learning_rate": 4.831555555555556e-06, "loss": 0.3731, "step": 34130 }, { "epoch": 0.7585318165659438, "grad_norm": 2.078125, "learning_rate": 4.827111111111111e-06, "loss": 0.3773, "step": 34140 }, { "epoch": 0.7587539992890153, "grad_norm": 2.171875, "learning_rate": 4.822666666666667e-06, "loss": 0.4028, "step": 34150 }, { "epoch": 0.7589761820120867, "grad_norm": 2.734375, "learning_rate": 4.818222222222223e-06, "loss": 0.3595, "step": 34160 }, { "epoch": 0.7591983647351582, "grad_norm": 3.046875, "learning_rate": 4.813777777777778e-06, "loss": 0.3652, "step": 34170 }, { "epoch": 0.7594205474582296, "grad_norm": 2.3125, "learning_rate": 4.809333333333333e-06, "loss": 0.3695, "step": 34180 }, { "epoch": 0.7596427301813011, "grad_norm": 2.375, "learning_rate": 4.80488888888889e-06, "loss": 0.3856, "step": 34190 }, { "epoch": 0.7598649129043725, "grad_norm": 2.40625, "learning_rate": 4.800444444444445e-06, "loss": 0.3393, "step": 34200 }, { "epoch": 0.760087095627444, "grad_norm": 2.59375, "learning_rate": 4.796e-06, "loss": 0.3476, "step": 34210 }, { "epoch": 0.7603092783505154, "grad_norm": 2.90625, "learning_rate": 4.791555555555556e-06, "loss": 0.3445, "step": 34220 }, { "epoch": 0.7605314610735869, "grad_norm": 2.59375, "learning_rate": 4.787111111111111e-06, "loss": 0.3893, "step": 34230 }, { "epoch": 0.7607536437966583, "grad_norm": 2.5, "learning_rate": 4.782666666666667e-06, "loss": 0.4079, "step": 34240 }, { "epoch": 0.7609758265197298, "grad_norm": 2.515625, "learning_rate": 4.778222222222222e-06, "loss": 0.3897, "step": 34250 }, { "epoch": 0.7611980092428012, "grad_norm": 2.765625, "learning_rate": 4.773777777777778e-06, "loss": 0.3997, "step": 34260 }, { "epoch": 0.7614201919658727, "grad_norm": 2.34375, "learning_rate": 4.769333333333334e-06, "loss": 0.3641, "step": 34270 }, { "epoch": 0.7616423746889441, "grad_norm": 2.515625, "learning_rate": 4.764888888888889e-06, "loss": 0.3564, "step": 34280 }, { "epoch": 0.7618645574120156, "grad_norm": 2.625, "learning_rate": 4.760444444444445e-06, "loss": 0.3899, "step": 34290 }, { "epoch": 0.7620867401350871, "grad_norm": 2.546875, "learning_rate": 4.756000000000001e-06, "loss": 0.3494, "step": 34300 }, { "epoch": 0.7623089228581585, "grad_norm": 2.484375, "learning_rate": 4.7515555555555556e-06, "loss": 0.4105, "step": 34310 }, { "epoch": 0.76253110558123, "grad_norm": 2.4375, "learning_rate": 4.747111111111111e-06, "loss": 0.3266, "step": 34320 }, { "epoch": 0.7627532883043014, "grad_norm": 2.3125, "learning_rate": 4.742666666666667e-06, "loss": 0.3259, "step": 34330 }, { "epoch": 0.762975471027373, "grad_norm": 2.921875, "learning_rate": 4.738222222222223e-06, "loss": 0.3529, "step": 34340 }, { "epoch": 0.7631976537504443, "grad_norm": 2.375, "learning_rate": 4.733777777777778e-06, "loss": 0.4033, "step": 34350 }, { "epoch": 0.7634198364735159, "grad_norm": 2.53125, "learning_rate": 4.729333333333333e-06, "loss": 0.3745, "step": 34360 }, { "epoch": 0.7636420191965873, "grad_norm": 2.09375, "learning_rate": 4.72488888888889e-06, "loss": 0.3941, "step": 34370 }, { "epoch": 0.7638642019196588, "grad_norm": 2.078125, "learning_rate": 4.7204444444444446e-06, "loss": 0.3911, "step": 34380 }, { "epoch": 0.7640863846427302, "grad_norm": 2.65625, "learning_rate": 4.716e-06, "loss": 0.3599, "step": 34390 }, { "epoch": 0.7643085673658017, "grad_norm": 2.984375, "learning_rate": 4.711555555555556e-06, "loss": 0.4085, "step": 34400 }, { "epoch": 0.7645307500888731, "grad_norm": 2.1875, "learning_rate": 4.707111111111112e-06, "loss": 0.3546, "step": 34410 }, { "epoch": 0.7647529328119446, "grad_norm": 3.125, "learning_rate": 4.702666666666667e-06, "loss": 0.4001, "step": 34420 }, { "epoch": 0.764975115535016, "grad_norm": 2.65625, "learning_rate": 4.698222222222222e-06, "loss": 0.4021, "step": 34430 }, { "epoch": 0.7651972982580875, "grad_norm": 3.296875, "learning_rate": 4.693777777777778e-06, "loss": 0.3948, "step": 34440 }, { "epoch": 0.7654194809811589, "grad_norm": 2.203125, "learning_rate": 4.6893333333333336e-06, "loss": 0.3804, "step": 34450 }, { "epoch": 0.7656416637042304, "grad_norm": 2.484375, "learning_rate": 4.684888888888889e-06, "loss": 0.3928, "step": 34460 }, { "epoch": 0.7658638464273018, "grad_norm": 2.75, "learning_rate": 4.680444444444445e-06, "loss": 0.3607, "step": 34470 }, { "epoch": 0.7660860291503733, "grad_norm": 2.390625, "learning_rate": 4.676000000000001e-06, "loss": 0.4113, "step": 34480 }, { "epoch": 0.7663082118734447, "grad_norm": 2.203125, "learning_rate": 4.6715555555555555e-06, "loss": 0.3627, "step": 34490 }, { "epoch": 0.7665303945965162, "grad_norm": 2.34375, "learning_rate": 4.667111111111112e-06, "loss": 0.3499, "step": 34500 }, { "epoch": 0.7667525773195877, "grad_norm": 2.46875, "learning_rate": 4.662666666666667e-06, "loss": 0.3732, "step": 34510 }, { "epoch": 0.7669747600426591, "grad_norm": 1.953125, "learning_rate": 4.6582222222222226e-06, "loss": 0.3323, "step": 34520 }, { "epoch": 0.7671969427657306, "grad_norm": 2.5625, "learning_rate": 4.653777777777778e-06, "loss": 0.3621, "step": 34530 }, { "epoch": 0.767419125488802, "grad_norm": 2.625, "learning_rate": 4.649333333333333e-06, "loss": 0.4036, "step": 34540 }, { "epoch": 0.7676413082118735, "grad_norm": 2.609375, "learning_rate": 4.64488888888889e-06, "loss": 0.3702, "step": 34550 }, { "epoch": 0.7678634909349449, "grad_norm": 2.34375, "learning_rate": 4.6404444444444445e-06, "loss": 0.3808, "step": 34560 }, { "epoch": 0.7680856736580164, "grad_norm": 2.6875, "learning_rate": 4.636e-06, "loss": 0.3658, "step": 34570 }, { "epoch": 0.7683078563810878, "grad_norm": 2.84375, "learning_rate": 4.631555555555556e-06, "loss": 0.3817, "step": 34580 }, { "epoch": 0.7685300391041593, "grad_norm": 2.65625, "learning_rate": 4.6271111111111116e-06, "loss": 0.3879, "step": 34590 }, { "epoch": 0.7687522218272307, "grad_norm": 2.359375, "learning_rate": 4.622666666666667e-06, "loss": 0.3624, "step": 34600 }, { "epoch": 0.7689744045503022, "grad_norm": 2.40625, "learning_rate": 4.618222222222223e-06, "loss": 0.3752, "step": 34610 }, { "epoch": 0.7691965872733736, "grad_norm": 2.078125, "learning_rate": 4.613777777777778e-06, "loss": 0.3627, "step": 34620 }, { "epoch": 0.7694187699964451, "grad_norm": 2.890625, "learning_rate": 4.6093333333333335e-06, "loss": 0.389, "step": 34630 }, { "epoch": 0.7696409527195165, "grad_norm": 2.71875, "learning_rate": 4.604888888888889e-06, "loss": 0.3609, "step": 34640 }, { "epoch": 0.769863135442588, "grad_norm": 2.484375, "learning_rate": 4.600444444444445e-06, "loss": 0.3974, "step": 34650 }, { "epoch": 0.7700853181656594, "grad_norm": 2.5, "learning_rate": 4.5960000000000006e-06, "loss": 0.4181, "step": 34660 }, { "epoch": 0.7703075008887309, "grad_norm": 2.453125, "learning_rate": 4.591555555555555e-06, "loss": 0.3805, "step": 34670 }, { "epoch": 0.7705296836118023, "grad_norm": 2.59375, "learning_rate": 4.587111111111112e-06, "loss": 0.374, "step": 34680 }, { "epoch": 0.7707518663348738, "grad_norm": 2.21875, "learning_rate": 4.582666666666667e-06, "loss": 0.3311, "step": 34690 }, { "epoch": 0.7709740490579452, "grad_norm": 2.4375, "learning_rate": 4.5782222222222225e-06, "loss": 0.3876, "step": 34700 }, { "epoch": 0.7711962317810167, "grad_norm": 2.640625, "learning_rate": 4.573777777777778e-06, "loss": 0.3584, "step": 34710 }, { "epoch": 0.7714184145040882, "grad_norm": 2.546875, "learning_rate": 4.569333333333334e-06, "loss": 0.4144, "step": 34720 }, { "epoch": 0.7716405972271596, "grad_norm": 2.71875, "learning_rate": 4.5648888888888895e-06, "loss": 0.3428, "step": 34730 }, { "epoch": 0.7718627799502311, "grad_norm": 2.859375, "learning_rate": 4.560444444444444e-06, "loss": 0.3809, "step": 34740 }, { "epoch": 0.7720849626733025, "grad_norm": 2.46875, "learning_rate": 4.556e-06, "loss": 0.4017, "step": 34750 }, { "epoch": 0.772307145396374, "grad_norm": 2.28125, "learning_rate": 4.551555555555556e-06, "loss": 0.3841, "step": 34760 }, { "epoch": 0.7725293281194454, "grad_norm": 3.078125, "learning_rate": 4.5471111111111115e-06, "loss": 0.3991, "step": 34770 }, { "epoch": 0.7727515108425169, "grad_norm": 2.625, "learning_rate": 4.542666666666667e-06, "loss": 0.4064, "step": 34780 }, { "epoch": 0.7729736935655883, "grad_norm": 2.90625, "learning_rate": 4.538222222222223e-06, "loss": 0.3668, "step": 34790 }, { "epoch": 0.7731958762886598, "grad_norm": 2.515625, "learning_rate": 4.533777777777778e-06, "loss": 0.4388, "step": 34800 }, { "epoch": 0.7734180590117312, "grad_norm": 2.59375, "learning_rate": 4.529333333333334e-06, "loss": 0.3686, "step": 34810 }, { "epoch": 0.7736402417348027, "grad_norm": 2.390625, "learning_rate": 4.524888888888889e-06, "loss": 0.3894, "step": 34820 }, { "epoch": 0.7738624244578741, "grad_norm": 2.859375, "learning_rate": 4.520444444444445e-06, "loss": 0.3523, "step": 34830 }, { "epoch": 0.7740846071809456, "grad_norm": 2.546875, "learning_rate": 4.5160000000000005e-06, "loss": 0.3411, "step": 34840 }, { "epoch": 0.774306789904017, "grad_norm": 3.046875, "learning_rate": 4.511555555555555e-06, "loss": 0.376, "step": 34850 }, { "epoch": 0.7745289726270885, "grad_norm": 2.875, "learning_rate": 4.507111111111112e-06, "loss": 0.3893, "step": 34860 }, { "epoch": 0.7747511553501599, "grad_norm": 2.3125, "learning_rate": 4.502666666666667e-06, "loss": 0.3377, "step": 34870 }, { "epoch": 0.7749733380732314, "grad_norm": 2.5625, "learning_rate": 4.498222222222222e-06, "loss": 0.396, "step": 34880 }, { "epoch": 0.7751955207963028, "grad_norm": 2.3125, "learning_rate": 4.493777777777778e-06, "loss": 0.3816, "step": 34890 }, { "epoch": 0.7754177035193743, "grad_norm": 2.390625, "learning_rate": 4.489333333333334e-06, "loss": 0.3772, "step": 34900 }, { "epoch": 0.7756398862424458, "grad_norm": 2.5625, "learning_rate": 4.4848888888888895e-06, "loss": 0.3348, "step": 34910 }, { "epoch": 0.7758620689655172, "grad_norm": 2.390625, "learning_rate": 4.480444444444445e-06, "loss": 0.4088, "step": 34920 }, { "epoch": 0.7760842516885887, "grad_norm": 2.703125, "learning_rate": 4.476e-06, "loss": 0.4077, "step": 34930 }, { "epoch": 0.7763064344116601, "grad_norm": 2.296875, "learning_rate": 4.471555555555556e-06, "loss": 0.3465, "step": 34940 }, { "epoch": 0.7765286171347316, "grad_norm": 2.9375, "learning_rate": 4.467111111111111e-06, "loss": 0.3538, "step": 34950 }, { "epoch": 0.776750799857803, "grad_norm": 2.734375, "learning_rate": 4.462666666666667e-06, "loss": 0.3795, "step": 34960 }, { "epoch": 0.7769729825808745, "grad_norm": 2.609375, "learning_rate": 4.458222222222223e-06, "loss": 0.3513, "step": 34970 }, { "epoch": 0.777195165303946, "grad_norm": 3.5625, "learning_rate": 4.453777777777778e-06, "loss": 0.3852, "step": 34980 }, { "epoch": 0.7774173480270175, "grad_norm": 3.203125, "learning_rate": 4.449333333333334e-06, "loss": 0.3594, "step": 34990 }, { "epoch": 0.7776395307500888, "grad_norm": 2.125, "learning_rate": 4.444888888888889e-06, "loss": 0.3742, "step": 35000 }, { "epoch": 0.7778617134731604, "grad_norm": 2.40625, "learning_rate": 4.440444444444445e-06, "loss": 0.4009, "step": 35010 }, { "epoch": 0.7780838961962318, "grad_norm": 2.375, "learning_rate": 4.436e-06, "loss": 0.3819, "step": 35020 }, { "epoch": 0.7783060789193033, "grad_norm": 2.59375, "learning_rate": 4.431555555555556e-06, "loss": 0.3715, "step": 35030 }, { "epoch": 0.7785282616423747, "grad_norm": 2.84375, "learning_rate": 4.427111111111112e-06, "loss": 0.3911, "step": 35040 }, { "epoch": 0.7787504443654462, "grad_norm": 2.5, "learning_rate": 4.422666666666667e-06, "loss": 0.4007, "step": 35050 }, { "epoch": 0.7789726270885176, "grad_norm": 2.359375, "learning_rate": 4.418222222222222e-06, "loss": 0.3836, "step": 35060 }, { "epoch": 0.7791948098115891, "grad_norm": 2.453125, "learning_rate": 4.413777777777778e-06, "loss": 0.3945, "step": 35070 }, { "epoch": 0.7794169925346605, "grad_norm": 2.546875, "learning_rate": 4.409333333333334e-06, "loss": 0.3924, "step": 35080 }, { "epoch": 0.779639175257732, "grad_norm": 2.515625, "learning_rate": 4.404888888888889e-06, "loss": 0.3972, "step": 35090 }, { "epoch": 0.7798613579808034, "grad_norm": 2.75, "learning_rate": 4.400444444444445e-06, "loss": 0.4165, "step": 35100 }, { "epoch": 0.7800835407038749, "grad_norm": 2.8125, "learning_rate": 4.396e-06, "loss": 0.3743, "step": 35110 }, { "epoch": 0.7803057234269464, "grad_norm": 3.046875, "learning_rate": 4.3915555555555565e-06, "loss": 0.3532, "step": 35120 }, { "epoch": 0.7805279061500178, "grad_norm": 2.328125, "learning_rate": 4.387111111111111e-06, "loss": 0.373, "step": 35130 }, { "epoch": 0.7807500888730893, "grad_norm": 2.75, "learning_rate": 4.382666666666667e-06, "loss": 0.3815, "step": 35140 }, { "epoch": 0.7809722715961607, "grad_norm": 2.484375, "learning_rate": 4.378222222222223e-06, "loss": 0.3535, "step": 35150 }, { "epoch": 0.7811944543192322, "grad_norm": 2.453125, "learning_rate": 4.3737777777777775e-06, "loss": 0.371, "step": 35160 }, { "epoch": 0.7814166370423036, "grad_norm": 2.25, "learning_rate": 4.369333333333334e-06, "loss": 0.3683, "step": 35170 }, { "epoch": 0.7816388197653751, "grad_norm": 3.359375, "learning_rate": 4.364888888888889e-06, "loss": 0.3765, "step": 35180 }, { "epoch": 0.7818610024884465, "grad_norm": 2.390625, "learning_rate": 4.360444444444445e-06, "loss": 0.3667, "step": 35190 }, { "epoch": 0.782083185211518, "grad_norm": 2.125, "learning_rate": 4.356e-06, "loss": 0.3965, "step": 35200 }, { "epoch": 0.7823053679345894, "grad_norm": 2.703125, "learning_rate": 4.351555555555556e-06, "loss": 0.3767, "step": 35210 }, { "epoch": 0.7825275506576609, "grad_norm": 2.890625, "learning_rate": 4.347111111111112e-06, "loss": 0.3617, "step": 35220 }, { "epoch": 0.7827497333807323, "grad_norm": 2.203125, "learning_rate": 4.342666666666667e-06, "loss": 0.3547, "step": 35230 }, { "epoch": 0.7829719161038038, "grad_norm": 2.09375, "learning_rate": 4.338222222222222e-06, "loss": 0.3716, "step": 35240 }, { "epoch": 0.7831940988268752, "grad_norm": 2.375, "learning_rate": 4.333777777777778e-06, "loss": 0.378, "step": 35250 }, { "epoch": 0.7834162815499467, "grad_norm": 2.6875, "learning_rate": 4.329333333333334e-06, "loss": 0.3583, "step": 35260 }, { "epoch": 0.7836384642730181, "grad_norm": 2.90625, "learning_rate": 4.324888888888889e-06, "loss": 0.3724, "step": 35270 }, { "epoch": 0.7838606469960896, "grad_norm": 2.640625, "learning_rate": 4.320444444444445e-06, "loss": 0.3697, "step": 35280 }, { "epoch": 0.784082829719161, "grad_norm": 2.171875, "learning_rate": 4.316e-06, "loss": 0.3935, "step": 35290 }, { "epoch": 0.7843050124422325, "grad_norm": 2.140625, "learning_rate": 4.311555555555556e-06, "loss": 0.3499, "step": 35300 }, { "epoch": 0.7845271951653039, "grad_norm": 2.515625, "learning_rate": 4.307111111111111e-06, "loss": 0.4539, "step": 35310 }, { "epoch": 0.7847493778883754, "grad_norm": 2.546875, "learning_rate": 4.302666666666667e-06, "loss": 0.3557, "step": 35320 }, { "epoch": 0.7849715606114469, "grad_norm": 2.71875, "learning_rate": 4.298222222222223e-06, "loss": 0.3916, "step": 35330 }, { "epoch": 0.7851937433345183, "grad_norm": 2.5, "learning_rate": 4.293777777777778e-06, "loss": 0.3796, "step": 35340 }, { "epoch": 0.7854159260575898, "grad_norm": 2.6875, "learning_rate": 4.289333333333334e-06, "loss": 0.4244, "step": 35350 }, { "epoch": 0.7856381087806612, "grad_norm": 2.46875, "learning_rate": 4.284888888888889e-06, "loss": 0.3966, "step": 35360 }, { "epoch": 0.7858602915037327, "grad_norm": 1.7421875, "learning_rate": 4.2804444444444445e-06, "loss": 0.3677, "step": 35370 }, { "epoch": 0.7860824742268041, "grad_norm": 2.296875, "learning_rate": 4.276e-06, "loss": 0.3921, "step": 35380 }, { "epoch": 0.7863046569498756, "grad_norm": 2.859375, "learning_rate": 4.271555555555556e-06, "loss": 0.4123, "step": 35390 }, { "epoch": 0.786526839672947, "grad_norm": 2.34375, "learning_rate": 4.267111111111112e-06, "loss": 0.3939, "step": 35400 }, { "epoch": 0.7867490223960185, "grad_norm": 2.453125, "learning_rate": 4.262666666666667e-06, "loss": 0.3451, "step": 35410 }, { "epoch": 0.7869712051190899, "grad_norm": 2.296875, "learning_rate": 4.258222222222222e-06, "loss": 0.4128, "step": 35420 }, { "epoch": 0.7871933878421614, "grad_norm": 2.515625, "learning_rate": 4.253777777777779e-06, "loss": 0.3621, "step": 35430 }, { "epoch": 0.7874155705652328, "grad_norm": 2.5625, "learning_rate": 4.2493333333333335e-06, "loss": 0.3936, "step": 35440 }, { "epoch": 0.7876377532883043, "grad_norm": 2.296875, "learning_rate": 4.244888888888889e-06, "loss": 0.3349, "step": 35450 }, { "epoch": 0.7878599360113757, "grad_norm": 2.28125, "learning_rate": 4.240444444444445e-06, "loss": 0.3618, "step": 35460 }, { "epoch": 0.7880821187344472, "grad_norm": 2.203125, "learning_rate": 4.236e-06, "loss": 0.3727, "step": 35470 }, { "epoch": 0.7883043014575186, "grad_norm": 2.453125, "learning_rate": 4.231555555555556e-06, "loss": 0.36, "step": 35480 }, { "epoch": 0.7885264841805901, "grad_norm": 2.40625, "learning_rate": 4.227111111111111e-06, "loss": 0.3867, "step": 35490 }, { "epoch": 0.7887486669036615, "grad_norm": 2.390625, "learning_rate": 4.222666666666667e-06, "loss": 0.3428, "step": 35500 }, { "epoch": 0.788970849626733, "grad_norm": 2.203125, "learning_rate": 4.2182222222222225e-06, "loss": 0.3838, "step": 35510 }, { "epoch": 0.7891930323498044, "grad_norm": 2.546875, "learning_rate": 4.213777777777778e-06, "loss": 0.3552, "step": 35520 }, { "epoch": 0.7894152150728759, "grad_norm": 2.265625, "learning_rate": 4.209333333333334e-06, "loss": 0.3469, "step": 35530 }, { "epoch": 0.7896373977959474, "grad_norm": 2.640625, "learning_rate": 4.20488888888889e-06, "loss": 0.3799, "step": 35540 }, { "epoch": 0.7898595805190188, "grad_norm": 2.078125, "learning_rate": 4.2004444444444445e-06, "loss": 0.351, "step": 35550 }, { "epoch": 0.7900817632420903, "grad_norm": 2.734375, "learning_rate": 4.196e-06, "loss": 0.334, "step": 35560 }, { "epoch": 0.7903039459651617, "grad_norm": 2.15625, "learning_rate": 4.191555555555556e-06, "loss": 0.3548, "step": 35570 }, { "epoch": 0.7905261286882332, "grad_norm": 3.109375, "learning_rate": 4.1871111111111115e-06, "loss": 0.3785, "step": 35580 }, { "epoch": 0.7907483114113046, "grad_norm": 2.390625, "learning_rate": 4.182666666666667e-06, "loss": 0.3791, "step": 35590 }, { "epoch": 0.7909704941343761, "grad_norm": 2.40625, "learning_rate": 4.178222222222222e-06, "loss": 0.3997, "step": 35600 }, { "epoch": 0.7911926768574475, "grad_norm": 3.515625, "learning_rate": 4.173777777777779e-06, "loss": 0.3568, "step": 35610 }, { "epoch": 0.791414859580519, "grad_norm": 2.65625, "learning_rate": 4.1693333333333335e-06, "loss": 0.3632, "step": 35620 }, { "epoch": 0.7916370423035904, "grad_norm": 2.859375, "learning_rate": 4.164888888888889e-06, "loss": 0.4123, "step": 35630 }, { "epoch": 0.791859225026662, "grad_norm": 2.40625, "learning_rate": 4.160444444444445e-06, "loss": 0.3667, "step": 35640 }, { "epoch": 0.7920814077497333, "grad_norm": 2.5625, "learning_rate": 4.1560000000000005e-06, "loss": 0.3872, "step": 35650 }, { "epoch": 0.7923035904728049, "grad_norm": 2.5, "learning_rate": 4.151555555555556e-06, "loss": 0.3686, "step": 35660 }, { "epoch": 0.7925257731958762, "grad_norm": 3.3125, "learning_rate": 4.147111111111111e-06, "loss": 0.3643, "step": 35670 }, { "epoch": 0.7927479559189478, "grad_norm": 2.71875, "learning_rate": 4.142666666666667e-06, "loss": 0.3725, "step": 35680 }, { "epoch": 0.7929701386420192, "grad_norm": 2.46875, "learning_rate": 4.1382222222222224e-06, "loss": 0.4178, "step": 35690 }, { "epoch": 0.7931923213650907, "grad_norm": 2.15625, "learning_rate": 4.133777777777778e-06, "loss": 0.3527, "step": 35700 }, { "epoch": 0.793414504088162, "grad_norm": 2.4375, "learning_rate": 4.129333333333334e-06, "loss": 0.4105, "step": 35710 }, { "epoch": 0.7936366868112336, "grad_norm": 2.671875, "learning_rate": 4.1248888888888895e-06, "loss": 0.3827, "step": 35720 }, { "epoch": 0.7938588695343051, "grad_norm": 2.109375, "learning_rate": 4.120444444444444e-06, "loss": 0.3481, "step": 35730 }, { "epoch": 0.7940810522573765, "grad_norm": 2.484375, "learning_rate": 4.116000000000001e-06, "loss": 0.378, "step": 35740 }, { "epoch": 0.794303234980448, "grad_norm": 2.9375, "learning_rate": 4.111555555555556e-06, "loss": 0.3797, "step": 35750 }, { "epoch": 0.7945254177035194, "grad_norm": 3.125, "learning_rate": 4.1071111111111114e-06, "loss": 0.4157, "step": 35760 }, { "epoch": 0.7947476004265909, "grad_norm": 2.828125, "learning_rate": 4.102666666666667e-06, "loss": 0.3935, "step": 35770 }, { "epoch": 0.7949697831496623, "grad_norm": 2.984375, "learning_rate": 4.098222222222222e-06, "loss": 0.395, "step": 35780 }, { "epoch": 0.7951919658727338, "grad_norm": 2.734375, "learning_rate": 4.0937777777777785e-06, "loss": 0.3708, "step": 35790 }, { "epoch": 0.7954141485958052, "grad_norm": 2.78125, "learning_rate": 4.089333333333333e-06, "loss": 0.3731, "step": 35800 }, { "epoch": 0.7956363313188767, "grad_norm": 2.59375, "learning_rate": 4.084888888888889e-06, "loss": 0.3528, "step": 35810 }, { "epoch": 0.7958585140419481, "grad_norm": 2.625, "learning_rate": 4.080444444444445e-06, "loss": 0.3945, "step": 35820 }, { "epoch": 0.7960806967650196, "grad_norm": 2.765625, "learning_rate": 4.0760000000000004e-06, "loss": 0.3837, "step": 35830 }, { "epoch": 0.796302879488091, "grad_norm": 1.953125, "learning_rate": 4.071555555555556e-06, "loss": 0.3751, "step": 35840 }, { "epoch": 0.7965250622111625, "grad_norm": 2.21875, "learning_rate": 4.067111111111112e-06, "loss": 0.3691, "step": 35850 }, { "epoch": 0.7967472449342339, "grad_norm": 2.109375, "learning_rate": 4.062666666666667e-06, "loss": 0.3627, "step": 35860 }, { "epoch": 0.7969694276573054, "grad_norm": 2.59375, "learning_rate": 4.058222222222222e-06, "loss": 0.3748, "step": 35870 }, { "epoch": 0.7971916103803768, "grad_norm": 2.40625, "learning_rate": 4.053777777777778e-06, "loss": 0.3858, "step": 35880 }, { "epoch": 0.7974137931034483, "grad_norm": 2.421875, "learning_rate": 4.049333333333334e-06, "loss": 0.3567, "step": 35890 }, { "epoch": 0.7976359758265197, "grad_norm": 2.28125, "learning_rate": 4.0448888888888894e-06, "loss": 0.3555, "step": 35900 }, { "epoch": 0.7978581585495912, "grad_norm": 2.4375, "learning_rate": 4.040444444444444e-06, "loss": 0.3841, "step": 35910 }, { "epoch": 0.7980803412726626, "grad_norm": 2.5625, "learning_rate": 4.036000000000001e-06, "loss": 0.3513, "step": 35920 }, { "epoch": 0.7983025239957341, "grad_norm": 2.171875, "learning_rate": 4.031555555555556e-06, "loss": 0.3697, "step": 35930 }, { "epoch": 0.7985247067188056, "grad_norm": 2.578125, "learning_rate": 4.027111111111111e-06, "loss": 0.3785, "step": 35940 }, { "epoch": 0.798746889441877, "grad_norm": 2.625, "learning_rate": 4.022666666666667e-06, "loss": 0.3818, "step": 35950 }, { "epoch": 0.7989690721649485, "grad_norm": 2.34375, "learning_rate": 4.018222222222223e-06, "loss": 0.3765, "step": 35960 }, { "epoch": 0.7991912548880199, "grad_norm": 2.578125, "learning_rate": 4.0137777777777784e-06, "loss": 0.3733, "step": 35970 }, { "epoch": 0.7994134376110914, "grad_norm": 2.03125, "learning_rate": 4.009333333333333e-06, "loss": 0.3615, "step": 35980 }, { "epoch": 0.7996356203341628, "grad_norm": 2.953125, "learning_rate": 4.004888888888889e-06, "loss": 0.3991, "step": 35990 }, { "epoch": 0.7998578030572343, "grad_norm": 2.71875, "learning_rate": 4.000444444444445e-06, "loss": 0.4218, "step": 36000 }, { "epoch": 0.8000799857803057, "grad_norm": 2.78125, "learning_rate": 3.996e-06, "loss": 0.4153, "step": 36010 }, { "epoch": 0.8003021685033772, "grad_norm": 2.609375, "learning_rate": 3.991555555555556e-06, "loss": 0.416, "step": 36020 }, { "epoch": 0.8005243512264486, "grad_norm": 2.71875, "learning_rate": 3.987111111111112e-06, "loss": 0.3752, "step": 36030 }, { "epoch": 0.8007465339495201, "grad_norm": 2.25, "learning_rate": 3.982666666666667e-06, "loss": 0.4093, "step": 36040 }, { "epoch": 0.8009687166725915, "grad_norm": 2.546875, "learning_rate": 3.978222222222223e-06, "loss": 0.3814, "step": 36050 }, { "epoch": 0.801190899395663, "grad_norm": 2.1875, "learning_rate": 3.973777777777778e-06, "loss": 0.3947, "step": 36060 }, { "epoch": 0.8014130821187344, "grad_norm": 2.34375, "learning_rate": 3.969333333333334e-06, "loss": 0.4031, "step": 36070 }, { "epoch": 0.8016352648418059, "grad_norm": 2.375, "learning_rate": 3.964888888888889e-06, "loss": 0.3883, "step": 36080 }, { "epoch": 0.8018574475648773, "grad_norm": 3.046875, "learning_rate": 3.960444444444444e-06, "loss": 0.3801, "step": 36090 }, { "epoch": 0.8020796302879488, "grad_norm": 2.578125, "learning_rate": 3.956000000000001e-06, "loss": 0.4001, "step": 36100 }, { "epoch": 0.8023018130110202, "grad_norm": 2.78125, "learning_rate": 3.951555555555556e-06, "loss": 0.3976, "step": 36110 }, { "epoch": 0.8025239957340917, "grad_norm": 2.53125, "learning_rate": 3.947111111111111e-06, "loss": 0.4109, "step": 36120 }, { "epoch": 0.8027461784571631, "grad_norm": 2.828125, "learning_rate": 3.942666666666667e-06, "loss": 0.3503, "step": 36130 }, { "epoch": 0.8029683611802346, "grad_norm": 2.703125, "learning_rate": 3.938222222222223e-06, "loss": 0.3853, "step": 36140 }, { "epoch": 0.8031905439033061, "grad_norm": 2.890625, "learning_rate": 3.933777777777778e-06, "loss": 0.4025, "step": 36150 }, { "epoch": 0.8034127266263775, "grad_norm": 2.53125, "learning_rate": 3.929333333333334e-06, "loss": 0.3681, "step": 36160 }, { "epoch": 0.803634909349449, "grad_norm": 2.140625, "learning_rate": 3.924888888888889e-06, "loss": 0.3452, "step": 36170 }, { "epoch": 0.8038570920725204, "grad_norm": 2.515625, "learning_rate": 3.920444444444445e-06, "loss": 0.3523, "step": 36180 }, { "epoch": 0.8040792747955919, "grad_norm": 2.28125, "learning_rate": 3.916e-06, "loss": 0.3614, "step": 36190 }, { "epoch": 0.8043014575186633, "grad_norm": 2.6875, "learning_rate": 3.911555555555556e-06, "loss": 0.4246, "step": 36200 }, { "epoch": 0.8045236402417348, "grad_norm": 2.6875, "learning_rate": 3.907111111111112e-06, "loss": 0.3876, "step": 36210 }, { "epoch": 0.8047458229648062, "grad_norm": 2.5625, "learning_rate": 3.9026666666666665e-06, "loss": 0.3782, "step": 36220 }, { "epoch": 0.8049680056878777, "grad_norm": 2.53125, "learning_rate": 3.898222222222223e-06, "loss": 0.4159, "step": 36230 }, { "epoch": 0.8051901884109491, "grad_norm": 2.65625, "learning_rate": 3.893777777777778e-06, "loss": 0.3788, "step": 36240 }, { "epoch": 0.8054123711340206, "grad_norm": 2.09375, "learning_rate": 3.889333333333334e-06, "loss": 0.3817, "step": 36250 }, { "epoch": 0.805634553857092, "grad_norm": 2.546875, "learning_rate": 3.884888888888889e-06, "loss": 0.4005, "step": 36260 }, { "epoch": 0.8058567365801635, "grad_norm": 3.0625, "learning_rate": 3.880444444444445e-06, "loss": 0.3908, "step": 36270 }, { "epoch": 0.8060789193032349, "grad_norm": 2.8125, "learning_rate": 3.876000000000001e-06, "loss": 0.3803, "step": 36280 }, { "epoch": 0.8063011020263064, "grad_norm": 1.953125, "learning_rate": 3.8715555555555555e-06, "loss": 0.3769, "step": 36290 }, { "epoch": 0.8065232847493778, "grad_norm": 2.328125, "learning_rate": 3.867111111111111e-06, "loss": 0.3546, "step": 36300 }, { "epoch": 0.8067454674724494, "grad_norm": 2.4375, "learning_rate": 3.862666666666667e-06, "loss": 0.4036, "step": 36310 }, { "epoch": 0.8069676501955207, "grad_norm": 3.203125, "learning_rate": 3.858222222222223e-06, "loss": 0.3939, "step": 36320 }, { "epoch": 0.8071898329185923, "grad_norm": 2.9375, "learning_rate": 3.853777777777778e-06, "loss": 0.4026, "step": 36330 }, { "epoch": 0.8074120156416636, "grad_norm": 2.484375, "learning_rate": 3.849333333333334e-06, "loss": 0.4102, "step": 36340 }, { "epoch": 0.8076341983647352, "grad_norm": 2.671875, "learning_rate": 3.844888888888889e-06, "loss": 0.3906, "step": 36350 }, { "epoch": 0.8078563810878067, "grad_norm": 2.421875, "learning_rate": 3.840444444444445e-06, "loss": 0.328, "step": 36360 }, { "epoch": 0.8080785638108781, "grad_norm": 2.875, "learning_rate": 3.836e-06, "loss": 0.409, "step": 36370 }, { "epoch": 0.8083007465339496, "grad_norm": 2.5625, "learning_rate": 3.831555555555556e-06, "loss": 0.3531, "step": 36380 }, { "epoch": 0.808522929257021, "grad_norm": 2.765625, "learning_rate": 3.827111111111112e-06, "loss": 0.3441, "step": 36390 }, { "epoch": 0.8087451119800925, "grad_norm": 2.359375, "learning_rate": 3.8226666666666664e-06, "loss": 0.3811, "step": 36400 }, { "epoch": 0.8089672947031639, "grad_norm": 2.390625, "learning_rate": 3.818222222222223e-06, "loss": 0.3645, "step": 36410 }, { "epoch": 0.8091894774262354, "grad_norm": 2.625, "learning_rate": 3.813777777777778e-06, "loss": 0.3758, "step": 36420 }, { "epoch": 0.8094116601493068, "grad_norm": 2.46875, "learning_rate": 3.809333333333334e-06, "loss": 0.3884, "step": 36430 }, { "epoch": 0.8096338428723783, "grad_norm": 1.984375, "learning_rate": 3.804888888888889e-06, "loss": 0.399, "step": 36440 }, { "epoch": 0.8098560255954497, "grad_norm": 2.46875, "learning_rate": 3.800444444444445e-06, "loss": 0.3821, "step": 36450 }, { "epoch": 0.8100782083185212, "grad_norm": 2.546875, "learning_rate": 3.796e-06, "loss": 0.3441, "step": 36460 }, { "epoch": 0.8103003910415926, "grad_norm": 2.34375, "learning_rate": 3.7915555555555563e-06, "loss": 0.3482, "step": 36470 }, { "epoch": 0.8105225737646641, "grad_norm": 2.265625, "learning_rate": 3.7871111111111115e-06, "loss": 0.3525, "step": 36480 }, { "epoch": 0.8107447564877355, "grad_norm": 2.484375, "learning_rate": 3.782666666666667e-06, "loss": 0.3821, "step": 36490 }, { "epoch": 0.810966939210807, "grad_norm": 2.75, "learning_rate": 3.7782222222222225e-06, "loss": 0.364, "step": 36500 }, { "epoch": 0.8111891219338784, "grad_norm": 3.421875, "learning_rate": 3.7737777777777778e-06, "loss": 0.3863, "step": 36510 }, { "epoch": 0.8114113046569499, "grad_norm": 2.421875, "learning_rate": 3.769333333333334e-06, "loss": 0.3838, "step": 36520 }, { "epoch": 0.8116334873800213, "grad_norm": 2.734375, "learning_rate": 3.764888888888889e-06, "loss": 0.3697, "step": 36530 }, { "epoch": 0.8118556701030928, "grad_norm": 2.40625, "learning_rate": 3.760444444444445e-06, "loss": 0.3424, "step": 36540 }, { "epoch": 0.8120778528261643, "grad_norm": 2.546875, "learning_rate": 3.756e-06, "loss": 0.3625, "step": 36550 }, { "epoch": 0.8123000355492357, "grad_norm": 2.5625, "learning_rate": 3.7515555555555562e-06, "loss": 0.3735, "step": 36560 }, { "epoch": 0.8125222182723072, "grad_norm": 2.703125, "learning_rate": 3.7471111111111115e-06, "loss": 0.4031, "step": 36570 }, { "epoch": 0.8127444009953786, "grad_norm": 2.5625, "learning_rate": 3.742666666666667e-06, "loss": 0.3842, "step": 36580 }, { "epoch": 0.8129665837184501, "grad_norm": 2.484375, "learning_rate": 3.7382222222222225e-06, "loss": 0.36, "step": 36590 }, { "epoch": 0.8131887664415215, "grad_norm": 2.515625, "learning_rate": 3.7337777777777777e-06, "loss": 0.3619, "step": 36600 }, { "epoch": 0.813410949164593, "grad_norm": 2.515625, "learning_rate": 3.729333333333334e-06, "loss": 0.3943, "step": 36610 }, { "epoch": 0.8136331318876644, "grad_norm": 2.78125, "learning_rate": 3.724888888888889e-06, "loss": 0.3786, "step": 36620 }, { "epoch": 0.8138553146107359, "grad_norm": 2.6875, "learning_rate": 3.720444444444445e-06, "loss": 0.3729, "step": 36630 }, { "epoch": 0.8140774973338073, "grad_norm": 2.21875, "learning_rate": 3.716e-06, "loss": 0.3617, "step": 36640 }, { "epoch": 0.8142996800568788, "grad_norm": 3.28125, "learning_rate": 3.711555555555556e-06, "loss": 0.4053, "step": 36650 }, { "epoch": 0.8145218627799502, "grad_norm": 1.9453125, "learning_rate": 3.7071111111111115e-06, "loss": 0.3896, "step": 36660 }, { "epoch": 0.8147440455030217, "grad_norm": 2.828125, "learning_rate": 3.702666666666667e-06, "loss": 0.4024, "step": 36670 }, { "epoch": 0.8149662282260931, "grad_norm": 2.453125, "learning_rate": 3.6982222222222224e-06, "loss": 0.3581, "step": 36680 }, { "epoch": 0.8151884109491646, "grad_norm": 2.6875, "learning_rate": 3.6937777777777785e-06, "loss": 0.3698, "step": 36690 }, { "epoch": 0.815410593672236, "grad_norm": 2.59375, "learning_rate": 3.689333333333334e-06, "loss": 0.3626, "step": 36700 }, { "epoch": 0.8156327763953075, "grad_norm": 2.265625, "learning_rate": 3.684888888888889e-06, "loss": 0.4152, "step": 36710 }, { "epoch": 0.8158549591183789, "grad_norm": 2.328125, "learning_rate": 3.6804444444444448e-06, "loss": 0.3904, "step": 36720 }, { "epoch": 0.8160771418414504, "grad_norm": 2.3125, "learning_rate": 3.676e-06, "loss": 0.3647, "step": 36730 }, { "epoch": 0.8162993245645218, "grad_norm": 2.265625, "learning_rate": 3.671555555555556e-06, "loss": 0.3711, "step": 36740 }, { "epoch": 0.8165215072875933, "grad_norm": 2.140625, "learning_rate": 3.6671111111111114e-06, "loss": 0.3562, "step": 36750 }, { "epoch": 0.8167436900106648, "grad_norm": 2.640625, "learning_rate": 3.662666666666667e-06, "loss": 0.3905, "step": 36760 }, { "epoch": 0.8169658727337362, "grad_norm": 2.34375, "learning_rate": 3.6582222222222224e-06, "loss": 0.3713, "step": 36770 }, { "epoch": 0.8171880554568077, "grad_norm": 2.484375, "learning_rate": 3.6537777777777785e-06, "loss": 0.3718, "step": 36780 }, { "epoch": 0.8174102381798791, "grad_norm": 2.421875, "learning_rate": 3.6493333333333338e-06, "loss": 0.3669, "step": 36790 }, { "epoch": 0.8176324209029506, "grad_norm": 2.5625, "learning_rate": 3.644888888888889e-06, "loss": 0.3661, "step": 36800 }, { "epoch": 0.817854603626022, "grad_norm": 2.796875, "learning_rate": 3.6404444444444447e-06, "loss": 0.4021, "step": 36810 }, { "epoch": 0.8180767863490935, "grad_norm": 2.1875, "learning_rate": 3.636e-06, "loss": 0.3581, "step": 36820 }, { "epoch": 0.8182989690721649, "grad_norm": 2.359375, "learning_rate": 3.631555555555556e-06, "loss": 0.3789, "step": 36830 }, { "epoch": 0.8185211517952364, "grad_norm": 2.03125, "learning_rate": 3.6271111111111114e-06, "loss": 0.3591, "step": 36840 }, { "epoch": 0.8187433345183078, "grad_norm": 2.484375, "learning_rate": 3.622666666666667e-06, "loss": 0.3703, "step": 36850 }, { "epoch": 0.8189655172413793, "grad_norm": 2.5625, "learning_rate": 3.6182222222222223e-06, "loss": 0.4259, "step": 36860 }, { "epoch": 0.8191876999644507, "grad_norm": 2.328125, "learning_rate": 3.6137777777777785e-06, "loss": 0.3824, "step": 36870 }, { "epoch": 0.8194098826875222, "grad_norm": 3.3125, "learning_rate": 3.6093333333333337e-06, "loss": 0.3941, "step": 36880 }, { "epoch": 0.8196320654105936, "grad_norm": 2.515625, "learning_rate": 3.6048888888888894e-06, "loss": 0.3845, "step": 36890 }, { "epoch": 0.8198542481336651, "grad_norm": 2.40625, "learning_rate": 3.6004444444444447e-06, "loss": 0.3932, "step": 36900 }, { "epoch": 0.8200764308567365, "grad_norm": 2.734375, "learning_rate": 3.596e-06, "loss": 0.3741, "step": 36910 }, { "epoch": 0.820298613579808, "grad_norm": 2.40625, "learning_rate": 3.591555555555556e-06, "loss": 0.3793, "step": 36920 }, { "epoch": 0.8205207963028794, "grad_norm": 2.78125, "learning_rate": 3.5871111111111113e-06, "loss": 0.3621, "step": 36930 }, { "epoch": 0.820742979025951, "grad_norm": 2.359375, "learning_rate": 3.582666666666667e-06, "loss": 0.3599, "step": 36940 }, { "epoch": 0.8209651617490223, "grad_norm": 3.765625, "learning_rate": 3.5782222222222223e-06, "loss": 0.4144, "step": 36950 }, { "epoch": 0.8211873444720938, "grad_norm": 2.734375, "learning_rate": 3.5737777777777784e-06, "loss": 0.3619, "step": 36960 }, { "epoch": 0.8214095271951654, "grad_norm": 2.90625, "learning_rate": 3.5693333333333337e-06, "loss": 0.3865, "step": 36970 }, { "epoch": 0.8216317099182368, "grad_norm": 2.375, "learning_rate": 3.5648888888888894e-06, "loss": 0.3761, "step": 36980 }, { "epoch": 0.8218538926413083, "grad_norm": 2.5, "learning_rate": 3.5604444444444447e-06, "loss": 0.3877, "step": 36990 }, { "epoch": 0.8220760753643797, "grad_norm": 2.328125, "learning_rate": 3.5560000000000008e-06, "loss": 0.3583, "step": 37000 }, { "epoch": 0.8222982580874512, "grad_norm": 2.265625, "learning_rate": 3.551555555555556e-06, "loss": 0.3937, "step": 37010 }, { "epoch": 0.8225204408105226, "grad_norm": 2.609375, "learning_rate": 3.5471111111111113e-06, "loss": 0.4002, "step": 37020 }, { "epoch": 0.8227426235335941, "grad_norm": 3.015625, "learning_rate": 3.542666666666667e-06, "loss": 0.4044, "step": 37030 }, { "epoch": 0.8229648062566655, "grad_norm": 2.859375, "learning_rate": 3.5382222222222223e-06, "loss": 0.3772, "step": 37040 }, { "epoch": 0.823186988979737, "grad_norm": 2.390625, "learning_rate": 3.5337777777777784e-06, "loss": 0.3827, "step": 37050 }, { "epoch": 0.8234091717028084, "grad_norm": 2.421875, "learning_rate": 3.5293333333333336e-06, "loss": 0.3963, "step": 37060 }, { "epoch": 0.8236313544258799, "grad_norm": 2.890625, "learning_rate": 3.5248888888888893e-06, "loss": 0.4422, "step": 37070 }, { "epoch": 0.8238535371489513, "grad_norm": 2.578125, "learning_rate": 3.5204444444444446e-06, "loss": 0.4013, "step": 37080 }, { "epoch": 0.8240757198720228, "grad_norm": 2.890625, "learning_rate": 3.5160000000000007e-06, "loss": 0.3957, "step": 37090 }, { "epoch": 0.8242979025950942, "grad_norm": 2.71875, "learning_rate": 3.511555555555556e-06, "loss": 0.3686, "step": 37100 }, { "epoch": 0.8245200853181657, "grad_norm": 2.515625, "learning_rate": 3.5071111111111113e-06, "loss": 0.3786, "step": 37110 }, { "epoch": 0.8247422680412371, "grad_norm": 2.203125, "learning_rate": 3.502666666666667e-06, "loss": 0.3427, "step": 37120 }, { "epoch": 0.8249644507643086, "grad_norm": 2.421875, "learning_rate": 3.4982222222222222e-06, "loss": 0.3927, "step": 37130 }, { "epoch": 0.82518663348738, "grad_norm": 2.8125, "learning_rate": 3.4937777777777783e-06, "loss": 0.395, "step": 37140 }, { "epoch": 0.8254088162104515, "grad_norm": 2.703125, "learning_rate": 3.4893333333333336e-06, "loss": 0.4044, "step": 37150 }, { "epoch": 0.8256309989335229, "grad_norm": 2.703125, "learning_rate": 3.4848888888888893e-06, "loss": 0.3699, "step": 37160 }, { "epoch": 0.8258531816565944, "grad_norm": 2.375, "learning_rate": 3.4804444444444446e-06, "loss": 0.3468, "step": 37170 }, { "epoch": 0.8260753643796659, "grad_norm": 2.96875, "learning_rate": 3.4760000000000007e-06, "loss": 0.3921, "step": 37180 }, { "epoch": 0.8262975471027373, "grad_norm": 2.234375, "learning_rate": 3.471555555555556e-06, "loss": 0.3611, "step": 37190 }, { "epoch": 0.8265197298258088, "grad_norm": 2.65625, "learning_rate": 3.4671111111111116e-06, "loss": 0.361, "step": 37200 }, { "epoch": 0.8267419125488802, "grad_norm": 2.578125, "learning_rate": 3.462666666666667e-06, "loss": 0.3554, "step": 37210 }, { "epoch": 0.8269640952719517, "grad_norm": 2.46875, "learning_rate": 3.458222222222222e-06, "loss": 0.3574, "step": 37220 }, { "epoch": 0.8271862779950231, "grad_norm": 2.46875, "learning_rate": 3.4537777777777783e-06, "loss": 0.3913, "step": 37230 }, { "epoch": 0.8274084607180946, "grad_norm": 2.28125, "learning_rate": 3.4493333333333336e-06, "loss": 0.3534, "step": 37240 }, { "epoch": 0.827630643441166, "grad_norm": 2.53125, "learning_rate": 3.4448888888888893e-06, "loss": 0.3659, "step": 37250 }, { "epoch": 0.8278528261642375, "grad_norm": 2.875, "learning_rate": 3.4404444444444445e-06, "loss": 0.3847, "step": 37260 }, { "epoch": 0.8280750088873089, "grad_norm": 2.40625, "learning_rate": 3.4360000000000006e-06, "loss": 0.3676, "step": 37270 }, { "epoch": 0.8282971916103804, "grad_norm": 2.234375, "learning_rate": 3.431555555555556e-06, "loss": 0.3727, "step": 37280 }, { "epoch": 0.8285193743334518, "grad_norm": 2.4375, "learning_rate": 3.4271111111111116e-06, "loss": 0.36, "step": 37290 }, { "epoch": 0.8287415570565233, "grad_norm": 2.171875, "learning_rate": 3.422666666666667e-06, "loss": 0.3995, "step": 37300 }, { "epoch": 0.8289637397795947, "grad_norm": 2.734375, "learning_rate": 3.4182222222222226e-06, "loss": 0.3648, "step": 37310 }, { "epoch": 0.8291859225026662, "grad_norm": 3.359375, "learning_rate": 3.4137777777777783e-06, "loss": 0.4034, "step": 37320 }, { "epoch": 0.8294081052257376, "grad_norm": 2.421875, "learning_rate": 3.4093333333333335e-06, "loss": 0.3502, "step": 37330 }, { "epoch": 0.8296302879488091, "grad_norm": 2.46875, "learning_rate": 3.4048888888888892e-06, "loss": 0.3505, "step": 37340 }, { "epoch": 0.8298524706718805, "grad_norm": 2.703125, "learning_rate": 3.4004444444444445e-06, "loss": 0.3957, "step": 37350 }, { "epoch": 0.830074653394952, "grad_norm": 2.859375, "learning_rate": 3.3960000000000006e-06, "loss": 0.3776, "step": 37360 }, { "epoch": 0.8302968361180235, "grad_norm": 2.546875, "learning_rate": 3.391555555555556e-06, "loss": 0.3782, "step": 37370 }, { "epoch": 0.8305190188410949, "grad_norm": 2.421875, "learning_rate": 3.3871111111111116e-06, "loss": 0.3891, "step": 37380 }, { "epoch": 0.8307412015641664, "grad_norm": 2.234375, "learning_rate": 3.382666666666667e-06, "loss": 0.3471, "step": 37390 }, { "epoch": 0.8309633842872378, "grad_norm": 2.765625, "learning_rate": 3.3782222222222225e-06, "loss": 0.4007, "step": 37400 }, { "epoch": 0.8311855670103093, "grad_norm": 2.359375, "learning_rate": 3.3737777777777782e-06, "loss": 0.3866, "step": 37410 }, { "epoch": 0.8314077497333807, "grad_norm": 2.59375, "learning_rate": 3.3693333333333335e-06, "loss": 0.3708, "step": 37420 }, { "epoch": 0.8316299324564522, "grad_norm": 2.375, "learning_rate": 3.364888888888889e-06, "loss": 0.3908, "step": 37430 }, { "epoch": 0.8318521151795236, "grad_norm": 2.65625, "learning_rate": 3.3604444444444444e-06, "loss": 0.3849, "step": 37440 }, { "epoch": 0.8320742979025951, "grad_norm": 2.75, "learning_rate": 3.3560000000000006e-06, "loss": 0.3863, "step": 37450 }, { "epoch": 0.8322964806256665, "grad_norm": 2.53125, "learning_rate": 3.351555555555556e-06, "loss": 0.3868, "step": 37460 }, { "epoch": 0.832518663348738, "grad_norm": 2.609375, "learning_rate": 3.3471111111111115e-06, "loss": 0.3625, "step": 37470 }, { "epoch": 0.8327408460718094, "grad_norm": 2.5625, "learning_rate": 3.342666666666667e-06, "loss": 0.3774, "step": 37480 }, { "epoch": 0.8329630287948809, "grad_norm": 2.453125, "learning_rate": 3.3382222222222225e-06, "loss": 0.4237, "step": 37490 }, { "epoch": 0.8331852115179523, "grad_norm": 2.8125, "learning_rate": 3.333777777777778e-06, "loss": 0.3394, "step": 37500 }, { "epoch": 0.8334073942410238, "grad_norm": 2.765625, "learning_rate": 3.329333333333334e-06, "loss": 0.4392, "step": 37510 }, { "epoch": 0.8336295769640952, "grad_norm": 3.015625, "learning_rate": 3.324888888888889e-06, "loss": 0.3995, "step": 37520 }, { "epoch": 0.8338517596871667, "grad_norm": 2.171875, "learning_rate": 3.3204444444444444e-06, "loss": 0.3664, "step": 37530 }, { "epoch": 0.8340739424102381, "grad_norm": 2.265625, "learning_rate": 3.3160000000000005e-06, "loss": 0.3388, "step": 37540 }, { "epoch": 0.8342961251333096, "grad_norm": 2.421875, "learning_rate": 3.311555555555556e-06, "loss": 0.3804, "step": 37550 }, { "epoch": 0.834518307856381, "grad_norm": 2.71875, "learning_rate": 3.3071111111111115e-06, "loss": 0.3699, "step": 37560 }, { "epoch": 0.8347404905794525, "grad_norm": 2.4375, "learning_rate": 3.3026666666666668e-06, "loss": 0.3796, "step": 37570 }, { "epoch": 0.834962673302524, "grad_norm": 2.640625, "learning_rate": 3.298222222222223e-06, "loss": 0.3949, "step": 37580 }, { "epoch": 0.8351848560255954, "grad_norm": 2.046875, "learning_rate": 3.293777777777778e-06, "loss": 0.3359, "step": 37590 }, { "epoch": 0.835407038748667, "grad_norm": 2.5625, "learning_rate": 3.289333333333334e-06, "loss": 0.3812, "step": 37600 }, { "epoch": 0.8356292214717383, "grad_norm": 2.59375, "learning_rate": 3.284888888888889e-06, "loss": 0.3936, "step": 37610 }, { "epoch": 0.8358514041948099, "grad_norm": 3.0, "learning_rate": 3.280444444444445e-06, "loss": 0.396, "step": 37620 }, { "epoch": 0.8360735869178813, "grad_norm": 2.546875, "learning_rate": 3.2760000000000005e-06, "loss": 0.4006, "step": 37630 }, { "epoch": 0.8362957696409528, "grad_norm": 2.640625, "learning_rate": 3.2715555555555558e-06, "loss": 0.4128, "step": 37640 }, { "epoch": 0.8365179523640242, "grad_norm": 2.859375, "learning_rate": 3.2671111111111114e-06, "loss": 0.3945, "step": 37650 }, { "epoch": 0.8367401350870957, "grad_norm": 2.328125, "learning_rate": 3.2626666666666667e-06, "loss": 0.3853, "step": 37660 }, { "epoch": 0.8369623178101671, "grad_norm": 2.6875, "learning_rate": 3.258222222222223e-06, "loss": 0.3778, "step": 37670 }, { "epoch": 0.8371845005332386, "grad_norm": 2.390625, "learning_rate": 3.253777777777778e-06, "loss": 0.3505, "step": 37680 }, { "epoch": 0.83740668325631, "grad_norm": 2.109375, "learning_rate": 3.249333333333334e-06, "loss": 0.3252, "step": 37690 }, { "epoch": 0.8376288659793815, "grad_norm": 2.734375, "learning_rate": 3.244888888888889e-06, "loss": 0.3809, "step": 37700 }, { "epoch": 0.8378510487024529, "grad_norm": 2.703125, "learning_rate": 3.2404444444444448e-06, "loss": 0.3932, "step": 37710 }, { "epoch": 0.8380732314255244, "grad_norm": 2.6875, "learning_rate": 3.2360000000000004e-06, "loss": 0.3969, "step": 37720 }, { "epoch": 0.8382954141485958, "grad_norm": 2.703125, "learning_rate": 3.2315555555555557e-06, "loss": 0.3881, "step": 37730 }, { "epoch": 0.8385175968716673, "grad_norm": 2.421875, "learning_rate": 3.2271111111111114e-06, "loss": 0.402, "step": 37740 }, { "epoch": 0.8387397795947387, "grad_norm": 2.578125, "learning_rate": 3.2226666666666667e-06, "loss": 0.3891, "step": 37750 }, { "epoch": 0.8389619623178102, "grad_norm": 2.640625, "learning_rate": 3.2182222222222228e-06, "loss": 0.3798, "step": 37760 }, { "epoch": 0.8391841450408816, "grad_norm": 2.359375, "learning_rate": 3.213777777777778e-06, "loss": 0.381, "step": 37770 }, { "epoch": 0.8394063277639531, "grad_norm": 2.5625, "learning_rate": 3.2093333333333337e-06, "loss": 0.3803, "step": 37780 }, { "epoch": 0.8396285104870246, "grad_norm": 2.453125, "learning_rate": 3.204888888888889e-06, "loss": 0.3757, "step": 37790 }, { "epoch": 0.839850693210096, "grad_norm": 2.515625, "learning_rate": 3.2004444444444447e-06, "loss": 0.3806, "step": 37800 }, { "epoch": 0.8400728759331675, "grad_norm": 2.40625, "learning_rate": 3.1960000000000004e-06, "loss": 0.4007, "step": 37810 }, { "epoch": 0.8402950586562389, "grad_norm": 2.765625, "learning_rate": 3.191555555555556e-06, "loss": 0.4154, "step": 37820 }, { "epoch": 0.8405172413793104, "grad_norm": 2.53125, "learning_rate": 3.1871111111111114e-06, "loss": 0.4186, "step": 37830 }, { "epoch": 0.8407394241023818, "grad_norm": 2.640625, "learning_rate": 3.1826666666666666e-06, "loss": 0.4009, "step": 37840 }, { "epoch": 0.8409616068254533, "grad_norm": 2.71875, "learning_rate": 3.1782222222222227e-06, "loss": 0.4162, "step": 37850 }, { "epoch": 0.8411837895485247, "grad_norm": 2.4375, "learning_rate": 3.173777777777778e-06, "loss": 0.3911, "step": 37860 }, { "epoch": 0.8414059722715962, "grad_norm": 2.65625, "learning_rate": 3.1693333333333337e-06, "loss": 0.3891, "step": 37870 }, { "epoch": 0.8416281549946676, "grad_norm": 2.984375, "learning_rate": 3.164888888888889e-06, "loss": 0.3632, "step": 37880 }, { "epoch": 0.8418503377177391, "grad_norm": 2.015625, "learning_rate": 3.1604444444444447e-06, "loss": 0.3702, "step": 37890 }, { "epoch": 0.8420725204408105, "grad_norm": 2.421875, "learning_rate": 3.1560000000000004e-06, "loss": 0.3673, "step": 37900 }, { "epoch": 0.842294703163882, "grad_norm": 2.375, "learning_rate": 3.151555555555556e-06, "loss": 0.3518, "step": 37910 }, { "epoch": 0.8425168858869534, "grad_norm": 2.28125, "learning_rate": 3.1471111111111113e-06, "loss": 0.382, "step": 37920 }, { "epoch": 0.8427390686100249, "grad_norm": 2.640625, "learning_rate": 3.142666666666667e-06, "loss": 0.377, "step": 37930 }, { "epoch": 0.8429612513330963, "grad_norm": 2.453125, "learning_rate": 3.1382222222222227e-06, "loss": 0.4045, "step": 37940 }, { "epoch": 0.8431834340561678, "grad_norm": 3.25, "learning_rate": 3.133777777777778e-06, "loss": 0.3554, "step": 37950 }, { "epoch": 0.8434056167792392, "grad_norm": 2.75, "learning_rate": 3.1293333333333337e-06, "loss": 0.3976, "step": 37960 }, { "epoch": 0.8436277995023107, "grad_norm": 2.609375, "learning_rate": 3.124888888888889e-06, "loss": 0.4088, "step": 37970 }, { "epoch": 0.8438499822253821, "grad_norm": 2.359375, "learning_rate": 3.1204444444444446e-06, "loss": 0.3825, "step": 37980 }, { "epoch": 0.8440721649484536, "grad_norm": 2.15625, "learning_rate": 3.1160000000000003e-06, "loss": 0.3749, "step": 37990 }, { "epoch": 0.8442943476715251, "grad_norm": 2.828125, "learning_rate": 3.111555555555556e-06, "loss": 0.3941, "step": 38000 }, { "epoch": 0.8445165303945965, "grad_norm": 2.515625, "learning_rate": 3.1071111111111113e-06, "loss": 0.3638, "step": 38010 }, { "epoch": 0.844738713117668, "grad_norm": 2.5625, "learning_rate": 3.102666666666667e-06, "loss": 0.3978, "step": 38020 }, { "epoch": 0.8449608958407394, "grad_norm": 2.546875, "learning_rate": 3.0982222222222227e-06, "loss": 0.3633, "step": 38030 }, { "epoch": 0.8451830785638109, "grad_norm": 2.953125, "learning_rate": 3.093777777777778e-06, "loss": 0.3862, "step": 38040 }, { "epoch": 0.8454052612868823, "grad_norm": 2.984375, "learning_rate": 3.0893333333333336e-06, "loss": 0.3785, "step": 38050 }, { "epoch": 0.8456274440099538, "grad_norm": 2.578125, "learning_rate": 3.084888888888889e-06, "loss": 0.389, "step": 38060 }, { "epoch": 0.8458496267330252, "grad_norm": 2.875, "learning_rate": 3.0804444444444446e-06, "loss": 0.3706, "step": 38070 }, { "epoch": 0.8460718094560967, "grad_norm": 2.015625, "learning_rate": 3.0760000000000003e-06, "loss": 0.3671, "step": 38080 }, { "epoch": 0.8462939921791681, "grad_norm": 2.765625, "learning_rate": 3.071555555555556e-06, "loss": 0.4127, "step": 38090 }, { "epoch": 0.8465161749022396, "grad_norm": 2.609375, "learning_rate": 3.0671111111111112e-06, "loss": 0.3619, "step": 38100 }, { "epoch": 0.846738357625311, "grad_norm": 2.0625, "learning_rate": 3.062666666666667e-06, "loss": 0.3844, "step": 38110 }, { "epoch": 0.8469605403483825, "grad_norm": 2.5625, "learning_rate": 3.0582222222222226e-06, "loss": 0.4027, "step": 38120 }, { "epoch": 0.8471827230714539, "grad_norm": 2.609375, "learning_rate": 3.0537777777777783e-06, "loss": 0.379, "step": 38130 }, { "epoch": 0.8474049057945254, "grad_norm": 2.28125, "learning_rate": 3.0493333333333336e-06, "loss": 0.4034, "step": 38140 }, { "epoch": 0.8476270885175968, "grad_norm": 2.6875, "learning_rate": 3.044888888888889e-06, "loss": 0.4175, "step": 38150 }, { "epoch": 0.8478492712406683, "grad_norm": 2.640625, "learning_rate": 3.0404444444444445e-06, "loss": 0.3888, "step": 38160 }, { "epoch": 0.8480714539637397, "grad_norm": 2.390625, "learning_rate": 3.0360000000000002e-06, "loss": 0.349, "step": 38170 }, { "epoch": 0.8482936366868112, "grad_norm": 2.375, "learning_rate": 3.031555555555556e-06, "loss": 0.3918, "step": 38180 }, { "epoch": 0.8485158194098826, "grad_norm": 2.734375, "learning_rate": 3.027111111111111e-06, "loss": 0.3855, "step": 38190 }, { "epoch": 0.8487380021329541, "grad_norm": 2.515625, "learning_rate": 3.022666666666667e-06, "loss": 0.3281, "step": 38200 }, { "epoch": 0.8489601848560256, "grad_norm": 2.28125, "learning_rate": 3.0182222222222226e-06, "loss": 0.3725, "step": 38210 }, { "epoch": 0.849182367579097, "grad_norm": 2.046875, "learning_rate": 3.0137777777777783e-06, "loss": 0.3511, "step": 38220 }, { "epoch": 0.8494045503021685, "grad_norm": 2.703125, "learning_rate": 3.0093333333333335e-06, "loss": 0.3693, "step": 38230 }, { "epoch": 0.8496267330252399, "grad_norm": 2.234375, "learning_rate": 3.0048888888888892e-06, "loss": 0.3991, "step": 38240 }, { "epoch": 0.8498489157483115, "grad_norm": 2.609375, "learning_rate": 3.0004444444444445e-06, "loss": 0.3783, "step": 38250 }, { "epoch": 0.8500710984713828, "grad_norm": 2.78125, "learning_rate": 2.996e-06, "loss": 0.3797, "step": 38260 }, { "epoch": 0.8502932811944544, "grad_norm": 2.359375, "learning_rate": 2.991555555555556e-06, "loss": 0.3732, "step": 38270 }, { "epoch": 0.8505154639175257, "grad_norm": 2.125, "learning_rate": 2.987111111111111e-06, "loss": 0.3435, "step": 38280 }, { "epoch": 0.8507376466405973, "grad_norm": 1.96875, "learning_rate": 2.982666666666667e-06, "loss": 0.3585, "step": 38290 }, { "epoch": 0.8509598293636687, "grad_norm": 2.90625, "learning_rate": 2.9782222222222225e-06, "loss": 0.3673, "step": 38300 }, { "epoch": 0.8511820120867402, "grad_norm": 2.671875, "learning_rate": 2.9737777777777782e-06, "loss": 0.3812, "step": 38310 }, { "epoch": 0.8514041948098116, "grad_norm": 2.453125, "learning_rate": 2.9693333333333335e-06, "loss": 0.3847, "step": 38320 }, { "epoch": 0.8516263775328831, "grad_norm": 2.984375, "learning_rate": 2.964888888888889e-06, "loss": 0.379, "step": 38330 }, { "epoch": 0.8518485602559545, "grad_norm": 2.953125, "learning_rate": 2.9604444444444445e-06, "loss": 0.3578, "step": 38340 }, { "epoch": 0.852070742979026, "grad_norm": 2.9375, "learning_rate": 2.956e-06, "loss": 0.3734, "step": 38350 }, { "epoch": 0.8522929257020974, "grad_norm": 2.8125, "learning_rate": 2.951555555555556e-06, "loss": 0.3857, "step": 38360 }, { "epoch": 0.8525151084251689, "grad_norm": 2.65625, "learning_rate": 2.947111111111111e-06, "loss": 0.3981, "step": 38370 }, { "epoch": 0.8527372911482403, "grad_norm": 2.359375, "learning_rate": 2.942666666666667e-06, "loss": 0.3664, "step": 38380 }, { "epoch": 0.8529594738713118, "grad_norm": 2.546875, "learning_rate": 2.9382222222222225e-06, "loss": 0.3549, "step": 38390 }, { "epoch": 0.8531816565943833, "grad_norm": 2.90625, "learning_rate": 2.933777777777778e-06, "loss": 0.3706, "step": 38400 }, { "epoch": 0.8534038393174547, "grad_norm": 2.578125, "learning_rate": 2.9293333333333335e-06, "loss": 0.3724, "step": 38410 }, { "epoch": 0.8536260220405262, "grad_norm": 2.53125, "learning_rate": 2.924888888888889e-06, "loss": 0.411, "step": 38420 }, { "epoch": 0.8538482047635976, "grad_norm": 2.65625, "learning_rate": 2.9204444444444444e-06, "loss": 0.3301, "step": 38430 }, { "epoch": 0.8540703874866691, "grad_norm": 2.75, "learning_rate": 2.9160000000000005e-06, "loss": 0.3766, "step": 38440 }, { "epoch": 0.8542925702097405, "grad_norm": 3.015625, "learning_rate": 2.911555555555556e-06, "loss": 0.3619, "step": 38450 }, { "epoch": 0.854514752932812, "grad_norm": 2.578125, "learning_rate": 2.907111111111111e-06, "loss": 0.3743, "step": 38460 }, { "epoch": 0.8547369356558834, "grad_norm": 2.25, "learning_rate": 2.9026666666666668e-06, "loss": 0.3742, "step": 38470 }, { "epoch": 0.8549591183789549, "grad_norm": 2.25, "learning_rate": 2.8982222222222225e-06, "loss": 0.3755, "step": 38480 }, { "epoch": 0.8551813011020263, "grad_norm": 2.140625, "learning_rate": 2.893777777777778e-06, "loss": 0.3777, "step": 38490 }, { "epoch": 0.8554034838250978, "grad_norm": 2.203125, "learning_rate": 2.8893333333333334e-06, "loss": 0.3395, "step": 38500 }, { "epoch": 0.8556256665481692, "grad_norm": 2.359375, "learning_rate": 2.884888888888889e-06, "loss": 0.3713, "step": 38510 }, { "epoch": 0.8558478492712407, "grad_norm": 2.84375, "learning_rate": 2.880444444444445e-06, "loss": 0.3555, "step": 38520 }, { "epoch": 0.8560700319943121, "grad_norm": 2.203125, "learning_rate": 2.8760000000000005e-06, "loss": 0.3666, "step": 38530 }, { "epoch": 0.8562922147173836, "grad_norm": 2.3125, "learning_rate": 2.8715555555555558e-06, "loss": 0.3908, "step": 38540 }, { "epoch": 0.856514397440455, "grad_norm": 2.40625, "learning_rate": 2.8671111111111115e-06, "loss": 0.3939, "step": 38550 }, { "epoch": 0.8567365801635265, "grad_norm": 2.484375, "learning_rate": 2.8626666666666667e-06, "loss": 0.402, "step": 38560 }, { "epoch": 0.8569587628865979, "grad_norm": 2.59375, "learning_rate": 2.8582222222222224e-06, "loss": 0.3823, "step": 38570 }, { "epoch": 0.8571809456096694, "grad_norm": 2.4375, "learning_rate": 2.853777777777778e-06, "loss": 0.3862, "step": 38580 }, { "epoch": 0.8574031283327408, "grad_norm": 2.9375, "learning_rate": 2.8493333333333334e-06, "loss": 0.3953, "step": 38590 }, { "epoch": 0.8576253110558123, "grad_norm": 2.640625, "learning_rate": 2.844888888888889e-06, "loss": 0.392, "step": 38600 }, { "epoch": 0.8578474937788838, "grad_norm": 2.453125, "learning_rate": 2.8404444444444448e-06, "loss": 0.3912, "step": 38610 }, { "epoch": 0.8580696765019552, "grad_norm": 2.671875, "learning_rate": 2.8360000000000005e-06, "loss": 0.3748, "step": 38620 }, { "epoch": 0.8582918592250267, "grad_norm": 2.390625, "learning_rate": 2.8315555555555557e-06, "loss": 0.3653, "step": 38630 }, { "epoch": 0.8585140419480981, "grad_norm": 2.375, "learning_rate": 2.8271111111111114e-06, "loss": 0.3418, "step": 38640 }, { "epoch": 0.8587362246711696, "grad_norm": 3.03125, "learning_rate": 2.8226666666666667e-06, "loss": 0.3835, "step": 38650 }, { "epoch": 0.858958407394241, "grad_norm": 2.484375, "learning_rate": 2.8182222222222224e-06, "loss": 0.3827, "step": 38660 }, { "epoch": 0.8591805901173125, "grad_norm": 2.375, "learning_rate": 2.813777777777778e-06, "loss": 0.3809, "step": 38670 }, { "epoch": 0.8594027728403839, "grad_norm": 3.296875, "learning_rate": 2.8093333333333333e-06, "loss": 0.3883, "step": 38680 }, { "epoch": 0.8596249555634554, "grad_norm": 2.59375, "learning_rate": 2.804888888888889e-06, "loss": 0.3659, "step": 38690 }, { "epoch": 0.8598471382865268, "grad_norm": 2.953125, "learning_rate": 2.8004444444444447e-06, "loss": 0.3739, "step": 38700 }, { "epoch": 0.8600693210095983, "grad_norm": 2.484375, "learning_rate": 2.7960000000000004e-06, "loss": 0.354, "step": 38710 }, { "epoch": 0.8602915037326697, "grad_norm": 2.46875, "learning_rate": 2.7915555555555557e-06, "loss": 0.3551, "step": 38720 }, { "epoch": 0.8605136864557412, "grad_norm": 2.5, "learning_rate": 2.7871111111111114e-06, "loss": 0.3688, "step": 38730 }, { "epoch": 0.8607358691788126, "grad_norm": 3.28125, "learning_rate": 2.7826666666666666e-06, "loss": 0.3777, "step": 38740 }, { "epoch": 0.8609580519018841, "grad_norm": 2.671875, "learning_rate": 2.7782222222222228e-06, "loss": 0.4131, "step": 38750 }, { "epoch": 0.8611802346249555, "grad_norm": 3.0, "learning_rate": 2.773777777777778e-06, "loss": 0.3677, "step": 38760 }, { "epoch": 0.861402417348027, "grad_norm": 2.515625, "learning_rate": 2.7693333333333333e-06, "loss": 0.3922, "step": 38770 }, { "epoch": 0.8616246000710984, "grad_norm": 2.875, "learning_rate": 2.764888888888889e-06, "loss": 0.378, "step": 38780 }, { "epoch": 0.8618467827941699, "grad_norm": 3.1875, "learning_rate": 2.7604444444444447e-06, "loss": 0.3579, "step": 38790 }, { "epoch": 0.8620689655172413, "grad_norm": 2.359375, "learning_rate": 2.7560000000000004e-06, "loss": 0.3672, "step": 38800 }, { "epoch": 0.8622911482403128, "grad_norm": 2.5, "learning_rate": 2.7515555555555556e-06, "loss": 0.3729, "step": 38810 }, { "epoch": 0.8625133309633843, "grad_norm": 3.28125, "learning_rate": 2.7471111111111113e-06, "loss": 0.3626, "step": 38820 }, { "epoch": 0.8627355136864557, "grad_norm": 2.71875, "learning_rate": 2.7426666666666666e-06, "loss": 0.4237, "step": 38830 }, { "epoch": 0.8629576964095272, "grad_norm": 2.4375, "learning_rate": 2.7382222222222227e-06, "loss": 0.4017, "step": 38840 }, { "epoch": 0.8631798791325986, "grad_norm": 2.59375, "learning_rate": 2.733777777777778e-06, "loss": 0.3437, "step": 38850 }, { "epoch": 0.8634020618556701, "grad_norm": 2.53125, "learning_rate": 2.7293333333333333e-06, "loss": 0.3578, "step": 38860 }, { "epoch": 0.8636242445787415, "grad_norm": 2.171875, "learning_rate": 2.724888888888889e-06, "loss": 0.3849, "step": 38870 }, { "epoch": 0.863846427301813, "grad_norm": 2.859375, "learning_rate": 2.7204444444444446e-06, "loss": 0.3795, "step": 38880 }, { "epoch": 0.8640686100248844, "grad_norm": 2.484375, "learning_rate": 2.7160000000000003e-06, "loss": 0.3713, "step": 38890 }, { "epoch": 0.864290792747956, "grad_norm": 2.28125, "learning_rate": 2.7115555555555556e-06, "loss": 0.3595, "step": 38900 }, { "epoch": 0.8645129754710273, "grad_norm": 2.6875, "learning_rate": 2.7071111111111113e-06, "loss": 0.3469, "step": 38910 }, { "epoch": 0.8647351581940989, "grad_norm": 2.734375, "learning_rate": 2.7026666666666666e-06, "loss": 0.3666, "step": 38920 }, { "epoch": 0.8649573409171702, "grad_norm": 2.46875, "learning_rate": 2.6982222222222227e-06, "loss": 0.3731, "step": 38930 }, { "epoch": 0.8651795236402418, "grad_norm": 2.578125, "learning_rate": 2.693777777777778e-06, "loss": 0.385, "step": 38940 }, { "epoch": 0.8654017063633132, "grad_norm": 2.671875, "learning_rate": 2.6893333333333336e-06, "loss": 0.3696, "step": 38950 }, { "epoch": 0.8656238890863847, "grad_norm": 2.609375, "learning_rate": 2.684888888888889e-06, "loss": 0.4151, "step": 38960 }, { "epoch": 0.865846071809456, "grad_norm": 2.6875, "learning_rate": 2.6804444444444446e-06, "loss": 0.3815, "step": 38970 }, { "epoch": 0.8660682545325276, "grad_norm": 3.34375, "learning_rate": 2.6760000000000003e-06, "loss": 0.3737, "step": 38980 }, { "epoch": 0.866290437255599, "grad_norm": 2.75, "learning_rate": 2.6715555555555556e-06, "loss": 0.378, "step": 38990 }, { "epoch": 0.8665126199786705, "grad_norm": 2.40625, "learning_rate": 2.6671111111111113e-06, "loss": 0.3694, "step": 39000 }, { "epoch": 0.8667348027017419, "grad_norm": 2.859375, "learning_rate": 2.6626666666666665e-06, "loss": 0.3726, "step": 39010 }, { "epoch": 0.8669569854248134, "grad_norm": 2.59375, "learning_rate": 2.6582222222222226e-06, "loss": 0.3751, "step": 39020 }, { "epoch": 0.8671791681478849, "grad_norm": 2.515625, "learning_rate": 2.653777777777778e-06, "loss": 0.3833, "step": 39030 }, { "epoch": 0.8674013508709563, "grad_norm": 2.9375, "learning_rate": 2.6493333333333336e-06, "loss": 0.4114, "step": 39040 }, { "epoch": 0.8676235335940278, "grad_norm": 2.984375, "learning_rate": 2.644888888888889e-06, "loss": 0.3759, "step": 39050 }, { "epoch": 0.8678457163170992, "grad_norm": 2.46875, "learning_rate": 2.640444444444445e-06, "loss": 0.3894, "step": 39060 }, { "epoch": 0.8680678990401707, "grad_norm": 2.625, "learning_rate": 2.6360000000000003e-06, "loss": 0.3688, "step": 39070 }, { "epoch": 0.8682900817632421, "grad_norm": 2.359375, "learning_rate": 2.6315555555555555e-06, "loss": 0.3668, "step": 39080 }, { "epoch": 0.8685122644863136, "grad_norm": 2.734375, "learning_rate": 2.6271111111111112e-06, "loss": 0.3603, "step": 39090 }, { "epoch": 0.868734447209385, "grad_norm": 2.46875, "learning_rate": 2.6226666666666665e-06, "loss": 0.3859, "step": 39100 }, { "epoch": 0.8689566299324565, "grad_norm": 3.078125, "learning_rate": 2.6182222222222226e-06, "loss": 0.3455, "step": 39110 }, { "epoch": 0.8691788126555279, "grad_norm": 2.59375, "learning_rate": 2.613777777777778e-06, "loss": 0.3751, "step": 39120 }, { "epoch": 0.8694009953785994, "grad_norm": 2.1875, "learning_rate": 2.6093333333333336e-06, "loss": 0.3494, "step": 39130 }, { "epoch": 0.8696231781016708, "grad_norm": 2.21875, "learning_rate": 2.604888888888889e-06, "loss": 0.4063, "step": 39140 }, { "epoch": 0.8698453608247423, "grad_norm": 2.6875, "learning_rate": 2.600444444444445e-06, "loss": 0.4022, "step": 39150 }, { "epoch": 0.8700675435478137, "grad_norm": 3.296875, "learning_rate": 2.5960000000000002e-06, "loss": 0.3903, "step": 39160 }, { "epoch": 0.8702897262708852, "grad_norm": 2.21875, "learning_rate": 2.5915555555555555e-06, "loss": 0.4057, "step": 39170 }, { "epoch": 0.8705119089939566, "grad_norm": 2.3125, "learning_rate": 2.587111111111111e-06, "loss": 0.3593, "step": 39180 }, { "epoch": 0.8707340917170281, "grad_norm": 2.953125, "learning_rate": 2.5826666666666664e-06, "loss": 0.3396, "step": 39190 }, { "epoch": 0.8709562744400995, "grad_norm": 2.375, "learning_rate": 2.5782222222222226e-06, "loss": 0.3696, "step": 39200 }, { "epoch": 0.871178457163171, "grad_norm": 2.234375, "learning_rate": 2.573777777777778e-06, "loss": 0.3804, "step": 39210 }, { "epoch": 0.8714006398862425, "grad_norm": 2.265625, "learning_rate": 2.5693333333333335e-06, "loss": 0.4056, "step": 39220 }, { "epoch": 0.8716228226093139, "grad_norm": 2.375, "learning_rate": 2.564888888888889e-06, "loss": 0.3712, "step": 39230 }, { "epoch": 0.8718450053323854, "grad_norm": 2.6875, "learning_rate": 2.560444444444445e-06, "loss": 0.3899, "step": 39240 }, { "epoch": 0.8720671880554568, "grad_norm": 3.09375, "learning_rate": 2.556e-06, "loss": 0.3869, "step": 39250 }, { "epoch": 0.8722893707785283, "grad_norm": 2.34375, "learning_rate": 2.551555555555556e-06, "loss": 0.3569, "step": 39260 }, { "epoch": 0.8725115535015997, "grad_norm": 2.453125, "learning_rate": 2.547111111111111e-06, "loss": 0.3602, "step": 39270 }, { "epoch": 0.8727337362246712, "grad_norm": 2.453125, "learning_rate": 2.5426666666666664e-06, "loss": 0.3651, "step": 39280 }, { "epoch": 0.8729559189477426, "grad_norm": 2.453125, "learning_rate": 2.5382222222222225e-06, "loss": 0.4032, "step": 39290 }, { "epoch": 0.8731781016708141, "grad_norm": 2.390625, "learning_rate": 2.533777777777778e-06, "loss": 0.4008, "step": 39300 }, { "epoch": 0.8734002843938855, "grad_norm": 2.328125, "learning_rate": 2.5293333333333335e-06, "loss": 0.3781, "step": 39310 }, { "epoch": 0.873622467116957, "grad_norm": 3.234375, "learning_rate": 2.5248888888888888e-06, "loss": 0.3624, "step": 39320 }, { "epoch": 0.8738446498400284, "grad_norm": 2.765625, "learning_rate": 2.520444444444445e-06, "loss": 0.3744, "step": 39330 }, { "epoch": 0.8740668325630999, "grad_norm": 2.984375, "learning_rate": 2.516e-06, "loss": 0.3678, "step": 39340 }, { "epoch": 0.8742890152861713, "grad_norm": 2.078125, "learning_rate": 2.511555555555556e-06, "loss": 0.3681, "step": 39350 }, { "epoch": 0.8745111980092428, "grad_norm": 2.5625, "learning_rate": 2.507111111111111e-06, "loss": 0.3679, "step": 39360 }, { "epoch": 0.8747333807323142, "grad_norm": 2.296875, "learning_rate": 2.5026666666666672e-06, "loss": 0.3866, "step": 39370 }, { "epoch": 0.8749555634553857, "grad_norm": 3.3125, "learning_rate": 2.4982222222222225e-06, "loss": 0.3862, "step": 39380 }, { "epoch": 0.8751777461784571, "grad_norm": 2.203125, "learning_rate": 2.493777777777778e-06, "loss": 0.3422, "step": 39390 }, { "epoch": 0.8753999289015286, "grad_norm": 2.4375, "learning_rate": 2.4893333333333334e-06, "loss": 0.3805, "step": 39400 }, { "epoch": 0.8756221116246, "grad_norm": 2.546875, "learning_rate": 2.484888888888889e-06, "loss": 0.4086, "step": 39410 }, { "epoch": 0.8758442943476715, "grad_norm": 2.328125, "learning_rate": 2.480444444444445e-06, "loss": 0.3776, "step": 39420 }, { "epoch": 0.876066477070743, "grad_norm": 2.453125, "learning_rate": 2.476e-06, "loss": 0.3834, "step": 39430 }, { "epoch": 0.8762886597938144, "grad_norm": 2.859375, "learning_rate": 2.4715555555555558e-06, "loss": 0.3931, "step": 39440 }, { "epoch": 0.8765108425168859, "grad_norm": 2.96875, "learning_rate": 2.467111111111111e-06, "loss": 0.3288, "step": 39450 }, { "epoch": 0.8767330252399573, "grad_norm": 2.6875, "learning_rate": 2.4626666666666667e-06, "loss": 0.4021, "step": 39460 }, { "epoch": 0.8769552079630288, "grad_norm": 2.3125, "learning_rate": 2.4582222222222224e-06, "loss": 0.3973, "step": 39470 }, { "epoch": 0.8771773906861002, "grad_norm": 2.578125, "learning_rate": 2.453777777777778e-06, "loss": 0.3773, "step": 39480 }, { "epoch": 0.8773995734091717, "grad_norm": 2.265625, "learning_rate": 2.4493333333333334e-06, "loss": 0.3593, "step": 39490 }, { "epoch": 0.8776217561322431, "grad_norm": 2.46875, "learning_rate": 2.444888888888889e-06, "loss": 0.4039, "step": 39500 }, { "epoch": 0.8778439388553146, "grad_norm": 2.765625, "learning_rate": 2.4404444444444448e-06, "loss": 0.3548, "step": 39510 }, { "epoch": 0.878066121578386, "grad_norm": 2.625, "learning_rate": 2.4360000000000005e-06, "loss": 0.3488, "step": 39520 }, { "epoch": 0.8782883043014575, "grad_norm": 2.484375, "learning_rate": 2.4315555555555557e-06, "loss": 0.3653, "step": 39530 }, { "epoch": 0.8785104870245289, "grad_norm": 2.3125, "learning_rate": 2.427111111111111e-06, "loss": 0.3435, "step": 39540 }, { "epoch": 0.8787326697476004, "grad_norm": 2.578125, "learning_rate": 2.4226666666666667e-06, "loss": 0.4013, "step": 39550 }, { "epoch": 0.8789548524706718, "grad_norm": 2.78125, "learning_rate": 2.4182222222222224e-06, "loss": 0.379, "step": 39560 }, { "epoch": 0.8791770351937434, "grad_norm": 2.140625, "learning_rate": 2.413777777777778e-06, "loss": 0.334, "step": 39570 }, { "epoch": 0.8793992179168147, "grad_norm": 2.234375, "learning_rate": 2.4093333333333334e-06, "loss": 0.3664, "step": 39580 }, { "epoch": 0.8796214006398863, "grad_norm": 2.65625, "learning_rate": 2.404888888888889e-06, "loss": 0.332, "step": 39590 }, { "epoch": 0.8798435833629576, "grad_norm": 3.359375, "learning_rate": 2.4004444444444447e-06, "loss": 0.3774, "step": 39600 }, { "epoch": 0.8800657660860292, "grad_norm": 2.671875, "learning_rate": 2.3960000000000004e-06, "loss": 0.4064, "step": 39610 }, { "epoch": 0.8802879488091006, "grad_norm": 2.984375, "learning_rate": 2.3915555555555557e-06, "loss": 0.3968, "step": 39620 }, { "epoch": 0.8805101315321721, "grad_norm": 2.5625, "learning_rate": 2.3871111111111114e-06, "loss": 0.366, "step": 39630 }, { "epoch": 0.8807323142552436, "grad_norm": 2.46875, "learning_rate": 2.3826666666666667e-06, "loss": 0.3739, "step": 39640 }, { "epoch": 0.880954496978315, "grad_norm": 2.875, "learning_rate": 2.3782222222222224e-06, "loss": 0.3934, "step": 39650 }, { "epoch": 0.8811766797013865, "grad_norm": 2.59375, "learning_rate": 2.373777777777778e-06, "loss": 0.3518, "step": 39660 }, { "epoch": 0.8813988624244579, "grad_norm": 2.15625, "learning_rate": 2.3693333333333333e-06, "loss": 0.3619, "step": 39670 }, { "epoch": 0.8816210451475294, "grad_norm": 2.734375, "learning_rate": 2.364888888888889e-06, "loss": 0.3641, "step": 39680 }, { "epoch": 0.8818432278706008, "grad_norm": 2.796875, "learning_rate": 2.3604444444444447e-06, "loss": 0.3966, "step": 39690 }, { "epoch": 0.8820654105936723, "grad_norm": 2.625, "learning_rate": 2.3560000000000004e-06, "loss": 0.3988, "step": 39700 }, { "epoch": 0.8822875933167437, "grad_norm": 3.328125, "learning_rate": 2.3515555555555557e-06, "loss": 0.3729, "step": 39710 }, { "epoch": 0.8825097760398152, "grad_norm": 2.53125, "learning_rate": 2.3471111111111114e-06, "loss": 0.3538, "step": 39720 }, { "epoch": 0.8827319587628866, "grad_norm": 2.4375, "learning_rate": 2.342666666666667e-06, "loss": 0.362, "step": 39730 }, { "epoch": 0.8829541414859581, "grad_norm": 2.53125, "learning_rate": 2.3382222222222223e-06, "loss": 0.4084, "step": 39740 }, { "epoch": 0.8831763242090295, "grad_norm": 2.296875, "learning_rate": 2.333777777777778e-06, "loss": 0.3524, "step": 39750 }, { "epoch": 0.883398506932101, "grad_norm": 2.34375, "learning_rate": 2.3293333333333333e-06, "loss": 0.3715, "step": 39760 }, { "epoch": 0.8836206896551724, "grad_norm": 2.78125, "learning_rate": 2.324888888888889e-06, "loss": 0.3663, "step": 39770 }, { "epoch": 0.8838428723782439, "grad_norm": 2.34375, "learning_rate": 2.3204444444444447e-06, "loss": 0.3583, "step": 39780 }, { "epoch": 0.8840650551013153, "grad_norm": 2.515625, "learning_rate": 2.3160000000000004e-06, "loss": 0.3297, "step": 39790 }, { "epoch": 0.8842872378243868, "grad_norm": 2.40625, "learning_rate": 2.3115555555555556e-06, "loss": 0.3392, "step": 39800 }, { "epoch": 0.8845094205474582, "grad_norm": 2.734375, "learning_rate": 2.3071111111111113e-06, "loss": 0.3786, "step": 39810 }, { "epoch": 0.8847316032705297, "grad_norm": 2.34375, "learning_rate": 2.302666666666667e-06, "loss": 0.4139, "step": 39820 }, { "epoch": 0.8849537859936011, "grad_norm": 2.484375, "learning_rate": 2.2982222222222227e-06, "loss": 0.4058, "step": 39830 }, { "epoch": 0.8851759687166726, "grad_norm": 2.671875, "learning_rate": 2.293777777777778e-06, "loss": 0.3488, "step": 39840 }, { "epoch": 0.8853981514397441, "grad_norm": 2.609375, "learning_rate": 2.2893333333333332e-06, "loss": 0.3846, "step": 39850 }, { "epoch": 0.8856203341628155, "grad_norm": 2.296875, "learning_rate": 2.284888888888889e-06, "loss": 0.3565, "step": 39860 }, { "epoch": 0.885842516885887, "grad_norm": 2.234375, "learning_rate": 2.2804444444444446e-06, "loss": 0.3832, "step": 39870 }, { "epoch": 0.8860646996089584, "grad_norm": 2.0, "learning_rate": 2.2760000000000003e-06, "loss": 0.389, "step": 39880 }, { "epoch": 0.8862868823320299, "grad_norm": 2.578125, "learning_rate": 2.2715555555555556e-06, "loss": 0.3651, "step": 39890 }, { "epoch": 0.8865090650551013, "grad_norm": 2.21875, "learning_rate": 2.2671111111111113e-06, "loss": 0.3626, "step": 39900 }, { "epoch": 0.8867312477781728, "grad_norm": 2.859375, "learning_rate": 2.262666666666667e-06, "loss": 0.4105, "step": 39910 }, { "epoch": 0.8869534305012442, "grad_norm": 2.484375, "learning_rate": 2.2582222222222227e-06, "loss": 0.4118, "step": 39920 }, { "epoch": 0.8871756132243157, "grad_norm": 2.328125, "learning_rate": 2.253777777777778e-06, "loss": 0.3644, "step": 39930 }, { "epoch": 0.8873977959473871, "grad_norm": 3.015625, "learning_rate": 2.2493333333333336e-06, "loss": 0.4027, "step": 39940 }, { "epoch": 0.8876199786704586, "grad_norm": 2.5, "learning_rate": 2.244888888888889e-06, "loss": 0.3823, "step": 39950 }, { "epoch": 0.88784216139353, "grad_norm": 2.71875, "learning_rate": 2.2404444444444446e-06, "loss": 0.3794, "step": 39960 }, { "epoch": 0.8880643441166015, "grad_norm": 2.25, "learning_rate": 2.2360000000000003e-06, "loss": 0.4008, "step": 39970 }, { "epoch": 0.8882865268396729, "grad_norm": 2.40625, "learning_rate": 2.2315555555555555e-06, "loss": 0.3852, "step": 39980 }, { "epoch": 0.8885087095627444, "grad_norm": 2.578125, "learning_rate": 2.2271111111111112e-06, "loss": 0.4021, "step": 39990 }, { "epoch": 0.8887308922858158, "grad_norm": 2.59375, "learning_rate": 2.222666666666667e-06, "loss": 0.3567, "step": 40000 }, { "epoch": 0.8889530750088873, "grad_norm": 2.375, "learning_rate": 2.2182222222222226e-06, "loss": 0.4155, "step": 40010 }, { "epoch": 0.8891752577319587, "grad_norm": 2.59375, "learning_rate": 2.213777777777778e-06, "loss": 0.4003, "step": 40020 }, { "epoch": 0.8893974404550302, "grad_norm": 2.921875, "learning_rate": 2.2093333333333336e-06, "loss": 0.3741, "step": 40030 }, { "epoch": 0.8896196231781017, "grad_norm": 2.28125, "learning_rate": 2.2048888888888893e-06, "loss": 0.3505, "step": 40040 }, { "epoch": 0.8898418059011731, "grad_norm": 2.765625, "learning_rate": 2.2004444444444445e-06, "loss": 0.384, "step": 40050 }, { "epoch": 0.8900639886242446, "grad_norm": 2.21875, "learning_rate": 2.1960000000000002e-06, "loss": 0.3613, "step": 40060 }, { "epoch": 0.890286171347316, "grad_norm": 2.375, "learning_rate": 2.1915555555555555e-06, "loss": 0.369, "step": 40070 }, { "epoch": 0.8905083540703875, "grad_norm": 2.421875, "learning_rate": 2.187111111111111e-06, "loss": 0.3752, "step": 40080 }, { "epoch": 0.8907305367934589, "grad_norm": 2.8125, "learning_rate": 2.182666666666667e-06, "loss": 0.3766, "step": 40090 }, { "epoch": 0.8909527195165304, "grad_norm": 2.640625, "learning_rate": 2.1782222222222226e-06, "loss": 0.3497, "step": 40100 }, { "epoch": 0.8911749022396018, "grad_norm": 2.546875, "learning_rate": 2.173777777777778e-06, "loss": 0.3781, "step": 40110 }, { "epoch": 0.8913970849626733, "grad_norm": 2.75, "learning_rate": 2.1693333333333335e-06, "loss": 0.3893, "step": 40120 }, { "epoch": 0.8916192676857447, "grad_norm": 2.46875, "learning_rate": 2.1648888888888892e-06, "loss": 0.3513, "step": 40130 }, { "epoch": 0.8918414504088162, "grad_norm": 2.796875, "learning_rate": 2.160444444444445e-06, "loss": 0.3694, "step": 40140 }, { "epoch": 0.8920636331318876, "grad_norm": 2.6875, "learning_rate": 2.156e-06, "loss": 0.4148, "step": 40150 }, { "epoch": 0.8922858158549591, "grad_norm": 2.515625, "learning_rate": 2.1515555555555555e-06, "loss": 0.3905, "step": 40160 }, { "epoch": 0.8925079985780305, "grad_norm": 2.515625, "learning_rate": 2.147111111111111e-06, "loss": 0.4334, "step": 40170 }, { "epoch": 0.892730181301102, "grad_norm": 2.28125, "learning_rate": 2.142666666666667e-06, "loss": 0.4006, "step": 40180 }, { "epoch": 0.8929523640241734, "grad_norm": 2.484375, "learning_rate": 2.1382222222222225e-06, "loss": 0.3621, "step": 40190 }, { "epoch": 0.893174546747245, "grad_norm": 2.65625, "learning_rate": 2.133777777777778e-06, "loss": 0.4158, "step": 40200 }, { "epoch": 0.8933967294703163, "grad_norm": 2.53125, "learning_rate": 2.1293333333333335e-06, "loss": 0.3865, "step": 40210 }, { "epoch": 0.8936189121933878, "grad_norm": 2.21875, "learning_rate": 2.124888888888889e-06, "loss": 0.3357, "step": 40220 }, { "epoch": 0.8938410949164592, "grad_norm": 2.53125, "learning_rate": 2.120444444444445e-06, "loss": 0.3716, "step": 40230 }, { "epoch": 0.8940632776395308, "grad_norm": 2.484375, "learning_rate": 2.116e-06, "loss": 0.3947, "step": 40240 }, { "epoch": 0.8942854603626023, "grad_norm": 2.578125, "learning_rate": 2.111555555555556e-06, "loss": 0.3916, "step": 40250 }, { "epoch": 0.8945076430856737, "grad_norm": 2.6875, "learning_rate": 2.107111111111111e-06, "loss": 0.3989, "step": 40260 }, { "epoch": 0.8947298258087452, "grad_norm": 2.703125, "learning_rate": 2.102666666666667e-06, "loss": 0.38, "step": 40270 }, { "epoch": 0.8949520085318166, "grad_norm": 2.421875, "learning_rate": 2.0982222222222225e-06, "loss": 0.3718, "step": 40280 }, { "epoch": 0.8951741912548881, "grad_norm": 2.390625, "learning_rate": 2.0937777777777778e-06, "loss": 0.4015, "step": 40290 }, { "epoch": 0.8953963739779595, "grad_norm": 2.859375, "learning_rate": 2.0893333333333335e-06, "loss": 0.3837, "step": 40300 }, { "epoch": 0.895618556701031, "grad_norm": 2.640625, "learning_rate": 2.084888888888889e-06, "loss": 0.3657, "step": 40310 }, { "epoch": 0.8958407394241024, "grad_norm": 2.59375, "learning_rate": 2.080444444444445e-06, "loss": 0.3588, "step": 40320 }, { "epoch": 0.8960629221471739, "grad_norm": 2.515625, "learning_rate": 2.076e-06, "loss": 0.3911, "step": 40330 }, { "epoch": 0.8962851048702453, "grad_norm": 2.59375, "learning_rate": 2.071555555555556e-06, "loss": 0.4072, "step": 40340 }, { "epoch": 0.8965072875933168, "grad_norm": 2.546875, "learning_rate": 2.0671111111111115e-06, "loss": 0.3658, "step": 40350 }, { "epoch": 0.8967294703163882, "grad_norm": 2.796875, "learning_rate": 2.0626666666666668e-06, "loss": 0.4061, "step": 40360 }, { "epoch": 0.8969516530394597, "grad_norm": 2.203125, "learning_rate": 2.0582222222222225e-06, "loss": 0.3724, "step": 40370 }, { "epoch": 0.8971738357625311, "grad_norm": 2.375, "learning_rate": 2.0537777777777777e-06, "loss": 0.3883, "step": 40380 }, { "epoch": 0.8973960184856026, "grad_norm": 2.90625, "learning_rate": 2.0493333333333334e-06, "loss": 0.3974, "step": 40390 }, { "epoch": 0.897618201208674, "grad_norm": 2.8125, "learning_rate": 2.044888888888889e-06, "loss": 0.3535, "step": 40400 }, { "epoch": 0.8978403839317455, "grad_norm": 2.59375, "learning_rate": 2.040444444444445e-06, "loss": 0.3663, "step": 40410 }, { "epoch": 0.8980625666548169, "grad_norm": 2.71875, "learning_rate": 2.036e-06, "loss": 0.3851, "step": 40420 }, { "epoch": 0.8982847493778884, "grad_norm": 2.75, "learning_rate": 2.0315555555555558e-06, "loss": 0.3804, "step": 40430 }, { "epoch": 0.8985069321009598, "grad_norm": 2.40625, "learning_rate": 2.0271111111111115e-06, "loss": 0.3553, "step": 40440 }, { "epoch": 0.8987291148240313, "grad_norm": 2.40625, "learning_rate": 2.022666666666667e-06, "loss": 0.3897, "step": 40450 }, { "epoch": 0.8989512975471028, "grad_norm": 2.5625, "learning_rate": 2.0182222222222224e-06, "loss": 0.3835, "step": 40460 }, { "epoch": 0.8991734802701742, "grad_norm": 2.96875, "learning_rate": 2.0137777777777777e-06, "loss": 0.3883, "step": 40470 }, { "epoch": 0.8993956629932457, "grad_norm": 2.390625, "learning_rate": 2.0093333333333334e-06, "loss": 0.3752, "step": 40480 }, { "epoch": 0.8996178457163171, "grad_norm": 3.0625, "learning_rate": 2.004888888888889e-06, "loss": 0.3865, "step": 40490 }, { "epoch": 0.8998400284393886, "grad_norm": 2.46875, "learning_rate": 2.0004444444444448e-06, "loss": 0.373, "step": 40500 }, { "epoch": 0.90006221116246, "grad_norm": 2.65625, "learning_rate": 1.996e-06, "loss": 0.3439, "step": 40510 }, { "epoch": 0.9002843938855315, "grad_norm": 2.640625, "learning_rate": 1.9915555555555557e-06, "loss": 0.3622, "step": 40520 }, { "epoch": 0.9005065766086029, "grad_norm": 3.03125, "learning_rate": 1.9871111111111114e-06, "loss": 0.3596, "step": 40530 }, { "epoch": 0.9007287593316744, "grad_norm": 2.953125, "learning_rate": 1.982666666666667e-06, "loss": 0.3888, "step": 40540 }, { "epoch": 0.9009509420547458, "grad_norm": 2.453125, "learning_rate": 1.9782222222222224e-06, "loss": 0.3853, "step": 40550 }, { "epoch": 0.9011731247778173, "grad_norm": 2.421875, "learning_rate": 1.973777777777778e-06, "loss": 0.3725, "step": 40560 }, { "epoch": 0.9013953075008887, "grad_norm": 2.375, "learning_rate": 1.9693333333333333e-06, "loss": 0.3684, "step": 40570 }, { "epoch": 0.9016174902239602, "grad_norm": 2.84375, "learning_rate": 1.964888888888889e-06, "loss": 0.4225, "step": 40580 }, { "epoch": 0.9018396729470316, "grad_norm": 2.703125, "learning_rate": 1.9604444444444447e-06, "loss": 0.3596, "step": 40590 }, { "epoch": 0.9020618556701031, "grad_norm": 2.421875, "learning_rate": 1.956e-06, "loss": 0.3713, "step": 40600 }, { "epoch": 0.9022840383931745, "grad_norm": 2.734375, "learning_rate": 1.9515555555555557e-06, "loss": 0.3995, "step": 40610 }, { "epoch": 0.902506221116246, "grad_norm": 2.09375, "learning_rate": 1.9471111111111114e-06, "loss": 0.3526, "step": 40620 }, { "epoch": 0.9027284038393174, "grad_norm": 2.5625, "learning_rate": 1.942666666666667e-06, "loss": 0.3605, "step": 40630 }, { "epoch": 0.9029505865623889, "grad_norm": 2.40625, "learning_rate": 1.9382222222222223e-06, "loss": 0.3665, "step": 40640 }, { "epoch": 0.9031727692854603, "grad_norm": 2.453125, "learning_rate": 1.933777777777778e-06, "loss": 0.3558, "step": 40650 }, { "epoch": 0.9033949520085318, "grad_norm": 2.234375, "learning_rate": 1.9293333333333337e-06, "loss": 0.3958, "step": 40660 }, { "epoch": 0.9036171347316033, "grad_norm": 2.921875, "learning_rate": 1.924888888888889e-06, "loss": 0.38, "step": 40670 }, { "epoch": 0.9038393174546747, "grad_norm": 2.59375, "learning_rate": 1.9204444444444447e-06, "loss": 0.3761, "step": 40680 }, { "epoch": 0.9040615001777462, "grad_norm": 2.234375, "learning_rate": 1.916e-06, "loss": 0.3434, "step": 40690 }, { "epoch": 0.9042836829008176, "grad_norm": 2.640625, "learning_rate": 1.9115555555555556e-06, "loss": 0.4032, "step": 40700 }, { "epoch": 0.9045058656238891, "grad_norm": 2.265625, "learning_rate": 1.9071111111111113e-06, "loss": 0.4014, "step": 40710 }, { "epoch": 0.9047280483469605, "grad_norm": 2.765625, "learning_rate": 1.9026666666666668e-06, "loss": 0.3608, "step": 40720 }, { "epoch": 0.904950231070032, "grad_norm": 2.453125, "learning_rate": 1.8982222222222225e-06, "loss": 0.3829, "step": 40730 }, { "epoch": 0.9051724137931034, "grad_norm": 2.6875, "learning_rate": 1.893777777777778e-06, "loss": 0.3839, "step": 40740 }, { "epoch": 0.9053945965161749, "grad_norm": 2.625, "learning_rate": 1.8893333333333335e-06, "loss": 0.4213, "step": 40750 }, { "epoch": 0.9056167792392463, "grad_norm": 2.609375, "learning_rate": 1.8848888888888892e-06, "loss": 0.411, "step": 40760 }, { "epoch": 0.9058389619623178, "grad_norm": 2.890625, "learning_rate": 1.8804444444444444e-06, "loss": 0.3776, "step": 40770 }, { "epoch": 0.9060611446853892, "grad_norm": 2.78125, "learning_rate": 1.8760000000000001e-06, "loss": 0.3827, "step": 40780 }, { "epoch": 0.9062833274084607, "grad_norm": 2.515625, "learning_rate": 1.8715555555555556e-06, "loss": 0.329, "step": 40790 }, { "epoch": 0.9065055101315321, "grad_norm": 2.40625, "learning_rate": 1.8671111111111113e-06, "loss": 0.3922, "step": 40800 }, { "epoch": 0.9067276928546036, "grad_norm": 2.40625, "learning_rate": 1.8626666666666668e-06, "loss": 0.3798, "step": 40810 }, { "epoch": 0.906949875577675, "grad_norm": 2.40625, "learning_rate": 1.8582222222222225e-06, "loss": 0.3537, "step": 40820 }, { "epoch": 0.9071720583007465, "grad_norm": 2.453125, "learning_rate": 1.853777777777778e-06, "loss": 0.3708, "step": 40830 }, { "epoch": 0.9073942410238179, "grad_norm": 2.578125, "learning_rate": 1.8493333333333336e-06, "loss": 0.3599, "step": 40840 }, { "epoch": 0.9076164237468894, "grad_norm": 2.390625, "learning_rate": 1.8448888888888891e-06, "loss": 0.3792, "step": 40850 }, { "epoch": 0.9078386064699608, "grad_norm": 2.515625, "learning_rate": 1.8404444444444446e-06, "loss": 0.4045, "step": 40860 }, { "epoch": 0.9080607891930323, "grad_norm": 2.203125, "learning_rate": 1.8360000000000003e-06, "loss": 0.3799, "step": 40870 }, { "epoch": 0.9082829719161039, "grad_norm": 3.484375, "learning_rate": 1.8315555555555556e-06, "loss": 0.3598, "step": 40880 }, { "epoch": 0.9085051546391752, "grad_norm": 2.65625, "learning_rate": 1.8271111111111113e-06, "loss": 0.3637, "step": 40890 }, { "epoch": 0.9087273373622468, "grad_norm": 2.484375, "learning_rate": 1.8226666666666667e-06, "loss": 0.3867, "step": 40900 }, { "epoch": 0.9089495200853182, "grad_norm": 2.15625, "learning_rate": 1.8182222222222224e-06, "loss": 0.3726, "step": 40910 }, { "epoch": 0.9091717028083897, "grad_norm": 2.703125, "learning_rate": 1.813777777777778e-06, "loss": 0.3596, "step": 40920 }, { "epoch": 0.909393885531461, "grad_norm": 2.421875, "learning_rate": 1.8093333333333336e-06, "loss": 0.368, "step": 40930 }, { "epoch": 0.9096160682545326, "grad_norm": 2.59375, "learning_rate": 1.804888888888889e-06, "loss": 0.3872, "step": 40940 }, { "epoch": 0.909838250977604, "grad_norm": 2.984375, "learning_rate": 1.8004444444444446e-06, "loss": 0.4331, "step": 40950 }, { "epoch": 0.9100604337006755, "grad_norm": 2.296875, "learning_rate": 1.7960000000000003e-06, "loss": 0.396, "step": 40960 }, { "epoch": 0.9102826164237469, "grad_norm": 2.640625, "learning_rate": 1.7915555555555557e-06, "loss": 0.4018, "step": 40970 }, { "epoch": 0.9105047991468184, "grad_norm": 3.0, "learning_rate": 1.7871111111111112e-06, "loss": 0.3579, "step": 40980 }, { "epoch": 0.9107269818698898, "grad_norm": 2.640625, "learning_rate": 1.7826666666666667e-06, "loss": 0.3482, "step": 40990 }, { "epoch": 0.9109491645929613, "grad_norm": 2.625, "learning_rate": 1.7782222222222224e-06, "loss": 0.3721, "step": 41000 }, { "epoch": 0.9111713473160327, "grad_norm": 2.390625, "learning_rate": 1.7737777777777779e-06, "loss": 0.3953, "step": 41010 }, { "epoch": 0.9113935300391042, "grad_norm": 3.078125, "learning_rate": 1.7693333333333336e-06, "loss": 0.3973, "step": 41020 }, { "epoch": 0.9116157127621756, "grad_norm": 2.203125, "learning_rate": 1.764888888888889e-06, "loss": 0.4075, "step": 41030 }, { "epoch": 0.9118378954852471, "grad_norm": 2.4375, "learning_rate": 1.7604444444444445e-06, "loss": 0.3856, "step": 41040 }, { "epoch": 0.9120600782083185, "grad_norm": 2.75, "learning_rate": 1.7560000000000002e-06, "loss": 0.4153, "step": 41050 }, { "epoch": 0.91228226093139, "grad_norm": 2.3125, "learning_rate": 1.7515555555555557e-06, "loss": 0.3414, "step": 41060 }, { "epoch": 0.9125044436544615, "grad_norm": 2.453125, "learning_rate": 1.7471111111111114e-06, "loss": 0.3496, "step": 41070 }, { "epoch": 0.9127266263775329, "grad_norm": 2.328125, "learning_rate": 1.7426666666666667e-06, "loss": 0.3977, "step": 41080 }, { "epoch": 0.9129488091006044, "grad_norm": 2.28125, "learning_rate": 1.7382222222222223e-06, "loss": 0.3732, "step": 41090 }, { "epoch": 0.9131709918236758, "grad_norm": 2.5, "learning_rate": 1.7337777777777778e-06, "loss": 0.4004, "step": 41100 }, { "epoch": 0.9133931745467473, "grad_norm": 2.65625, "learning_rate": 1.7293333333333335e-06, "loss": 0.3833, "step": 41110 }, { "epoch": 0.9136153572698187, "grad_norm": 1.9921875, "learning_rate": 1.724888888888889e-06, "loss": 0.3491, "step": 41120 }, { "epoch": 0.9138375399928902, "grad_norm": 2.40625, "learning_rate": 1.7204444444444445e-06, "loss": 0.3881, "step": 41130 }, { "epoch": 0.9140597227159616, "grad_norm": 2.8125, "learning_rate": 1.7160000000000002e-06, "loss": 0.3619, "step": 41140 }, { "epoch": 0.9142819054390331, "grad_norm": 2.5, "learning_rate": 1.7115555555555557e-06, "loss": 0.3969, "step": 41150 }, { "epoch": 0.9145040881621045, "grad_norm": 2.46875, "learning_rate": 1.7071111111111113e-06, "loss": 0.3802, "step": 41160 }, { "epoch": 0.914726270885176, "grad_norm": 2.765625, "learning_rate": 1.7026666666666668e-06, "loss": 0.4202, "step": 41170 }, { "epoch": 0.9149484536082474, "grad_norm": 2.34375, "learning_rate": 1.6982222222222225e-06, "loss": 0.354, "step": 41180 }, { "epoch": 0.9151706363313189, "grad_norm": 2.546875, "learning_rate": 1.6937777777777778e-06, "loss": 0.371, "step": 41190 }, { "epoch": 0.9153928190543903, "grad_norm": 2.703125, "learning_rate": 1.6893333333333335e-06, "loss": 0.3855, "step": 41200 }, { "epoch": 0.9156150017774618, "grad_norm": 2.625, "learning_rate": 1.684888888888889e-06, "loss": 0.4043, "step": 41210 }, { "epoch": 0.9158371845005332, "grad_norm": 2.875, "learning_rate": 1.6804444444444444e-06, "loss": 0.3803, "step": 41220 }, { "epoch": 0.9160593672236047, "grad_norm": 2.296875, "learning_rate": 1.6760000000000001e-06, "loss": 0.4139, "step": 41230 }, { "epoch": 0.9162815499466761, "grad_norm": 2.328125, "learning_rate": 1.6715555555555556e-06, "loss": 0.3653, "step": 41240 }, { "epoch": 0.9165037326697476, "grad_norm": 2.84375, "learning_rate": 1.6671111111111113e-06, "loss": 0.3962, "step": 41250 }, { "epoch": 0.916725915392819, "grad_norm": 2.421875, "learning_rate": 1.6626666666666668e-06, "loss": 0.3863, "step": 41260 }, { "epoch": 0.9169480981158905, "grad_norm": 2.75, "learning_rate": 1.6582222222222225e-06, "loss": 0.3965, "step": 41270 }, { "epoch": 0.917170280838962, "grad_norm": 2.5625, "learning_rate": 1.653777777777778e-06, "loss": 0.3669, "step": 41280 }, { "epoch": 0.9173924635620334, "grad_norm": 2.390625, "learning_rate": 1.6493333333333334e-06, "loss": 0.4242, "step": 41290 }, { "epoch": 0.9176146462851049, "grad_norm": 2.296875, "learning_rate": 1.644888888888889e-06, "loss": 0.3731, "step": 41300 }, { "epoch": 0.9178368290081763, "grad_norm": 2.5, "learning_rate": 1.6404444444444446e-06, "loss": 0.3651, "step": 41310 }, { "epoch": 0.9180590117312478, "grad_norm": 2.421875, "learning_rate": 1.636e-06, "loss": 0.3503, "step": 41320 }, { "epoch": 0.9182811944543192, "grad_norm": 2.796875, "learning_rate": 1.6315555555555556e-06, "loss": 0.3702, "step": 41330 }, { "epoch": 0.9185033771773907, "grad_norm": 2.71875, "learning_rate": 1.6271111111111113e-06, "loss": 0.3517, "step": 41340 }, { "epoch": 0.9187255599004621, "grad_norm": 2.578125, "learning_rate": 1.6226666666666667e-06, "loss": 0.3837, "step": 41350 }, { "epoch": 0.9189477426235336, "grad_norm": 2.453125, "learning_rate": 1.6182222222222224e-06, "loss": 0.3561, "step": 41360 }, { "epoch": 0.919169925346605, "grad_norm": 2.359375, "learning_rate": 1.613777777777778e-06, "loss": 0.3496, "step": 41370 }, { "epoch": 0.9193921080696765, "grad_norm": 2.78125, "learning_rate": 1.6093333333333336e-06, "loss": 0.4501, "step": 41380 }, { "epoch": 0.9196142907927479, "grad_norm": 2.640625, "learning_rate": 1.6048888888888889e-06, "loss": 0.3776, "step": 41390 }, { "epoch": 0.9198364735158194, "grad_norm": 2.859375, "learning_rate": 1.6004444444444446e-06, "loss": 0.3998, "step": 41400 }, { "epoch": 0.9200586562388908, "grad_norm": 2.859375, "learning_rate": 1.596e-06, "loss": 0.3667, "step": 41410 }, { "epoch": 0.9202808389619623, "grad_norm": 2.5, "learning_rate": 1.5915555555555555e-06, "loss": 0.4079, "step": 41420 }, { "epoch": 0.9205030216850337, "grad_norm": 2.5625, "learning_rate": 1.5871111111111112e-06, "loss": 0.3854, "step": 41430 }, { "epoch": 0.9207252044081052, "grad_norm": 2.4375, "learning_rate": 1.5826666666666667e-06, "loss": 0.383, "step": 41440 }, { "epoch": 0.9209473871311766, "grad_norm": 2.8125, "learning_rate": 1.5782222222222224e-06, "loss": 0.3772, "step": 41450 }, { "epoch": 0.9211695698542481, "grad_norm": 2.515625, "learning_rate": 1.5737777777777779e-06, "loss": 0.3157, "step": 41460 }, { "epoch": 0.9213917525773195, "grad_norm": 2.578125, "learning_rate": 1.5693333333333336e-06, "loss": 0.3859, "step": 41470 }, { "epoch": 0.921613935300391, "grad_norm": 2.953125, "learning_rate": 1.564888888888889e-06, "loss": 0.3857, "step": 41480 }, { "epoch": 0.9218361180234625, "grad_norm": 2.203125, "learning_rate": 1.5604444444444447e-06, "loss": 0.3758, "step": 41490 }, { "epoch": 0.9220583007465339, "grad_norm": 2.328125, "learning_rate": 1.556e-06, "loss": 0.361, "step": 41500 }, { "epoch": 0.9222804834696054, "grad_norm": 2.546875, "learning_rate": 1.5515555555555555e-06, "loss": 0.4005, "step": 41510 }, { "epoch": 0.9225026661926768, "grad_norm": 3.265625, "learning_rate": 1.5471111111111112e-06, "loss": 0.3538, "step": 41520 }, { "epoch": 0.9227248489157484, "grad_norm": 2.8125, "learning_rate": 1.5426666666666667e-06, "loss": 0.4144, "step": 41530 }, { "epoch": 0.9229470316388197, "grad_norm": 2.390625, "learning_rate": 1.5382222222222224e-06, "loss": 0.3523, "step": 41540 }, { "epoch": 0.9231692143618913, "grad_norm": 2.265625, "learning_rate": 1.5337777777777778e-06, "loss": 0.3618, "step": 41550 }, { "epoch": 0.9233913970849627, "grad_norm": 2.53125, "learning_rate": 1.5293333333333335e-06, "loss": 0.3997, "step": 41560 }, { "epoch": 0.9236135798080342, "grad_norm": 2.6875, "learning_rate": 1.524888888888889e-06, "loss": 0.366, "step": 41570 }, { "epoch": 0.9238357625311056, "grad_norm": 2.625, "learning_rate": 1.5204444444444447e-06, "loss": 0.4361, "step": 41580 }, { "epoch": 0.9240579452541771, "grad_norm": 2.59375, "learning_rate": 1.5160000000000002e-06, "loss": 0.392, "step": 41590 }, { "epoch": 0.9242801279772485, "grad_norm": 2.390625, "learning_rate": 1.5115555555555554e-06, "loss": 0.4082, "step": 41600 }, { "epoch": 0.92450231070032, "grad_norm": 2.34375, "learning_rate": 1.5071111111111111e-06, "loss": 0.4094, "step": 41610 }, { "epoch": 0.9247244934233914, "grad_norm": 2.359375, "learning_rate": 1.5026666666666666e-06, "loss": 0.3583, "step": 41620 }, { "epoch": 0.9249466761464629, "grad_norm": 2.875, "learning_rate": 1.4982222222222223e-06, "loss": 0.3857, "step": 41630 }, { "epoch": 0.9251688588695343, "grad_norm": 2.59375, "learning_rate": 1.4937777777777778e-06, "loss": 0.3817, "step": 41640 }, { "epoch": 0.9253910415926058, "grad_norm": 2.21875, "learning_rate": 1.4893333333333335e-06, "loss": 0.3696, "step": 41650 }, { "epoch": 0.9256132243156772, "grad_norm": 3.0, "learning_rate": 1.484888888888889e-06, "loss": 0.3652, "step": 41660 }, { "epoch": 0.9258354070387487, "grad_norm": 2.859375, "learning_rate": 1.4804444444444447e-06, "loss": 0.4169, "step": 41670 }, { "epoch": 0.9260575897618201, "grad_norm": 2.6875, "learning_rate": 1.4760000000000001e-06, "loss": 0.369, "step": 41680 }, { "epoch": 0.9262797724848916, "grad_norm": 2.46875, "learning_rate": 1.4715555555555558e-06, "loss": 0.3739, "step": 41690 }, { "epoch": 0.9265019552079631, "grad_norm": 2.8125, "learning_rate": 1.467111111111111e-06, "loss": 0.3888, "step": 41700 }, { "epoch": 0.9267241379310345, "grad_norm": 2.515625, "learning_rate": 1.4626666666666666e-06, "loss": 0.3685, "step": 41710 }, { "epoch": 0.926946320654106, "grad_norm": 2.484375, "learning_rate": 1.4582222222222223e-06, "loss": 0.4017, "step": 41720 }, { "epoch": 0.9271685033771774, "grad_norm": 2.59375, "learning_rate": 1.4537777777777778e-06, "loss": 0.358, "step": 41730 }, { "epoch": 0.9273906861002489, "grad_norm": 2.15625, "learning_rate": 1.4493333333333334e-06, "loss": 0.3853, "step": 41740 }, { "epoch": 0.9276128688233203, "grad_norm": 2.359375, "learning_rate": 1.444888888888889e-06, "loss": 0.3745, "step": 41750 }, { "epoch": 0.9278350515463918, "grad_norm": 1.90625, "learning_rate": 1.4404444444444446e-06, "loss": 0.3652, "step": 41760 }, { "epoch": 0.9280572342694632, "grad_norm": 2.5625, "learning_rate": 1.436e-06, "loss": 0.3897, "step": 41770 }, { "epoch": 0.9282794169925347, "grad_norm": 2.40625, "learning_rate": 1.4315555555555558e-06, "loss": 0.3989, "step": 41780 }, { "epoch": 0.9285015997156061, "grad_norm": 2.421875, "learning_rate": 1.4271111111111113e-06, "loss": 0.3566, "step": 41790 }, { "epoch": 0.9287237824386776, "grad_norm": 2.328125, "learning_rate": 1.422666666666667e-06, "loss": 0.4067, "step": 41800 }, { "epoch": 0.928945965161749, "grad_norm": 2.59375, "learning_rate": 1.4182222222222222e-06, "loss": 0.3799, "step": 41810 }, { "epoch": 0.9291681478848205, "grad_norm": 2.546875, "learning_rate": 1.4137777777777777e-06, "loss": 0.3472, "step": 41820 }, { "epoch": 0.9293903306078919, "grad_norm": 2.46875, "learning_rate": 1.4093333333333334e-06, "loss": 0.3585, "step": 41830 }, { "epoch": 0.9296125133309634, "grad_norm": 2.65625, "learning_rate": 1.4048888888888889e-06, "loss": 0.3858, "step": 41840 }, { "epoch": 0.9298346960540348, "grad_norm": 2.703125, "learning_rate": 1.4004444444444446e-06, "loss": 0.3849, "step": 41850 }, { "epoch": 0.9300568787771063, "grad_norm": 2.390625, "learning_rate": 1.396e-06, "loss": 0.3503, "step": 41860 }, { "epoch": 0.9302790615001777, "grad_norm": 2.34375, "learning_rate": 1.3915555555555558e-06, "loss": 0.3858, "step": 41870 }, { "epoch": 0.9305012442232492, "grad_norm": 2.4375, "learning_rate": 1.3871111111111112e-06, "loss": 0.3878, "step": 41880 }, { "epoch": 0.9307234269463207, "grad_norm": 2.6875, "learning_rate": 1.382666666666667e-06, "loss": 0.3741, "step": 41890 }, { "epoch": 0.9309456096693921, "grad_norm": 2.140625, "learning_rate": 1.3782222222222224e-06, "loss": 0.3998, "step": 41900 }, { "epoch": 0.9311677923924636, "grad_norm": 2.765625, "learning_rate": 1.3737777777777777e-06, "loss": 0.3897, "step": 41910 }, { "epoch": 0.931389975115535, "grad_norm": 2.59375, "learning_rate": 1.3693333333333334e-06, "loss": 0.3934, "step": 41920 }, { "epoch": 0.9316121578386065, "grad_norm": 1.9765625, "learning_rate": 1.3648888888888888e-06, "loss": 0.3852, "step": 41930 }, { "epoch": 0.9318343405616779, "grad_norm": 2.828125, "learning_rate": 1.3604444444444445e-06, "loss": 0.4274, "step": 41940 }, { "epoch": 0.9320565232847494, "grad_norm": 2.546875, "learning_rate": 1.356e-06, "loss": 0.3154, "step": 41950 }, { "epoch": 0.9322787060078208, "grad_norm": 2.5625, "learning_rate": 1.3515555555555557e-06, "loss": 0.4125, "step": 41960 }, { "epoch": 0.9325008887308923, "grad_norm": 2.640625, "learning_rate": 1.3471111111111112e-06, "loss": 0.3975, "step": 41970 }, { "epoch": 0.9327230714539637, "grad_norm": 2.75, "learning_rate": 1.3426666666666669e-06, "loss": 0.3655, "step": 41980 }, { "epoch": 0.9329452541770352, "grad_norm": 2.625, "learning_rate": 1.3382222222222224e-06, "loss": 0.4096, "step": 41990 }, { "epoch": 0.9331674369001066, "grad_norm": 2.5625, "learning_rate": 1.333777777777778e-06, "loss": 0.3845, "step": 42000 }, { "epoch": 0.9333896196231781, "grad_norm": 2.34375, "learning_rate": 1.3293333333333333e-06, "loss": 0.3792, "step": 42010 }, { "epoch": 0.9336118023462495, "grad_norm": 2.578125, "learning_rate": 1.3248888888888888e-06, "loss": 0.3942, "step": 42020 }, { "epoch": 0.933833985069321, "grad_norm": 2.859375, "learning_rate": 1.3204444444444445e-06, "loss": 0.39, "step": 42030 }, { "epoch": 0.9340561677923924, "grad_norm": 2.890625, "learning_rate": 1.316e-06, "loss": 0.3978, "step": 42040 }, { "epoch": 0.9342783505154639, "grad_norm": 2.5, "learning_rate": 1.3115555555555557e-06, "loss": 0.3792, "step": 42050 }, { "epoch": 0.9345005332385353, "grad_norm": 2.734375, "learning_rate": 1.3071111111111112e-06, "loss": 0.3787, "step": 42060 }, { "epoch": 0.9347227159616068, "grad_norm": 2.5625, "learning_rate": 1.3026666666666668e-06, "loss": 0.3818, "step": 42070 }, { "epoch": 0.9349448986846782, "grad_norm": 2.703125, "learning_rate": 1.2982222222222223e-06, "loss": 0.3714, "step": 42080 }, { "epoch": 0.9351670814077497, "grad_norm": 2.5, "learning_rate": 1.293777777777778e-06, "loss": 0.3789, "step": 42090 }, { "epoch": 0.9353892641308212, "grad_norm": 3.234375, "learning_rate": 1.2893333333333335e-06, "loss": 0.358, "step": 42100 }, { "epoch": 0.9356114468538926, "grad_norm": 2.390625, "learning_rate": 1.2848888888888892e-06, "loss": 0.3733, "step": 42110 }, { "epoch": 0.9358336295769641, "grad_norm": 2.3125, "learning_rate": 1.2804444444444445e-06, "loss": 0.3934, "step": 42120 }, { "epoch": 0.9360558123000355, "grad_norm": 2.59375, "learning_rate": 1.276e-06, "loss": 0.3869, "step": 42130 }, { "epoch": 0.936277995023107, "grad_norm": 2.234375, "learning_rate": 1.2715555555555556e-06, "loss": 0.4351, "step": 42140 }, { "epoch": 0.9365001777461784, "grad_norm": 2.25, "learning_rate": 1.2671111111111111e-06, "loss": 0.3776, "step": 42150 }, { "epoch": 0.93672236046925, "grad_norm": 2.34375, "learning_rate": 1.2626666666666668e-06, "loss": 0.3752, "step": 42160 }, { "epoch": 0.9369445431923213, "grad_norm": 2.796875, "learning_rate": 1.2582222222222223e-06, "loss": 0.3555, "step": 42170 }, { "epoch": 0.9371667259153929, "grad_norm": 2.328125, "learning_rate": 1.253777777777778e-06, "loss": 0.394, "step": 42180 }, { "epoch": 0.9373889086384642, "grad_norm": 2.96875, "learning_rate": 1.2493333333333335e-06, "loss": 0.4158, "step": 42190 }, { "epoch": 0.9376110913615358, "grad_norm": 2.15625, "learning_rate": 1.244888888888889e-06, "loss": 0.3608, "step": 42200 }, { "epoch": 0.9378332740846071, "grad_norm": 2.296875, "learning_rate": 1.2404444444444446e-06, "loss": 0.3904, "step": 42210 }, { "epoch": 0.9380554568076787, "grad_norm": 2.40625, "learning_rate": 1.2360000000000001e-06, "loss": 0.4005, "step": 42220 }, { "epoch": 0.93827763953075, "grad_norm": 3.078125, "learning_rate": 1.2315555555555558e-06, "loss": 0.3674, "step": 42230 }, { "epoch": 0.9384998222538216, "grad_norm": 3.015625, "learning_rate": 1.2271111111111113e-06, "loss": 0.3818, "step": 42240 }, { "epoch": 0.938722004976893, "grad_norm": 2.6875, "learning_rate": 1.2226666666666668e-06, "loss": 0.3794, "step": 42250 }, { "epoch": 0.9389441876999645, "grad_norm": 2.734375, "learning_rate": 1.2182222222222222e-06, "loss": 0.411, "step": 42260 }, { "epoch": 0.9391663704230359, "grad_norm": 2.53125, "learning_rate": 1.213777777777778e-06, "loss": 0.3834, "step": 42270 }, { "epoch": 0.9393885531461074, "grad_norm": 2.484375, "learning_rate": 1.2093333333333334e-06, "loss": 0.3438, "step": 42280 }, { "epoch": 0.9396107358691788, "grad_norm": 2.25, "learning_rate": 1.2048888888888891e-06, "loss": 0.4101, "step": 42290 }, { "epoch": 0.9398329185922503, "grad_norm": 2.53125, "learning_rate": 1.2004444444444446e-06, "loss": 0.4113, "step": 42300 }, { "epoch": 0.9400551013153218, "grad_norm": 2.0625, "learning_rate": 1.196e-06, "loss": 0.3796, "step": 42310 }, { "epoch": 0.9402772840383932, "grad_norm": 2.625, "learning_rate": 1.1915555555555558e-06, "loss": 0.3958, "step": 42320 }, { "epoch": 0.9404994667614647, "grad_norm": 2.359375, "learning_rate": 1.1871111111111112e-06, "loss": 0.4032, "step": 42330 }, { "epoch": 0.9407216494845361, "grad_norm": 2.671875, "learning_rate": 1.1826666666666667e-06, "loss": 0.3915, "step": 42340 }, { "epoch": 0.9409438322076076, "grad_norm": 2.453125, "learning_rate": 1.1782222222222222e-06, "loss": 0.4121, "step": 42350 }, { "epoch": 0.941166014930679, "grad_norm": 2.53125, "learning_rate": 1.173777777777778e-06, "loss": 0.3798, "step": 42360 }, { "epoch": 0.9413881976537505, "grad_norm": 2.453125, "learning_rate": 1.1693333333333334e-06, "loss": 0.3774, "step": 42370 }, { "epoch": 0.9416103803768219, "grad_norm": 2.515625, "learning_rate": 1.164888888888889e-06, "loss": 0.3736, "step": 42380 }, { "epoch": 0.9418325630998934, "grad_norm": 2.625, "learning_rate": 1.1604444444444445e-06, "loss": 0.3654, "step": 42390 }, { "epoch": 0.9420547458229648, "grad_norm": 2.515625, "learning_rate": 1.156e-06, "loss": 0.3836, "step": 42400 }, { "epoch": 0.9422769285460363, "grad_norm": 2.5625, "learning_rate": 1.1515555555555557e-06, "loss": 0.3899, "step": 42410 }, { "epoch": 0.9424991112691077, "grad_norm": 2.421875, "learning_rate": 1.1471111111111112e-06, "loss": 0.3751, "step": 42420 }, { "epoch": 0.9427212939921792, "grad_norm": 2.546875, "learning_rate": 1.1426666666666667e-06, "loss": 0.3956, "step": 42430 }, { "epoch": 0.9429434767152506, "grad_norm": 2.71875, "learning_rate": 1.1382222222222224e-06, "loss": 0.3709, "step": 42440 }, { "epoch": 0.9431656594383221, "grad_norm": 2.46875, "learning_rate": 1.1337777777777779e-06, "loss": 0.3564, "step": 42450 }, { "epoch": 0.9433878421613935, "grad_norm": 2.4375, "learning_rate": 1.1293333333333333e-06, "loss": 0.3594, "step": 42460 }, { "epoch": 0.943610024884465, "grad_norm": 2.546875, "learning_rate": 1.124888888888889e-06, "loss": 0.3577, "step": 42470 }, { "epoch": 0.9438322076075364, "grad_norm": 1.921875, "learning_rate": 1.1204444444444445e-06, "loss": 0.3817, "step": 42480 }, { "epoch": 0.9440543903306079, "grad_norm": 2.640625, "learning_rate": 1.1160000000000002e-06, "loss": 0.3739, "step": 42490 }, { "epoch": 0.9442765730536793, "grad_norm": 3.0625, "learning_rate": 1.1115555555555557e-06, "loss": 0.376, "step": 42500 }, { "epoch": 0.9444987557767508, "grad_norm": 2.5625, "learning_rate": 1.1071111111111112e-06, "loss": 0.4159, "step": 42510 }, { "epoch": 0.9447209384998223, "grad_norm": 2.546875, "learning_rate": 1.1026666666666666e-06, "loss": 0.3683, "step": 42520 }, { "epoch": 0.9449431212228937, "grad_norm": 2.421875, "learning_rate": 1.0982222222222223e-06, "loss": 0.3693, "step": 42530 }, { "epoch": 0.9451653039459652, "grad_norm": 2.578125, "learning_rate": 1.0937777777777778e-06, "loss": 0.4035, "step": 42540 }, { "epoch": 0.9453874866690366, "grad_norm": 2.515625, "learning_rate": 1.0893333333333333e-06, "loss": 0.3564, "step": 42550 }, { "epoch": 0.9456096693921081, "grad_norm": 2.78125, "learning_rate": 1.084888888888889e-06, "loss": 0.3892, "step": 42560 }, { "epoch": 0.9458318521151795, "grad_norm": 2.765625, "learning_rate": 1.0804444444444445e-06, "loss": 0.4029, "step": 42570 }, { "epoch": 0.946054034838251, "grad_norm": 2.71875, "learning_rate": 1.0760000000000002e-06, "loss": 0.3823, "step": 42580 }, { "epoch": 0.9462762175613224, "grad_norm": 2.1875, "learning_rate": 1.0715555555555556e-06, "loss": 0.3705, "step": 42590 }, { "epoch": 0.9464984002843939, "grad_norm": 2.765625, "learning_rate": 1.0671111111111113e-06, "loss": 0.3629, "step": 42600 }, { "epoch": 0.9467205830074653, "grad_norm": 2.46875, "learning_rate": 1.0626666666666668e-06, "loss": 0.3675, "step": 42610 }, { "epoch": 0.9469427657305368, "grad_norm": 2.46875, "learning_rate": 1.0582222222222223e-06, "loss": 0.39, "step": 42620 }, { "epoch": 0.9471649484536082, "grad_norm": 2.734375, "learning_rate": 1.0537777777777778e-06, "loss": 0.3632, "step": 42630 }, { "epoch": 0.9473871311766797, "grad_norm": 2.359375, "learning_rate": 1.0493333333333335e-06, "loss": 0.3646, "step": 42640 }, { "epoch": 0.9476093138997511, "grad_norm": 2.375, "learning_rate": 1.044888888888889e-06, "loss": 0.4065, "step": 42650 }, { "epoch": 0.9478314966228226, "grad_norm": 2.890625, "learning_rate": 1.0404444444444444e-06, "loss": 0.3758, "step": 42660 }, { "epoch": 0.948053679345894, "grad_norm": 2.53125, "learning_rate": 1.0360000000000001e-06, "loss": 0.3788, "step": 42670 }, { "epoch": 0.9482758620689655, "grad_norm": 3.046875, "learning_rate": 1.0315555555555556e-06, "loss": 0.3928, "step": 42680 }, { "epoch": 0.9484980447920369, "grad_norm": 2.453125, "learning_rate": 1.0271111111111113e-06, "loss": 0.3944, "step": 42690 }, { "epoch": 0.9487202275151084, "grad_norm": 2.546875, "learning_rate": 1.0226666666666668e-06, "loss": 0.3966, "step": 42700 }, { "epoch": 0.9489424102381799, "grad_norm": 2.125, "learning_rate": 1.0182222222222223e-06, "loss": 0.3592, "step": 42710 }, { "epoch": 0.9491645929612513, "grad_norm": 2.203125, "learning_rate": 1.0137777777777777e-06, "loss": 0.3896, "step": 42720 }, { "epoch": 0.9493867756843228, "grad_norm": 2.640625, "learning_rate": 1.0093333333333334e-06, "loss": 0.3657, "step": 42730 }, { "epoch": 0.9496089584073942, "grad_norm": 2.0625, "learning_rate": 1.004888888888889e-06, "loss": 0.3815, "step": 42740 }, { "epoch": 0.9498311411304657, "grad_norm": 3.015625, "learning_rate": 1.0004444444444446e-06, "loss": 0.3694, "step": 42750 }, { "epoch": 0.9500533238535371, "grad_norm": 2.234375, "learning_rate": 9.96e-07, "loss": 0.3747, "step": 42760 }, { "epoch": 0.9502755065766086, "grad_norm": 2.546875, "learning_rate": 9.915555555555556e-07, "loss": 0.3493, "step": 42770 }, { "epoch": 0.95049768929968, "grad_norm": 2.578125, "learning_rate": 9.871111111111113e-07, "loss": 0.419, "step": 42780 }, { "epoch": 0.9507198720227515, "grad_norm": 2.75, "learning_rate": 9.826666666666667e-07, "loss": 0.3941, "step": 42790 }, { "epoch": 0.9509420547458229, "grad_norm": 2.46875, "learning_rate": 9.782222222222224e-07, "loss": 0.3739, "step": 42800 }, { "epoch": 0.9511642374688944, "grad_norm": 2.8125, "learning_rate": 9.737777777777777e-07, "loss": 0.3549, "step": 42810 }, { "epoch": 0.9513864201919658, "grad_norm": 2.546875, "learning_rate": 9.693333333333334e-07, "loss": 0.403, "step": 42820 }, { "epoch": 0.9516086029150373, "grad_norm": 2.46875, "learning_rate": 9.648888888888889e-07, "loss": 0.3846, "step": 42830 }, { "epoch": 0.9518307856381087, "grad_norm": 2.71875, "learning_rate": 9.604444444444446e-07, "loss": 0.3849, "step": 42840 }, { "epoch": 0.9520529683611803, "grad_norm": 2.453125, "learning_rate": 9.56e-07, "loss": 0.3603, "step": 42850 }, { "epoch": 0.9522751510842516, "grad_norm": 2.546875, "learning_rate": 9.515555555555555e-07, "loss": 0.3794, "step": 42860 }, { "epoch": 0.9524973338073232, "grad_norm": 2.875, "learning_rate": 9.471111111111111e-07, "loss": 0.4247, "step": 42870 }, { "epoch": 0.9527195165303946, "grad_norm": 2.375, "learning_rate": 9.426666666666667e-07, "loss": 0.3788, "step": 42880 }, { "epoch": 0.9529416992534661, "grad_norm": 2.71875, "learning_rate": 9.382222222222223e-07, "loss": 0.3558, "step": 42890 }, { "epoch": 0.9531638819765375, "grad_norm": 2.421875, "learning_rate": 9.337777777777779e-07, "loss": 0.3491, "step": 42900 }, { "epoch": 0.953386064699609, "grad_norm": 2.5625, "learning_rate": 9.293333333333334e-07, "loss": 0.388, "step": 42910 }, { "epoch": 0.9536082474226805, "grad_norm": 2.609375, "learning_rate": 9.248888888888889e-07, "loss": 0.3641, "step": 42920 }, { "epoch": 0.9538304301457519, "grad_norm": 2.640625, "learning_rate": 9.204444444444445e-07, "loss": 0.3852, "step": 42930 }, { "epoch": 0.9540526128688234, "grad_norm": 2.140625, "learning_rate": 9.160000000000001e-07, "loss": 0.3894, "step": 42940 }, { "epoch": 0.9542747955918948, "grad_norm": 2.640625, "learning_rate": 9.115555555555557e-07, "loss": 0.39, "step": 42950 }, { "epoch": 0.9544969783149663, "grad_norm": 2.484375, "learning_rate": 9.071111111111113e-07, "loss": 0.328, "step": 42960 }, { "epoch": 0.9547191610380377, "grad_norm": 2.78125, "learning_rate": 9.026666666666666e-07, "loss": 0.3202, "step": 42970 }, { "epoch": 0.9549413437611092, "grad_norm": 2.296875, "learning_rate": 8.982222222222222e-07, "loss": 0.3493, "step": 42980 }, { "epoch": 0.9551635264841806, "grad_norm": 2.265625, "learning_rate": 8.937777777777778e-07, "loss": 0.3455, "step": 42990 }, { "epoch": 0.9553857092072521, "grad_norm": 2.609375, "learning_rate": 8.893333333333334e-07, "loss": 0.3665, "step": 43000 }, { "epoch": 0.9556078919303235, "grad_norm": 2.0625, "learning_rate": 8.84888888888889e-07, "loss": 0.3596, "step": 43010 }, { "epoch": 0.955830074653395, "grad_norm": 2.9375, "learning_rate": 8.804444444444445e-07, "loss": 0.4035, "step": 43020 }, { "epoch": 0.9560522573764664, "grad_norm": 2.875, "learning_rate": 8.760000000000001e-07, "loss": 0.4075, "step": 43030 }, { "epoch": 0.9562744400995379, "grad_norm": 2.328125, "learning_rate": 8.715555555555556e-07, "loss": 0.3831, "step": 43040 }, { "epoch": 0.9564966228226093, "grad_norm": 3.078125, "learning_rate": 8.671111111111112e-07, "loss": 0.3732, "step": 43050 }, { "epoch": 0.9567188055456808, "grad_norm": 2.234375, "learning_rate": 8.626666666666668e-07, "loss": 0.3609, "step": 43060 }, { "epoch": 0.9569409882687522, "grad_norm": 3.0, "learning_rate": 8.582222222222222e-07, "loss": 0.4022, "step": 43070 }, { "epoch": 0.9571631709918237, "grad_norm": 2.515625, "learning_rate": 8.537777777777778e-07, "loss": 0.3679, "step": 43080 }, { "epoch": 0.9573853537148951, "grad_norm": 2.296875, "learning_rate": 8.493333333333334e-07, "loss": 0.3745, "step": 43090 }, { "epoch": 0.9576075364379666, "grad_norm": 2.78125, "learning_rate": 8.44888888888889e-07, "loss": 0.4094, "step": 43100 }, { "epoch": 0.957829719161038, "grad_norm": 2.90625, "learning_rate": 8.404444444444445e-07, "loss": 0.3889, "step": 43110 }, { "epoch": 0.9580519018841095, "grad_norm": 2.453125, "learning_rate": 8.36e-07, "loss": 0.3631, "step": 43120 }, { "epoch": 0.958274084607181, "grad_norm": 2.28125, "learning_rate": 8.315555555555556e-07, "loss": 0.3851, "step": 43130 }, { "epoch": 0.9584962673302524, "grad_norm": 2.53125, "learning_rate": 8.271111111111112e-07, "loss": 0.3862, "step": 43140 }, { "epoch": 0.9587184500533239, "grad_norm": 2.703125, "learning_rate": 8.226666666666668e-07, "loss": 0.3828, "step": 43150 }, { "epoch": 0.9589406327763953, "grad_norm": 2.734375, "learning_rate": 8.182222222222224e-07, "loss": 0.3638, "step": 43160 }, { "epoch": 0.9591628154994668, "grad_norm": 2.640625, "learning_rate": 8.137777777777777e-07, "loss": 0.4227, "step": 43170 }, { "epoch": 0.9593849982225382, "grad_norm": 2.53125, "learning_rate": 8.093333333333333e-07, "loss": 0.3688, "step": 43180 }, { "epoch": 0.9596071809456097, "grad_norm": 2.625, "learning_rate": 8.048888888888889e-07, "loss": 0.3755, "step": 43190 }, { "epoch": 0.9598293636686811, "grad_norm": 2.421875, "learning_rate": 8.004444444444445e-07, "loss": 0.3626, "step": 43200 }, { "epoch": 0.9600515463917526, "grad_norm": 2.375, "learning_rate": 7.960000000000001e-07, "loss": 0.3469, "step": 43210 }, { "epoch": 0.960273729114824, "grad_norm": 2.421875, "learning_rate": 7.915555555555557e-07, "loss": 0.3896, "step": 43220 }, { "epoch": 0.9604959118378955, "grad_norm": 2.78125, "learning_rate": 7.871111111111112e-07, "loss": 0.3818, "step": 43230 }, { "epoch": 0.9607180945609669, "grad_norm": 2.796875, "learning_rate": 7.826666666666667e-07, "loss": 0.3521, "step": 43240 }, { "epoch": 0.9609402772840384, "grad_norm": 2.8125, "learning_rate": 7.782222222222223e-07, "loss": 0.3824, "step": 43250 }, { "epoch": 0.9611624600071098, "grad_norm": 2.734375, "learning_rate": 7.737777777777779e-07, "loss": 0.3383, "step": 43260 }, { "epoch": 0.9613846427301813, "grad_norm": 2.328125, "learning_rate": 7.693333333333335e-07, "loss": 0.3442, "step": 43270 }, { "epoch": 0.9616068254532527, "grad_norm": 2.375, "learning_rate": 7.648888888888889e-07, "loss": 0.3612, "step": 43280 }, { "epoch": 0.9618290081763242, "grad_norm": 2.765625, "learning_rate": 7.604444444444445e-07, "loss": 0.3635, "step": 43290 }, { "epoch": 0.9620511908993956, "grad_norm": 3.0, "learning_rate": 7.56e-07, "loss": 0.3861, "step": 43300 }, { "epoch": 0.9622733736224671, "grad_norm": 2.59375, "learning_rate": 7.515555555555556e-07, "loss": 0.3739, "step": 43310 }, { "epoch": 0.9624955563455385, "grad_norm": 1.9609375, "learning_rate": 7.471111111111112e-07, "loss": 0.336, "step": 43320 }, { "epoch": 0.96271773906861, "grad_norm": 2.4375, "learning_rate": 7.426666666666667e-07, "loss": 0.3691, "step": 43330 }, { "epoch": 0.9629399217916815, "grad_norm": 2.234375, "learning_rate": 7.382222222222223e-07, "loss": 0.3891, "step": 43340 }, { "epoch": 0.9631621045147529, "grad_norm": 2.578125, "learning_rate": 7.337777777777779e-07, "loss": 0.4046, "step": 43350 }, { "epoch": 0.9633842872378244, "grad_norm": 2.234375, "learning_rate": 7.293333333333335e-07, "loss": 0.3876, "step": 43360 }, { "epoch": 0.9636064699608958, "grad_norm": 2.9375, "learning_rate": 7.24888888888889e-07, "loss": 0.3588, "step": 43370 }, { "epoch": 0.9638286526839673, "grad_norm": 2.53125, "learning_rate": 7.204444444444444e-07, "loss": 0.3977, "step": 43380 }, { "epoch": 0.9640508354070387, "grad_norm": 2.984375, "learning_rate": 7.16e-07, "loss": 0.4028, "step": 43390 }, { "epoch": 0.9642730181301102, "grad_norm": 2.65625, "learning_rate": 7.115555555555556e-07, "loss": 0.373, "step": 43400 }, { "epoch": 0.9644952008531816, "grad_norm": 2.3125, "learning_rate": 7.071111111111112e-07, "loss": 0.3863, "step": 43410 }, { "epoch": 0.9647173835762531, "grad_norm": 2.265625, "learning_rate": 7.026666666666668e-07, "loss": 0.3439, "step": 43420 }, { "epoch": 0.9649395662993245, "grad_norm": 2.125, "learning_rate": 6.982222222222222e-07, "loss": 0.3628, "step": 43430 }, { "epoch": 0.965161749022396, "grad_norm": 2.125, "learning_rate": 6.937777777777778e-07, "loss": 0.3671, "step": 43440 }, { "epoch": 0.9653839317454674, "grad_norm": 2.25, "learning_rate": 6.893333333333334e-07, "loss": 0.3832, "step": 43450 }, { "epoch": 0.965606114468539, "grad_norm": 2.59375, "learning_rate": 6.84888888888889e-07, "loss": 0.3556, "step": 43460 }, { "epoch": 0.9658282971916103, "grad_norm": 3.140625, "learning_rate": 6.804444444444446e-07, "loss": 0.3729, "step": 43470 }, { "epoch": 0.9660504799146818, "grad_norm": 3.03125, "learning_rate": 6.76e-07, "loss": 0.4146, "step": 43480 }, { "epoch": 0.9662726626377532, "grad_norm": 2.140625, "learning_rate": 6.715555555555556e-07, "loss": 0.3773, "step": 43490 }, { "epoch": 0.9664948453608248, "grad_norm": 2.390625, "learning_rate": 6.671111111111111e-07, "loss": 0.3963, "step": 43500 }, { "epoch": 0.9667170280838961, "grad_norm": 2.5625, "learning_rate": 6.626666666666667e-07, "loss": 0.3538, "step": 43510 }, { "epoch": 0.9669392108069677, "grad_norm": 2.34375, "learning_rate": 6.582222222222223e-07, "loss": 0.3399, "step": 43520 }, { "epoch": 0.9671613935300392, "grad_norm": 2.5, "learning_rate": 6.537777777777779e-07, "loss": 0.3567, "step": 43530 }, { "epoch": 0.9673835762531106, "grad_norm": 2.859375, "learning_rate": 6.493333333333334e-07, "loss": 0.4067, "step": 43540 }, { "epoch": 0.9676057589761821, "grad_norm": 2.703125, "learning_rate": 6.44888888888889e-07, "loss": 0.3565, "step": 43550 }, { "epoch": 0.9678279416992535, "grad_norm": 2.890625, "learning_rate": 6.404444444444446e-07, "loss": 0.3918, "step": 43560 }, { "epoch": 0.968050124422325, "grad_norm": 2.546875, "learning_rate": 6.360000000000001e-07, "loss": 0.3324, "step": 43570 }, { "epoch": 0.9682723071453964, "grad_norm": 2.59375, "learning_rate": 6.315555555555557e-07, "loss": 0.3777, "step": 43580 }, { "epoch": 0.9684944898684679, "grad_norm": 2.40625, "learning_rate": 6.271111111111111e-07, "loss": 0.3734, "step": 43590 }, { "epoch": 0.9687166725915393, "grad_norm": 2.203125, "learning_rate": 6.226666666666667e-07, "loss": 0.3634, "step": 43600 }, { "epoch": 0.9689388553146108, "grad_norm": 2.515625, "learning_rate": 6.182222222222223e-07, "loss": 0.3954, "step": 43610 }, { "epoch": 0.9691610380376822, "grad_norm": 2.8125, "learning_rate": 6.137777777777779e-07, "loss": 0.4005, "step": 43620 }, { "epoch": 0.9693832207607537, "grad_norm": 2.359375, "learning_rate": 6.093333333333333e-07, "loss": 0.3846, "step": 43630 }, { "epoch": 0.9696054034838251, "grad_norm": 2.375, "learning_rate": 6.048888888888889e-07, "loss": 0.3847, "step": 43640 }, { "epoch": 0.9698275862068966, "grad_norm": 2.515625, "learning_rate": 6.004444444444445e-07, "loss": 0.3993, "step": 43650 }, { "epoch": 0.970049768929968, "grad_norm": 2.75, "learning_rate": 5.960000000000001e-07, "loss": 0.3835, "step": 43660 }, { "epoch": 0.9702719516530395, "grad_norm": 2.28125, "learning_rate": 5.915555555555557e-07, "loss": 0.3449, "step": 43670 }, { "epoch": 0.9704941343761109, "grad_norm": 2.359375, "learning_rate": 5.871111111111112e-07, "loss": 0.3962, "step": 43680 }, { "epoch": 0.9707163170991824, "grad_norm": 2.328125, "learning_rate": 5.826666666666667e-07, "loss": 0.3536, "step": 43690 }, { "epoch": 0.9709384998222538, "grad_norm": 2.46875, "learning_rate": 5.782222222222222e-07, "loss": 0.3503, "step": 43700 }, { "epoch": 0.9711606825453253, "grad_norm": 2.453125, "learning_rate": 5.737777777777778e-07, "loss": 0.3566, "step": 43710 }, { "epoch": 0.9713828652683967, "grad_norm": 2.890625, "learning_rate": 5.693333333333334e-07, "loss": 0.4052, "step": 43720 }, { "epoch": 0.9716050479914682, "grad_norm": 2.421875, "learning_rate": 5.648888888888889e-07, "loss": 0.3622, "step": 43730 }, { "epoch": 0.9718272307145397, "grad_norm": 2.765625, "learning_rate": 5.604444444444445e-07, "loss": 0.3668, "step": 43740 }, { "epoch": 0.9720494134376111, "grad_norm": 2.15625, "learning_rate": 5.560000000000001e-07, "loss": 0.3619, "step": 43750 }, { "epoch": 0.9722715961606826, "grad_norm": 2.875, "learning_rate": 5.515555555555556e-07, "loss": 0.4131, "step": 43760 }, { "epoch": 0.972493778883754, "grad_norm": 2.28125, "learning_rate": 5.471111111111112e-07, "loss": 0.3642, "step": 43770 }, { "epoch": 0.9727159616068255, "grad_norm": 2.15625, "learning_rate": 5.426666666666667e-07, "loss": 0.3549, "step": 43780 }, { "epoch": 0.9729381443298969, "grad_norm": 2.578125, "learning_rate": 5.382222222222223e-07, "loss": 0.3783, "step": 43790 }, { "epoch": 0.9731603270529684, "grad_norm": 2.3125, "learning_rate": 5.337777777777779e-07, "loss": 0.3919, "step": 43800 }, { "epoch": 0.9733825097760398, "grad_norm": 2.65625, "learning_rate": 5.293333333333334e-07, "loss": 0.3956, "step": 43810 }, { "epoch": 0.9736046924991113, "grad_norm": 3.125, "learning_rate": 5.24888888888889e-07, "loss": 0.4097, "step": 43820 }, { "epoch": 0.9738268752221827, "grad_norm": 2.421875, "learning_rate": 5.204444444444444e-07, "loss": 0.3635, "step": 43830 }, { "epoch": 0.9740490579452542, "grad_norm": 2.328125, "learning_rate": 5.16e-07, "loss": 0.4085, "step": 43840 }, { "epoch": 0.9742712406683256, "grad_norm": 2.640625, "learning_rate": 5.115555555555556e-07, "loss": 0.3866, "step": 43850 }, { "epoch": 0.9744934233913971, "grad_norm": 2.859375, "learning_rate": 5.071111111111112e-07, "loss": 0.3987, "step": 43860 }, { "epoch": 0.9747156061144685, "grad_norm": 2.046875, "learning_rate": 5.026666666666667e-07, "loss": 0.3365, "step": 43870 }, { "epoch": 0.97493778883754, "grad_norm": 2.796875, "learning_rate": 4.982222222222223e-07, "loss": 0.3901, "step": 43880 }, { "epoch": 0.9751599715606114, "grad_norm": 2.484375, "learning_rate": 4.937777777777778e-07, "loss": 0.388, "step": 43890 }, { "epoch": 0.9753821542836829, "grad_norm": 2.5, "learning_rate": 4.893333333333334e-07, "loss": 0.3724, "step": 43900 }, { "epoch": 0.9756043370067543, "grad_norm": 2.390625, "learning_rate": 4.848888888888889e-07, "loss": 0.3528, "step": 43910 }, { "epoch": 0.9758265197298258, "grad_norm": 2.4375, "learning_rate": 4.804444444444445e-07, "loss": 0.351, "step": 43920 }, { "epoch": 0.9760487024528972, "grad_norm": 2.375, "learning_rate": 4.760000000000001e-07, "loss": 0.3827, "step": 43930 }, { "epoch": 0.9762708851759687, "grad_norm": 2.6875, "learning_rate": 4.7155555555555556e-07, "loss": 0.3802, "step": 43940 }, { "epoch": 0.9764930678990402, "grad_norm": 3.015625, "learning_rate": 4.6711111111111115e-07, "loss": 0.3979, "step": 43950 }, { "epoch": 0.9767152506221116, "grad_norm": 2.734375, "learning_rate": 4.626666666666667e-07, "loss": 0.3912, "step": 43960 }, { "epoch": 0.9769374333451831, "grad_norm": 2.609375, "learning_rate": 4.5822222222222227e-07, "loss": 0.42, "step": 43970 }, { "epoch": 0.9771596160682545, "grad_norm": 2.59375, "learning_rate": 4.5377777777777785e-07, "loss": 0.3759, "step": 43980 }, { "epoch": 0.977381798791326, "grad_norm": 2.5, "learning_rate": 4.4933333333333333e-07, "loss": 0.3879, "step": 43990 }, { "epoch": 0.9776039815143974, "grad_norm": 2.40625, "learning_rate": 4.448888888888889e-07, "loss": 0.3706, "step": 44000 }, { "epoch": 0.9778261642374689, "grad_norm": 2.21875, "learning_rate": 4.4044444444444445e-07, "loss": 0.3588, "step": 44010 }, { "epoch": 0.9780483469605403, "grad_norm": 2.46875, "learning_rate": 4.3600000000000004e-07, "loss": 0.3787, "step": 44020 }, { "epoch": 0.9782705296836118, "grad_norm": 2.203125, "learning_rate": 4.315555555555556e-07, "loss": 0.3515, "step": 44030 }, { "epoch": 0.9784927124066832, "grad_norm": 2.75, "learning_rate": 4.271111111111111e-07, "loss": 0.3578, "step": 44040 }, { "epoch": 0.9787148951297547, "grad_norm": 2.8125, "learning_rate": 4.226666666666667e-07, "loss": 0.3804, "step": 44050 }, { "epoch": 0.9789370778528261, "grad_norm": 2.59375, "learning_rate": 4.182222222222222e-07, "loss": 0.4112, "step": 44060 }, { "epoch": 0.9791592605758976, "grad_norm": 2.671875, "learning_rate": 4.137777777777778e-07, "loss": 0.3923, "step": 44070 }, { "epoch": 0.979381443298969, "grad_norm": 2.171875, "learning_rate": 4.093333333333334e-07, "loss": 0.3167, "step": 44080 }, { "epoch": 0.9796036260220405, "grad_norm": 2.125, "learning_rate": 4.048888888888889e-07, "loss": 0.4019, "step": 44090 }, { "epoch": 0.9798258087451119, "grad_norm": 2.75, "learning_rate": 4.0044444444444447e-07, "loss": 0.3911, "step": 44100 }, { "epoch": 0.9800479914681834, "grad_norm": 2.875, "learning_rate": 3.9600000000000005e-07, "loss": 0.356, "step": 44110 }, { "epoch": 0.9802701741912548, "grad_norm": 2.25, "learning_rate": 3.915555555555556e-07, "loss": 0.3712, "step": 44120 }, { "epoch": 0.9804923569143263, "grad_norm": 2.828125, "learning_rate": 3.8711111111111117e-07, "loss": 0.4247, "step": 44130 }, { "epoch": 0.9807145396373977, "grad_norm": 2.609375, "learning_rate": 3.8266666666666665e-07, "loss": 0.4134, "step": 44140 }, { "epoch": 0.9809367223604692, "grad_norm": 2.78125, "learning_rate": 3.7822222222222224e-07, "loss": 0.3876, "step": 44150 }, { "epoch": 0.9811589050835408, "grad_norm": 2.90625, "learning_rate": 3.737777777777778e-07, "loss": 0.3674, "step": 44160 }, { "epoch": 0.9813810878066122, "grad_norm": 2.484375, "learning_rate": 3.6933333333333336e-07, "loss": 0.3346, "step": 44170 }, { "epoch": 0.9816032705296837, "grad_norm": 2.515625, "learning_rate": 3.6488888888888894e-07, "loss": 0.3563, "step": 44180 }, { "epoch": 0.981825453252755, "grad_norm": 2.265625, "learning_rate": 3.604444444444444e-07, "loss": 0.4051, "step": 44190 }, { "epoch": 0.9820476359758266, "grad_norm": 2.5, "learning_rate": 3.56e-07, "loss": 0.3901, "step": 44200 }, { "epoch": 0.982269818698898, "grad_norm": 3.15625, "learning_rate": 3.515555555555556e-07, "loss": 0.4045, "step": 44210 }, { "epoch": 0.9824920014219695, "grad_norm": 2.28125, "learning_rate": 3.4711111111111113e-07, "loss": 0.3926, "step": 44220 }, { "epoch": 0.9827141841450409, "grad_norm": 2.421875, "learning_rate": 3.426666666666667e-07, "loss": 0.3516, "step": 44230 }, { "epoch": 0.9829363668681124, "grad_norm": 2.75, "learning_rate": 3.382222222222222e-07, "loss": 0.393, "step": 44240 }, { "epoch": 0.9831585495911838, "grad_norm": 2.4375, "learning_rate": 3.337777777777778e-07, "loss": 0.3764, "step": 44250 }, { "epoch": 0.9833807323142553, "grad_norm": 2.953125, "learning_rate": 3.2933333333333337e-07, "loss": 0.3908, "step": 44260 }, { "epoch": 0.9836029150373267, "grad_norm": 2.765625, "learning_rate": 3.248888888888889e-07, "loss": 0.4109, "step": 44270 }, { "epoch": 0.9838250977603982, "grad_norm": 2.40625, "learning_rate": 3.204444444444445e-07, "loss": 0.3827, "step": 44280 }, { "epoch": 0.9840472804834696, "grad_norm": 2.3125, "learning_rate": 3.160000000000001e-07, "loss": 0.386, "step": 44290 }, { "epoch": 0.9842694632065411, "grad_norm": 2.8125, "learning_rate": 3.1155555555555556e-07, "loss": 0.3903, "step": 44300 }, { "epoch": 0.9844916459296125, "grad_norm": 2.734375, "learning_rate": 3.0711111111111114e-07, "loss": 0.4183, "step": 44310 }, { "epoch": 0.984713828652684, "grad_norm": 2.53125, "learning_rate": 3.026666666666667e-07, "loss": 0.39, "step": 44320 }, { "epoch": 0.9849360113757554, "grad_norm": 2.703125, "learning_rate": 2.9822222222222226e-07, "loss": 0.4059, "step": 44330 }, { "epoch": 0.9851581940988269, "grad_norm": 2.59375, "learning_rate": 2.937777777777778e-07, "loss": 0.3491, "step": 44340 }, { "epoch": 0.9853803768218983, "grad_norm": 2.671875, "learning_rate": 2.8933333333333333e-07, "loss": 0.3557, "step": 44350 }, { "epoch": 0.9856025595449698, "grad_norm": 2.265625, "learning_rate": 2.848888888888889e-07, "loss": 0.4059, "step": 44360 }, { "epoch": 0.9858247422680413, "grad_norm": 2.5625, "learning_rate": 2.8044444444444445e-07, "loss": 0.3636, "step": 44370 }, { "epoch": 0.9860469249911127, "grad_norm": 3.203125, "learning_rate": 2.7600000000000004e-07, "loss": 0.3925, "step": 44380 }, { "epoch": 0.9862691077141842, "grad_norm": 2.328125, "learning_rate": 2.7155555555555557e-07, "loss": 0.358, "step": 44390 }, { "epoch": 0.9864912904372556, "grad_norm": 2.390625, "learning_rate": 2.6711111111111116e-07, "loss": 0.3771, "step": 44400 }, { "epoch": 0.9867134731603271, "grad_norm": 2.734375, "learning_rate": 2.626666666666667e-07, "loss": 0.4057, "step": 44410 }, { "epoch": 0.9869356558833985, "grad_norm": 2.953125, "learning_rate": 2.582222222222222e-07, "loss": 0.3639, "step": 44420 }, { "epoch": 0.98715783860647, "grad_norm": 2.765625, "learning_rate": 2.537777777777778e-07, "loss": 0.397, "step": 44430 }, { "epoch": 0.9873800213295414, "grad_norm": 2.765625, "learning_rate": 2.4933333333333334e-07, "loss": 0.3519, "step": 44440 }, { "epoch": 0.9876022040526129, "grad_norm": 1.90625, "learning_rate": 2.4488888888888893e-07, "loss": 0.3831, "step": 44450 }, { "epoch": 0.9878243867756843, "grad_norm": 2.3125, "learning_rate": 2.4044444444444446e-07, "loss": 0.3542, "step": 44460 }, { "epoch": 0.9880465694987558, "grad_norm": 2.296875, "learning_rate": 2.3600000000000002e-07, "loss": 0.3645, "step": 44470 }, { "epoch": 0.9882687522218272, "grad_norm": 2.40625, "learning_rate": 2.3155555555555556e-07, "loss": 0.3595, "step": 44480 }, { "epoch": 0.9884909349448987, "grad_norm": 2.78125, "learning_rate": 2.2711111111111114e-07, "loss": 0.3782, "step": 44490 }, { "epoch": 0.9887131176679701, "grad_norm": 3.0, "learning_rate": 2.226666666666667e-07, "loss": 0.4173, "step": 44500 }, { "epoch": 0.9889353003910416, "grad_norm": 2.671875, "learning_rate": 2.1822222222222224e-07, "loss": 0.4032, "step": 44510 }, { "epoch": 0.989157483114113, "grad_norm": 2.21875, "learning_rate": 2.137777777777778e-07, "loss": 0.3444, "step": 44520 }, { "epoch": 0.9893796658371845, "grad_norm": 2.703125, "learning_rate": 2.0933333333333335e-07, "loss": 0.3868, "step": 44530 }, { "epoch": 0.9896018485602559, "grad_norm": 2.53125, "learning_rate": 2.0488888888888891e-07, "loss": 0.3836, "step": 44540 }, { "epoch": 0.9898240312833274, "grad_norm": 2.78125, "learning_rate": 2.0044444444444447e-07, "loss": 0.3686, "step": 44550 }, { "epoch": 0.9900462140063989, "grad_norm": 2.5625, "learning_rate": 1.96e-07, "loss": 0.3866, "step": 44560 }, { "epoch": 0.9902683967294703, "grad_norm": 2.84375, "learning_rate": 1.9155555555555557e-07, "loss": 0.3696, "step": 44570 }, { "epoch": 0.9904905794525418, "grad_norm": 2.125, "learning_rate": 1.8711111111111113e-07, "loss": 0.376, "step": 44580 }, { "epoch": 0.9907127621756132, "grad_norm": 2.328125, "learning_rate": 1.826666666666667e-07, "loss": 0.3585, "step": 44590 }, { "epoch": 0.9909349448986847, "grad_norm": 3.0, "learning_rate": 1.7822222222222222e-07, "loss": 0.3533, "step": 44600 }, { "epoch": 0.9911571276217561, "grad_norm": 2.75, "learning_rate": 1.7377777777777778e-07, "loss": 0.3631, "step": 44610 }, { "epoch": 0.9913793103448276, "grad_norm": 2.5625, "learning_rate": 1.6933333333333337e-07, "loss": 0.3594, "step": 44620 }, { "epoch": 0.991601493067899, "grad_norm": 2.34375, "learning_rate": 1.648888888888889e-07, "loss": 0.3878, "step": 44630 }, { "epoch": 0.9918236757909705, "grad_norm": 1.9375, "learning_rate": 1.6044444444444446e-07, "loss": 0.3635, "step": 44640 }, { "epoch": 0.9920458585140419, "grad_norm": 2.390625, "learning_rate": 1.56e-07, "loss": 0.3724, "step": 44650 }, { "epoch": 0.9922680412371134, "grad_norm": 2.421875, "learning_rate": 1.5155555555555558e-07, "loss": 0.3796, "step": 44660 }, { "epoch": 0.9924902239601848, "grad_norm": 2.46875, "learning_rate": 1.4711111111111111e-07, "loss": 0.3429, "step": 44670 }, { "epoch": 0.9927124066832563, "grad_norm": 2.734375, "learning_rate": 1.4266666666666667e-07, "loss": 0.3699, "step": 44680 }, { "epoch": 0.9929345894063277, "grad_norm": 2.5625, "learning_rate": 1.3822222222222223e-07, "loss": 0.3655, "step": 44690 }, { "epoch": 0.9931567721293992, "grad_norm": 2.125, "learning_rate": 1.337777777777778e-07, "loss": 0.392, "step": 44700 }, { "epoch": 0.9933789548524706, "grad_norm": 2.640625, "learning_rate": 1.2933333333333335e-07, "loss": 0.4055, "step": 44710 }, { "epoch": 0.9936011375755421, "grad_norm": 2.484375, "learning_rate": 1.248888888888889e-07, "loss": 0.4049, "step": 44720 }, { "epoch": 0.9938233202986135, "grad_norm": 2.984375, "learning_rate": 1.2044444444444445e-07, "loss": 0.3453, "step": 44730 }, { "epoch": 0.994045503021685, "grad_norm": 2.921875, "learning_rate": 1.16e-07, "loss": 0.3585, "step": 44740 }, { "epoch": 0.9942676857447564, "grad_norm": 2.703125, "learning_rate": 1.1155555555555557e-07, "loss": 0.3736, "step": 44750 }, { "epoch": 0.9944898684678279, "grad_norm": 2.46875, "learning_rate": 1.0711111111111111e-07, "loss": 0.3338, "step": 44760 }, { "epoch": 0.9947120511908994, "grad_norm": 2.671875, "learning_rate": 1.0266666666666667e-07, "loss": 0.3935, "step": 44770 }, { "epoch": 0.9949342339139708, "grad_norm": 2.140625, "learning_rate": 9.822222222222222e-08, "loss": 0.3645, "step": 44780 }, { "epoch": 0.9951564166370424, "grad_norm": 2.4375, "learning_rate": 9.377777777777779e-08, "loss": 0.4132, "step": 44790 }, { "epoch": 0.9953785993601137, "grad_norm": 2.265625, "learning_rate": 8.933333333333334e-08, "loss": 0.3645, "step": 44800 }, { "epoch": 0.9956007820831853, "grad_norm": 2.640625, "learning_rate": 8.48888888888889e-08, "loss": 0.3802, "step": 44810 }, { "epoch": 0.9958229648062566, "grad_norm": 2.375, "learning_rate": 8.044444444444445e-08, "loss": 0.4019, "step": 44820 }, { "epoch": 0.9960451475293282, "grad_norm": 2.59375, "learning_rate": 7.6e-08, "loss": 0.3698, "step": 44830 }, { "epoch": 0.9962673302523996, "grad_norm": 2.90625, "learning_rate": 7.155555555555557e-08, "loss": 0.3613, "step": 44840 }, { "epoch": 0.9964895129754711, "grad_norm": 2.15625, "learning_rate": 6.711111111111111e-08, "loss": 0.334, "step": 44850 }, { "epoch": 0.9967116956985425, "grad_norm": 2.9375, "learning_rate": 6.266666666666667e-08, "loss": 0.3732, "step": 44860 }, { "epoch": 0.996933878421614, "grad_norm": 2.96875, "learning_rate": 5.822222222222223e-08, "loss": 0.3744, "step": 44870 }, { "epoch": 0.9971560611446854, "grad_norm": 2.921875, "learning_rate": 5.3777777777777785e-08, "loss": 0.3746, "step": 44880 }, { "epoch": 0.9973782438677569, "grad_norm": 2.171875, "learning_rate": 4.933333333333333e-08, "loss": 0.3952, "step": 44890 }, { "epoch": 0.9976004265908283, "grad_norm": 2.109375, "learning_rate": 4.488888888888889e-08, "loss": 0.3463, "step": 44900 }, { "epoch": 0.9978226093138998, "grad_norm": 2.640625, "learning_rate": 4.0444444444444445e-08, "loss": 0.3189, "step": 44910 }, { "epoch": 0.9980447920369712, "grad_norm": 3.0, "learning_rate": 3.6000000000000005e-08, "loss": 0.4051, "step": 44920 }, { "epoch": 0.9982669747600427, "grad_norm": 2.4375, "learning_rate": 3.155555555555556e-08, "loss": 0.3898, "step": 44930 }, { "epoch": 0.9984891574831141, "grad_norm": 2.640625, "learning_rate": 2.7111111111111115e-08, "loss": 0.4055, "step": 44940 }, { "epoch": 0.9987113402061856, "grad_norm": 3.03125, "learning_rate": 2.266666666666667e-08, "loss": 0.3668, "step": 44950 }, { "epoch": 0.998933522929257, "grad_norm": 2.390625, "learning_rate": 1.8222222222222224e-08, "loss": 0.3609, "step": 44960 }, { "epoch": 0.9991557056523285, "grad_norm": 2.8125, "learning_rate": 1.3777777777777778e-08, "loss": 0.3752, "step": 44970 }, { "epoch": 0.9993778883754, "grad_norm": 2.609375, "learning_rate": 9.333333333333334e-09, "loss": 0.3807, "step": 44980 }, { "epoch": 0.9996000710984714, "grad_norm": 2.703125, "learning_rate": 4.888888888888889e-09, "loss": 0.3794, "step": 44990 }, { "epoch": 0.9998222538215429, "grad_norm": 2.75, "learning_rate": 4.444444444444445e-10, "loss": 0.3628, "step": 45000 } ], "logging_steps": 10, "max_steps": 45000, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.3115342813852418e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }