{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "global_step": 44500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.4981273408239701e-06, "loss": 1.6359, "step": 20 }, { "epoch": 0.0, "learning_rate": 2.9962546816479402e-06, "loss": 1.6528, "step": 40 }, { "epoch": 0.01, "learning_rate": 4.49438202247191e-06, "loss": 1.6445, "step": 60 }, { "epoch": 0.01, "learning_rate": 5.9925093632958805e-06, "loss": 1.6479, "step": 80 }, { "epoch": 0.01, "learning_rate": 7.490636704119851e-06, "loss": 1.608, "step": 100 }, { "epoch": 0.01, "learning_rate": 8.98876404494382e-06, "loss": 1.6177, "step": 120 }, { "epoch": 0.02, "learning_rate": 1.0486891385767791e-05, "loss": 1.5548, "step": 140 }, { "epoch": 0.02, "learning_rate": 1.1985018726591761e-05, "loss": 1.527, "step": 160 }, { "epoch": 0.02, "learning_rate": 1.348314606741573e-05, "loss": 1.4841, "step": 180 }, { "epoch": 0.02, "learning_rate": 1.4981273408239702e-05, "loss": 1.4651, "step": 200 }, { "epoch": 0.02, "learning_rate": 1.647940074906367e-05, "loss": 1.4259, "step": 220 }, { "epoch": 0.03, "learning_rate": 1.797752808988764e-05, "loss": 1.4087, "step": 240 }, { "epoch": 0.03, "learning_rate": 1.9475655430711613e-05, "loss": 1.4239, "step": 260 }, { "epoch": 0.03, "learning_rate": 2.0973782771535582e-05, "loss": 1.4119, "step": 280 }, { "epoch": 0.03, "learning_rate": 2.2471910112359552e-05, "loss": 1.3844, "step": 300 }, { "epoch": 0.04, "learning_rate": 2.3970037453183522e-05, "loss": 1.3582, "step": 320 }, { "epoch": 0.04, "learning_rate": 2.546816479400749e-05, "loss": 1.3717, "step": 340 }, { "epoch": 0.04, "learning_rate": 2.696629213483146e-05, "loss": 1.3852, "step": 360 }, { "epoch": 0.04, "learning_rate": 2.846441947565543e-05, "loss": 1.3896, "step": 380 }, { "epoch": 0.04, "learning_rate": 2.9962546816479404e-05, "loss": 1.3512, "step": 400 }, { "epoch": 0.05, "learning_rate": 3.1460674157303374e-05, "loss": 1.3857, "step": 420 }, { "epoch": 0.05, "learning_rate": 3.295880149812734e-05, "loss": 1.3396, "step": 440 }, { "epoch": 0.05, "learning_rate": 3.445692883895131e-05, "loss": 1.3324, "step": 460 }, { "epoch": 0.05, "learning_rate": 3.595505617977528e-05, "loss": 1.3522, "step": 480 }, { "epoch": 0.06, "learning_rate": 3.745318352059925e-05, "loss": 1.3429, "step": 500 }, { "epoch": 0.06, "learning_rate": 3.8951310861423226e-05, "loss": 1.3368, "step": 520 }, { "epoch": 0.06, "learning_rate": 4.044943820224719e-05, "loss": 1.3165, "step": 540 }, { "epoch": 0.06, "learning_rate": 4.1947565543071165e-05, "loss": 1.3356, "step": 560 }, { "epoch": 0.07, "learning_rate": 4.344569288389513e-05, "loss": 1.2966, "step": 580 }, { "epoch": 0.07, "learning_rate": 4.4943820224719104e-05, "loss": 1.3255, "step": 600 }, { "epoch": 0.07, "learning_rate": 4.644194756554308e-05, "loss": 1.2908, "step": 620 }, { "epoch": 0.07, "learning_rate": 4.7940074906367044e-05, "loss": 1.2983, "step": 640 }, { "epoch": 0.07, "learning_rate": 4.943820224719101e-05, "loss": 1.2979, "step": 660 }, { "epoch": 0.08, "learning_rate": 5.093632958801498e-05, "loss": 1.3171, "step": 680 }, { "epoch": 0.08, "learning_rate": 5.243445692883895e-05, "loss": 1.3026, "step": 700 }, { "epoch": 0.08, "learning_rate": 5.393258426966292e-05, "loss": 1.3105, "step": 720 }, { "epoch": 0.08, "learning_rate": 5.5430711610486895e-05, "loss": 1.3146, "step": 740 }, { "epoch": 0.09, "learning_rate": 5.692883895131086e-05, "loss": 1.2834, "step": 760 }, { "epoch": 0.09, "learning_rate": 5.8426966292134835e-05, "loss": 1.2993, "step": 780 }, { "epoch": 0.09, "learning_rate": 5.992509363295881e-05, "loss": 1.287, "step": 800 }, { "epoch": 0.09, "learning_rate": 6.142322097378277e-05, "loss": 1.293, "step": 820 }, { "epoch": 0.09, "learning_rate": 6.292134831460675e-05, "loss": 1.2556, "step": 840 }, { "epoch": 0.1, "learning_rate": 6.441947565543071e-05, "loss": 1.2979, "step": 860 }, { "epoch": 0.1, "learning_rate": 6.591760299625468e-05, "loss": 1.3002, "step": 880 }, { "epoch": 0.1, "learning_rate": 6.741573033707866e-05, "loss": 1.2408, "step": 900 }, { "epoch": 0.1, "learning_rate": 6.891385767790263e-05, "loss": 1.2753, "step": 920 }, { "epoch": 0.11, "learning_rate": 7.04119850187266e-05, "loss": 1.2671, "step": 940 }, { "epoch": 0.11, "learning_rate": 7.191011235955056e-05, "loss": 1.2925, "step": 960 }, { "epoch": 0.11, "learning_rate": 7.340823970037454e-05, "loss": 1.2709, "step": 980 }, { "epoch": 0.11, "learning_rate": 7.49063670411985e-05, "loss": 1.2614, "step": 1000 }, { "epoch": 0.11, "learning_rate": 7.640449438202247e-05, "loss": 1.2979, "step": 1020 }, { "epoch": 0.12, "learning_rate": 7.790262172284645e-05, "loss": 1.2534, "step": 1040 }, { "epoch": 0.12, "learning_rate": 7.940074906367042e-05, "loss": 1.2425, "step": 1060 }, { "epoch": 0.12, "learning_rate": 8.089887640449438e-05, "loss": 1.2671, "step": 1080 }, { "epoch": 0.12, "learning_rate": 8.239700374531836e-05, "loss": 1.247, "step": 1100 }, { "epoch": 0.13, "learning_rate": 8.389513108614233e-05, "loss": 1.2785, "step": 1120 }, { "epoch": 0.13, "learning_rate": 8.53932584269663e-05, "loss": 1.2611, "step": 1140 }, { "epoch": 0.13, "learning_rate": 8.689138576779026e-05, "loss": 1.238, "step": 1160 }, { "epoch": 0.13, "learning_rate": 8.838951310861424e-05, "loss": 1.2352, "step": 1180 }, { "epoch": 0.13, "learning_rate": 8.988764044943821e-05, "loss": 1.2389, "step": 1200 }, { "epoch": 0.14, "learning_rate": 9.138576779026217e-05, "loss": 1.2519, "step": 1220 }, { "epoch": 0.14, "learning_rate": 9.288389513108615e-05, "loss": 1.2466, "step": 1240 }, { "epoch": 0.14, "learning_rate": 9.438202247191012e-05, "loss": 1.2414, "step": 1260 }, { "epoch": 0.14, "learning_rate": 9.588014981273409e-05, "loss": 1.2565, "step": 1280 }, { "epoch": 0.15, "learning_rate": 9.737827715355807e-05, "loss": 1.2661, "step": 1300 }, { "epoch": 0.15, "learning_rate": 9.887640449438202e-05, "loss": 1.2708, "step": 1320 }, { "epoch": 0.15, "learning_rate": 9.999999668932716e-05, "loss": 1.2292, "step": 1340 }, { "epoch": 0.15, "learning_rate": 9.999991723320065e-05, "loss": 1.2506, "step": 1360 }, { "epoch": 0.16, "learning_rate": 9.999973183573581e-05, "loss": 1.2434, "step": 1380 }, { "epoch": 0.16, "learning_rate": 9.999944049732545e-05, "loss": 1.2425, "step": 1400 }, { "epoch": 0.16, "learning_rate": 9.99990432185869e-05, "loss": 1.2279, "step": 1420 }, { "epoch": 0.16, "learning_rate": 9.999854000036192e-05, "loss": 1.2039, "step": 1440 }, { "epoch": 0.16, "learning_rate": 9.999793084371672e-05, "loss": 1.2298, "step": 1460 }, { "epoch": 0.17, "learning_rate": 9.999721574994201e-05, "loss": 1.2467, "step": 1480 }, { "epoch": 0.17, "learning_rate": 9.999639472055294e-05, "loss": 1.2039, "step": 1500 }, { "epoch": 0.17, "learning_rate": 9.999546775728917e-05, "loss": 1.2668, "step": 1520 }, { "epoch": 0.17, "learning_rate": 9.999443486211473e-05, "loss": 1.2045, "step": 1540 }, { "epoch": 0.18, "learning_rate": 9.99932960372182e-05, "loss": 1.2156, "step": 1560 }, { "epoch": 0.18, "learning_rate": 9.99920512850125e-05, "loss": 1.2157, "step": 1580 }, { "epoch": 0.18, "learning_rate": 9.999070060813509e-05, "loss": 1.2027, "step": 1600 }, { "epoch": 0.18, "learning_rate": 9.99892440094478e-05, "loss": 1.222, "step": 1620 }, { "epoch": 0.18, "learning_rate": 9.998768149203695e-05, "loss": 1.2139, "step": 1640 }, { "epoch": 0.19, "learning_rate": 9.998601305921322e-05, "loss": 1.2042, "step": 1660 }, { "epoch": 0.19, "learning_rate": 9.998423871451174e-05, "loss": 1.2379, "step": 1680 }, { "epoch": 0.19, "learning_rate": 9.998235846169204e-05, "loss": 1.1764, "step": 1700 }, { "epoch": 0.19, "learning_rate": 9.998037230473809e-05, "loss": 1.2254, "step": 1720 }, { "epoch": 0.2, "learning_rate": 9.997828024785817e-05, "loss": 1.1891, "step": 1740 }, { "epoch": 0.2, "learning_rate": 9.997608229548504e-05, "loss": 1.1889, "step": 1760 }, { "epoch": 0.2, "learning_rate": 9.997377845227576e-05, "loss": 1.2035, "step": 1780 }, { "epoch": 0.2, "learning_rate": 9.997136872311177e-05, "loss": 1.2186, "step": 1800 }, { "epoch": 0.2, "learning_rate": 9.996885311309891e-05, "loss": 1.1837, "step": 1820 }, { "epoch": 0.21, "learning_rate": 9.996623162756733e-05, "loss": 1.183, "step": 1840 }, { "epoch": 0.21, "learning_rate": 9.996350427207148e-05, "loss": 1.1707, "step": 1860 }, { "epoch": 0.21, "learning_rate": 9.99606710523902e-05, "loss": 1.2172, "step": 1880 }, { "epoch": 0.21, "learning_rate": 9.995773197452657e-05, "loss": 1.1877, "step": 1900 }, { "epoch": 0.22, "learning_rate": 9.995468704470802e-05, "loss": 1.2208, "step": 1920 }, { "epoch": 0.22, "learning_rate": 9.995153626938623e-05, "loss": 1.1727, "step": 1940 }, { "epoch": 0.22, "learning_rate": 9.994827965523716e-05, "loss": 1.2127, "step": 1960 }, { "epoch": 0.22, "learning_rate": 9.994491720916102e-05, "loss": 1.1912, "step": 1980 }, { "epoch": 0.22, "learning_rate": 9.994144893828226e-05, "loss": 1.1852, "step": 2000 }, { "epoch": 0.23, "learning_rate": 9.993787484994957e-05, "loss": 1.1689, "step": 2020 }, { "epoch": 0.23, "learning_rate": 9.993419495173582e-05, "loss": 1.19, "step": 2040 }, { "epoch": 0.23, "learning_rate": 9.99304092514381e-05, "loss": 1.1833, "step": 2060 }, { "epoch": 0.23, "learning_rate": 9.992651775707768e-05, "loss": 1.1944, "step": 2080 }, { "epoch": 0.24, "learning_rate": 9.992252047689997e-05, "loss": 1.1592, "step": 2100 }, { "epoch": 0.24, "learning_rate": 9.991841741937448e-05, "loss": 1.1744, "step": 2120 }, { "epoch": 0.24, "learning_rate": 9.991420859319496e-05, "loss": 1.1884, "step": 2140 }, { "epoch": 0.24, "learning_rate": 9.990989400727916e-05, "loss": 1.1372, "step": 2160 }, { "epoch": 0.24, "learning_rate": 9.990547367076896e-05, "loss": 1.1767, "step": 2180 }, { "epoch": 0.25, "learning_rate": 9.990094759303033e-05, "loss": 1.1837, "step": 2200 }, { "epoch": 0.25, "learning_rate": 9.989631578365322e-05, "loss": 1.1564, "step": 2220 }, { "epoch": 0.25, "learning_rate": 9.989157825245167e-05, "loss": 1.1807, "step": 2240 }, { "epoch": 0.25, "learning_rate": 9.98867350094637e-05, "loss": 1.1898, "step": 2260 }, { "epoch": 0.26, "learning_rate": 9.988178606495132e-05, "loss": 1.2028, "step": 2280 }, { "epoch": 0.26, "learning_rate": 9.98767314294005e-05, "loss": 1.1629, "step": 2300 }, { "epoch": 0.26, "learning_rate": 9.987157111352117e-05, "loss": 1.1963, "step": 2320 }, { "epoch": 0.26, "learning_rate": 9.986630512824715e-05, "loss": 1.1642, "step": 2340 }, { "epoch": 0.27, "learning_rate": 9.986093348473617e-05, "loss": 1.1624, "step": 2360 }, { "epoch": 0.27, "learning_rate": 9.985545619436984e-05, "loss": 1.1595, "step": 2380 }, { "epoch": 0.27, "learning_rate": 9.984987326875359e-05, "loss": 1.1985, "step": 2400 }, { "epoch": 0.27, "learning_rate": 9.984418471971671e-05, "loss": 1.1912, "step": 2420 }, { "epoch": 0.27, "learning_rate": 9.983839055931226e-05, "loss": 1.2146, "step": 2440 }, { "epoch": 0.28, "learning_rate": 9.983249079981709e-05, "loss": 1.183, "step": 2460 }, { "epoch": 0.28, "learning_rate": 9.982648545373177e-05, "loss": 1.1561, "step": 2480 }, { "epoch": 0.28, "learning_rate": 9.982037453378063e-05, "loss": 1.1517, "step": 2500 }, { "epoch": 0.28, "learning_rate": 9.981415805291168e-05, "loss": 1.1485, "step": 2520 }, { "epoch": 0.29, "learning_rate": 9.980783602429656e-05, "loss": 1.1721, "step": 2540 }, { "epoch": 0.29, "learning_rate": 9.98014084613306e-05, "loss": 1.1825, "step": 2560 }, { "epoch": 0.29, "learning_rate": 9.979487537763269e-05, "loss": 1.1338, "step": 2580 }, { "epoch": 0.29, "learning_rate": 9.978823678704533e-05, "loss": 1.193, "step": 2600 }, { "epoch": 0.29, "learning_rate": 9.978149270363462e-05, "loss": 1.1809, "step": 2620 }, { "epoch": 0.3, "learning_rate": 9.977464314169005e-05, "loss": 1.1425, "step": 2640 }, { "epoch": 0.3, "learning_rate": 9.976768811572473e-05, "loss": 1.1428, "step": 2660 }, { "epoch": 0.3, "learning_rate": 9.976062764047515e-05, "loss": 1.1511, "step": 2680 }, { "epoch": 0.3, "learning_rate": 9.975346173090128e-05, "loss": 1.161, "step": 2700 }, { "epoch": 0.31, "learning_rate": 9.974619040218644e-05, "loss": 1.168, "step": 2720 }, { "epoch": 0.31, "learning_rate": 9.973881366973738e-05, "loss": 1.1474, "step": 2740 }, { "epoch": 0.31, "learning_rate": 9.973133154918413e-05, "loss": 1.1605, "step": 2760 }, { "epoch": 0.31, "learning_rate": 9.972374405638e-05, "loss": 1.1591, "step": 2780 }, { "epoch": 0.31, "learning_rate": 9.971605120740166e-05, "loss": 1.1494, "step": 2800 }, { "epoch": 0.32, "learning_rate": 9.970825301854889e-05, "loss": 1.1596, "step": 2820 }, { "epoch": 0.32, "learning_rate": 9.970034950634478e-05, "loss": 1.1543, "step": 2840 }, { "epoch": 0.32, "learning_rate": 9.96923406875355e-05, "loss": 1.128, "step": 2860 }, { "epoch": 0.32, "learning_rate": 9.968422657909037e-05, "loss": 1.1567, "step": 2880 }, { "epoch": 0.33, "learning_rate": 9.967600719820183e-05, "loss": 1.1439, "step": 2900 }, { "epoch": 0.33, "learning_rate": 9.966768256228536e-05, "loss": 1.1156, "step": 2920 }, { "epoch": 0.33, "learning_rate": 9.965925268897942e-05, "loss": 1.1695, "step": 2940 }, { "epoch": 0.33, "learning_rate": 9.96507175961455e-05, "loss": 1.1286, "step": 2960 }, { "epoch": 0.33, "learning_rate": 9.964207730186804e-05, "loss": 1.1687, "step": 2980 }, { "epoch": 0.34, "learning_rate": 9.963333182445429e-05, "loss": 1.1401, "step": 3000 }, { "epoch": 0.34, "learning_rate": 9.962448118243451e-05, "loss": 1.1419, "step": 3020 }, { "epoch": 0.34, "learning_rate": 9.961552539456163e-05, "loss": 1.1224, "step": 3040 }, { "epoch": 0.34, "learning_rate": 9.96064644798115e-05, "loss": 1.1506, "step": 3060 }, { "epoch": 0.35, "learning_rate": 9.959729845738264e-05, "loss": 1.143, "step": 3080 }, { "epoch": 0.35, "learning_rate": 9.958802734669633e-05, "loss": 1.1684, "step": 3100 }, { "epoch": 0.35, "learning_rate": 9.957865116739641e-05, "loss": 1.1226, "step": 3120 }, { "epoch": 0.35, "learning_rate": 9.956916993934947e-05, "loss": 1.1404, "step": 3140 }, { "epoch": 0.36, "learning_rate": 9.95595836826446e-05, "loss": 1.1483, "step": 3160 }, { "epoch": 0.36, "learning_rate": 9.954989241759346e-05, "loss": 1.137, "step": 3180 }, { "epoch": 0.36, "learning_rate": 9.954009616473019e-05, "loss": 1.1019, "step": 3200 }, { "epoch": 0.36, "learning_rate": 9.95301949448114e-05, "loss": 1.1063, "step": 3220 }, { "epoch": 0.36, "learning_rate": 9.952018877881606e-05, "loss": 1.1487, "step": 3240 }, { "epoch": 0.37, "learning_rate": 9.951007768794558e-05, "loss": 1.128, "step": 3260 }, { "epoch": 0.37, "learning_rate": 9.949986169362362e-05, "loss": 1.1343, "step": 3280 }, { "epoch": 0.37, "learning_rate": 9.948954081749616e-05, "loss": 1.1342, "step": 3300 }, { "epoch": 0.37, "learning_rate": 9.947911508143135e-05, "loss": 1.1387, "step": 3320 }, { "epoch": 0.38, "learning_rate": 9.946858450751958e-05, "loss": 1.1217, "step": 3340 }, { "epoch": 0.38, "learning_rate": 9.945794911807334e-05, "loss": 1.1276, "step": 3360 }, { "epoch": 0.38, "learning_rate": 9.944720893562722e-05, "loss": 1.1567, "step": 3380 }, { "epoch": 0.38, "learning_rate": 9.943636398293785e-05, "loss": 1.1307, "step": 3400 }, { "epoch": 0.38, "learning_rate": 9.942541428298384e-05, "loss": 1.1264, "step": 3420 }, { "epoch": 0.39, "learning_rate": 9.941435985896573e-05, "loss": 1.1148, "step": 3440 }, { "epoch": 0.39, "learning_rate": 9.940320073430598e-05, "loss": 1.1417, "step": 3460 }, { "epoch": 0.39, "learning_rate": 9.93919369326489e-05, "loss": 1.1565, "step": 3480 }, { "epoch": 0.39, "learning_rate": 9.938056847786053e-05, "loss": 1.1517, "step": 3500 }, { "epoch": 0.4, "learning_rate": 9.936909539402874e-05, "loss": 1.1505, "step": 3520 }, { "epoch": 0.4, "learning_rate": 9.935751770546302e-05, "loss": 1.1276, "step": 3540 }, { "epoch": 0.4, "learning_rate": 9.934583543669453e-05, "loss": 1.1169, "step": 3560 }, { "epoch": 0.4, "learning_rate": 9.933404861247603e-05, "loss": 1.1185, "step": 3580 }, { "epoch": 0.4, "learning_rate": 9.93221572577818e-05, "loss": 1.1566, "step": 3600 }, { "epoch": 0.41, "learning_rate": 9.931016139780758e-05, "loss": 1.1288, "step": 3620 }, { "epoch": 0.41, "learning_rate": 9.929806105797058e-05, "loss": 1.137, "step": 3640 }, { "epoch": 0.41, "learning_rate": 9.928585626390935e-05, "loss": 1.1266, "step": 3660 }, { "epoch": 0.41, "learning_rate": 9.927354704148382e-05, "loss": 1.1436, "step": 3680 }, { "epoch": 0.42, "learning_rate": 9.926113341677507e-05, "loss": 1.0938, "step": 3700 }, { "epoch": 0.42, "learning_rate": 9.924861541608553e-05, "loss": 1.1159, "step": 3720 }, { "epoch": 0.42, "learning_rate": 9.92359930659387e-05, "loss": 1.0964, "step": 3740 }, { "epoch": 0.42, "learning_rate": 9.922326639307917e-05, "loss": 1.1329, "step": 3760 }, { "epoch": 0.42, "learning_rate": 9.921043542447264e-05, "loss": 1.1393, "step": 3780 }, { "epoch": 0.43, "learning_rate": 9.919750018730571e-05, "loss": 1.1251, "step": 3800 }, { "epoch": 0.43, "learning_rate": 9.918446070898601e-05, "loss": 1.1018, "step": 3820 }, { "epoch": 0.43, "learning_rate": 9.917131701714192e-05, "loss": 1.1376, "step": 3840 }, { "epoch": 0.43, "learning_rate": 9.915806913962274e-05, "loss": 1.1901, "step": 3860 }, { "epoch": 0.44, "learning_rate": 9.914471710449845e-05, "loss": 1.1236, "step": 3880 }, { "epoch": 0.44, "learning_rate": 9.913126094005976e-05, "loss": 1.1188, "step": 3900 }, { "epoch": 0.44, "learning_rate": 9.911770067481798e-05, "loss": 1.1358, "step": 3920 }, { "epoch": 0.44, "learning_rate": 9.910403633750502e-05, "loss": 1.14, "step": 3940 }, { "epoch": 0.44, "learning_rate": 9.909026795707331e-05, "loss": 1.1145, "step": 3960 }, { "epoch": 0.45, "learning_rate": 9.907639556269566e-05, "loss": 1.1162, "step": 3980 }, { "epoch": 0.45, "learning_rate": 9.906241918376537e-05, "loss": 1.1131, "step": 4000 }, { "epoch": 0.45, "learning_rate": 9.904833884989602e-05, "loss": 1.1174, "step": 4020 }, { "epoch": 0.45, "learning_rate": 9.90341545909214e-05, "loss": 1.1196, "step": 4040 }, { "epoch": 0.46, "learning_rate": 9.901986643689559e-05, "loss": 1.1053, "step": 4060 }, { "epoch": 0.46, "learning_rate": 9.900547441809272e-05, "loss": 1.1168, "step": 4080 }, { "epoch": 0.46, "learning_rate": 9.899097856500707e-05, "loss": 1.101, "step": 4100 }, { "epoch": 0.46, "learning_rate": 9.897637890835289e-05, "loss": 1.1039, "step": 4120 }, { "epoch": 0.47, "learning_rate": 9.896167547906437e-05, "loss": 1.1027, "step": 4140 }, { "epoch": 0.47, "learning_rate": 9.894686830829558e-05, "loss": 1.1028, "step": 4160 }, { "epoch": 0.47, "learning_rate": 9.89319574274204e-05, "loss": 1.0957, "step": 4180 }, { "epoch": 0.47, "learning_rate": 9.891694286803246e-05, "loss": 1.1397, "step": 4200 }, { "epoch": 0.47, "learning_rate": 9.890182466194505e-05, "loss": 1.109, "step": 4220 }, { "epoch": 0.48, "learning_rate": 9.88866028411911e-05, "loss": 1.1414, "step": 4240 }, { "epoch": 0.48, "learning_rate": 9.887127743802304e-05, "loss": 1.136, "step": 4260 }, { "epoch": 0.48, "learning_rate": 9.885584848491285e-05, "loss": 1.1458, "step": 4280 }, { "epoch": 0.48, "learning_rate": 9.884031601455179e-05, "loss": 1.1379, "step": 4300 }, { "epoch": 0.49, "learning_rate": 9.88246800598506e-05, "loss": 1.1083, "step": 4320 }, { "epoch": 0.49, "learning_rate": 9.880894065393915e-05, "loss": 1.1063, "step": 4340 }, { "epoch": 0.49, "learning_rate": 9.879309783016663e-05, "loss": 1.101, "step": 4360 }, { "epoch": 0.49, "learning_rate": 9.877715162210123e-05, "loss": 1.1023, "step": 4380 }, { "epoch": 0.49, "learning_rate": 9.876110206353033e-05, "loss": 1.1223, "step": 4400 }, { "epoch": 0.5, "learning_rate": 9.874494918846017e-05, "loss": 1.1348, "step": 4420 }, { "epoch": 0.5, "learning_rate": 9.872869303111595e-05, "loss": 1.0998, "step": 4440 }, { "epoch": 0.5, "learning_rate": 9.871233362594175e-05, "loss": 1.0983, "step": 4460 }, { "epoch": 0.5, "learning_rate": 9.869587100760034e-05, "loss": 1.1114, "step": 4480 }, { "epoch": 0.51, "learning_rate": 9.86793052109732e-05, "loss": 1.0765, "step": 4500 }, { "epoch": 0.51, "learning_rate": 9.866263627116049e-05, "loss": 1.089, "step": 4520 }, { "epoch": 0.51, "learning_rate": 9.864586422348081e-05, "loss": 1.101, "step": 4540 }, { "epoch": 0.51, "learning_rate": 9.862898910347132e-05, "loss": 1.1247, "step": 4560 }, { "epoch": 0.51, "learning_rate": 9.861201094688752e-05, "loss": 1.113, "step": 4580 }, { "epoch": 0.52, "learning_rate": 9.859492978970325e-05, "loss": 1.1354, "step": 4600 }, { "epoch": 0.52, "learning_rate": 9.857774566811058e-05, "loss": 1.095, "step": 4620 }, { "epoch": 0.52, "learning_rate": 9.856045861851975e-05, "loss": 1.1188, "step": 4640 }, { "epoch": 0.52, "learning_rate": 9.854306867755906e-05, "loss": 1.0938, "step": 4660 }, { "epoch": 0.53, "learning_rate": 9.852557588207487e-05, "loss": 1.1113, "step": 4680 }, { "epoch": 0.53, "learning_rate": 9.850798026913145e-05, "loss": 1.088, "step": 4700 }, { "epoch": 0.53, "learning_rate": 9.849028187601091e-05, "loss": 1.1256, "step": 4720 }, { "epoch": 0.53, "learning_rate": 9.847248074021312e-05, "loss": 1.0979, "step": 4740 }, { "epoch": 0.53, "learning_rate": 9.845457689945567e-05, "loss": 1.1227, "step": 4760 }, { "epoch": 0.54, "learning_rate": 9.84365703916738e-05, "loss": 1.1265, "step": 4780 }, { "epoch": 0.54, "learning_rate": 9.841846125502021e-05, "loss": 1.0997, "step": 4800 }, { "epoch": 0.54, "learning_rate": 9.840024952786508e-05, "loss": 1.0951, "step": 4820 }, { "epoch": 0.54, "learning_rate": 9.838193524879599e-05, "loss": 1.1091, "step": 4840 }, { "epoch": 0.55, "learning_rate": 9.836351845661777e-05, "loss": 1.1148, "step": 4860 }, { "epoch": 0.55, "learning_rate": 9.834499919035249e-05, "loss": 1.1211, "step": 4880 }, { "epoch": 0.55, "learning_rate": 9.832637748923934e-05, "loss": 1.1033, "step": 4900 }, { "epoch": 0.55, "learning_rate": 9.830765339273454e-05, "loss": 1.0925, "step": 4920 }, { "epoch": 0.56, "learning_rate": 9.828882694051124e-05, "loss": 1.1061, "step": 4940 }, { "epoch": 0.56, "learning_rate": 9.826989817245953e-05, "loss": 1.1083, "step": 4960 }, { "epoch": 0.56, "learning_rate": 9.825086712868625e-05, "loss": 1.0864, "step": 4980 }, { "epoch": 0.56, "learning_rate": 9.823173384951496e-05, "loss": 1.1202, "step": 5000 }, { "epoch": 0.56, "learning_rate": 9.821249837548582e-05, "loss": 1.101, "step": 5020 }, { "epoch": 0.57, "learning_rate": 9.819316074735554e-05, "loss": 1.1191, "step": 5040 }, { "epoch": 0.57, "learning_rate": 9.817372100609726e-05, "loss": 1.0535, "step": 5060 }, { "epoch": 0.57, "learning_rate": 9.81541791929005e-05, "loss": 1.1082, "step": 5080 }, { "epoch": 0.57, "learning_rate": 9.813453534917105e-05, "loss": 1.1366, "step": 5100 }, { "epoch": 0.58, "learning_rate": 9.811478951653088e-05, "loss": 1.072, "step": 5120 }, { "epoch": 0.58, "learning_rate": 9.809494173681804e-05, "loss": 1.0954, "step": 5140 }, { "epoch": 0.58, "learning_rate": 9.807499205208663e-05, "loss": 1.1014, "step": 5160 }, { "epoch": 0.58, "learning_rate": 9.805494050460666e-05, "loss": 1.0966, "step": 5180 }, { "epoch": 0.58, "learning_rate": 9.803478713686391e-05, "loss": 1.1035, "step": 5200 }, { "epoch": 0.59, "learning_rate": 9.801453199155996e-05, "loss": 1.1061, "step": 5220 }, { "epoch": 0.59, "learning_rate": 9.799417511161206e-05, "loss": 1.0943, "step": 5240 }, { "epoch": 0.59, "learning_rate": 9.797371654015296e-05, "loss": 1.0943, "step": 5260 }, { "epoch": 0.59, "learning_rate": 9.795315632053088e-05, "loss": 1.0939, "step": 5280 }, { "epoch": 0.6, "learning_rate": 9.793249449630946e-05, "loss": 1.083, "step": 5300 }, { "epoch": 0.6, "learning_rate": 9.791173111126759e-05, "loss": 1.0958, "step": 5320 }, { "epoch": 0.6, "learning_rate": 9.789086620939936e-05, "loss": 1.0671, "step": 5340 }, { "epoch": 0.6, "learning_rate": 9.786989983491397e-05, "loss": 1.0766, "step": 5360 }, { "epoch": 0.6, "learning_rate": 9.784883203223558e-05, "loss": 1.1059, "step": 5380 }, { "epoch": 0.61, "learning_rate": 9.782766284600332e-05, "loss": 1.1136, "step": 5400 }, { "epoch": 0.61, "learning_rate": 9.780639232107108e-05, "loss": 1.1049, "step": 5420 }, { "epoch": 0.61, "learning_rate": 9.778502050250749e-05, "loss": 1.0934, "step": 5440 }, { "epoch": 0.61, "learning_rate": 9.776354743559583e-05, "loss": 1.0905, "step": 5460 }, { "epoch": 0.62, "learning_rate": 9.774197316583387e-05, "loss": 1.0722, "step": 5480 }, { "epoch": 0.62, "learning_rate": 9.77202977389338e-05, "loss": 1.0761, "step": 5500 }, { "epoch": 0.62, "learning_rate": 9.769852120082222e-05, "loss": 1.0705, "step": 5520 }, { "epoch": 0.62, "learning_rate": 9.767664359763991e-05, "loss": 1.0709, "step": 5540 }, { "epoch": 0.62, "learning_rate": 9.765466497574175e-05, "loss": 1.0927, "step": 5560 }, { "epoch": 0.63, "learning_rate": 9.763258538169675e-05, "loss": 1.0942, "step": 5580 }, { "epoch": 0.63, "learning_rate": 9.761040486228783e-05, "loss": 1.1113, "step": 5600 }, { "epoch": 0.63, "learning_rate": 9.758812346451171e-05, "loss": 1.0902, "step": 5620 }, { "epoch": 0.63, "learning_rate": 9.756574123557893e-05, "loss": 1.1216, "step": 5640 }, { "epoch": 0.64, "learning_rate": 9.754325822291362e-05, "loss": 1.1029, "step": 5660 }, { "epoch": 0.64, "learning_rate": 9.752067447415342e-05, "loss": 1.1237, "step": 5680 }, { "epoch": 0.64, "learning_rate": 9.749799003714954e-05, "loss": 1.0988, "step": 5700 }, { "epoch": 0.64, "learning_rate": 9.747520495996641e-05, "loss": 1.087, "step": 5720 }, { "epoch": 0.64, "learning_rate": 9.745231929088174e-05, "loss": 1.0668, "step": 5740 }, { "epoch": 0.65, "learning_rate": 9.74293330783864e-05, "loss": 1.0756, "step": 5760 }, { "epoch": 0.65, "learning_rate": 9.740624637118425e-05, "loss": 1.1091, "step": 5780 }, { "epoch": 0.65, "learning_rate": 9.73830592181921e-05, "loss": 1.0985, "step": 5800 }, { "epoch": 0.65, "learning_rate": 9.735977166853962e-05, "loss": 1.0667, "step": 5820 }, { "epoch": 0.66, "learning_rate": 9.733638377156915e-05, "loss": 1.0753, "step": 5840 }, { "epoch": 0.66, "learning_rate": 9.731289557683567e-05, "loss": 1.0869, "step": 5860 }, { "epoch": 0.66, "learning_rate": 9.72893071341067e-05, "loss": 1.0944, "step": 5880 }, { "epoch": 0.66, "learning_rate": 9.726561849336216e-05, "loss": 1.0817, "step": 5900 }, { "epoch": 0.67, "learning_rate": 9.724182970479422e-05, "loss": 1.1044, "step": 5920 }, { "epoch": 0.67, "learning_rate": 9.72179408188073e-05, "loss": 1.1296, "step": 5940 }, { "epoch": 0.67, "learning_rate": 9.71939518860179e-05, "loss": 1.0678, "step": 5960 }, { "epoch": 0.67, "learning_rate": 9.71698629572545e-05, "loss": 1.0837, "step": 5980 }, { "epoch": 0.67, "learning_rate": 9.714567408355744e-05, "loss": 1.0911, "step": 6000 }, { "epoch": 0.68, "learning_rate": 9.712138531617883e-05, "loss": 1.0979, "step": 6020 }, { "epoch": 0.68, "learning_rate": 9.709699670658248e-05, "loss": 1.0742, "step": 6040 }, { "epoch": 0.68, "learning_rate": 9.707250830644367e-05, "loss": 1.0689, "step": 6060 }, { "epoch": 0.68, "learning_rate": 9.704792016764922e-05, "loss": 1.1154, "step": 6080 }, { "epoch": 0.69, "learning_rate": 9.702323234229717e-05, "loss": 1.089, "step": 6100 }, { "epoch": 0.69, "learning_rate": 9.699844488269687e-05, "loss": 1.0787, "step": 6120 }, { "epoch": 0.69, "learning_rate": 9.69735578413687e-05, "loss": 1.0688, "step": 6140 }, { "epoch": 0.69, "learning_rate": 9.69485712710441e-05, "loss": 1.0665, "step": 6160 }, { "epoch": 0.69, "learning_rate": 9.692348522466537e-05, "loss": 1.0686, "step": 6180 }, { "epoch": 0.7, "learning_rate": 9.689829975538559e-05, "loss": 1.0872, "step": 6200 }, { "epoch": 0.7, "learning_rate": 9.687301491656849e-05, "loss": 1.0818, "step": 6220 }, { "epoch": 0.7, "learning_rate": 9.684763076178836e-05, "loss": 1.0844, "step": 6240 }, { "epoch": 0.7, "learning_rate": 9.682214734482989e-05, "loss": 1.0977, "step": 6260 }, { "epoch": 0.71, "learning_rate": 9.679656471968814e-05, "loss": 1.072, "step": 6280 }, { "epoch": 0.71, "learning_rate": 9.677088294056833e-05, "loss": 1.0869, "step": 6300 }, { "epoch": 0.71, "learning_rate": 9.674510206188584e-05, "loss": 1.0839, "step": 6320 }, { "epoch": 0.71, "learning_rate": 9.671922213826589e-05, "loss": 1.077, "step": 6340 }, { "epoch": 0.71, "learning_rate": 9.669324322454373e-05, "loss": 1.0974, "step": 6360 }, { "epoch": 0.72, "learning_rate": 9.666716537576422e-05, "loss": 1.1057, "step": 6380 }, { "epoch": 0.72, "learning_rate": 9.664098864718191e-05, "loss": 1.0543, "step": 6400 }, { "epoch": 0.72, "learning_rate": 9.661471309426085e-05, "loss": 1.0699, "step": 6420 }, { "epoch": 0.72, "learning_rate": 9.658833877267448e-05, "loss": 1.091, "step": 6440 }, { "epoch": 0.73, "learning_rate": 9.65618657383055e-05, "loss": 1.0425, "step": 6460 }, { "epoch": 0.73, "learning_rate": 9.653529404724578e-05, "loss": 1.0519, "step": 6480 }, { "epoch": 0.73, "learning_rate": 9.650862375579622e-05, "loss": 1.1099, "step": 6500 }, { "epoch": 0.73, "learning_rate": 9.648185492046663e-05, "loss": 1.1131, "step": 6520 }, { "epoch": 0.73, "learning_rate": 9.645498759797566e-05, "loss": 1.1014, "step": 6540 }, { "epoch": 0.74, "learning_rate": 9.642802184525058e-05, "loss": 1.0645, "step": 6560 }, { "epoch": 0.74, "learning_rate": 9.640095771942725e-05, "loss": 1.0989, "step": 6580 }, { "epoch": 0.74, "learning_rate": 9.637379527784997e-05, "loss": 1.1347, "step": 6600 }, { "epoch": 0.74, "learning_rate": 9.634653457807135e-05, "loss": 1.1031, "step": 6620 }, { "epoch": 0.75, "learning_rate": 9.631917567785213e-05, "loss": 1.1107, "step": 6640 }, { "epoch": 0.75, "learning_rate": 9.629171863516126e-05, "loss": 1.0845, "step": 6660 }, { "epoch": 0.75, "learning_rate": 9.626416350817549e-05, "loss": 1.0804, "step": 6680 }, { "epoch": 0.75, "learning_rate": 9.623651035527947e-05, "loss": 1.081, "step": 6700 }, { "epoch": 0.76, "learning_rate": 9.620875923506556e-05, "loss": 1.1133, "step": 6720 }, { "epoch": 0.76, "learning_rate": 9.618091020633365e-05, "loss": 1.0605, "step": 6740 }, { "epoch": 0.76, "learning_rate": 9.615296332809112e-05, "loss": 1.034, "step": 6760 }, { "epoch": 0.76, "learning_rate": 9.612491865955265e-05, "loss": 1.0704, "step": 6780 }, { "epoch": 0.76, "learning_rate": 9.609677626014015e-05, "loss": 1.0791, "step": 6800 }, { "epoch": 0.77, "learning_rate": 9.606853618948256e-05, "loss": 1.0931, "step": 6820 }, { "epoch": 0.77, "learning_rate": 9.604019850741582e-05, "loss": 1.0579, "step": 6840 }, { "epoch": 0.77, "learning_rate": 9.601176327398267e-05, "loss": 1.0252, "step": 6860 }, { "epoch": 0.77, "learning_rate": 9.598323054943252e-05, "loss": 1.0754, "step": 6880 }, { "epoch": 0.78, "learning_rate": 9.59546003942214e-05, "loss": 1.0665, "step": 6900 }, { "epoch": 0.78, "learning_rate": 9.592587286901172e-05, "loss": 1.1004, "step": 6920 }, { "epoch": 0.78, "learning_rate": 9.589704803467225e-05, "loss": 1.1064, "step": 6940 }, { "epoch": 0.78, "learning_rate": 9.586812595227792e-05, "loss": 1.0677, "step": 6960 }, { "epoch": 0.78, "learning_rate": 9.583910668310971e-05, "loss": 1.0727, "step": 6980 }, { "epoch": 0.79, "learning_rate": 9.580999028865452e-05, "loss": 1.0799, "step": 7000 }, { "epoch": 0.79, "learning_rate": 9.578077683060507e-05, "loss": 1.0665, "step": 7020 }, { "epoch": 0.79, "learning_rate": 9.57514663708597e-05, "loss": 1.0729, "step": 7040 }, { "epoch": 0.79, "learning_rate": 9.572205897152229e-05, "loss": 1.0472, "step": 7060 }, { "epoch": 0.8, "learning_rate": 9.569255469490214e-05, "loss": 1.0633, "step": 7080 }, { "epoch": 0.8, "learning_rate": 9.566295360351383e-05, "loss": 1.0578, "step": 7100 }, { "epoch": 0.8, "learning_rate": 9.563325576007701e-05, "loss": 1.0679, "step": 7120 }, { "epoch": 0.8, "learning_rate": 9.560346122751638e-05, "loss": 1.0878, "step": 7140 }, { "epoch": 0.8, "learning_rate": 9.557357006896152e-05, "loss": 1.0521, "step": 7160 }, { "epoch": 0.81, "learning_rate": 9.554358234774669e-05, "loss": 1.0543, "step": 7180 }, { "epoch": 0.81, "learning_rate": 9.55134981274108e-05, "loss": 1.0781, "step": 7200 }, { "epoch": 0.81, "learning_rate": 9.548331747169719e-05, "loss": 1.0668, "step": 7220 }, { "epoch": 0.81, "learning_rate": 9.545304044455357e-05, "loss": 1.0908, "step": 7240 }, { "epoch": 0.82, "learning_rate": 9.542266711013182e-05, "loss": 1.0535, "step": 7260 }, { "epoch": 0.82, "learning_rate": 9.539219753278785e-05, "loss": 1.0587, "step": 7280 }, { "epoch": 0.82, "learning_rate": 9.536163177708155e-05, "loss": 1.0406, "step": 7300 }, { "epoch": 0.82, "learning_rate": 9.533096990777657e-05, "loss": 1.0645, "step": 7320 }, { "epoch": 0.82, "learning_rate": 9.530021198984019e-05, "loss": 1.0425, "step": 7340 }, { "epoch": 0.83, "learning_rate": 9.526935808844324e-05, "loss": 1.0564, "step": 7360 }, { "epoch": 0.83, "learning_rate": 9.523840826895988e-05, "loss": 1.0811, "step": 7380 }, { "epoch": 0.83, "learning_rate": 9.520736259696753e-05, "loss": 1.0727, "step": 7400 }, { "epoch": 0.83, "learning_rate": 9.51762211382467e-05, "loss": 1.0687, "step": 7420 }, { "epoch": 0.84, "learning_rate": 9.514498395878086e-05, "loss": 1.0902, "step": 7440 }, { "epoch": 0.84, "learning_rate": 9.51136511247563e-05, "loss": 1.0613, "step": 7460 }, { "epoch": 0.84, "learning_rate": 9.508222270256195e-05, "loss": 1.0809, "step": 7480 }, { "epoch": 0.84, "learning_rate": 9.505069875878934e-05, "loss": 1.0682, "step": 7500 }, { "epoch": 0.84, "learning_rate": 9.501907936023231e-05, "loss": 1.07, "step": 7520 }, { "epoch": 0.85, "learning_rate": 9.498736457388703e-05, "loss": 1.0797, "step": 7540 }, { "epoch": 0.85, "learning_rate": 9.495555446695175e-05, "loss": 1.0468, "step": 7560 }, { "epoch": 0.85, "learning_rate": 9.492364910682668e-05, "loss": 1.0903, "step": 7580 }, { "epoch": 0.85, "learning_rate": 9.489164856111387e-05, "loss": 1.0929, "step": 7600 }, { "epoch": 0.86, "learning_rate": 9.485955289761703e-05, "loss": 1.0669, "step": 7620 }, { "epoch": 0.86, "learning_rate": 9.482736218434143e-05, "loss": 1.0938, "step": 7640 }, { "epoch": 0.86, "learning_rate": 9.479507648949372e-05, "loss": 1.0582, "step": 7660 }, { "epoch": 0.86, "learning_rate": 9.476269588148186e-05, "loss": 1.0793, "step": 7680 }, { "epoch": 0.87, "learning_rate": 9.473022042891477e-05, "loss": 1.0696, "step": 7700 }, { "epoch": 0.87, "learning_rate": 9.469765020060251e-05, "loss": 1.0599, "step": 7720 }, { "epoch": 0.87, "learning_rate": 9.46649852655558e-05, "loss": 1.0667, "step": 7740 }, { "epoch": 0.87, "learning_rate": 9.463222569298615e-05, "loss": 1.0126, "step": 7760 }, { "epoch": 0.87, "learning_rate": 9.459937155230549e-05, "loss": 1.0694, "step": 7780 }, { "epoch": 0.88, "learning_rate": 9.456642291312618e-05, "loss": 1.0566, "step": 7800 }, { "epoch": 0.88, "learning_rate": 9.45333798452608e-05, "loss": 1.0664, "step": 7820 }, { "epoch": 0.88, "learning_rate": 9.450024241872201e-05, "loss": 1.0408, "step": 7840 }, { "epoch": 0.88, "learning_rate": 9.446701070372237e-05, "loss": 1.0734, "step": 7860 }, { "epoch": 0.89, "learning_rate": 9.443368477067427e-05, "loss": 1.0604, "step": 7880 }, { "epoch": 0.89, "learning_rate": 9.440026469018968e-05, "loss": 1.0746, "step": 7900 }, { "epoch": 0.89, "learning_rate": 9.43667505330801e-05, "loss": 1.0358, "step": 7920 }, { "epoch": 0.89, "learning_rate": 9.433314237035631e-05, "loss": 1.0631, "step": 7940 }, { "epoch": 0.89, "learning_rate": 9.429944027322834e-05, "loss": 1.0296, "step": 7960 }, { "epoch": 0.9, "learning_rate": 9.426564431310521e-05, "loss": 1.0412, "step": 7980 }, { "epoch": 0.9, "learning_rate": 9.42317545615948e-05, "loss": 1.0554, "step": 8000 }, { "epoch": 0.9, "learning_rate": 9.419777109050376e-05, "loss": 1.0978, "step": 8020 }, { "epoch": 0.9, "learning_rate": 9.416369397183728e-05, "loss": 1.0215, "step": 8040 }, { "epoch": 0.91, "learning_rate": 9.4129523277799e-05, "loss": 1.0424, "step": 8060 }, { "epoch": 0.91, "learning_rate": 9.409525908079082e-05, "loss": 1.0453, "step": 8080 }, { "epoch": 0.91, "learning_rate": 9.406090145341277e-05, "loss": 1.0588, "step": 8100 }, { "epoch": 0.91, "learning_rate": 9.402645046846281e-05, "loss": 1.0439, "step": 8120 }, { "epoch": 0.91, "learning_rate": 9.399190619893676e-05, "loss": 1.0501, "step": 8140 }, { "epoch": 0.92, "learning_rate": 9.395726871802804e-05, "loss": 1.0565, "step": 8160 }, { "epoch": 0.92, "learning_rate": 9.392253809912758e-05, "loss": 1.0456, "step": 8180 }, { "epoch": 0.92, "learning_rate": 9.388771441582369e-05, "loss": 1.0547, "step": 8200 }, { "epoch": 0.92, "learning_rate": 9.385279774190184e-05, "loss": 1.0344, "step": 8220 }, { "epoch": 0.93, "learning_rate": 9.381778815134455e-05, "loss": 1.0286, "step": 8240 }, { "epoch": 0.93, "learning_rate": 9.378268571833116e-05, "loss": 1.1033, "step": 8260 }, { "epoch": 0.93, "learning_rate": 9.374749051723781e-05, "loss": 1.0667, "step": 8280 }, { "epoch": 0.93, "learning_rate": 9.371220262263713e-05, "loss": 1.0416, "step": 8300 }, { "epoch": 0.93, "learning_rate": 9.36768221092982e-05, "loss": 1.0674, "step": 8320 }, { "epoch": 0.94, "learning_rate": 9.364134905218632e-05, "loss": 1.0305, "step": 8340 }, { "epoch": 0.94, "learning_rate": 9.360578352646285e-05, "loss": 1.0604, "step": 8360 }, { "epoch": 0.94, "learning_rate": 9.357012560748513e-05, "loss": 1.061, "step": 8380 }, { "epoch": 0.94, "learning_rate": 9.353437537080625e-05, "loss": 1.0678, "step": 8400 }, { "epoch": 0.95, "learning_rate": 9.349853289217485e-05, "loss": 1.0767, "step": 8420 }, { "epoch": 0.95, "learning_rate": 9.34625982475351e-05, "loss": 1.0562, "step": 8440 }, { "epoch": 0.95, "learning_rate": 9.342657151302637e-05, "loss": 1.0301, "step": 8460 }, { "epoch": 0.95, "learning_rate": 9.339045276498325e-05, "loss": 1.063, "step": 8480 }, { "epoch": 0.96, "learning_rate": 9.33542420799352e-05, "loss": 1.0157, "step": 8500 }, { "epoch": 0.96, "learning_rate": 9.331793953460653e-05, "loss": 1.0564, "step": 8520 }, { "epoch": 0.96, "learning_rate": 9.328154520591614e-05, "loss": 1.0817, "step": 8540 }, { "epoch": 0.96, "learning_rate": 9.324505917097749e-05, "loss": 1.0453, "step": 8560 }, { "epoch": 0.96, "learning_rate": 9.320848150709826e-05, "loss": 1.0442, "step": 8580 }, { "epoch": 0.97, "learning_rate": 9.317181229178031e-05, "loss": 1.0379, "step": 8600 }, { "epoch": 0.97, "learning_rate": 9.313505160271952e-05, "loss": 1.0686, "step": 8620 }, { "epoch": 0.97, "learning_rate": 9.30981995178055e-05, "loss": 1.0417, "step": 8640 }, { "epoch": 0.97, "learning_rate": 9.306125611512159e-05, "loss": 1.0569, "step": 8660 }, { "epoch": 0.98, "learning_rate": 9.302422147294458e-05, "loss": 1.0646, "step": 8680 }, { "epoch": 0.98, "learning_rate": 9.298709566974462e-05, "loss": 1.0439, "step": 8700 }, { "epoch": 0.98, "learning_rate": 9.294987878418495e-05, "loss": 1.0529, "step": 8720 }, { "epoch": 0.98, "learning_rate": 9.291257089512185e-05, "loss": 1.0369, "step": 8740 }, { "epoch": 0.98, "learning_rate": 9.287517208160439e-05, "loss": 1.0509, "step": 8760 }, { "epoch": 0.99, "learning_rate": 9.283768242287433e-05, "loss": 1.0825, "step": 8780 }, { "epoch": 0.99, "learning_rate": 9.280010199836588e-05, "loss": 1.0583, "step": 8800 }, { "epoch": 0.99, "learning_rate": 9.276243088770559e-05, "loss": 1.0528, "step": 8820 }, { "epoch": 0.99, "learning_rate": 9.272466917071216e-05, "loss": 1.0307, "step": 8840 }, { "epoch": 1.0, "learning_rate": 9.268681692739623e-05, "loss": 1.0538, "step": 8860 }, { "epoch": 1.0, "learning_rate": 9.264887423796029e-05, "loss": 1.0459, "step": 8880 }, { "epoch": 1.0, "learning_rate": 9.261084118279847e-05, "loss": 1.052, "step": 8900 }, { "epoch": 1.0, "learning_rate": 9.257271784249635e-05, "loss": 0.9985, "step": 8920 }, { "epoch": 1.0, "learning_rate": 9.253450429783081e-05, "loss": 1.0312, "step": 8940 }, { "epoch": 1.01, "learning_rate": 9.249620062976988e-05, "loss": 1.0433, "step": 8960 }, { "epoch": 1.01, "learning_rate": 9.245780691947252e-05, "loss": 1.0061, "step": 8980 }, { "epoch": 1.01, "learning_rate": 9.24193232482885e-05, "loss": 1.0237, "step": 9000 }, { "epoch": 1.01, "learning_rate": 9.238074969775818e-05, "loss": 1.0311, "step": 9020 }, { "epoch": 1.02, "learning_rate": 9.234208634961236e-05, "loss": 1.0467, "step": 9040 }, { "epoch": 1.02, "learning_rate": 9.230333328577212e-05, "loss": 1.0217, "step": 9060 }, { "epoch": 1.02, "learning_rate": 9.226449058834863e-05, "loss": 1.024, "step": 9080 }, { "epoch": 1.02, "learning_rate": 9.222555833964296e-05, "loss": 1.0373, "step": 9100 }, { "epoch": 1.02, "learning_rate": 9.218653662214593e-05, "loss": 1.0248, "step": 9120 }, { "epoch": 1.03, "learning_rate": 9.214742551853798e-05, "loss": 1.0597, "step": 9140 }, { "epoch": 1.03, "learning_rate": 9.210822511168884e-05, "loss": 1.0138, "step": 9160 }, { "epoch": 1.03, "learning_rate": 9.206893548465758e-05, "loss": 1.0406, "step": 9180 }, { "epoch": 1.03, "learning_rate": 9.20295567206922e-05, "loss": 1.0399, "step": 9200 }, { "epoch": 1.04, "learning_rate": 9.199008890322963e-05, "loss": 1.0282, "step": 9220 }, { "epoch": 1.04, "learning_rate": 9.19505321158955e-05, "loss": 1.0156, "step": 9240 }, { "epoch": 1.04, "learning_rate": 9.191088644250389e-05, "loss": 1.0146, "step": 9260 }, { "epoch": 1.04, "learning_rate": 9.187115196705731e-05, "loss": 0.9898, "step": 9280 }, { "epoch": 1.04, "learning_rate": 9.183132877374631e-05, "loss": 1.0027, "step": 9300 }, { "epoch": 1.05, "learning_rate": 9.17914169469495e-05, "loss": 1.0047, "step": 9320 }, { "epoch": 1.05, "learning_rate": 9.17514165712333e-05, "loss": 1.0628, "step": 9340 }, { "epoch": 1.05, "learning_rate": 9.171132773135165e-05, "loss": 1.041, "step": 9360 }, { "epoch": 1.05, "learning_rate": 9.167115051224606e-05, "loss": 1.0387, "step": 9380 }, { "epoch": 1.06, "learning_rate": 9.16308849990452e-05, "loss": 1.0521, "step": 9400 }, { "epoch": 1.06, "learning_rate": 9.159053127706487e-05, "loss": 0.9995, "step": 9420 }, { "epoch": 1.06, "learning_rate": 9.155008943180776e-05, "loss": 1.0068, "step": 9440 }, { "epoch": 1.06, "learning_rate": 9.150955954896327e-05, "loss": 1.0396, "step": 9460 }, { "epoch": 1.07, "learning_rate": 9.146894171440735e-05, "loss": 0.9964, "step": 9480 }, { "epoch": 1.07, "learning_rate": 9.14282360142023e-05, "loss": 0.9995, "step": 9500 }, { "epoch": 1.07, "learning_rate": 9.138744253459658e-05, "loss": 1.0396, "step": 9520 }, { "epoch": 1.07, "learning_rate": 9.134656136202466e-05, "loss": 1.0167, "step": 9540 }, { "epoch": 1.07, "learning_rate": 9.130559258310679e-05, "loss": 1.0319, "step": 9560 }, { "epoch": 1.08, "learning_rate": 9.126453628464888e-05, "loss": 1.0222, "step": 9580 }, { "epoch": 1.08, "learning_rate": 9.122339255364224e-05, "loss": 0.9881, "step": 9600 }, { "epoch": 1.08, "learning_rate": 9.118216147726347e-05, "loss": 1.0193, "step": 9620 }, { "epoch": 1.08, "learning_rate": 9.11408431428742e-05, "loss": 1.0327, "step": 9640 }, { "epoch": 1.09, "learning_rate": 9.109943763802097e-05, "loss": 1.0149, "step": 9660 }, { "epoch": 1.09, "learning_rate": 9.105794505043505e-05, "loss": 0.9916, "step": 9680 }, { "epoch": 1.09, "learning_rate": 9.101636546803218e-05, "loss": 1.0443, "step": 9700 }, { "epoch": 1.09, "learning_rate": 9.09746989789124e-05, "loss": 1.0485, "step": 9720 }, { "epoch": 1.09, "learning_rate": 9.093294567135998e-05, "loss": 1.0536, "step": 9740 }, { "epoch": 1.1, "learning_rate": 9.089110563384304e-05, "loss": 1.0167, "step": 9760 }, { "epoch": 1.1, "learning_rate": 9.084917895501357e-05, "loss": 1.0192, "step": 9780 }, { "epoch": 1.1, "learning_rate": 9.080716572370704e-05, "loss": 1.0163, "step": 9800 }, { "epoch": 1.1, "learning_rate": 9.07650660289424e-05, "loss": 1.0041, "step": 9820 }, { "epoch": 1.11, "learning_rate": 9.072287995992172e-05, "loss": 1.0364, "step": 9840 }, { "epoch": 1.11, "learning_rate": 9.068060760603014e-05, "loss": 1.0567, "step": 9860 }, { "epoch": 1.11, "learning_rate": 9.063824905683562e-05, "loss": 1.0234, "step": 9880 }, { "epoch": 1.11, "learning_rate": 9.059580440208869e-05, "loss": 1.0011, "step": 9900 }, { "epoch": 1.11, "learning_rate": 9.05532737317224e-05, "loss": 1.0375, "step": 9920 }, { "epoch": 1.12, "learning_rate": 9.051065713585203e-05, "loss": 1.0142, "step": 9940 }, { "epoch": 1.12, "learning_rate": 9.04679547047749e-05, "loss": 1.0143, "step": 9960 }, { "epoch": 1.12, "learning_rate": 9.04251665289702e-05, "loss": 1.0232, "step": 9980 }, { "epoch": 1.12, "learning_rate": 9.038229269909883e-05, "loss": 1.0121, "step": 10000 }, { "epoch": 1.13, "learning_rate": 9.033933330600316e-05, "loss": 1.0262, "step": 10020 }, { "epoch": 1.13, "learning_rate": 9.029628844070686e-05, "loss": 1.0398, "step": 10040 }, { "epoch": 1.13, "learning_rate": 9.025315819441467e-05, "loss": 1.0101, "step": 10060 }, { "epoch": 1.13, "learning_rate": 9.020994265851226e-05, "loss": 1.0367, "step": 10080 }, { "epoch": 1.13, "learning_rate": 9.016664192456604e-05, "loss": 0.9912, "step": 10100 }, { "epoch": 1.14, "learning_rate": 9.012325608432291e-05, "loss": 1.0516, "step": 10120 }, { "epoch": 1.14, "learning_rate": 9.00797852297101e-05, "loss": 1.0297, "step": 10140 }, { "epoch": 1.14, "learning_rate": 9.003622945283496e-05, "loss": 1.0401, "step": 10160 }, { "epoch": 1.14, "learning_rate": 8.99925888459848e-05, "loss": 1.0365, "step": 10180 }, { "epoch": 1.15, "learning_rate": 8.994886350162666e-05, "loss": 1.0227, "step": 10200 }, { "epoch": 1.15, "learning_rate": 8.990505351240714e-05, "loss": 1.0176, "step": 10220 }, { "epoch": 1.15, "learning_rate": 8.986115897115213e-05, "loss": 1.0419, "step": 10240 }, { "epoch": 1.15, "learning_rate": 8.981717997086674e-05, "loss": 1.0206, "step": 10260 }, { "epoch": 1.16, "learning_rate": 8.977311660473499e-05, "loss": 0.9932, "step": 10280 }, { "epoch": 1.16, "learning_rate": 8.972896896611971e-05, "loss": 1.0083, "step": 10300 }, { "epoch": 1.16, "learning_rate": 8.968473714856222e-05, "loss": 1.0006, "step": 10320 }, { "epoch": 1.16, "learning_rate": 8.964042124578224e-05, "loss": 1.0136, "step": 10340 }, { "epoch": 1.16, "learning_rate": 8.959602135167766e-05, "loss": 1.0254, "step": 10360 }, { "epoch": 1.17, "learning_rate": 8.955153756032428e-05, "loss": 0.9969, "step": 10380 }, { "epoch": 1.17, "learning_rate": 8.950696996597576e-05, "loss": 0.9852, "step": 10400 }, { "epoch": 1.17, "learning_rate": 8.946231866306325e-05, "loss": 0.9993, "step": 10420 }, { "epoch": 1.17, "learning_rate": 8.941758374619525e-05, "loss": 1.019, "step": 10440 }, { "epoch": 1.18, "learning_rate": 8.93727653101575e-05, "loss": 1.0028, "step": 10460 }, { "epoch": 1.18, "learning_rate": 8.932786344991264e-05, "loss": 1.028, "step": 10480 }, { "epoch": 1.18, "learning_rate": 8.928287826060009e-05, "loss": 1.0152, "step": 10500 }, { "epoch": 1.18, "learning_rate": 8.923780983753583e-05, "loss": 1.0209, "step": 10520 }, { "epoch": 1.18, "learning_rate": 8.919265827621218e-05, "loss": 1.0409, "step": 10540 }, { "epoch": 1.19, "learning_rate": 8.914742367229768e-05, "loss": 1.0155, "step": 10560 }, { "epoch": 1.19, "learning_rate": 8.910210612163673e-05, "loss": 0.994, "step": 10580 }, { "epoch": 1.19, "learning_rate": 8.905670572024958e-05, "loss": 1.0145, "step": 10600 }, { "epoch": 1.19, "learning_rate": 8.901122256433195e-05, "loss": 1.0211, "step": 10620 }, { "epoch": 1.2, "learning_rate": 8.89656567502549e-05, "loss": 0.9977, "step": 10640 }, { "epoch": 1.2, "learning_rate": 8.89200083745647e-05, "loss": 0.9889, "step": 10660 }, { "epoch": 1.2, "learning_rate": 8.887427753398248e-05, "loss": 1.0104, "step": 10680 }, { "epoch": 1.2, "learning_rate": 8.882846432540413e-05, "loss": 1.0535, "step": 10700 }, { "epoch": 1.2, "learning_rate": 8.87825688459001e-05, "loss": 1.0043, "step": 10720 }, { "epoch": 1.21, "learning_rate": 8.873659119271507e-05, "loss": 1.0144, "step": 10740 }, { "epoch": 1.21, "learning_rate": 8.869053146326793e-05, "loss": 1.0118, "step": 10760 }, { "epoch": 1.21, "learning_rate": 8.864438975515141e-05, "loss": 1.0366, "step": 10780 }, { "epoch": 1.21, "learning_rate": 8.859816616613194e-05, "loss": 1.0125, "step": 10800 }, { "epoch": 1.22, "learning_rate": 8.855186079414949e-05, "loss": 0.9938, "step": 10820 }, { "epoch": 1.22, "learning_rate": 8.850547373731727e-05, "loss": 1.0046, "step": 10840 }, { "epoch": 1.22, "learning_rate": 8.845900509392158e-05, "loss": 1.0199, "step": 10860 }, { "epoch": 1.22, "learning_rate": 8.841245496242157e-05, "loss": 1.0019, "step": 10880 }, { "epoch": 1.22, "learning_rate": 8.836582344144911e-05, "loss": 0.9985, "step": 10900 }, { "epoch": 1.23, "learning_rate": 8.831911062980845e-05, "loss": 1.0138, "step": 10920 }, { "epoch": 1.23, "learning_rate": 8.827231662647611e-05, "loss": 1.0053, "step": 10940 }, { "epoch": 1.23, "learning_rate": 8.822544153060064e-05, "loss": 1.012, "step": 10960 }, { "epoch": 1.23, "learning_rate": 8.817848544150243e-05, "loss": 1.0178, "step": 10980 }, { "epoch": 1.24, "learning_rate": 8.813144845867345e-05, "loss": 1.0143, "step": 11000 }, { "epoch": 1.24, "learning_rate": 8.808433068177708e-05, "loss": 1.0087, "step": 11020 }, { "epoch": 1.24, "learning_rate": 8.80371322106479e-05, "loss": 1.0242, "step": 11040 }, { "epoch": 1.24, "learning_rate": 8.798985314529146e-05, "loss": 1.0364, "step": 11060 }, { "epoch": 1.24, "learning_rate": 8.794249358588407e-05, "loss": 0.9967, "step": 11080 }, { "epoch": 1.25, "learning_rate": 8.789505363277259e-05, "loss": 1.0144, "step": 11100 }, { "epoch": 1.25, "learning_rate": 8.784753338647424e-05, "loss": 1.016, "step": 11120 }, { "epoch": 1.25, "learning_rate": 8.779993294767635e-05, "loss": 1.0061, "step": 11140 }, { "epoch": 1.25, "learning_rate": 8.77522524172362e-05, "loss": 1.0166, "step": 11160 }, { "epoch": 1.26, "learning_rate": 8.770449189618069e-05, "loss": 1.0228, "step": 11180 }, { "epoch": 1.26, "learning_rate": 8.76566514857063e-05, "loss": 1.0037, "step": 11200 }, { "epoch": 1.26, "learning_rate": 8.76087312871787e-05, "loss": 1.033, "step": 11220 }, { "epoch": 1.26, "learning_rate": 8.75607314021327e-05, "loss": 1.0347, "step": 11240 }, { "epoch": 1.27, "learning_rate": 8.751265193227189e-05, "loss": 0.9958, "step": 11260 }, { "epoch": 1.27, "learning_rate": 8.746449297946853e-05, "loss": 1.0154, "step": 11280 }, { "epoch": 1.27, "learning_rate": 8.741625464576322e-05, "loss": 1.0308, "step": 11300 }, { "epoch": 1.27, "learning_rate": 8.736793703336482e-05, "loss": 1.0114, "step": 11320 }, { "epoch": 1.27, "learning_rate": 8.731954024465017e-05, "loss": 1.0231, "step": 11340 }, { "epoch": 1.28, "learning_rate": 8.727106438216384e-05, "loss": 1.01, "step": 11360 }, { "epoch": 1.28, "learning_rate": 8.722250954861795e-05, "loss": 0.983, "step": 11380 }, { "epoch": 1.28, "learning_rate": 8.717387584689195e-05, "loss": 0.9938, "step": 11400 }, { "epoch": 1.28, "learning_rate": 8.712516338003241e-05, "loss": 0.9918, "step": 11420 }, { "epoch": 1.29, "learning_rate": 8.707637225125276e-05, "loss": 1.0061, "step": 11440 }, { "epoch": 1.29, "learning_rate": 8.702750256393316e-05, "loss": 1.0493, "step": 11460 }, { "epoch": 1.29, "learning_rate": 8.697855442162012e-05, "loss": 0.9973, "step": 11480 }, { "epoch": 1.29, "learning_rate": 8.692952792802651e-05, "loss": 1.0343, "step": 11500 }, { "epoch": 1.29, "learning_rate": 8.688042318703111e-05, "loss": 1.0333, "step": 11520 }, { "epoch": 1.3, "learning_rate": 8.683124030267855e-05, "loss": 1.0276, "step": 11540 }, { "epoch": 1.3, "learning_rate": 8.678197937917901e-05, "loss": 1.0016, "step": 11560 }, { "epoch": 1.3, "learning_rate": 8.673264052090801e-05, "loss": 0.9907, "step": 11580 }, { "epoch": 1.3, "learning_rate": 8.668322383240626e-05, "loss": 1.0153, "step": 11600 }, { "epoch": 1.31, "learning_rate": 8.663372941837929e-05, "loss": 1.0084, "step": 11620 }, { "epoch": 1.31, "learning_rate": 8.658415738369737e-05, "loss": 1.0255, "step": 11640 }, { "epoch": 1.31, "learning_rate": 8.653450783339523e-05, "loss": 0.9996, "step": 11660 }, { "epoch": 1.31, "learning_rate": 8.648478087267187e-05, "loss": 0.9922, "step": 11680 }, { "epoch": 1.31, "learning_rate": 8.643497660689024e-05, "loss": 1.021, "step": 11700 }, { "epoch": 1.32, "learning_rate": 8.638509514157715e-05, "loss": 1.0102, "step": 11720 }, { "epoch": 1.32, "learning_rate": 8.633513658242295e-05, "loss": 1.0281, "step": 11740 }, { "epoch": 1.32, "learning_rate": 8.628510103528134e-05, "loss": 1.0185, "step": 11760 }, { "epoch": 1.32, "learning_rate": 8.623498860616918e-05, "loss": 0.9624, "step": 11780 }, { "epoch": 1.33, "learning_rate": 8.618479940126617e-05, "loss": 1.0486, "step": 11800 }, { "epoch": 1.33, "learning_rate": 8.613453352691473e-05, "loss": 1.0069, "step": 11820 }, { "epoch": 1.33, "learning_rate": 8.608419108961971e-05, "loss": 1.0449, "step": 11840 }, { "epoch": 1.33, "learning_rate": 8.603377219604823e-05, "loss": 0.9983, "step": 11860 }, { "epoch": 1.33, "learning_rate": 8.59832769530293e-05, "loss": 1.0164, "step": 11880 }, { "epoch": 1.34, "learning_rate": 8.59327054675538e-05, "loss": 1.0222, "step": 11900 }, { "epoch": 1.34, "learning_rate": 8.588205784677415e-05, "loss": 1.0372, "step": 11920 }, { "epoch": 1.34, "learning_rate": 8.583133419800404e-05, "loss": 1.0078, "step": 11940 }, { "epoch": 1.34, "learning_rate": 8.578053462871827e-05, "loss": 1.0216, "step": 11960 }, { "epoch": 1.35, "learning_rate": 8.57296592465525e-05, "loss": 1.0257, "step": 11980 }, { "epoch": 1.35, "learning_rate": 8.567870815930305e-05, "loss": 0.9813, "step": 12000 }, { "epoch": 1.35, "learning_rate": 8.562768147492662e-05, "loss": 0.9851, "step": 12020 }, { "epoch": 1.35, "learning_rate": 8.557657930154007e-05, "loss": 1.0091, "step": 12040 }, { "epoch": 1.36, "learning_rate": 8.552540174742025e-05, "loss": 0.9911, "step": 12060 }, { "epoch": 1.36, "learning_rate": 8.547414892100373e-05, "loss": 1.0127, "step": 12080 }, { "epoch": 1.36, "learning_rate": 8.542282093088651e-05, "loss": 0.9931, "step": 12100 }, { "epoch": 1.36, "learning_rate": 8.537141788582393e-05, "loss": 0.9987, "step": 12120 }, { "epoch": 1.36, "learning_rate": 8.53199398947303e-05, "loss": 1.0192, "step": 12140 }, { "epoch": 1.37, "learning_rate": 8.526838706667873e-05, "loss": 0.984, "step": 12160 }, { "epoch": 1.37, "learning_rate": 8.521675951090094e-05, "loss": 0.9841, "step": 12180 }, { "epoch": 1.37, "learning_rate": 8.516505733678695e-05, "loss": 1.0117, "step": 12200 }, { "epoch": 1.37, "learning_rate": 8.511328065388488e-05, "loss": 0.9758, "step": 12220 }, { "epoch": 1.38, "learning_rate": 8.506142957190073e-05, "loss": 1.0073, "step": 12240 }, { "epoch": 1.38, "learning_rate": 8.500950420069817e-05, "loss": 1.0178, "step": 12260 }, { "epoch": 1.38, "learning_rate": 8.495750465029821e-05, "loss": 1.0222, "step": 12280 }, { "epoch": 1.38, "learning_rate": 8.490543103087912e-05, "loss": 0.9812, "step": 12300 }, { "epoch": 1.38, "learning_rate": 8.485328345277603e-05, "loss": 1.0102, "step": 12320 }, { "epoch": 1.39, "learning_rate": 8.48010620264808e-05, "loss": 1.009, "step": 12340 }, { "epoch": 1.39, "learning_rate": 8.47487668626418e-05, "loss": 1.0279, "step": 12360 }, { "epoch": 1.39, "learning_rate": 8.469639807206357e-05, "loss": 1.019, "step": 12380 }, { "epoch": 1.39, "learning_rate": 8.46439557657067e-05, "loss": 1.0064, "step": 12400 }, { "epoch": 1.4, "learning_rate": 8.459144005468756e-05, "loss": 1.0037, "step": 12420 }, { "epoch": 1.4, "learning_rate": 8.453885105027802e-05, "loss": 0.9955, "step": 12440 }, { "epoch": 1.4, "learning_rate": 8.448618886390522e-05, "loss": 0.9949, "step": 12460 }, { "epoch": 1.4, "learning_rate": 8.443345360715143e-05, "loss": 0.9902, "step": 12480 }, { "epoch": 1.4, "learning_rate": 8.43806453917537e-05, "loss": 1.026, "step": 12500 }, { "epoch": 1.41, "learning_rate": 8.432776432960366e-05, "loss": 0.983, "step": 12520 }, { "epoch": 1.41, "learning_rate": 8.427481053274734e-05, "loss": 1.0039, "step": 12540 }, { "epoch": 1.41, "learning_rate": 8.422178411338481e-05, "loss": 0.99, "step": 12560 }, { "epoch": 1.41, "learning_rate": 8.416868518387009e-05, "loss": 1.0346, "step": 12580 }, { "epoch": 1.42, "learning_rate": 8.411551385671077e-05, "loss": 1.0002, "step": 12600 }, { "epoch": 1.42, "learning_rate": 8.406227024456788e-05, "loss": 0.9916, "step": 12620 }, { "epoch": 1.42, "learning_rate": 8.400895446025558e-05, "loss": 1.0303, "step": 12640 }, { "epoch": 1.42, "learning_rate": 8.3955566616741e-05, "loss": 0.982, "step": 12660 }, { "epoch": 1.42, "learning_rate": 8.39021068271439e-05, "loss": 0.996, "step": 12680 }, { "epoch": 1.43, "learning_rate": 8.38485752047365e-05, "loss": 1.0142, "step": 12700 }, { "epoch": 1.43, "learning_rate": 8.379497186294322e-05, "loss": 1.032, "step": 12720 }, { "epoch": 1.43, "learning_rate": 8.374129691534046e-05, "loss": 1.0146, "step": 12740 }, { "epoch": 1.43, "learning_rate": 8.36875504756563e-05, "loss": 1.0217, "step": 12760 }, { "epoch": 1.44, "learning_rate": 8.363373265777034e-05, "loss": 0.9988, "step": 12780 }, { "epoch": 1.44, "learning_rate": 8.357984357571337e-05, "loss": 1.0119, "step": 12800 }, { "epoch": 1.44, "learning_rate": 8.352588334366728e-05, "loss": 0.9801, "step": 12820 }, { "epoch": 1.44, "learning_rate": 8.347185207596457e-05, "loss": 0.9835, "step": 12840 }, { "epoch": 1.44, "learning_rate": 8.341774988708837e-05, "loss": 0.9979, "step": 12860 }, { "epoch": 1.45, "learning_rate": 8.336357689167203e-05, "loss": 1.0108, "step": 12880 }, { "epoch": 1.45, "learning_rate": 8.33093332044989e-05, "loss": 0.9943, "step": 12900 }, { "epoch": 1.45, "learning_rate": 8.325501894050218e-05, "loss": 0.9962, "step": 12920 }, { "epoch": 1.45, "learning_rate": 8.320063421476454e-05, "loss": 1.0326, "step": 12940 }, { "epoch": 1.46, "learning_rate": 8.314617914251805e-05, "loss": 0.9947, "step": 12960 }, { "epoch": 1.46, "learning_rate": 8.30916538391437e-05, "loss": 1.0253, "step": 12980 }, { "epoch": 1.46, "learning_rate": 8.30370584201714e-05, "loss": 0.9905, "step": 13000 }, { "epoch": 1.46, "learning_rate": 8.298239300127954e-05, "loss": 1.0028, "step": 13020 }, { "epoch": 1.47, "learning_rate": 8.292765769829487e-05, "loss": 1.0467, "step": 13040 }, { "epoch": 1.47, "learning_rate": 8.287285262719224e-05, "loss": 1.0042, "step": 13060 }, { "epoch": 1.47, "learning_rate": 8.281797790409425e-05, "loss": 1.0102, "step": 13080 }, { "epoch": 1.47, "learning_rate": 8.276303364527116e-05, "loss": 1.0038, "step": 13100 }, { "epoch": 1.47, "learning_rate": 8.270801996714051e-05, "loss": 1.0028, "step": 13120 }, { "epoch": 1.48, "learning_rate": 8.265293698626694e-05, "loss": 0.9969, "step": 13140 }, { "epoch": 1.48, "learning_rate": 8.259778481936197e-05, "loss": 0.9955, "step": 13160 }, { "epoch": 1.48, "learning_rate": 8.254256358328365e-05, "loss": 1.0106, "step": 13180 }, { "epoch": 1.48, "learning_rate": 8.248727339503641e-05, "loss": 1.0129, "step": 13200 }, { "epoch": 1.49, "learning_rate": 8.243191437177077e-05, "loss": 0.981, "step": 13220 }, { "epoch": 1.49, "learning_rate": 8.237648663078314e-05, "loss": 1.0168, "step": 13240 }, { "epoch": 1.49, "learning_rate": 8.232099028951548e-05, "loss": 0.9942, "step": 13260 }, { "epoch": 1.49, "learning_rate": 8.22654254655551e-05, "loss": 1.0028, "step": 13280 }, { "epoch": 1.49, "learning_rate": 8.22097922766344e-05, "loss": 1.0011, "step": 13300 }, { "epoch": 1.5, "learning_rate": 8.215409084063075e-05, "loss": 1.0141, "step": 13320 }, { "epoch": 1.5, "learning_rate": 8.209832127556598e-05, "loss": 0.9799, "step": 13340 }, { "epoch": 1.5, "learning_rate": 8.204248369960634e-05, "loss": 0.9972, "step": 13360 }, { "epoch": 1.5, "learning_rate": 8.198657823106219e-05, "loss": 0.9737, "step": 13380 }, { "epoch": 1.51, "learning_rate": 8.193060498838774e-05, "loss": 1.0, "step": 13400 }, { "epoch": 1.51, "learning_rate": 8.187456409018074e-05, "loss": 1.0246, "step": 13420 }, { "epoch": 1.51, "learning_rate": 8.18184556551824e-05, "loss": 1.0043, "step": 13440 }, { "epoch": 1.51, "learning_rate": 8.176227980227694e-05, "loss": 0.9859, "step": 13460 }, { "epoch": 1.51, "learning_rate": 8.170603665049146e-05, "loss": 0.9851, "step": 13480 }, { "epoch": 1.52, "learning_rate": 8.164972631899566e-05, "loss": 1.0025, "step": 13500 }, { "epoch": 1.52, "learning_rate": 8.159334892710156e-05, "loss": 0.9988, "step": 13520 }, { "epoch": 1.52, "learning_rate": 8.15369045942633e-05, "loss": 1.0531, "step": 13540 }, { "epoch": 1.52, "learning_rate": 8.148039344007685e-05, "loss": 0.983, "step": 13560 }, { "epoch": 1.53, "learning_rate": 8.142381558427974e-05, "loss": 1.0149, "step": 13580 }, { "epoch": 1.53, "learning_rate": 8.136717114675083e-05, "loss": 1.0205, "step": 13600 }, { "epoch": 1.53, "learning_rate": 8.131046024751009e-05, "loss": 1.015, "step": 13620 }, { "epoch": 1.53, "learning_rate": 8.12536830067183e-05, "loss": 1.0131, "step": 13640 }, { "epoch": 1.53, "learning_rate": 8.119683954467677e-05, "loss": 1.026, "step": 13660 }, { "epoch": 1.54, "learning_rate": 8.113992998182715e-05, "loss": 0.9792, "step": 13680 }, { "epoch": 1.54, "learning_rate": 8.108295443875116e-05, "loss": 1.0232, "step": 13700 }, { "epoch": 1.54, "learning_rate": 8.102591303617031e-05, "loss": 1.0185, "step": 13720 }, { "epoch": 1.54, "learning_rate": 8.096880589494563e-05, "loss": 0.9907, "step": 13740 }, { "epoch": 1.55, "learning_rate": 8.091163313607749e-05, "loss": 1.0145, "step": 13760 }, { "epoch": 1.55, "learning_rate": 8.085439488070521e-05, "loss": 0.9818, "step": 13780 }, { "epoch": 1.55, "learning_rate": 8.079709125010699e-05, "loss": 0.9885, "step": 13800 }, { "epoch": 1.55, "learning_rate": 8.073972236569947e-05, "loss": 1.0026, "step": 13820 }, { "epoch": 1.56, "learning_rate": 8.06822883490376e-05, "loss": 0.9577, "step": 13840 }, { "epoch": 1.56, "learning_rate": 8.06247893218143e-05, "loss": 1.0351, "step": 13860 }, { "epoch": 1.56, "learning_rate": 8.056722540586024e-05, "loss": 0.9972, "step": 13880 }, { "epoch": 1.56, "learning_rate": 8.050959672314359e-05, "loss": 1.0099, "step": 13900 }, { "epoch": 1.56, "learning_rate": 8.045190339576978e-05, "loss": 0.9896, "step": 13920 }, { "epoch": 1.57, "learning_rate": 8.039414554598113e-05, "loss": 1.0083, "step": 13940 }, { "epoch": 1.57, "learning_rate": 8.033632329615676e-05, "loss": 0.9907, "step": 13960 }, { "epoch": 1.57, "learning_rate": 8.027843676881218e-05, "loss": 1.0125, "step": 13980 }, { "epoch": 1.57, "learning_rate": 8.022048608659913e-05, "loss": 1.0153, "step": 14000 }, { "epoch": 1.58, "learning_rate": 8.016247137230525e-05, "loss": 0.9991, "step": 14020 }, { "epoch": 1.58, "learning_rate": 8.010439274885391e-05, "loss": 0.9889, "step": 14040 }, { "epoch": 1.58, "learning_rate": 8.004625033930382e-05, "loss": 1.0172, "step": 14060 }, { "epoch": 1.58, "learning_rate": 7.998804426684889e-05, "loss": 0.9886, "step": 14080 }, { "epoch": 1.58, "learning_rate": 7.992977465481793e-05, "loss": 1.0062, "step": 14100 }, { "epoch": 1.59, "learning_rate": 7.987144162667431e-05, "loss": 0.9952, "step": 14120 }, { "epoch": 1.59, "learning_rate": 7.981304530601586e-05, "loss": 1.0364, "step": 14140 }, { "epoch": 1.59, "learning_rate": 7.975458581657446e-05, "loss": 1.021, "step": 14160 }, { "epoch": 1.59, "learning_rate": 7.969606328221583e-05, "loss": 1.0118, "step": 14180 }, { "epoch": 1.6, "learning_rate": 7.96374778269393e-05, "loss": 1.0187, "step": 14200 }, { "epoch": 1.6, "learning_rate": 7.95788295748775e-05, "loss": 1.0267, "step": 14220 }, { "epoch": 1.6, "learning_rate": 7.952011865029614e-05, "loss": 1.0121, "step": 14240 }, { "epoch": 1.6, "learning_rate": 7.946134517759368e-05, "loss": 0.971, "step": 14260 }, { "epoch": 1.6, "learning_rate": 7.940250928130116e-05, "loss": 1.0182, "step": 14280 }, { "epoch": 1.61, "learning_rate": 7.934361108608183e-05, "loss": 0.9876, "step": 14300 }, { "epoch": 1.61, "learning_rate": 7.9284650716731e-05, "loss": 0.988, "step": 14320 }, { "epoch": 1.61, "learning_rate": 7.922562829817564e-05, "loss": 1.0128, "step": 14340 }, { "epoch": 1.61, "learning_rate": 7.916654395547427e-05, "loss": 1.029, "step": 14360 }, { "epoch": 1.62, "learning_rate": 7.91073978138166e-05, "loss": 0.9795, "step": 14380 }, { "epoch": 1.62, "learning_rate": 7.904818999852323e-05, "loss": 0.9923, "step": 14400 }, { "epoch": 1.62, "learning_rate": 7.898892063504548e-05, "loss": 1.0132, "step": 14420 }, { "epoch": 1.62, "learning_rate": 7.89295898489651e-05, "loss": 0.999, "step": 14440 }, { "epoch": 1.62, "learning_rate": 7.887019776599391e-05, "loss": 0.9659, "step": 14460 }, { "epoch": 1.63, "learning_rate": 7.88107445119737e-05, "loss": 0.9859, "step": 14480 }, { "epoch": 1.63, "learning_rate": 7.875123021287579e-05, "loss": 0.9863, "step": 14500 }, { "epoch": 1.63, "learning_rate": 7.869165499480089e-05, "loss": 1.0014, "step": 14520 }, { "epoch": 1.63, "learning_rate": 7.863201898397878e-05, "loss": 0.9914, "step": 14540 }, { "epoch": 1.64, "learning_rate": 7.857232230676802e-05, "loss": 0.9893, "step": 14560 }, { "epoch": 1.64, "learning_rate": 7.851256508965577e-05, "loss": 0.9932, "step": 14580 }, { "epoch": 1.64, "learning_rate": 7.845274745925744e-05, "loss": 1.0187, "step": 14600 }, { "epoch": 1.64, "learning_rate": 7.83928695423164e-05, "loss": 1.0001, "step": 14620 }, { "epoch": 1.64, "learning_rate": 7.83329314657038e-05, "loss": 0.991, "step": 14640 }, { "epoch": 1.65, "learning_rate": 7.827293335641825e-05, "loss": 1.0057, "step": 14660 }, { "epoch": 1.65, "learning_rate": 7.82128753415856e-05, "loss": 0.9592, "step": 14680 }, { "epoch": 1.65, "learning_rate": 7.815275754845854e-05, "loss": 1.0121, "step": 14700 }, { "epoch": 1.65, "learning_rate": 7.809258010441649e-05, "loss": 0.9967, "step": 14720 }, { "epoch": 1.66, "learning_rate": 7.803234313696524e-05, "loss": 0.9933, "step": 14740 }, { "epoch": 1.66, "learning_rate": 7.79720467737367e-05, "loss": 0.9841, "step": 14760 }, { "epoch": 1.66, "learning_rate": 7.791169114248864e-05, "loss": 1.0147, "step": 14780 }, { "epoch": 1.66, "learning_rate": 7.785127637110438e-05, "loss": 1.0291, "step": 14800 }, { "epoch": 1.67, "learning_rate": 7.779080258759259e-05, "loss": 0.9922, "step": 14820 }, { "epoch": 1.67, "learning_rate": 7.773026992008692e-05, "loss": 1.0002, "step": 14840 }, { "epoch": 1.67, "learning_rate": 7.766967849684584e-05, "loss": 1.0118, "step": 14860 }, { "epoch": 1.67, "learning_rate": 7.760902844625228e-05, "loss": 1.0237, "step": 14880 }, { "epoch": 1.67, "learning_rate": 7.754831989681345e-05, "loss": 1.0038, "step": 14900 }, { "epoch": 1.68, "learning_rate": 7.74875529771604e-05, "loss": 1.0016, "step": 14920 }, { "epoch": 1.68, "learning_rate": 7.742672781604794e-05, "loss": 0.9977, "step": 14940 }, { "epoch": 1.68, "learning_rate": 7.736584454235427e-05, "loss": 0.9864, "step": 14960 }, { "epoch": 1.68, "learning_rate": 7.730490328508072e-05, "loss": 0.9771, "step": 14980 }, { "epoch": 1.69, "learning_rate": 7.724390417335144e-05, "loss": 0.983, "step": 15000 }, { "epoch": 1.69, "learning_rate": 7.718284733641323e-05, "loss": 0.968, "step": 15020 }, { "epoch": 1.69, "learning_rate": 7.712173290363514e-05, "loss": 0.9642, "step": 15040 }, { "epoch": 1.69, "learning_rate": 7.706056100450831e-05, "loss": 1.0196, "step": 15060 }, { "epoch": 1.69, "learning_rate": 7.699933176864558e-05, "loss": 0.9708, "step": 15080 }, { "epoch": 1.7, "learning_rate": 7.693804532578131e-05, "loss": 0.9916, "step": 15100 }, { "epoch": 1.7, "learning_rate": 7.687670180577109e-05, "loss": 1.0076, "step": 15120 }, { "epoch": 1.7, "learning_rate": 7.681530133859142e-05, "loss": 0.9733, "step": 15140 }, { "epoch": 1.7, "learning_rate": 7.675384405433947e-05, "loss": 0.9965, "step": 15160 }, { "epoch": 1.71, "learning_rate": 7.66923300832328e-05, "loss": 0.9941, "step": 15180 }, { "epoch": 1.71, "learning_rate": 7.663075955560906e-05, "loss": 0.9851, "step": 15200 }, { "epoch": 1.71, "learning_rate": 7.656913260192574e-05, "loss": 0.9879, "step": 15220 }, { "epoch": 1.71, "learning_rate": 7.650744935275992e-05, "loss": 1.0127, "step": 15240 }, { "epoch": 1.71, "learning_rate": 7.644570993880791e-05, "loss": 0.9718, "step": 15260 }, { "epoch": 1.72, "learning_rate": 7.63839144908851e-05, "loss": 0.9495, "step": 15280 }, { "epoch": 1.72, "learning_rate": 7.632206313992548e-05, "loss": 0.9745, "step": 15300 }, { "epoch": 1.72, "learning_rate": 7.626015601698163e-05, "loss": 0.9862, "step": 15320 }, { "epoch": 1.72, "learning_rate": 7.619819325322422e-05, "loss": 0.9923, "step": 15340 }, { "epoch": 1.73, "learning_rate": 7.613617497994178e-05, "loss": 0.9779, "step": 15360 }, { "epoch": 1.73, "learning_rate": 7.607410132854059e-05, "loss": 0.9875, "step": 15380 }, { "epoch": 1.73, "learning_rate": 7.60119724305441e-05, "loss": 1.011, "step": 15400 }, { "epoch": 1.73, "learning_rate": 7.594978841759297e-05, "loss": 0.9933, "step": 15420 }, { "epoch": 1.73, "learning_rate": 7.588754942144452e-05, "loss": 0.9842, "step": 15440 }, { "epoch": 1.74, "learning_rate": 7.582525557397264e-05, "loss": 0.9784, "step": 15460 }, { "epoch": 1.74, "learning_rate": 7.576290700716742e-05, "loss": 0.9794, "step": 15480 }, { "epoch": 1.74, "learning_rate": 7.570050385313487e-05, "loss": 1.0136, "step": 15500 }, { "epoch": 1.74, "learning_rate": 7.563804624409672e-05, "loss": 1.0115, "step": 15520 }, { "epoch": 1.75, "learning_rate": 7.557553431239002e-05, "loss": 0.9926, "step": 15540 }, { "epoch": 1.75, "learning_rate": 7.551296819046693e-05, "loss": 0.9946, "step": 15560 }, { "epoch": 1.75, "learning_rate": 7.545034801089448e-05, "loss": 0.9707, "step": 15580 }, { "epoch": 1.75, "learning_rate": 7.538767390635416e-05, "loss": 0.9644, "step": 15600 }, { "epoch": 1.76, "learning_rate": 7.53249460096418e-05, "loss": 0.9909, "step": 15620 }, { "epoch": 1.76, "learning_rate": 7.526216445366713e-05, "loss": 0.994, "step": 15640 }, { "epoch": 1.76, "learning_rate": 7.519932937145364e-05, "loss": 0.9701, "step": 15660 }, { "epoch": 1.76, "learning_rate": 7.513644089613818e-05, "loss": 0.9868, "step": 15680 }, { "epoch": 1.76, "learning_rate": 7.507349916097077e-05, "loss": 1.018, "step": 15700 }, { "epoch": 1.77, "learning_rate": 7.501050429931429e-05, "loss": 0.9759, "step": 15720 }, { "epoch": 1.77, "learning_rate": 7.49474564446441e-05, "loss": 1.0035, "step": 15740 }, { "epoch": 1.77, "learning_rate": 7.488435573054795e-05, "loss": 0.9836, "step": 15760 }, { "epoch": 1.77, "learning_rate": 7.482120229072552e-05, "loss": 0.9725, "step": 15780 }, { "epoch": 1.78, "learning_rate": 7.475799625898825e-05, "loss": 0.9832, "step": 15800 }, { "epoch": 1.78, "learning_rate": 7.469473776925897e-05, "loss": 0.9895, "step": 15820 }, { "epoch": 1.78, "learning_rate": 7.463142695557171e-05, "loss": 0.979, "step": 15840 }, { "epoch": 1.78, "learning_rate": 7.456806395207132e-05, "loss": 0.987, "step": 15860 }, { "epoch": 1.78, "learning_rate": 7.450464889301326e-05, "loss": 0.9911, "step": 15880 }, { "epoch": 1.79, "learning_rate": 7.444118191276326e-05, "loss": 0.9616, "step": 15900 }, { "epoch": 1.79, "learning_rate": 7.43776631457971e-05, "loss": 0.9772, "step": 15920 }, { "epoch": 1.79, "learning_rate": 7.431409272670027e-05, "loss": 0.9851, "step": 15940 }, { "epoch": 1.79, "learning_rate": 7.425047079016765e-05, "loss": 0.9971, "step": 15960 }, { "epoch": 1.8, "learning_rate": 7.418679747100339e-05, "loss": 0.9858, "step": 15980 }, { "epoch": 1.8, "learning_rate": 7.412307290412041e-05, "loss": 0.9759, "step": 16000 }, { "epoch": 1.8, "learning_rate": 7.405929722454026e-05, "loss": 1.0255, "step": 16020 }, { "epoch": 1.8, "learning_rate": 7.399547056739278e-05, "loss": 0.9645, "step": 16040 }, { "epoch": 1.8, "learning_rate": 7.39315930679158e-05, "loss": 0.9821, "step": 16060 }, { "epoch": 1.81, "learning_rate": 7.386766486145496e-05, "loss": 0.9783, "step": 16080 }, { "epoch": 1.81, "learning_rate": 7.380368608346322e-05, "loss": 0.9899, "step": 16100 }, { "epoch": 1.81, "learning_rate": 7.373965686950078e-05, "loss": 0.9705, "step": 16120 }, { "epoch": 1.81, "learning_rate": 7.367557735523467e-05, "loss": 0.9869, "step": 16140 }, { "epoch": 1.82, "learning_rate": 7.361144767643849e-05, "loss": 0.983, "step": 16160 }, { "epoch": 1.82, "learning_rate": 7.354726796899219e-05, "loss": 1.0142, "step": 16180 }, { "epoch": 1.82, "learning_rate": 7.348303836888163e-05, "loss": 0.9991, "step": 16200 }, { "epoch": 1.82, "learning_rate": 7.341875901219845e-05, "loss": 0.9927, "step": 16220 }, { "epoch": 1.82, "learning_rate": 7.33544300351397e-05, "loss": 1.0073, "step": 16240 }, { "epoch": 1.83, "learning_rate": 7.329005157400754e-05, "loss": 0.998, "step": 16260 }, { "epoch": 1.83, "learning_rate": 7.322562376520904e-05, "loss": 0.9921, "step": 16280 }, { "epoch": 1.83, "learning_rate": 7.316114674525578e-05, "loss": 0.9735, "step": 16300 }, { "epoch": 1.83, "learning_rate": 7.30966206507636e-05, "loss": 0.9668, "step": 16320 }, { "epoch": 1.84, "learning_rate": 7.303204561845236e-05, "loss": 0.9921, "step": 16340 }, { "epoch": 1.84, "learning_rate": 7.29674217851456e-05, "loss": 0.9809, "step": 16360 }, { "epoch": 1.84, "learning_rate": 7.290274928777024e-05, "loss": 0.9915, "step": 16380 }, { "epoch": 1.84, "learning_rate": 7.283802826335635e-05, "loss": 0.9805, "step": 16400 }, { "epoch": 1.84, "learning_rate": 7.277325884903674e-05, "loss": 0.9791, "step": 16420 }, { "epoch": 1.85, "learning_rate": 7.270844118204688e-05, "loss": 0.9702, "step": 16440 }, { "epoch": 1.85, "learning_rate": 7.264357539972434e-05, "loss": 0.9717, "step": 16460 }, { "epoch": 1.85, "learning_rate": 7.257866163950873e-05, "loss": 0.9982, "step": 16480 }, { "epoch": 1.85, "learning_rate": 7.251370003894133e-05, "loss": 0.9703, "step": 16500 }, { "epoch": 1.86, "learning_rate": 7.244869073566466e-05, "loss": 0.9482, "step": 16520 }, { "epoch": 1.86, "learning_rate": 7.238363386742249e-05, "loss": 1.0209, "step": 16540 }, { "epoch": 1.86, "learning_rate": 7.23185295720592e-05, "loss": 0.9672, "step": 16560 }, { "epoch": 1.86, "learning_rate": 7.225337798751981e-05, "loss": 1.0046, "step": 16580 }, { "epoch": 1.87, "learning_rate": 7.218817925184944e-05, "loss": 1.0102, "step": 16600 }, { "epoch": 1.87, "learning_rate": 7.212293350319313e-05, "loss": 0.9578, "step": 16620 }, { "epoch": 1.87, "learning_rate": 7.205764087979557e-05, "loss": 0.9835, "step": 16640 }, { "epoch": 1.87, "learning_rate": 7.199230152000074e-05, "loss": 1.0221, "step": 16660 }, { "epoch": 1.87, "learning_rate": 7.192691556225167e-05, "loss": 0.9698, "step": 16680 }, { "epoch": 1.88, "learning_rate": 7.186148314509008e-05, "loss": 0.9918, "step": 16700 }, { "epoch": 1.88, "learning_rate": 7.179600440715615e-05, "loss": 0.9828, "step": 16720 }, { "epoch": 1.88, "learning_rate": 7.173047948718826e-05, "loss": 1.0046, "step": 16740 }, { "epoch": 1.88, "learning_rate": 7.166490852402254e-05, "loss": 0.9624, "step": 16760 }, { "epoch": 1.89, "learning_rate": 7.159929165659277e-05, "loss": 0.9675, "step": 16780 }, { "epoch": 1.89, "learning_rate": 7.153362902392994e-05, "loss": 0.9811, "step": 16800 }, { "epoch": 1.89, "learning_rate": 7.146792076516202e-05, "loss": 0.9725, "step": 16820 }, { "epoch": 1.89, "learning_rate": 7.140216701951366e-05, "loss": 0.9751, "step": 16840 }, { "epoch": 1.89, "learning_rate": 7.13363679263059e-05, "loss": 0.9734, "step": 16860 }, { "epoch": 1.9, "learning_rate": 7.127052362495583e-05, "loss": 0.9879, "step": 16880 }, { "epoch": 1.9, "learning_rate": 7.120463425497637e-05, "loss": 0.9824, "step": 16900 }, { "epoch": 1.9, "learning_rate": 7.11386999559759e-05, "loss": 0.9833, "step": 16920 }, { "epoch": 1.9, "learning_rate": 7.1072720867658e-05, "loss": 0.9703, "step": 16940 }, { "epoch": 1.91, "learning_rate": 7.100669712982119e-05, "loss": 0.9864, "step": 16960 }, { "epoch": 1.91, "learning_rate": 7.094062888235852e-05, "loss": 0.9809, "step": 16980 }, { "epoch": 1.91, "learning_rate": 7.087451626525745e-05, "loss": 0.9887, "step": 17000 }, { "epoch": 1.91, "learning_rate": 7.080835941859932e-05, "loss": 0.9817, "step": 17020 }, { "epoch": 1.91, "learning_rate": 7.074215848255933e-05, "loss": 0.9697, "step": 17040 }, { "epoch": 1.92, "learning_rate": 7.067591359740599e-05, "loss": 0.9717, "step": 17060 }, { "epoch": 1.92, "learning_rate": 7.060962490350098e-05, "loss": 0.9435, "step": 17080 }, { "epoch": 1.92, "learning_rate": 7.05432925412988e-05, "loss": 0.9578, "step": 17100 }, { "epoch": 1.92, "learning_rate": 7.047691665134643e-05, "loss": 0.9845, "step": 17120 }, { "epoch": 1.93, "learning_rate": 7.041049737428316e-05, "loss": 0.9647, "step": 17140 }, { "epoch": 1.93, "learning_rate": 7.034403485084014e-05, "loss": 0.9781, "step": 17160 }, { "epoch": 1.93, "learning_rate": 7.027752922184017e-05, "loss": 0.9348, "step": 17180 }, { "epoch": 1.93, "learning_rate": 7.021098062819743e-05, "loss": 0.9801, "step": 17200 }, { "epoch": 1.93, "learning_rate": 7.014438921091703e-05, "loss": 0.9987, "step": 17220 }, { "epoch": 1.94, "learning_rate": 7.007775511109495e-05, "loss": 0.9797, "step": 17240 }, { "epoch": 1.94, "learning_rate": 7.001107846991751e-05, "loss": 0.9882, "step": 17260 }, { "epoch": 1.94, "learning_rate": 6.994435942866117e-05, "loss": 0.9706, "step": 17280 }, { "epoch": 1.94, "learning_rate": 6.98775981286923e-05, "loss": 0.9733, "step": 17300 }, { "epoch": 1.95, "learning_rate": 6.981079471146672e-05, "loss": 0.9788, "step": 17320 }, { "epoch": 1.95, "learning_rate": 6.974394931852956e-05, "loss": 0.9797, "step": 17340 }, { "epoch": 1.95, "learning_rate": 6.967706209151488e-05, "loss": 0.9357, "step": 17360 }, { "epoch": 1.95, "learning_rate": 6.96101331721453e-05, "loss": 0.9818, "step": 17380 }, { "epoch": 1.96, "learning_rate": 6.954316270223189e-05, "loss": 0.9728, "step": 17400 }, { "epoch": 1.96, "learning_rate": 6.94761508236737e-05, "loss": 1.0087, "step": 17420 }, { "epoch": 1.96, "learning_rate": 6.940909767845753e-05, "loss": 0.9928, "step": 17440 }, { "epoch": 1.96, "learning_rate": 6.934200340865761e-05, "loss": 0.9464, "step": 17460 }, { "epoch": 1.96, "learning_rate": 6.927486815643528e-05, "loss": 0.9691, "step": 17480 }, { "epoch": 1.97, "learning_rate": 6.920769206403881e-05, "loss": 0.9875, "step": 17500 }, { "epoch": 1.97, "learning_rate": 6.914047527380288e-05, "loss": 0.9622, "step": 17520 }, { "epoch": 1.97, "learning_rate": 6.907321792814848e-05, "loss": 0.9906, "step": 17540 }, { "epoch": 1.97, "learning_rate": 6.900592016958252e-05, "loss": 0.9778, "step": 17560 }, { "epoch": 1.98, "learning_rate": 6.89385821406975e-05, "loss": 0.9606, "step": 17580 }, { "epoch": 1.98, "learning_rate": 6.887120398417132e-05, "loss": 0.9551, "step": 17600 }, { "epoch": 1.98, "learning_rate": 6.880378584276682e-05, "loss": 0.9555, "step": 17620 }, { "epoch": 1.98, "learning_rate": 6.87363278593316e-05, "loss": 0.9735, "step": 17640 }, { "epoch": 1.98, "learning_rate": 6.866883017679772e-05, "loss": 0.9791, "step": 17660 }, { "epoch": 1.99, "learning_rate": 6.860129293818124e-05, "loss": 0.9975, "step": 17680 }, { "epoch": 1.99, "learning_rate": 6.853371628658217e-05, "loss": 0.9816, "step": 17700 }, { "epoch": 1.99, "learning_rate": 6.846610036518396e-05, "loss": 0.985, "step": 17720 }, { "epoch": 1.99, "learning_rate": 6.839844531725321e-05, "loss": 0.9667, "step": 17740 }, { "epoch": 2.0, "learning_rate": 6.833075128613955e-05, "loss": 0.95, "step": 17760 }, { "epoch": 2.0, "learning_rate": 6.826301841527512e-05, "loss": 0.9984, "step": 17780 }, { "epoch": 2.0, "learning_rate": 6.819524684817438e-05, "loss": 0.9795, "step": 17800 }, { "epoch": 2.0, "learning_rate": 6.812743672843378e-05, "loss": 0.932, "step": 17820 }, { "epoch": 2.0, "learning_rate": 6.805958819973144e-05, "loss": 0.9332, "step": 17840 }, { "epoch": 2.01, "learning_rate": 6.799170140582689e-05, "loss": 0.9567, "step": 17860 }, { "epoch": 2.01, "learning_rate": 6.792377649056071e-05, "loss": 0.9452, "step": 17880 }, { "epoch": 2.01, "learning_rate": 6.785581359785428e-05, "loss": 0.9466, "step": 17900 }, { "epoch": 2.01, "learning_rate": 6.778781287170946e-05, "loss": 0.9355, "step": 17920 }, { "epoch": 2.02, "learning_rate": 6.771977445620818e-05, "loss": 0.9449, "step": 17940 }, { "epoch": 2.02, "learning_rate": 6.765169849551235e-05, "loss": 0.9313, "step": 17960 }, { "epoch": 2.02, "learning_rate": 6.758358513386335e-05, "loss": 0.9742, "step": 17980 }, { "epoch": 2.02, "learning_rate": 6.751543451558186e-05, "loss": 0.919, "step": 18000 }, { "epoch": 2.02, "learning_rate": 6.744724678506746e-05, "loss": 0.955, "step": 18020 }, { "epoch": 2.03, "learning_rate": 6.737902208679837e-05, "loss": 0.961, "step": 18040 }, { "epoch": 2.03, "learning_rate": 6.731076056533114e-05, "loss": 0.9699, "step": 18060 }, { "epoch": 2.03, "learning_rate": 6.724246236530036e-05, "loss": 0.9497, "step": 18080 }, { "epoch": 2.03, "learning_rate": 6.717412763141832e-05, "loss": 0.9476, "step": 18100 }, { "epoch": 2.04, "learning_rate": 6.710575650847474e-05, "loss": 0.9641, "step": 18120 }, { "epoch": 2.04, "learning_rate": 6.70373491413364e-05, "loss": 0.9534, "step": 18140 }, { "epoch": 2.04, "learning_rate": 6.69689056749469e-05, "loss": 0.9416, "step": 18160 }, { "epoch": 2.04, "learning_rate": 6.690042625432635e-05, "loss": 0.9359, "step": 18180 }, { "epoch": 2.04, "learning_rate": 6.6831911024571e-05, "loss": 0.9538, "step": 18200 }, { "epoch": 2.05, "learning_rate": 6.676336013085302e-05, "loss": 0.9375, "step": 18220 }, { "epoch": 2.05, "learning_rate": 6.669477371842008e-05, "loss": 0.9479, "step": 18240 }, { "epoch": 2.05, "learning_rate": 6.662615193259519e-05, "loss": 0.9275, "step": 18260 }, { "epoch": 2.05, "learning_rate": 6.655749491877623e-05, "loss": 0.9417, "step": 18280 }, { "epoch": 2.06, "learning_rate": 6.648880282243579e-05, "loss": 0.9558, "step": 18300 }, { "epoch": 2.06, "learning_rate": 6.642007578912074e-05, "loss": 0.9539, "step": 18320 }, { "epoch": 2.06, "learning_rate": 6.635131396445199e-05, "loss": 0.955, "step": 18340 }, { "epoch": 2.06, "learning_rate": 6.628251749412421e-05, "loss": 0.9321, "step": 18360 }, { "epoch": 2.07, "learning_rate": 6.621368652390542e-05, "loss": 0.9354, "step": 18380 }, { "epoch": 2.07, "learning_rate": 6.614482119963677e-05, "loss": 0.932, "step": 18400 }, { "epoch": 2.07, "learning_rate": 6.607592166723219e-05, "loss": 0.9318, "step": 18420 }, { "epoch": 2.07, "learning_rate": 6.600698807267811e-05, "loss": 0.9294, "step": 18440 }, { "epoch": 2.07, "learning_rate": 6.59380205620331e-05, "loss": 0.946, "step": 18460 }, { "epoch": 2.08, "learning_rate": 6.586901928142761e-05, "loss": 0.9338, "step": 18480 }, { "epoch": 2.08, "learning_rate": 6.579998437706367e-05, "loss": 0.9615, "step": 18500 }, { "epoch": 2.08, "learning_rate": 6.573091599521448e-05, "loss": 0.9363, "step": 18520 }, { "epoch": 2.08, "learning_rate": 6.566181428222424e-05, "loss": 0.9891, "step": 18540 }, { "epoch": 2.09, "learning_rate": 6.559267938450778e-05, "loss": 0.9679, "step": 18560 }, { "epoch": 2.09, "learning_rate": 6.552351144855015e-05, "loss": 0.9197, "step": 18580 }, { "epoch": 2.09, "learning_rate": 6.545431062090653e-05, "loss": 0.9464, "step": 18600 }, { "epoch": 2.09, "learning_rate": 6.538507704820169e-05, "loss": 0.9829, "step": 18620 }, { "epoch": 2.09, "learning_rate": 6.531581087712984e-05, "loss": 0.9383, "step": 18640 }, { "epoch": 2.1, "learning_rate": 6.524651225445423e-05, "loss": 0.941, "step": 18660 }, { "epoch": 2.1, "learning_rate": 6.517718132700689e-05, "loss": 0.9647, "step": 18680 }, { "epoch": 2.1, "learning_rate": 6.510781824168828e-05, "loss": 0.9517, "step": 18700 }, { "epoch": 2.1, "learning_rate": 6.5038423145467e-05, "loss": 0.9526, "step": 18720 }, { "epoch": 2.11, "learning_rate": 6.496899618537947e-05, "loss": 0.943, "step": 18740 }, { "epoch": 2.11, "learning_rate": 6.489953750852966e-05, "loss": 0.9427, "step": 18760 }, { "epoch": 2.11, "learning_rate": 6.483004726208873e-05, "loss": 0.9405, "step": 18780 }, { "epoch": 2.11, "learning_rate": 6.476052559329467e-05, "loss": 0.9578, "step": 18800 }, { "epoch": 2.11, "learning_rate": 6.469097264945214e-05, "loss": 0.967, "step": 18820 }, { "epoch": 2.12, "learning_rate": 6.4621388577932e-05, "loss": 0.958, "step": 18840 }, { "epoch": 2.12, "learning_rate": 6.45517735261711e-05, "loss": 0.9582, "step": 18860 }, { "epoch": 2.12, "learning_rate": 6.448212764167191e-05, "loss": 0.9493, "step": 18880 }, { "epoch": 2.12, "learning_rate": 6.441245107200223e-05, "loss": 0.9368, "step": 18900 }, { "epoch": 2.13, "learning_rate": 6.43427439647949e-05, "loss": 0.9792, "step": 18920 }, { "epoch": 2.13, "learning_rate": 6.427300646774744e-05, "loss": 0.9427, "step": 18940 }, { "epoch": 2.13, "learning_rate": 6.420323872862179e-05, "loss": 0.9504, "step": 18960 }, { "epoch": 2.13, "learning_rate": 6.413344089524393e-05, "loss": 0.9439, "step": 18980 }, { "epoch": 2.13, "learning_rate": 6.406361311550361e-05, "loss": 0.92, "step": 19000 }, { "epoch": 2.14, "learning_rate": 6.399375553735407e-05, "loss": 0.9736, "step": 19020 }, { "epoch": 2.14, "learning_rate": 6.392386830881164e-05, "loss": 0.9712, "step": 19040 }, { "epoch": 2.14, "learning_rate": 6.385395157795552e-05, "loss": 0.9777, "step": 19060 }, { "epoch": 2.14, "learning_rate": 6.378400549292739e-05, "loss": 0.9232, "step": 19080 }, { "epoch": 2.15, "learning_rate": 6.371403020193109e-05, "loss": 0.9597, "step": 19100 }, { "epoch": 2.15, "learning_rate": 6.364402585323245e-05, "loss": 0.9131, "step": 19120 }, { "epoch": 2.15, "learning_rate": 6.357399259515877e-05, "loss": 0.9555, "step": 19140 }, { "epoch": 2.15, "learning_rate": 6.350393057609865e-05, "loss": 0.9488, "step": 19160 }, { "epoch": 2.16, "learning_rate": 6.343383994450158e-05, "loss": 0.9597, "step": 19180 }, { "epoch": 2.16, "learning_rate": 6.336372084887775e-05, "loss": 0.9153, "step": 19200 }, { "epoch": 2.16, "learning_rate": 6.329357343779763e-05, "loss": 0.9319, "step": 19220 }, { "epoch": 2.16, "learning_rate": 6.322339785989163e-05, "loss": 0.9723, "step": 19240 }, { "epoch": 2.16, "learning_rate": 6.315319426384993e-05, "loss": 0.9426, "step": 19260 }, { "epoch": 2.17, "learning_rate": 6.308296279842205e-05, "loss": 0.9569, "step": 19280 }, { "epoch": 2.17, "learning_rate": 6.301270361241649e-05, "loss": 0.9376, "step": 19300 }, { "epoch": 2.17, "learning_rate": 6.294241685470057e-05, "loss": 0.953, "step": 19320 }, { "epoch": 2.17, "learning_rate": 6.287210267420001e-05, "loss": 0.9552, "step": 19340 }, { "epoch": 2.18, "learning_rate": 6.280176121989861e-05, "loss": 0.941, "step": 19360 }, { "epoch": 2.18, "learning_rate": 6.273139264083798e-05, "loss": 0.9632, "step": 19380 }, { "epoch": 2.18, "learning_rate": 6.266099708611719e-05, "loss": 0.9531, "step": 19400 }, { "epoch": 2.18, "learning_rate": 6.259057470489246e-05, "loss": 0.9426, "step": 19420 }, { "epoch": 2.18, "learning_rate": 6.252012564637689e-05, "loss": 0.9947, "step": 19440 }, { "epoch": 2.19, "learning_rate": 6.244965005984008e-05, "loss": 0.9713, "step": 19460 }, { "epoch": 2.19, "learning_rate": 6.23791480946078e-05, "loss": 0.9208, "step": 19480 }, { "epoch": 2.19, "learning_rate": 6.23086199000618e-05, "loss": 0.9401, "step": 19500 }, { "epoch": 2.19, "learning_rate": 6.223806562563929e-05, "loss": 0.9537, "step": 19520 }, { "epoch": 2.2, "learning_rate": 6.216748542083286e-05, "loss": 0.9889, "step": 19540 }, { "epoch": 2.2, "learning_rate": 6.209687943518996e-05, "loss": 0.9211, "step": 19560 }, { "epoch": 2.2, "learning_rate": 6.202624781831268e-05, "loss": 0.9332, "step": 19580 }, { "epoch": 2.2, "learning_rate": 6.195559071985745e-05, "loss": 0.9656, "step": 19600 }, { "epoch": 2.2, "learning_rate": 6.188490828953465e-05, "loss": 0.9292, "step": 19620 }, { "epoch": 2.21, "learning_rate": 6.181420067710838e-05, "loss": 0.9479, "step": 19640 }, { "epoch": 2.21, "learning_rate": 6.174346803239604e-05, "loss": 0.9307, "step": 19660 }, { "epoch": 2.21, "learning_rate": 6.167271050526812e-05, "loss": 0.9564, "step": 19680 }, { "epoch": 2.21, "learning_rate": 6.160192824564778e-05, "loss": 0.9316, "step": 19700 }, { "epoch": 2.22, "learning_rate": 6.153112140351066e-05, "loss": 0.9171, "step": 19720 }, { "epoch": 2.22, "learning_rate": 6.14602901288844e-05, "loss": 0.9369, "step": 19740 }, { "epoch": 2.22, "learning_rate": 6.138943457184847e-05, "loss": 0.9351, "step": 19760 }, { "epoch": 2.22, "learning_rate": 6.131855488253379e-05, "loss": 0.9421, "step": 19780 }, { "epoch": 2.22, "learning_rate": 6.124765121112233e-05, "loss": 0.9732, "step": 19800 }, { "epoch": 2.23, "learning_rate": 6.1176723707847e-05, "loss": 0.933, "step": 19820 }, { "epoch": 2.23, "learning_rate": 6.110577252299108e-05, "loss": 0.9656, "step": 19840 }, { "epoch": 2.23, "learning_rate": 6.103479780688816e-05, "loss": 0.9369, "step": 19860 }, { "epoch": 2.23, "learning_rate": 6.096379970992157e-05, "loss": 0.9743, "step": 19880 }, { "epoch": 2.24, "learning_rate": 6.089277838252422e-05, "loss": 0.9686, "step": 19900 }, { "epoch": 2.24, "learning_rate": 6.0821733975178276e-05, "loss": 0.927, "step": 19920 }, { "epoch": 2.24, "learning_rate": 6.0750666638414765e-05, "loss": 0.9462, "step": 19940 }, { "epoch": 2.24, "learning_rate": 6.067957652281332e-05, "loss": 0.9591, "step": 19960 }, { "epoch": 2.24, "learning_rate": 6.060846377900182e-05, "loss": 0.9595, "step": 19980 }, { "epoch": 2.25, "learning_rate": 6.0537328557656105e-05, "loss": 0.9518, "step": 20000 }, { "epoch": 2.25, "learning_rate": 6.046617100949965e-05, "loss": 0.9485, "step": 20020 }, { "epoch": 2.25, "learning_rate": 6.0394991285303196e-05, "loss": 0.961, "step": 20040 }, { "epoch": 2.25, "learning_rate": 6.03237895358845e-05, "loss": 0.9536, "step": 20060 }, { "epoch": 2.26, "learning_rate": 6.025256591210799e-05, "loss": 0.9449, "step": 20080 }, { "epoch": 2.26, "learning_rate": 6.0181320564884444e-05, "loss": 0.947, "step": 20100 }, { "epoch": 2.26, "learning_rate": 6.011005364517068e-05, "loss": 0.9491, "step": 20120 }, { "epoch": 2.26, "learning_rate": 6.003876530396916e-05, "loss": 0.9332, "step": 20140 }, { "epoch": 2.27, "learning_rate": 5.99674556923278e-05, "loss": 0.9612, "step": 20160 }, { "epoch": 2.27, "learning_rate": 5.989612496133956e-05, "loss": 0.9536, "step": 20180 }, { "epoch": 2.27, "learning_rate": 5.9824773262142165e-05, "loss": 0.9351, "step": 20200 }, { "epoch": 2.27, "learning_rate": 5.975340074591774e-05, "loss": 0.955, "step": 20220 }, { "epoch": 2.27, "learning_rate": 5.968200756389255e-05, "loss": 0.9472, "step": 20240 }, { "epoch": 2.28, "learning_rate": 5.9610593867336614e-05, "loss": 0.9185, "step": 20260 }, { "epoch": 2.28, "learning_rate": 5.9539159807563437e-05, "loss": 0.9523, "step": 20280 }, { "epoch": 2.28, "learning_rate": 5.9467705535929686e-05, "loss": 0.9177, "step": 20300 }, { "epoch": 2.28, "learning_rate": 5.939623120383481e-05, "loss": 0.9517, "step": 20320 }, { "epoch": 2.29, "learning_rate": 5.9324736962720805e-05, "loss": 0.9449, "step": 20340 }, { "epoch": 2.29, "learning_rate": 5.925322296407181e-05, "loss": 0.9656, "step": 20360 }, { "epoch": 2.29, "learning_rate": 5.918168935941388e-05, "loss": 0.9617, "step": 20380 }, { "epoch": 2.29, "learning_rate": 5.911013630031457e-05, "loss": 0.9479, "step": 20400 }, { "epoch": 2.29, "learning_rate": 5.903856393838265e-05, "loss": 0.9431, "step": 20420 }, { "epoch": 2.3, "learning_rate": 5.896697242526785e-05, "loss": 0.9583, "step": 20440 }, { "epoch": 2.3, "learning_rate": 5.8895361912660374e-05, "loss": 0.9749, "step": 20460 }, { "epoch": 2.3, "learning_rate": 5.882373255229081e-05, "loss": 0.9783, "step": 20480 }, { "epoch": 2.3, "learning_rate": 5.875208449592957e-05, "loss": 0.9316, "step": 20500 }, { "epoch": 2.31, "learning_rate": 5.868041789538675e-05, "loss": 0.9483, "step": 20520 }, { "epoch": 2.31, "learning_rate": 5.8608732902511695e-05, "loss": 0.9384, "step": 20540 }, { "epoch": 2.31, "learning_rate": 5.853702966919275e-05, "loss": 0.9093, "step": 20560 }, { "epoch": 2.31, "learning_rate": 5.8465308347356895e-05, "loss": 0.9331, "step": 20580 }, { "epoch": 2.31, "learning_rate": 5.8393569088969425e-05, "loss": 0.9641, "step": 20600 }, { "epoch": 2.32, "learning_rate": 5.8321812046033666e-05, "loss": 0.9628, "step": 20620 }, { "epoch": 2.32, "learning_rate": 5.825003737059062e-05, "loss": 0.9644, "step": 20640 }, { "epoch": 2.32, "learning_rate": 5.81782452147186e-05, "loss": 0.9429, "step": 20660 }, { "epoch": 2.32, "learning_rate": 5.810643573053306e-05, "loss": 0.9444, "step": 20680 }, { "epoch": 2.33, "learning_rate": 5.803460907018607e-05, "loss": 0.9412, "step": 20700 }, { "epoch": 2.33, "learning_rate": 5.796276538586615e-05, "loss": 0.9411, "step": 20720 }, { "epoch": 2.33, "learning_rate": 5.7890904829797856e-05, "loss": 0.9342, "step": 20740 }, { "epoch": 2.33, "learning_rate": 5.781902755424151e-05, "loss": 0.9188, "step": 20760 }, { "epoch": 2.33, "learning_rate": 5.7747133711492895e-05, "loss": 0.9423, "step": 20780 }, { "epoch": 2.34, "learning_rate": 5.767522345388282e-05, "loss": 0.9363, "step": 20800 }, { "epoch": 2.34, "learning_rate": 5.760329693377693e-05, "loss": 0.9369, "step": 20820 }, { "epoch": 2.34, "learning_rate": 5.7531354303575324e-05, "loss": 0.9655, "step": 20840 }, { "epoch": 2.34, "learning_rate": 5.7459395715712205e-05, "loss": 0.9417, "step": 20860 }, { "epoch": 2.35, "learning_rate": 5.738742132265562e-05, "loss": 0.9504, "step": 20880 }, { "epoch": 2.35, "learning_rate": 5.731543127690709e-05, "loss": 0.9594, "step": 20900 }, { "epoch": 2.35, "learning_rate": 5.724342573100131e-05, "loss": 0.9268, "step": 20920 }, { "epoch": 2.35, "learning_rate": 5.7171404837505796e-05, "loss": 0.9299, "step": 20940 }, { "epoch": 2.36, "learning_rate": 5.709936874902061e-05, "loss": 0.9372, "step": 20960 }, { "epoch": 2.36, "learning_rate": 5.702731761817799e-05, "loss": 0.9087, "step": 20980 }, { "epoch": 2.36, "learning_rate": 5.695525159764206e-05, "loss": 0.9338, "step": 21000 }, { "epoch": 2.36, "learning_rate": 5.688317084010847e-05, "loss": 0.9435, "step": 21020 }, { "epoch": 2.36, "learning_rate": 5.681107549830414e-05, "loss": 0.9552, "step": 21040 }, { "epoch": 2.37, "learning_rate": 5.673896572498683e-05, "loss": 0.9095, "step": 21060 }, { "epoch": 2.37, "learning_rate": 5.6666841672944925e-05, "loss": 0.9398, "step": 21080 }, { "epoch": 2.37, "learning_rate": 5.659470349499707e-05, "loss": 0.949, "step": 21100 }, { "epoch": 2.37, "learning_rate": 5.652255134399178e-05, "loss": 0.9129, "step": 21120 }, { "epoch": 2.38, "learning_rate": 5.645038537280726e-05, "loss": 0.9701, "step": 21140 }, { "epoch": 2.38, "learning_rate": 5.6378205734350916e-05, "loss": 0.9328, "step": 21160 }, { "epoch": 2.38, "learning_rate": 5.630601258155917e-05, "loss": 0.9371, "step": 21180 }, { "epoch": 2.38, "learning_rate": 5.623380606739708e-05, "loss": 0.9384, "step": 21200 }, { "epoch": 2.38, "learning_rate": 5.616158634485793e-05, "loss": 0.9557, "step": 21220 }, { "epoch": 2.39, "learning_rate": 5.608935356696313e-05, "loss": 0.9288, "step": 21240 }, { "epoch": 2.39, "learning_rate": 5.6017107886761634e-05, "loss": 0.9439, "step": 21260 }, { "epoch": 2.39, "learning_rate": 5.5944849457329786e-05, "loss": 0.9502, "step": 21280 }, { "epoch": 2.39, "learning_rate": 5.5872578431770936e-05, "loss": 0.9466, "step": 21300 }, { "epoch": 2.4, "learning_rate": 5.5800294963215116e-05, "loss": 0.9338, "step": 21320 }, { "epoch": 2.4, "learning_rate": 5.5727999204818736e-05, "loss": 0.9466, "step": 21340 }, { "epoch": 2.4, "learning_rate": 5.565569130976422e-05, "loss": 0.9392, "step": 21360 }, { "epoch": 2.4, "learning_rate": 5.5583371431259745e-05, "loss": 0.9885, "step": 21380 }, { "epoch": 2.4, "learning_rate": 5.551103972253884e-05, "loss": 0.9347, "step": 21400 }, { "epoch": 2.41, "learning_rate": 5.543869633686013e-05, "loss": 0.934, "step": 21420 }, { "epoch": 2.41, "learning_rate": 5.536634142750699e-05, "loss": 0.9347, "step": 21440 }, { "epoch": 2.41, "learning_rate": 5.529397514778716e-05, "loss": 0.9379, "step": 21460 }, { "epoch": 2.41, "learning_rate": 5.522159765103251e-05, "loss": 0.9509, "step": 21480 }, { "epoch": 2.42, "learning_rate": 5.5149209090598686e-05, "loss": 0.9212, "step": 21500 }, { "epoch": 2.42, "learning_rate": 5.5076809619864754e-05, "loss": 0.9608, "step": 21520 }, { "epoch": 2.42, "learning_rate": 5.5004399392232906e-05, "loss": 0.9088, "step": 21540 }, { "epoch": 2.42, "learning_rate": 5.493197856112812e-05, "loss": 0.9442, "step": 21560 }, { "epoch": 2.42, "learning_rate": 5.485954727999785e-05, "loss": 0.9389, "step": 21580 }, { "epoch": 2.43, "learning_rate": 5.478710570231168e-05, "loss": 0.9191, "step": 21600 }, { "epoch": 2.43, "learning_rate": 5.4714653981561015e-05, "loss": 0.9181, "step": 21620 }, { "epoch": 2.43, "learning_rate": 5.464219227125877e-05, "loss": 0.9255, "step": 21640 }, { "epoch": 2.43, "learning_rate": 5.4569720724939025e-05, "loss": 0.9249, "step": 21660 }, { "epoch": 2.44, "learning_rate": 5.449723949615664e-05, "loss": 0.9217, "step": 21680 }, { "epoch": 2.44, "learning_rate": 5.442474873848706e-05, "loss": 0.9316, "step": 21700 }, { "epoch": 2.44, "learning_rate": 5.43522486055259e-05, "loss": 0.9689, "step": 21720 }, { "epoch": 2.44, "learning_rate": 5.427973925088865e-05, "loss": 0.931, "step": 21740 }, { "epoch": 2.44, "learning_rate": 5.42072208282103e-05, "loss": 0.9415, "step": 21760 }, { "epoch": 2.45, "learning_rate": 5.4134693491145085e-05, "loss": 0.9149, "step": 21780 }, { "epoch": 2.45, "learning_rate": 5.4062157393366134e-05, "loss": 0.9355, "step": 21800 }, { "epoch": 2.45, "learning_rate": 5.398961268856512e-05, "loss": 0.9458, "step": 21820 }, { "epoch": 2.45, "learning_rate": 5.391705953045195e-05, "loss": 0.9411, "step": 21840 }, { "epoch": 2.46, "learning_rate": 5.3844498072754476e-05, "loss": 0.9501, "step": 21860 }, { "epoch": 2.46, "learning_rate": 5.377192846921808e-05, "loss": 0.9204, "step": 21880 }, { "epoch": 2.46, "learning_rate": 5.369935087360547e-05, "loss": 0.9033, "step": 21900 }, { "epoch": 2.46, "learning_rate": 5.362676543969622e-05, "loss": 0.9327, "step": 21920 }, { "epoch": 2.47, "learning_rate": 5.3554172321286576e-05, "loss": 0.9579, "step": 21940 }, { "epoch": 2.47, "learning_rate": 5.348157167218901e-05, "loss": 0.9313, "step": 21960 }, { "epoch": 2.47, "learning_rate": 5.340896364623198e-05, "loss": 0.913, "step": 21980 }, { "epoch": 2.47, "learning_rate": 5.333634839725958e-05, "loss": 0.9628, "step": 22000 }, { "epoch": 2.47, "learning_rate": 5.3263726079131194e-05, "loss": 0.9603, "step": 22020 }, { "epoch": 2.48, "learning_rate": 5.319109684572118e-05, "loss": 0.9116, "step": 22040 }, { "epoch": 2.48, "learning_rate": 5.311846085091856e-05, "loss": 0.9344, "step": 22060 }, { "epoch": 2.48, "learning_rate": 5.3045818248626676e-05, "loss": 0.9263, "step": 22080 }, { "epoch": 2.48, "learning_rate": 5.29731691927629e-05, "loss": 0.9622, "step": 22100 }, { "epoch": 2.49, "learning_rate": 5.29005138372582e-05, "loss": 0.9481, "step": 22120 }, { "epoch": 2.49, "learning_rate": 5.282785233605698e-05, "loss": 0.9256, "step": 22140 }, { "epoch": 2.49, "learning_rate": 5.2755184843116635e-05, "loss": 0.9808, "step": 22160 }, { "epoch": 2.49, "learning_rate": 5.268251151240722e-05, "loss": 0.968, "step": 22180 }, { "epoch": 2.49, "learning_rate": 5.2609832497911215e-05, "loss": 0.95, "step": 22200 }, { "epoch": 2.5, "learning_rate": 5.253714795362309e-05, "loss": 0.9662, "step": 22220 }, { "epoch": 2.5, "learning_rate": 5.246445803354907e-05, "loss": 0.9352, "step": 22240 }, { "epoch": 2.5, "learning_rate": 5.2391762891706764e-05, "loss": 0.9437, "step": 22260 }, { "epoch": 2.5, "learning_rate": 5.231906268212483e-05, "loss": 0.9409, "step": 22280 }, { "epoch": 2.51, "learning_rate": 5.224635755884268e-05, "loss": 0.9487, "step": 22300 }, { "epoch": 2.51, "learning_rate": 5.217364767591014e-05, "loss": 0.9401, "step": 22320 }, { "epoch": 2.51, "learning_rate": 5.210093318738709e-05, "loss": 0.952, "step": 22340 }, { "epoch": 2.51, "learning_rate": 5.20282142473432e-05, "loss": 0.9752, "step": 22360 }, { "epoch": 2.51, "learning_rate": 5.195549100985756e-05, "loss": 0.9655, "step": 22380 }, { "epoch": 2.52, "learning_rate": 5.188276362901836e-05, "loss": 0.9752, "step": 22400 }, { "epoch": 2.52, "learning_rate": 5.1810032258922605e-05, "loss": 0.9632, "step": 22420 }, { "epoch": 2.52, "learning_rate": 5.173729705367568e-05, "loss": 0.9166, "step": 22440 }, { "epoch": 2.52, "learning_rate": 5.166455816739118e-05, "loss": 0.9433, "step": 22460 }, { "epoch": 2.53, "learning_rate": 5.159181575419043e-05, "loss": 0.9459, "step": 22480 }, { "epoch": 2.53, "learning_rate": 5.151906996820227e-05, "loss": 0.9316, "step": 22500 }, { "epoch": 2.53, "learning_rate": 5.144632096356269e-05, "loss": 0.945, "step": 22520 }, { "epoch": 2.53, "learning_rate": 5.137356889441444e-05, "loss": 0.9192, "step": 22540 }, { "epoch": 2.53, "learning_rate": 5.1300813914906853e-05, "loss": 0.9338, "step": 22560 }, { "epoch": 2.54, "learning_rate": 5.122805617919536e-05, "loss": 0.9607, "step": 22580 }, { "epoch": 2.54, "learning_rate": 5.115529584144125e-05, "loss": 0.9123, "step": 22600 }, { "epoch": 2.54, "learning_rate": 5.108253305581134e-05, "loss": 0.9547, "step": 22620 }, { "epoch": 2.54, "learning_rate": 5.100976797647761e-05, "loss": 0.9258, "step": 22640 }, { "epoch": 2.55, "learning_rate": 5.0937000757616934e-05, "loss": 0.9028, "step": 22660 }, { "epoch": 2.55, "learning_rate": 5.086423155341068e-05, "loss": 0.9422, "step": 22680 }, { "epoch": 2.55, "learning_rate": 5.079146051804444e-05, "loss": 0.9331, "step": 22700 }, { "epoch": 2.55, "learning_rate": 5.071868780570772e-05, "loss": 0.9227, "step": 22720 }, { "epoch": 2.56, "learning_rate": 5.0645913570593484e-05, "loss": 0.9301, "step": 22740 }, { "epoch": 2.56, "learning_rate": 5.057313796689804e-05, "loss": 0.9385, "step": 22760 }, { "epoch": 2.56, "learning_rate": 5.050036114882052e-05, "loss": 0.9192, "step": 22780 }, { "epoch": 2.56, "learning_rate": 5.042758327056265e-05, "loss": 0.9302, "step": 22800 }, { "epoch": 2.56, "learning_rate": 5.03548044863284e-05, "loss": 0.9305, "step": 22820 }, { "epoch": 2.57, "learning_rate": 5.028202495032366e-05, "loss": 0.9267, "step": 22840 }, { "epoch": 2.57, "learning_rate": 5.020924481675593e-05, "loss": 0.9608, "step": 22860 }, { "epoch": 2.57, "learning_rate": 5.013646423983392e-05, "loss": 0.9333, "step": 22880 }, { "epoch": 2.57, "learning_rate": 5.006368337376737e-05, "loss": 0.9643, "step": 22900 }, { "epoch": 2.58, "learning_rate": 4.999090237276657e-05, "loss": 0.9209, "step": 22920 }, { "epoch": 2.58, "learning_rate": 4.991812139104207e-05, "loss": 0.9363, "step": 22940 }, { "epoch": 2.58, "learning_rate": 4.984534058280445e-05, "loss": 0.9439, "step": 22960 }, { "epoch": 2.58, "learning_rate": 4.97725601022639e-05, "loss": 0.9514, "step": 22980 }, { "epoch": 2.58, "learning_rate": 4.969978010362989e-05, "loss": 0.9453, "step": 23000 }, { "epoch": 2.59, "learning_rate": 4.9627000741110865e-05, "loss": 0.9394, "step": 23020 }, { "epoch": 2.59, "learning_rate": 4.955422216891397e-05, "loss": 0.9316, "step": 23040 }, { "epoch": 2.59, "learning_rate": 4.9481444541244665e-05, "loss": 0.9088, "step": 23060 }, { "epoch": 2.59, "learning_rate": 4.9408668012306344e-05, "loss": 0.9212, "step": 23080 }, { "epoch": 2.6, "learning_rate": 4.933589273630013e-05, "loss": 0.9114, "step": 23100 }, { "epoch": 2.6, "learning_rate": 4.9263118867424515e-05, "loss": 0.9269, "step": 23120 }, { "epoch": 2.6, "learning_rate": 4.919034655987493e-05, "loss": 0.9383, "step": 23140 }, { "epoch": 2.6, "learning_rate": 4.911757596784357e-05, "loss": 0.9495, "step": 23160 }, { "epoch": 2.6, "learning_rate": 4.904480724551897e-05, "loss": 0.9556, "step": 23180 }, { "epoch": 2.61, "learning_rate": 4.89720405470857e-05, "loss": 0.9101, "step": 23200 }, { "epoch": 2.61, "learning_rate": 4.8899276026724034e-05, "loss": 0.9385, "step": 23220 }, { "epoch": 2.61, "learning_rate": 4.882651383860963e-05, "loss": 0.9146, "step": 23240 }, { "epoch": 2.61, "learning_rate": 4.875375413691327e-05, "loss": 0.8875, "step": 23260 }, { "epoch": 2.62, "learning_rate": 4.868099707580035e-05, "loss": 0.9435, "step": 23280 }, { "epoch": 2.62, "learning_rate": 4.8608242809430744e-05, "loss": 0.9215, "step": 23300 }, { "epoch": 2.62, "learning_rate": 4.8535491491958415e-05, "loss": 0.9206, "step": 23320 }, { "epoch": 2.62, "learning_rate": 4.846274327753107e-05, "loss": 0.9159, "step": 23340 }, { "epoch": 2.62, "learning_rate": 4.8389998320289785e-05, "loss": 0.9272, "step": 23360 }, { "epoch": 2.63, "learning_rate": 4.8317256774368815e-05, "loss": 0.9164, "step": 23380 }, { "epoch": 2.63, "learning_rate": 4.824451879389513e-05, "loss": 0.9521, "step": 23400 }, { "epoch": 2.63, "learning_rate": 4.8171784532988165e-05, "loss": 0.9555, "step": 23420 }, { "epoch": 2.63, "learning_rate": 4.809905414575947e-05, "loss": 0.9317, "step": 23440 }, { "epoch": 2.64, "learning_rate": 4.802632778631241e-05, "loss": 0.9336, "step": 23460 }, { "epoch": 2.64, "learning_rate": 4.795360560874181e-05, "loss": 0.9299, "step": 23480 }, { "epoch": 2.64, "learning_rate": 4.7880887767133565e-05, "loss": 0.9365, "step": 23500 }, { "epoch": 2.64, "learning_rate": 4.7808174415564484e-05, "loss": 0.9178, "step": 23520 }, { "epoch": 2.64, "learning_rate": 4.773546570810182e-05, "loss": 0.931, "step": 23540 }, { "epoch": 2.65, "learning_rate": 4.766276179880296e-05, "loss": 0.9326, "step": 23560 }, { "epoch": 2.65, "learning_rate": 4.759006284171515e-05, "loss": 0.9195, "step": 23580 }, { "epoch": 2.65, "learning_rate": 4.7517368990875146e-05, "loss": 0.9536, "step": 23600 }, { "epoch": 2.65, "learning_rate": 4.744468040030891e-05, "loss": 0.9326, "step": 23620 }, { "epoch": 2.66, "learning_rate": 4.737199722403117e-05, "loss": 0.9336, "step": 23640 }, { "epoch": 2.66, "learning_rate": 4.729931961604529e-05, "loss": 0.9292, "step": 23660 }, { "epoch": 2.66, "learning_rate": 4.722664773034278e-05, "loss": 0.9481, "step": 23680 }, { "epoch": 2.66, "learning_rate": 4.7153981720902997e-05, "loss": 0.9314, "step": 23700 }, { "epoch": 2.67, "learning_rate": 4.7081321741692904e-05, "loss": 0.9054, "step": 23720 }, { "epoch": 2.67, "learning_rate": 4.7008667946666674e-05, "loss": 0.946, "step": 23740 }, { "epoch": 2.67, "learning_rate": 4.693602048976537e-05, "loss": 0.9389, "step": 23760 }, { "epoch": 2.67, "learning_rate": 4.686337952491659e-05, "loss": 0.9484, "step": 23780 }, { "epoch": 2.67, "learning_rate": 4.679074520603423e-05, "loss": 0.9318, "step": 23800 }, { "epoch": 2.68, "learning_rate": 4.671811768701811e-05, "loss": 0.9421, "step": 23820 }, { "epoch": 2.68, "learning_rate": 4.6645497121753564e-05, "loss": 0.9526, "step": 23840 }, { "epoch": 2.68, "learning_rate": 4.657288366411127e-05, "loss": 0.9352, "step": 23860 }, { "epoch": 2.68, "learning_rate": 4.650027746794686e-05, "loss": 0.9146, "step": 23880 }, { "epoch": 2.69, "learning_rate": 4.642767868710045e-05, "loss": 0.9241, "step": 23900 }, { "epoch": 2.69, "learning_rate": 4.635508747539661e-05, "loss": 0.9173, "step": 23920 }, { "epoch": 2.69, "learning_rate": 4.6282503986643775e-05, "loss": 0.9367, "step": 23940 }, { "epoch": 2.69, "learning_rate": 4.6209928374634036e-05, "loss": 0.939, "step": 23960 }, { "epoch": 2.69, "learning_rate": 4.6137360793142794e-05, "loss": 0.9138, "step": 23980 }, { "epoch": 2.7, "learning_rate": 4.606480139592843e-05, "loss": 0.9526, "step": 24000 }, { "epoch": 2.7, "learning_rate": 4.599225033673203e-05, "loss": 0.9391, "step": 24020 }, { "epoch": 2.7, "learning_rate": 4.591970776927692e-05, "loss": 0.9484, "step": 24040 }, { "epoch": 2.7, "learning_rate": 4.584717384726853e-05, "loss": 0.9413, "step": 24060 }, { "epoch": 2.71, "learning_rate": 4.577464872439391e-05, "loss": 0.9497, "step": 24080 }, { "epoch": 2.71, "learning_rate": 4.57021325543215e-05, "loss": 0.9306, "step": 24100 }, { "epoch": 2.71, "learning_rate": 4.562962549070074e-05, "loss": 0.9218, "step": 24120 }, { "epoch": 2.71, "learning_rate": 4.555712768716179e-05, "loss": 0.9342, "step": 24140 }, { "epoch": 2.71, "learning_rate": 4.548463929731522e-05, "loss": 0.9354, "step": 24160 }, { "epoch": 2.72, "learning_rate": 4.5412160474751595e-05, "loss": 0.9286, "step": 24180 }, { "epoch": 2.72, "learning_rate": 4.5339691373041236e-05, "loss": 0.9458, "step": 24200 }, { "epoch": 2.72, "learning_rate": 4.526723214573389e-05, "loss": 0.956, "step": 24220 }, { "epoch": 2.72, "learning_rate": 4.519478294635837e-05, "loss": 0.9322, "step": 24240 }, { "epoch": 2.73, "learning_rate": 4.51223439284222e-05, "loss": 0.9161, "step": 24260 }, { "epoch": 2.73, "learning_rate": 4.504991524541138e-05, "loss": 0.9273, "step": 24280 }, { "epoch": 2.73, "learning_rate": 4.497749705079001e-05, "loss": 0.9667, "step": 24300 }, { "epoch": 2.73, "learning_rate": 4.490508949799993e-05, "loss": 0.9419, "step": 24320 }, { "epoch": 2.73, "learning_rate": 4.483269274046046e-05, "loss": 0.9533, "step": 24340 }, { "epoch": 2.74, "learning_rate": 4.4760306931568044e-05, "loss": 0.9396, "step": 24360 }, { "epoch": 2.74, "learning_rate": 4.468793222469596e-05, "loss": 0.917, "step": 24380 }, { "epoch": 2.74, "learning_rate": 4.461556877319385e-05, "loss": 0.9475, "step": 24400 }, { "epoch": 2.74, "learning_rate": 4.454321673038766e-05, "loss": 0.9314, "step": 24420 }, { "epoch": 2.75, "learning_rate": 4.447087624957906e-05, "loss": 0.9221, "step": 24440 }, { "epoch": 2.75, "learning_rate": 4.4398547484045245e-05, "loss": 0.9088, "step": 24460 }, { "epoch": 2.75, "learning_rate": 4.4326230587038594e-05, "loss": 0.9398, "step": 24480 }, { "epoch": 2.75, "learning_rate": 4.425392571178635e-05, "loss": 0.9181, "step": 24500 }, { "epoch": 2.76, "learning_rate": 4.418163301149027e-05, "loss": 0.9506, "step": 24520 }, { "epoch": 2.76, "learning_rate": 4.41093526393263e-05, "loss": 0.9425, "step": 24540 }, { "epoch": 2.76, "learning_rate": 4.4037084748444284e-05, "loss": 0.931, "step": 24560 }, { "epoch": 2.76, "learning_rate": 4.3964829491967655e-05, "loss": 0.9209, "step": 24580 }, { "epoch": 2.76, "learning_rate": 4.389258702299298e-05, "loss": 0.9217, "step": 24600 }, { "epoch": 2.77, "learning_rate": 4.3820357494589816e-05, "loss": 0.9193, "step": 24620 }, { "epoch": 2.77, "learning_rate": 4.3748141059800276e-05, "loss": 0.9129, "step": 24640 }, { "epoch": 2.77, "learning_rate": 4.367593787163875e-05, "loss": 0.9262, "step": 24660 }, { "epoch": 2.77, "learning_rate": 4.3603748083091495e-05, "loss": 0.9432, "step": 24680 }, { "epoch": 2.78, "learning_rate": 4.353157184711645e-05, "loss": 0.942, "step": 24700 }, { "epoch": 2.78, "learning_rate": 4.34594093166428e-05, "loss": 0.94, "step": 24720 }, { "epoch": 2.78, "learning_rate": 4.33872606445707e-05, "loss": 0.9462, "step": 24740 }, { "epoch": 2.78, "learning_rate": 4.331512598377092e-05, "loss": 0.9453, "step": 24760 }, { "epoch": 2.78, "learning_rate": 4.3243005487084595e-05, "loss": 0.9504, "step": 24780 }, { "epoch": 2.79, "learning_rate": 4.3170899307322826e-05, "loss": 0.9104, "step": 24800 }, { "epoch": 2.79, "learning_rate": 4.309880759726633e-05, "loss": 0.9403, "step": 24820 }, { "epoch": 2.79, "learning_rate": 4.302673050966523e-05, "loss": 0.9555, "step": 24840 }, { "epoch": 2.79, "learning_rate": 4.295466819723864e-05, "loss": 0.9586, "step": 24860 }, { "epoch": 2.8, "learning_rate": 4.288262081267435e-05, "loss": 0.9811, "step": 24880 }, { "epoch": 2.8, "learning_rate": 4.281058850862856e-05, "loss": 0.929, "step": 24900 }, { "epoch": 2.8, "learning_rate": 4.27385714377255e-05, "loss": 0.9444, "step": 24920 }, { "epoch": 2.8, "learning_rate": 4.266656975255709e-05, "loss": 0.9514, "step": 24940 }, { "epoch": 2.8, "learning_rate": 4.259458360568271e-05, "loss": 0.9328, "step": 24960 }, { "epoch": 2.81, "learning_rate": 4.252261314962878e-05, "loss": 0.9218, "step": 24980 }, { "epoch": 2.81, "learning_rate": 4.245065853688848e-05, "loss": 0.9279, "step": 25000 }, { "epoch": 2.81, "learning_rate": 4.237871991992142e-05, "loss": 0.9317, "step": 25020 }, { "epoch": 2.81, "learning_rate": 4.2306797451153314e-05, "loss": 0.9481, "step": 25040 }, { "epoch": 2.82, "learning_rate": 4.223489128297568e-05, "loss": 0.9363, "step": 25060 }, { "epoch": 2.82, "learning_rate": 4.216300156774548e-05, "loss": 0.9445, "step": 25080 }, { "epoch": 2.82, "learning_rate": 4.209112845778481e-05, "loss": 0.917, "step": 25100 }, { "epoch": 2.82, "learning_rate": 4.201927210538058e-05, "loss": 0.9566, "step": 25120 }, { "epoch": 2.82, "learning_rate": 4.194743266278426e-05, "loss": 0.9185, "step": 25140 }, { "epoch": 2.83, "learning_rate": 4.1875610282211364e-05, "loss": 0.9006, "step": 25160 }, { "epoch": 2.83, "learning_rate": 4.1803805115841366e-05, "loss": 0.92, "step": 25180 }, { "epoch": 2.83, "learning_rate": 4.173201731581724e-05, "loss": 0.9291, "step": 25200 }, { "epoch": 2.83, "learning_rate": 4.166024703424511e-05, "loss": 0.9299, "step": 25220 }, { "epoch": 2.84, "learning_rate": 4.1588494423194046e-05, "loss": 0.9074, "step": 25240 }, { "epoch": 2.84, "learning_rate": 4.151675963469565e-05, "loss": 0.9473, "step": 25260 }, { "epoch": 2.84, "learning_rate": 4.1445042820743764e-05, "loss": 0.9326, "step": 25280 }, { "epoch": 2.84, "learning_rate": 4.137334413329414e-05, "loss": 0.9276, "step": 25300 }, { "epoch": 2.84, "learning_rate": 4.130166372426412e-05, "loss": 0.9333, "step": 25320 }, { "epoch": 2.85, "learning_rate": 4.123000174553235e-05, "loss": 0.9269, "step": 25340 }, { "epoch": 2.85, "learning_rate": 4.1158358348938374e-05, "loss": 0.9264, "step": 25360 }, { "epoch": 2.85, "learning_rate": 4.1086733686282395e-05, "loss": 0.9503, "step": 25380 }, { "epoch": 2.85, "learning_rate": 4.1015127909324936e-05, "loss": 0.9071, "step": 25400 }, { "epoch": 2.86, "learning_rate": 4.094354116978647e-05, "loss": 0.9654, "step": 25420 }, { "epoch": 2.86, "learning_rate": 4.087197361934714e-05, "loss": 0.949, "step": 25440 }, { "epoch": 2.86, "learning_rate": 4.0800425409646456e-05, "loss": 0.9102, "step": 25460 }, { "epoch": 2.86, "learning_rate": 4.0728896692282926e-05, "loss": 0.943, "step": 25480 }, { "epoch": 2.87, "learning_rate": 4.065738761881375e-05, "loss": 0.9392, "step": 25500 }, { "epoch": 2.87, "learning_rate": 4.0585898340754506e-05, "loss": 0.9151, "step": 25520 }, { "epoch": 2.87, "learning_rate": 4.051442900957888e-05, "loss": 0.937, "step": 25540 }, { "epoch": 2.87, "learning_rate": 4.0442979776718237e-05, "loss": 0.9283, "step": 25560 }, { "epoch": 2.87, "learning_rate": 4.037155079356137e-05, "loss": 0.92, "step": 25580 }, { "epoch": 2.88, "learning_rate": 4.030014221145417e-05, "loss": 0.9313, "step": 25600 }, { "epoch": 2.88, "learning_rate": 4.022875418169931e-05, "loss": 0.9367, "step": 25620 }, { "epoch": 2.88, "learning_rate": 4.0157386855555906e-05, "loss": 0.9642, "step": 25640 }, { "epoch": 2.88, "learning_rate": 4.00860403842392e-05, "loss": 0.9027, "step": 25660 }, { "epoch": 2.89, "learning_rate": 4.001471491892026e-05, "loss": 0.9215, "step": 25680 }, { "epoch": 2.89, "learning_rate": 3.9943410610725665e-05, "loss": 0.9546, "step": 25700 }, { "epoch": 2.89, "learning_rate": 3.9872127610737095e-05, "loss": 0.909, "step": 25720 }, { "epoch": 2.89, "learning_rate": 3.9800866069991173e-05, "loss": 0.9495, "step": 25740 }, { "epoch": 2.89, "learning_rate": 3.9729626139478995e-05, "loss": 0.9311, "step": 25760 }, { "epoch": 2.9, "learning_rate": 3.965840797014586e-05, "loss": 0.9387, "step": 25780 }, { "epoch": 2.9, "learning_rate": 3.9587211712891005e-05, "loss": 0.919, "step": 25800 }, { "epoch": 2.9, "learning_rate": 3.9516037518567204e-05, "loss": 0.938, "step": 25820 }, { "epoch": 2.9, "learning_rate": 3.9444885537980526e-05, "loss": 0.929, "step": 25840 }, { "epoch": 2.91, "learning_rate": 3.9373755921889886e-05, "loss": 0.9502, "step": 25860 }, { "epoch": 2.91, "learning_rate": 3.93026488210069e-05, "loss": 0.9294, "step": 25880 }, { "epoch": 2.91, "learning_rate": 3.9231564385995476e-05, "loss": 0.9643, "step": 25900 }, { "epoch": 2.91, "learning_rate": 3.91605027674714e-05, "loss": 0.9125, "step": 25920 }, { "epoch": 2.91, "learning_rate": 3.908946411600222e-05, "loss": 0.9356, "step": 25940 }, { "epoch": 2.92, "learning_rate": 3.9018448582106795e-05, "loss": 0.9298, "step": 25960 }, { "epoch": 2.92, "learning_rate": 3.894745631625495e-05, "loss": 0.9532, "step": 25980 }, { "epoch": 2.92, "learning_rate": 3.887648746886727e-05, "loss": 0.9326, "step": 26000 }, { "epoch": 2.92, "learning_rate": 3.8805542190314705e-05, "loss": 0.9003, "step": 26020 }, { "epoch": 2.93, "learning_rate": 3.873462063091825e-05, "loss": 0.9361, "step": 26040 }, { "epoch": 2.93, "learning_rate": 3.866372294094864e-05, "loss": 0.9186, "step": 26060 }, { "epoch": 2.93, "learning_rate": 3.859284927062604e-05, "loss": 0.9442, "step": 26080 }, { "epoch": 2.93, "learning_rate": 3.8521999770119786e-05, "loss": 0.929, "step": 26100 }, { "epoch": 2.93, "learning_rate": 3.845117458954787e-05, "loss": 0.9241, "step": 26120 }, { "epoch": 2.94, "learning_rate": 3.838037387897688e-05, "loss": 0.9246, "step": 26140 }, { "epoch": 2.94, "learning_rate": 3.8309597788421474e-05, "loss": 0.9379, "step": 26160 }, { "epoch": 2.94, "learning_rate": 3.823884646784421e-05, "loss": 0.9221, "step": 26180 }, { "epoch": 2.94, "learning_rate": 3.8168120067155096e-05, "loss": 0.9069, "step": 26200 }, { "epoch": 2.95, "learning_rate": 3.809741873621138e-05, "loss": 0.9144, "step": 26220 }, { "epoch": 2.95, "learning_rate": 3.802674262481719e-05, "loss": 0.9446, "step": 26240 }, { "epoch": 2.95, "learning_rate": 3.79560918827232e-05, "loss": 0.972, "step": 26260 }, { "epoch": 2.95, "learning_rate": 3.7885466659626334e-05, "loss": 0.9395, "step": 26280 }, { "epoch": 2.96, "learning_rate": 3.781486710516948e-05, "loss": 0.9471, "step": 26300 }, { "epoch": 2.96, "learning_rate": 3.77442933689411e-05, "loss": 0.9502, "step": 26320 }, { "epoch": 2.96, "learning_rate": 3.767374560047495e-05, "loss": 0.9296, "step": 26340 }, { "epoch": 2.96, "learning_rate": 3.760322394924979e-05, "loss": 0.9643, "step": 26360 }, { "epoch": 2.96, "learning_rate": 3.753272856468903e-05, "loss": 0.929, "step": 26380 }, { "epoch": 2.97, "learning_rate": 3.746225959616042e-05, "loss": 0.9233, "step": 26400 }, { "epoch": 2.97, "learning_rate": 3.7391817192975745e-05, "loss": 0.9169, "step": 26420 }, { "epoch": 2.97, "learning_rate": 3.732140150439048e-05, "loss": 0.891, "step": 26440 }, { "epoch": 2.97, "learning_rate": 3.725101267960359e-05, "loss": 0.9535, "step": 26460 }, { "epoch": 2.98, "learning_rate": 3.718065086775695e-05, "loss": 0.9363, "step": 26480 }, { "epoch": 2.98, "learning_rate": 3.7110316217935357e-05, "loss": 0.964, "step": 26500 }, { "epoch": 2.98, "learning_rate": 3.7040008879166e-05, "loss": 0.9386, "step": 26520 }, { "epoch": 2.98, "learning_rate": 3.696972900041816e-05, "loss": 0.9268, "step": 26540 }, { "epoch": 2.98, "learning_rate": 3.6899476730603e-05, "loss": 0.9139, "step": 26560 }, { "epoch": 2.99, "learning_rate": 3.682925221857315e-05, "loss": 0.9274, "step": 26580 }, { "epoch": 2.99, "learning_rate": 3.675905561312244e-05, "loss": 0.9195, "step": 26600 }, { "epoch": 2.99, "learning_rate": 3.668888706298554e-05, "loss": 0.9289, "step": 26620 }, { "epoch": 2.99, "learning_rate": 3.66187467168377e-05, "loss": 0.9429, "step": 26640 }, { "epoch": 3.0, "learning_rate": 3.654863472329445e-05, "loss": 0.9156, "step": 26660 }, { "epoch": 3.0, "learning_rate": 3.647855123091115e-05, "loss": 0.9355, "step": 26680 }, { "epoch": 3.0, "learning_rate": 3.640849638818286e-05, "loss": 0.9313, "step": 26700 }, { "epoch": 3.0, "learning_rate": 3.633847034354389e-05, "loss": 0.86, "step": 26720 }, { "epoch": 3.0, "learning_rate": 3.626847324536755e-05, "loss": 0.9101, "step": 26740 }, { "epoch": 3.01, "learning_rate": 3.6198505241965806e-05, "loss": 0.9232, "step": 26760 }, { "epoch": 3.01, "learning_rate": 3.6128566481588977e-05, "loss": 0.9325, "step": 26780 }, { "epoch": 3.01, "learning_rate": 3.605865711242544e-05, "loss": 0.9364, "step": 26800 }, { "epoch": 3.01, "learning_rate": 3.598877728260127e-05, "loss": 0.8978, "step": 26820 }, { "epoch": 3.02, "learning_rate": 3.591892714017995e-05, "loss": 0.892, "step": 26840 }, { "epoch": 3.02, "learning_rate": 3.5849106833162124e-05, "loss": 0.8974, "step": 26860 }, { "epoch": 3.02, "learning_rate": 3.577931650948512e-05, "loss": 0.8923, "step": 26880 }, { "epoch": 3.02, "learning_rate": 3.5709556317022823e-05, "loss": 0.9018, "step": 26900 }, { "epoch": 3.02, "learning_rate": 3.563982640358523e-05, "loss": 0.9101, "step": 26920 }, { "epoch": 3.03, "learning_rate": 3.55701269169182e-05, "loss": 0.8988, "step": 26940 }, { "epoch": 3.03, "learning_rate": 3.550045800470311e-05, "loss": 0.8896, "step": 26960 }, { "epoch": 3.03, "learning_rate": 3.5430819814556544e-05, "loss": 0.9021, "step": 26980 }, { "epoch": 3.03, "learning_rate": 3.536121249403004e-05, "loss": 0.8667, "step": 27000 }, { "epoch": 3.04, "learning_rate": 3.5291636190609665e-05, "loss": 0.8931, "step": 27020 }, { "epoch": 3.04, "learning_rate": 3.52220910517158e-05, "loss": 0.8927, "step": 27040 }, { "epoch": 3.04, "learning_rate": 3.515257722470281e-05, "loss": 0.9163, "step": 27060 }, { "epoch": 3.04, "learning_rate": 3.50830948568587e-05, "loss": 0.9169, "step": 27080 }, { "epoch": 3.04, "learning_rate": 3.50136440954048e-05, "loss": 0.9031, "step": 27100 }, { "epoch": 3.05, "learning_rate": 3.494422508749547e-05, "loss": 0.9213, "step": 27120 }, { "epoch": 3.05, "learning_rate": 3.487483798021785e-05, "loss": 0.9082, "step": 27140 }, { "epoch": 3.05, "learning_rate": 3.480548292059139e-05, "loss": 0.911, "step": 27160 }, { "epoch": 3.05, "learning_rate": 3.473616005556773e-05, "loss": 0.9075, "step": 27180 }, { "epoch": 3.06, "learning_rate": 3.4666869532030224e-05, "loss": 0.9072, "step": 27200 }, { "epoch": 3.06, "learning_rate": 3.459761149679378e-05, "loss": 0.9181, "step": 27220 }, { "epoch": 3.06, "learning_rate": 3.4528386096604366e-05, "loss": 0.908, "step": 27240 }, { "epoch": 3.06, "learning_rate": 3.445919347813888e-05, "loss": 0.8838, "step": 27260 }, { "epoch": 3.07, "learning_rate": 3.439003378800475e-05, "loss": 0.8977, "step": 27280 }, { "epoch": 3.07, "learning_rate": 3.4320907172739594e-05, "loss": 0.9024, "step": 27300 }, { "epoch": 3.07, "learning_rate": 3.425181377881099e-05, "loss": 0.9174, "step": 27320 }, { "epoch": 3.07, "learning_rate": 3.4182753752616094e-05, "loss": 0.916, "step": 27340 }, { "epoch": 3.07, "learning_rate": 3.411372724048144e-05, "loss": 0.9103, "step": 27360 }, { "epoch": 3.08, "learning_rate": 3.4044734388662426e-05, "loss": 0.8922, "step": 27380 }, { "epoch": 3.08, "learning_rate": 3.3975775343343205e-05, "loss": 0.8991, "step": 27400 }, { "epoch": 3.08, "learning_rate": 3.390685025063633e-05, "loss": 0.8822, "step": 27420 }, { "epoch": 3.08, "learning_rate": 3.383795925658233e-05, "loss": 0.9007, "step": 27440 }, { "epoch": 3.09, "learning_rate": 3.376910250714955e-05, "loss": 0.9058, "step": 27460 }, { "epoch": 3.09, "learning_rate": 3.370028014823375e-05, "loss": 0.9046, "step": 27480 }, { "epoch": 3.09, "learning_rate": 3.363149232565785e-05, "loss": 0.9123, "step": 27500 }, { "epoch": 3.09, "learning_rate": 3.356273918517153e-05, "loss": 0.8856, "step": 27520 }, { "epoch": 3.09, "learning_rate": 3.349402087245104e-05, "loss": 0.9146, "step": 27540 }, { "epoch": 3.1, "learning_rate": 3.342533753309887e-05, "loss": 0.9106, "step": 27560 }, { "epoch": 3.1, "learning_rate": 3.335668931264327e-05, "loss": 0.8902, "step": 27580 }, { "epoch": 3.1, "learning_rate": 3.328807635653822e-05, "loss": 0.8881, "step": 27600 }, { "epoch": 3.1, "learning_rate": 3.321949881016293e-05, "loss": 0.9172, "step": 27620 }, { "epoch": 3.11, "learning_rate": 3.315095681882159e-05, "loss": 0.9076, "step": 27640 }, { "epoch": 3.11, "learning_rate": 3.3082450527743014e-05, "loss": 0.8837, "step": 27660 }, { "epoch": 3.11, "learning_rate": 3.301398008208042e-05, "loss": 0.8725, "step": 27680 }, { "epoch": 3.11, "learning_rate": 3.294554562691108e-05, "loss": 0.9202, "step": 27700 }, { "epoch": 3.11, "learning_rate": 3.287714730723596e-05, "loss": 0.909, "step": 27720 }, { "epoch": 3.12, "learning_rate": 3.280878526797948e-05, "loss": 0.9094, "step": 27740 }, { "epoch": 3.12, "learning_rate": 3.274045965398924e-05, "loss": 0.8797, "step": 27760 }, { "epoch": 3.12, "learning_rate": 3.267217061003562e-05, "loss": 0.8962, "step": 27780 }, { "epoch": 3.12, "learning_rate": 3.260391828081147e-05, "loss": 0.8772, "step": 27800 }, { "epoch": 3.13, "learning_rate": 3.253570281093192e-05, "loss": 0.8907, "step": 27820 }, { "epoch": 3.13, "learning_rate": 3.246752434493398e-05, "loss": 0.8898, "step": 27840 }, { "epoch": 3.13, "learning_rate": 3.239938302727622e-05, "loss": 0.8992, "step": 27860 }, { "epoch": 3.13, "learning_rate": 3.233127900233855e-05, "loss": 0.8948, "step": 27880 }, { "epoch": 3.13, "learning_rate": 3.2263212414421846e-05, "loss": 0.9386, "step": 27900 }, { "epoch": 3.14, "learning_rate": 3.219518340774763e-05, "loss": 0.915, "step": 27920 }, { "epoch": 3.14, "learning_rate": 3.2127192126457815e-05, "loss": 0.9026, "step": 27940 }, { "epoch": 3.14, "learning_rate": 3.205923871461442e-05, "loss": 0.8793, "step": 27960 }, { "epoch": 3.14, "learning_rate": 3.19913233161992e-05, "loss": 0.9182, "step": 27980 }, { "epoch": 3.15, "learning_rate": 3.192344607511329e-05, "loss": 0.8803, "step": 28000 }, { "epoch": 3.15, "learning_rate": 3.18556071351771e-05, "loss": 0.872, "step": 28020 }, { "epoch": 3.15, "learning_rate": 3.1787806640129826e-05, "loss": 0.899, "step": 28040 }, { "epoch": 3.15, "learning_rate": 3.1720044733629196e-05, "loss": 0.9047, "step": 28060 }, { "epoch": 3.16, "learning_rate": 3.165232155925118e-05, "loss": 0.8979, "step": 28080 }, { "epoch": 3.16, "learning_rate": 3.15846372604897e-05, "loss": 0.8833, "step": 28100 }, { "epoch": 3.16, "learning_rate": 3.151699198075633e-05, "loss": 0.908, "step": 28120 }, { "epoch": 3.16, "learning_rate": 3.1449385863379866e-05, "loss": 0.8998, "step": 28140 }, { "epoch": 3.16, "learning_rate": 3.138181905160625e-05, "loss": 0.8975, "step": 28160 }, { "epoch": 3.17, "learning_rate": 3.13142916885981e-05, "loss": 0.8921, "step": 28180 }, { "epoch": 3.17, "learning_rate": 3.124680391743438e-05, "loss": 0.9263, "step": 28200 }, { "epoch": 3.17, "learning_rate": 3.117935588111026e-05, "loss": 0.9153, "step": 28220 }, { "epoch": 3.17, "learning_rate": 3.111194772253668e-05, "loss": 0.9274, "step": 28240 }, { "epoch": 3.18, "learning_rate": 3.104457958454009e-05, "loss": 0.9159, "step": 28260 }, { "epoch": 3.18, "learning_rate": 3.097725160986212e-05, "loss": 0.9314, "step": 28280 }, { "epoch": 3.18, "learning_rate": 3.090996394115933e-05, "loss": 0.9059, "step": 28300 }, { "epoch": 3.18, "learning_rate": 3.0842716721002894e-05, "loss": 0.9248, "step": 28320 }, { "epoch": 3.18, "learning_rate": 3.077551009187821e-05, "loss": 0.9125, "step": 28340 }, { "epoch": 3.19, "learning_rate": 3.0708344196184756e-05, "loss": 0.9084, "step": 28360 }, { "epoch": 3.19, "learning_rate": 3.064121917623566e-05, "loss": 0.9046, "step": 28380 }, { "epoch": 3.19, "learning_rate": 3.0574135174257444e-05, "loss": 0.8961, "step": 28400 }, { "epoch": 3.19, "learning_rate": 3.050709233238972e-05, "loss": 0.9001, "step": 28420 }, { "epoch": 3.2, "learning_rate": 3.0440090792684884e-05, "loss": 0.91, "step": 28440 }, { "epoch": 3.2, "learning_rate": 3.037313069710784e-05, "loss": 0.9225, "step": 28460 }, { "epoch": 3.2, "learning_rate": 3.0306212187535653e-05, "loss": 0.8991, "step": 28480 }, { "epoch": 3.2, "learning_rate": 3.0239335405757275e-05, "loss": 0.8773, "step": 28500 }, { "epoch": 3.2, "learning_rate": 3.0172500493473294e-05, "loss": 0.8922, "step": 28520 }, { "epoch": 3.21, "learning_rate": 3.0105707592295528e-05, "loss": 0.9035, "step": 28540 }, { "epoch": 3.21, "learning_rate": 3.003895684374679e-05, "loss": 0.9027, "step": 28560 }, { "epoch": 3.21, "learning_rate": 2.9972248389260593e-05, "loss": 0.9163, "step": 28580 }, { "epoch": 3.21, "learning_rate": 2.9905582370180836e-05, "loss": 0.909, "step": 28600 }, { "epoch": 3.22, "learning_rate": 2.9838958927761477e-05, "loss": 0.8952, "step": 28620 }, { "epoch": 3.22, "learning_rate": 2.9772378203166307e-05, "loss": 0.9269, "step": 28640 }, { "epoch": 3.22, "learning_rate": 2.9705840337468554e-05, "loss": 0.8917, "step": 28660 }, { "epoch": 3.22, "learning_rate": 2.9639345471650716e-05, "loss": 0.8882, "step": 28680 }, { "epoch": 3.22, "learning_rate": 2.9572893746604052e-05, "loss": 0.9008, "step": 28700 }, { "epoch": 3.23, "learning_rate": 2.950648530312854e-05, "loss": 0.9153, "step": 28720 }, { "epoch": 3.23, "learning_rate": 2.9440120281932403e-05, "loss": 0.8977, "step": 28740 }, { "epoch": 3.23, "learning_rate": 2.937379882363183e-05, "loss": 0.9006, "step": 28760 }, { "epoch": 3.23, "learning_rate": 2.9307521068750748e-05, "loss": 0.921, "step": 28780 }, { "epoch": 3.24, "learning_rate": 2.924128715772047e-05, "loss": 0.8782, "step": 28800 }, { "epoch": 3.24, "learning_rate": 2.9175097230879423e-05, "loss": 0.8994, "step": 28820 }, { "epoch": 3.24, "learning_rate": 2.9108951428472804e-05, "loss": 0.8945, "step": 28840 }, { "epoch": 3.24, "learning_rate": 2.9042849890652352e-05, "loss": 0.8867, "step": 28860 }, { "epoch": 3.24, "learning_rate": 2.8976792757476013e-05, "loss": 0.8793, "step": 28880 }, { "epoch": 3.25, "learning_rate": 2.891078016890763e-05, "loss": 0.9037, "step": 28900 }, { "epoch": 3.25, "learning_rate": 2.8844812264816684e-05, "loss": 0.9293, "step": 28920 }, { "epoch": 3.25, "learning_rate": 2.8778889184977986e-05, "loss": 0.8962, "step": 28940 }, { "epoch": 3.25, "learning_rate": 2.8713011069071306e-05, "loss": 0.886, "step": 28960 }, { "epoch": 3.26, "learning_rate": 2.8647178056681194e-05, "loss": 0.8791, "step": 28980 }, { "epoch": 3.26, "learning_rate": 2.8581390287296672e-05, "loss": 0.9162, "step": 29000 }, { "epoch": 3.26, "learning_rate": 2.851564790031086e-05, "loss": 0.9088, "step": 29020 }, { "epoch": 3.26, "learning_rate": 2.8449951035020672e-05, "loss": 0.9208, "step": 29040 }, { "epoch": 3.27, "learning_rate": 2.8384299830626637e-05, "loss": 0.8747, "step": 29060 }, { "epoch": 3.27, "learning_rate": 2.8318694426232516e-05, "loss": 0.8721, "step": 29080 }, { "epoch": 3.27, "learning_rate": 2.825313496084503e-05, "loss": 0.9281, "step": 29100 }, { "epoch": 3.27, "learning_rate": 2.8187621573373544e-05, "loss": 0.9045, "step": 29120 }, { "epoch": 3.27, "learning_rate": 2.8122154402629818e-05, "loss": 0.8925, "step": 29140 }, { "epoch": 3.28, "learning_rate": 2.8056733587327694e-05, "loss": 0.8958, "step": 29160 }, { "epoch": 3.28, "learning_rate": 2.7991359266082717e-05, "loss": 0.9155, "step": 29180 }, { "epoch": 3.28, "learning_rate": 2.7926031577412038e-05, "loss": 0.8971, "step": 29200 }, { "epoch": 3.28, "learning_rate": 2.7860750659733938e-05, "loss": 0.9249, "step": 29220 }, { "epoch": 3.29, "learning_rate": 2.779551665136756e-05, "loss": 0.9045, "step": 29240 }, { "epoch": 3.29, "learning_rate": 2.773032969053273e-05, "loss": 0.9207, "step": 29260 }, { "epoch": 3.29, "learning_rate": 2.7665189915349533e-05, "loss": 0.8938, "step": 29280 }, { "epoch": 3.29, "learning_rate": 2.7600097463838114e-05, "loss": 0.9088, "step": 29300 }, { "epoch": 3.29, "learning_rate": 2.753505247391832e-05, "loss": 0.9085, "step": 29320 }, { "epoch": 3.3, "learning_rate": 2.7470055083409452e-05, "loss": 0.8978, "step": 29340 }, { "epoch": 3.3, "learning_rate": 2.740510543002996e-05, "loss": 0.8963, "step": 29360 }, { "epoch": 3.3, "learning_rate": 2.734020365139708e-05, "loss": 0.9295, "step": 29380 }, { "epoch": 3.3, "learning_rate": 2.727534988502673e-05, "loss": 0.9076, "step": 29400 }, { "epoch": 3.31, "learning_rate": 2.721054426833301e-05, "loss": 0.9037, "step": 29420 }, { "epoch": 3.31, "learning_rate": 2.7145786938628036e-05, "loss": 0.8821, "step": 29440 }, { "epoch": 3.31, "learning_rate": 2.7081078033121577e-05, "loss": 0.8976, "step": 29460 }, { "epoch": 3.31, "learning_rate": 2.7016417688920815e-05, "loss": 0.9083, "step": 29480 }, { "epoch": 3.31, "learning_rate": 2.695180604303007e-05, "loss": 0.9198, "step": 29500 }, { "epoch": 3.32, "learning_rate": 2.6887243232350434e-05, "loss": 0.9052, "step": 29520 }, { "epoch": 3.32, "learning_rate": 2.6822729393679558e-05, "loss": 0.9033, "step": 29540 }, { "epoch": 3.32, "learning_rate": 2.6758264663711306e-05, "loss": 0.9058, "step": 29560 }, { "epoch": 3.32, "learning_rate": 2.6693849179035513e-05, "loss": 0.889, "step": 29580 }, { "epoch": 3.33, "learning_rate": 2.662948307613764e-05, "loss": 0.9118, "step": 29600 }, { "epoch": 3.33, "learning_rate": 2.6565166491398553e-05, "loss": 0.8994, "step": 29620 }, { "epoch": 3.33, "learning_rate": 2.6500899561094184e-05, "loss": 0.9048, "step": 29640 }, { "epoch": 3.33, "learning_rate": 2.643668242139522e-05, "loss": 0.9, "step": 29660 }, { "epoch": 3.33, "learning_rate": 2.63725152083669e-05, "loss": 0.9199, "step": 29680 }, { "epoch": 3.34, "learning_rate": 2.630839805796863e-05, "loss": 0.9024, "step": 29700 }, { "epoch": 3.34, "learning_rate": 2.624433110605383e-05, "loss": 0.9102, "step": 29720 }, { "epoch": 3.34, "learning_rate": 2.6180314488369452e-05, "loss": 0.8843, "step": 29740 }, { "epoch": 3.34, "learning_rate": 2.611634834055585e-05, "loss": 0.8905, "step": 29760 }, { "epoch": 3.35, "learning_rate": 2.6052432798146436e-05, "loss": 0.8834, "step": 29780 }, { "epoch": 3.35, "learning_rate": 2.5988567996567402e-05, "loss": 0.8973, "step": 29800 }, { "epoch": 3.35, "learning_rate": 2.5924754071137415e-05, "loss": 0.887, "step": 29820 }, { "epoch": 3.35, "learning_rate": 2.5860991157067356e-05, "loss": 0.9162, "step": 29840 }, { "epoch": 3.36, "learning_rate": 2.5797279389460037e-05, "loss": 0.8948, "step": 29860 }, { "epoch": 3.36, "learning_rate": 2.5733618903309843e-05, "loss": 0.897, "step": 29880 }, { "epoch": 3.36, "learning_rate": 2.567000983350254e-05, "loss": 0.875, "step": 29900 }, { "epoch": 3.36, "learning_rate": 2.5606452314815e-05, "loss": 0.9287, "step": 29920 }, { "epoch": 3.36, "learning_rate": 2.554294648191477e-05, "loss": 0.8996, "step": 29940 }, { "epoch": 3.37, "learning_rate": 2.5479492469359944e-05, "loss": 0.8989, "step": 29960 }, { "epoch": 3.37, "learning_rate": 2.5416090411598813e-05, "loss": 0.91, "step": 29980 }, { "epoch": 3.37, "learning_rate": 2.535274044296957e-05, "loss": 0.9104, "step": 30000 }, { "epoch": 3.37, "learning_rate": 2.5289442697700043e-05, "loss": 0.8902, "step": 30020 }, { "epoch": 3.38, "learning_rate": 2.5226197309907418e-05, "loss": 0.8907, "step": 30040 }, { "epoch": 3.38, "learning_rate": 2.5163004413597955e-05, "loss": 0.9099, "step": 30060 }, { "epoch": 3.38, "learning_rate": 2.5099864142666642e-05, "loss": 0.8979, "step": 30080 }, { "epoch": 3.38, "learning_rate": 2.5036776630896985e-05, "loss": 0.9008, "step": 30100 }, { "epoch": 3.38, "learning_rate": 2.4973742011960775e-05, "loss": 0.9105, "step": 30120 }, { "epoch": 3.39, "learning_rate": 2.4910760419417616e-05, "loss": 0.9075, "step": 30140 }, { "epoch": 3.39, "learning_rate": 2.4847831986714837e-05, "loss": 0.9141, "step": 30160 }, { "epoch": 3.39, "learning_rate": 2.47849568471871e-05, "loss": 0.9281, "step": 30180 }, { "epoch": 3.39, "learning_rate": 2.472213513405615e-05, "loss": 0.9085, "step": 30200 }, { "epoch": 3.4, "learning_rate": 2.4659366980430547e-05, "loss": 0.9308, "step": 30220 }, { "epoch": 3.4, "learning_rate": 2.4596652519305346e-05, "loss": 0.9147, "step": 30240 }, { "epoch": 3.4, "learning_rate": 2.4533991883561868e-05, "loss": 0.91, "step": 30260 }, { "epoch": 3.4, "learning_rate": 2.4471385205967323e-05, "loss": 0.8888, "step": 30280 }, { "epoch": 3.4, "learning_rate": 2.4408832619174644e-05, "loss": 0.894, "step": 30300 }, { "epoch": 3.41, "learning_rate": 2.4346334255722168e-05, "loss": 0.8859, "step": 30320 }, { "epoch": 3.41, "learning_rate": 2.4283890248033337e-05, "loss": 0.9136, "step": 30340 }, { "epoch": 3.41, "learning_rate": 2.4221500728416356e-05, "loss": 0.9353, "step": 30360 }, { "epoch": 3.41, "learning_rate": 2.415916582906405e-05, "loss": 0.9154, "step": 30380 }, { "epoch": 3.42, "learning_rate": 2.409688568205349e-05, "loss": 0.9022, "step": 30400 }, { "epoch": 3.42, "learning_rate": 2.403466041934574e-05, "loss": 0.9071, "step": 30420 }, { "epoch": 3.42, "learning_rate": 2.3972490172785567e-05, "loss": 0.8839, "step": 30440 }, { "epoch": 3.42, "learning_rate": 2.3910375074101172e-05, "loss": 0.9102, "step": 30460 }, { "epoch": 3.42, "learning_rate": 2.3848315254903924e-05, "loss": 0.9196, "step": 30480 }, { "epoch": 3.43, "learning_rate": 2.3786310846688e-05, "loss": 0.8863, "step": 30500 }, { "epoch": 3.43, "learning_rate": 2.3724361980830257e-05, "loss": 0.9043, "step": 30520 }, { "epoch": 3.43, "learning_rate": 2.366246878858984e-05, "loss": 0.8813, "step": 30540 }, { "epoch": 3.43, "learning_rate": 2.3600631401107882e-05, "loss": 0.9023, "step": 30560 }, { "epoch": 3.44, "learning_rate": 2.353884994940732e-05, "loss": 0.8872, "step": 30580 }, { "epoch": 3.44, "learning_rate": 2.3477124564392572e-05, "loss": 0.9056, "step": 30600 }, { "epoch": 3.44, "learning_rate": 2.3415455376849248e-05, "loss": 0.8708, "step": 30620 }, { "epoch": 3.44, "learning_rate": 2.3353842517443898e-05, "loss": 0.8727, "step": 30640 }, { "epoch": 3.44, "learning_rate": 2.32922861167237e-05, "loss": 0.8796, "step": 30660 }, { "epoch": 3.45, "learning_rate": 2.3230786305116253e-05, "loss": 0.9369, "step": 30680 }, { "epoch": 3.45, "learning_rate": 2.316934321292915e-05, "loss": 0.9044, "step": 30700 }, { "epoch": 3.45, "learning_rate": 2.3107956970349942e-05, "loss": 0.8945, "step": 30720 }, { "epoch": 3.45, "learning_rate": 2.3046627707445635e-05, "loss": 0.9097, "step": 30740 }, { "epoch": 3.46, "learning_rate": 2.2985355554162546e-05, "loss": 0.908, "step": 30760 }, { "epoch": 3.46, "learning_rate": 2.292414064032593e-05, "loss": 0.8604, "step": 30780 }, { "epoch": 3.46, "learning_rate": 2.2862983095639823e-05, "loss": 0.8698, "step": 30800 }, { "epoch": 3.46, "learning_rate": 2.2801883049686678e-05, "loss": 0.8969, "step": 30820 }, { "epoch": 3.47, "learning_rate": 2.2740840631927118e-05, "loss": 0.8927, "step": 30840 }, { "epoch": 3.47, "learning_rate": 2.2679855971699676e-05, "loss": 0.9017, "step": 30860 }, { "epoch": 3.47, "learning_rate": 2.2618929198220513e-05, "loss": 0.924, "step": 30880 }, { "epoch": 3.47, "learning_rate": 2.2558060440583057e-05, "loss": 0.8936, "step": 30900 }, { "epoch": 3.47, "learning_rate": 2.2497249827757933e-05, "loss": 0.9073, "step": 30920 }, { "epoch": 3.48, "learning_rate": 2.2436497488592497e-05, "loss": 0.9292, "step": 30940 }, { "epoch": 3.48, "learning_rate": 2.2375803551810654e-05, "loss": 0.9278, "step": 30960 }, { "epoch": 3.48, "learning_rate": 2.2315168146012527e-05, "loss": 0.8894, "step": 30980 }, { "epoch": 3.48, "learning_rate": 2.225459139967426e-05, "loss": 0.8936, "step": 31000 }, { "epoch": 3.49, "learning_rate": 2.21940734411477e-05, "loss": 0.9102, "step": 31020 }, { "epoch": 3.49, "learning_rate": 2.213361439866013e-05, "loss": 0.8849, "step": 31040 }, { "epoch": 3.49, "learning_rate": 2.2073214400313997e-05, "loss": 0.8884, "step": 31060 }, { "epoch": 3.49, "learning_rate": 2.201287357408665e-05, "loss": 0.8864, "step": 31080 }, { "epoch": 3.49, "learning_rate": 2.1952592047830055e-05, "loss": 0.94, "step": 31100 }, { "epoch": 3.5, "learning_rate": 2.189236994927054e-05, "loss": 0.8892, "step": 31120 }, { "epoch": 3.5, "learning_rate": 2.1832207406008502e-05, "loss": 0.8934, "step": 31140 }, { "epoch": 3.5, "learning_rate": 2.1772104545518185e-05, "loss": 0.8911, "step": 31160 }, { "epoch": 3.5, "learning_rate": 2.171206149514731e-05, "loss": 0.9029, "step": 31180 }, { "epoch": 3.51, "learning_rate": 2.165207838211693e-05, "loss": 0.9103, "step": 31200 }, { "epoch": 3.51, "learning_rate": 2.159215533352106e-05, "loss": 0.9166, "step": 31220 }, { "epoch": 3.51, "learning_rate": 2.153229247632652e-05, "loss": 0.8993, "step": 31240 }, { "epoch": 3.51, "learning_rate": 2.14724899373725e-05, "loss": 0.9114, "step": 31260 }, { "epoch": 3.51, "learning_rate": 2.141274784337044e-05, "loss": 0.9053, "step": 31280 }, { "epoch": 3.52, "learning_rate": 2.1353066320903698e-05, "loss": 0.8942, "step": 31300 }, { "epoch": 3.52, "learning_rate": 2.1293445496427296e-05, "loss": 0.8935, "step": 31320 }, { "epoch": 3.52, "learning_rate": 2.1233885496267634e-05, "loss": 0.8798, "step": 31340 }, { "epoch": 3.52, "learning_rate": 2.117438644662226e-05, "loss": 0.9204, "step": 31360 }, { "epoch": 3.53, "learning_rate": 2.1114948473559554e-05, "loss": 0.8907, "step": 31380 }, { "epoch": 3.53, "learning_rate": 2.1055571703018474e-05, "loss": 0.8935, "step": 31400 }, { "epoch": 3.53, "learning_rate": 2.0996256260808316e-05, "loss": 0.8761, "step": 31420 }, { "epoch": 3.53, "learning_rate": 2.0937002272608493e-05, "loss": 0.9049, "step": 31440 }, { "epoch": 3.53, "learning_rate": 2.087780986396808e-05, "loss": 0.9202, "step": 31460 }, { "epoch": 3.54, "learning_rate": 2.0818679160305776e-05, "loss": 0.8871, "step": 31480 }, { "epoch": 3.54, "learning_rate": 2.0759610286909508e-05, "loss": 0.8833, "step": 31500 }, { "epoch": 3.54, "learning_rate": 2.0700603368936182e-05, "loss": 0.917, "step": 31520 }, { "epoch": 3.54, "learning_rate": 2.064165853141145e-05, "loss": 0.9222, "step": 31540 }, { "epoch": 3.55, "learning_rate": 2.058277589922942e-05, "loss": 0.9001, "step": 31560 }, { "epoch": 3.55, "learning_rate": 2.05239555971524e-05, "loss": 0.906, "step": 31580 }, { "epoch": 3.55, "learning_rate": 2.0465197749810604e-05, "loss": 0.9021, "step": 31600 }, { "epoch": 3.55, "learning_rate": 2.040650248170194e-05, "loss": 0.8962, "step": 31620 }, { "epoch": 3.56, "learning_rate": 2.034786991719174e-05, "loss": 0.901, "step": 31640 }, { "epoch": 3.56, "learning_rate": 2.0289300180512478e-05, "loss": 0.9213, "step": 31660 }, { "epoch": 3.56, "learning_rate": 2.0230793395763426e-05, "loss": 0.9018, "step": 31680 }, { "epoch": 3.56, "learning_rate": 2.0172349686910568e-05, "loss": 0.8924, "step": 31700 }, { "epoch": 3.56, "learning_rate": 2.011396917778619e-05, "loss": 0.8989, "step": 31720 }, { "epoch": 3.57, "learning_rate": 2.0055651992088692e-05, "loss": 0.8872, "step": 31740 }, { "epoch": 3.57, "learning_rate": 1.9997398253382287e-05, "loss": 0.9258, "step": 31760 }, { "epoch": 3.57, "learning_rate": 1.993920808509676e-05, "loss": 0.9025, "step": 31780 }, { "epoch": 3.57, "learning_rate": 1.988108161052722e-05, "loss": 0.8995, "step": 31800 }, { "epoch": 3.58, "learning_rate": 1.9823018952833748e-05, "loss": 0.9158, "step": 31820 }, { "epoch": 3.58, "learning_rate": 1.9765020235041322e-05, "loss": 0.9092, "step": 31840 }, { "epoch": 3.58, "learning_rate": 1.9707085580039365e-05, "loss": 0.9003, "step": 31860 }, { "epoch": 3.58, "learning_rate": 1.9649215110581553e-05, "loss": 0.8768, "step": 31880 }, { "epoch": 3.58, "learning_rate": 1.9591408949285605e-05, "loss": 0.9147, "step": 31900 }, { "epoch": 3.59, "learning_rate": 1.953366721863297e-05, "loss": 0.9055, "step": 31920 }, { "epoch": 3.59, "learning_rate": 1.947599004096856e-05, "loss": 0.9076, "step": 31940 }, { "epoch": 3.59, "learning_rate": 1.9418377538500543e-05, "loss": 0.8803, "step": 31960 }, { "epoch": 3.59, "learning_rate": 1.936082983330002e-05, "loss": 0.9346, "step": 31980 }, { "epoch": 3.6, "learning_rate": 1.9303347047300834e-05, "loss": 0.9033, "step": 32000 }, { "epoch": 3.6, "learning_rate": 1.9245929302299202e-05, "loss": 0.8857, "step": 32020 }, { "epoch": 3.6, "learning_rate": 1.9188576719953633e-05, "loss": 0.9229, "step": 32040 }, { "epoch": 3.6, "learning_rate": 1.913128942178451e-05, "loss": 0.9216, "step": 32060 }, { "epoch": 3.6, "learning_rate": 1.907406752917386e-05, "loss": 0.8786, "step": 32080 }, { "epoch": 3.61, "learning_rate": 1.9016911163365185e-05, "loss": 0.896, "step": 32100 }, { "epoch": 3.61, "learning_rate": 1.89598204454631e-05, "loss": 0.9214, "step": 32120 }, { "epoch": 3.61, "learning_rate": 1.89027954964332e-05, "loss": 0.8923, "step": 32140 }, { "epoch": 3.61, "learning_rate": 1.8845836437101622e-05, "loss": 0.9045, "step": 32160 }, { "epoch": 3.62, "learning_rate": 1.8788943388154962e-05, "loss": 0.8815, "step": 32180 }, { "epoch": 3.62, "learning_rate": 1.873211647013995e-05, "loss": 0.9081, "step": 32200 }, { "epoch": 3.62, "learning_rate": 1.867535580346313e-05, "loss": 0.8995, "step": 32220 }, { "epoch": 3.62, "learning_rate": 1.861866150839078e-05, "loss": 0.9083, "step": 32240 }, { "epoch": 3.62, "learning_rate": 1.856203370504846e-05, "loss": 0.8916, "step": 32260 }, { "epoch": 3.63, "learning_rate": 1.850547251342089e-05, "loss": 0.9364, "step": 32280 }, { "epoch": 3.63, "learning_rate": 1.844897805335162e-05, "loss": 0.8781, "step": 32300 }, { "epoch": 3.63, "learning_rate": 1.8392550444542793e-05, "loss": 0.9038, "step": 32320 }, { "epoch": 3.63, "learning_rate": 1.8336189806555014e-05, "loss": 0.8806, "step": 32340 }, { "epoch": 3.64, "learning_rate": 1.8279896258806844e-05, "loss": 0.9133, "step": 32360 }, { "epoch": 3.64, "learning_rate": 1.8223669920574772e-05, "loss": 0.9114, "step": 32380 }, { "epoch": 3.64, "learning_rate": 1.8167510910992875e-05, "loss": 0.8787, "step": 32400 }, { "epoch": 3.64, "learning_rate": 1.811141934905255e-05, "loss": 0.8737, "step": 32420 }, { "epoch": 3.64, "learning_rate": 1.8055395353602306e-05, "loss": 0.9233, "step": 32440 }, { "epoch": 3.65, "learning_rate": 1.7999439043347476e-05, "loss": 0.9049, "step": 32460 }, { "epoch": 3.65, "learning_rate": 1.7943550536850006e-05, "loss": 0.893, "step": 32480 }, { "epoch": 3.65, "learning_rate": 1.7887729952528117e-05, "loss": 0.9078, "step": 32500 }, { "epoch": 3.65, "learning_rate": 1.7831977408656153e-05, "loss": 0.9135, "step": 32520 }, { "epoch": 3.66, "learning_rate": 1.7776293023364325e-05, "loss": 0.8959, "step": 32540 }, { "epoch": 3.66, "learning_rate": 1.7720676914638407e-05, "loss": 0.9199, "step": 32560 }, { "epoch": 3.66, "learning_rate": 1.766512920031944e-05, "loss": 0.9057, "step": 32580 }, { "epoch": 3.66, "learning_rate": 1.7609649998103634e-05, "loss": 0.8747, "step": 32600 }, { "epoch": 3.67, "learning_rate": 1.755423942554199e-05, "loss": 0.911, "step": 32620 }, { "epoch": 3.67, "learning_rate": 1.749889760004012e-05, "loss": 0.8998, "step": 32640 }, { "epoch": 3.67, "learning_rate": 1.7443624638857954e-05, "loss": 0.9014, "step": 32660 }, { "epoch": 3.67, "learning_rate": 1.7388420659109515e-05, "loss": 0.8887, "step": 32680 }, { "epoch": 3.67, "learning_rate": 1.7333285777762682e-05, "loss": 0.9168, "step": 32700 }, { "epoch": 3.68, "learning_rate": 1.727822011163886e-05, "loss": 0.9106, "step": 32720 }, { "epoch": 3.68, "learning_rate": 1.7223223777412905e-05, "loss": 0.907, "step": 32740 }, { "epoch": 3.68, "learning_rate": 1.7168296891612707e-05, "loss": 0.906, "step": 32760 }, { "epoch": 3.68, "learning_rate": 1.711343957061899e-05, "loss": 0.8978, "step": 32780 }, { "epoch": 3.69, "learning_rate": 1.7058651930665114e-05, "loss": 0.8732, "step": 32800 }, { "epoch": 3.69, "learning_rate": 1.70039340878368e-05, "loss": 0.909, "step": 32820 }, { "epoch": 3.69, "learning_rate": 1.6949286158071858e-05, "loss": 0.8888, "step": 32840 }, { "epoch": 3.69, "learning_rate": 1.689470825715998e-05, "loss": 0.9104, "step": 32860 }, { "epoch": 3.69, "learning_rate": 1.6840200500742482e-05, "loss": 0.878, "step": 32880 }, { "epoch": 3.7, "learning_rate": 1.6785763004312055e-05, "loss": 0.9154, "step": 32900 }, { "epoch": 3.7, "learning_rate": 1.673139588321247e-05, "loss": 0.8783, "step": 32920 }, { "epoch": 3.7, "learning_rate": 1.6677099252638477e-05, "loss": 0.8909, "step": 32940 }, { "epoch": 3.7, "learning_rate": 1.6622873227635428e-05, "loss": 0.908, "step": 32960 }, { "epoch": 3.71, "learning_rate": 1.656871792309902e-05, "loss": 0.8948, "step": 32980 }, { "epoch": 3.71, "learning_rate": 1.651463345377518e-05, "loss": 0.888, "step": 33000 }, { "epoch": 3.71, "learning_rate": 1.6460619934259707e-05, "loss": 0.9002, "step": 33020 }, { "epoch": 3.71, "learning_rate": 1.6406677478998094e-05, "loss": 0.9047, "step": 33040 }, { "epoch": 3.71, "learning_rate": 1.6352806202285244e-05, "loss": 0.8803, "step": 33060 }, { "epoch": 3.72, "learning_rate": 1.6299006218265246e-05, "loss": 0.8966, "step": 33080 }, { "epoch": 3.72, "learning_rate": 1.624527764093115e-05, "loss": 0.8901, "step": 33100 }, { "epoch": 3.72, "learning_rate": 1.619162058412465e-05, "loss": 0.9045, "step": 33120 }, { "epoch": 3.72, "learning_rate": 1.6138035161535986e-05, "loss": 0.8999, "step": 33140 }, { "epoch": 3.73, "learning_rate": 1.608452148670356e-05, "loss": 0.8836, "step": 33160 }, { "epoch": 3.73, "learning_rate": 1.603107967301378e-05, "loss": 0.9097, "step": 33180 }, { "epoch": 3.73, "learning_rate": 1.597770983370075e-05, "loss": 0.9269, "step": 33200 }, { "epoch": 3.73, "learning_rate": 1.5924412081846113e-05, "loss": 0.9053, "step": 33220 }, { "epoch": 3.73, "learning_rate": 1.5871186530378763e-05, "loss": 0.8981, "step": 33240 }, { "epoch": 3.74, "learning_rate": 1.581803329207461e-05, "loss": 0.8572, "step": 33260 }, { "epoch": 3.74, "learning_rate": 1.5764952479556334e-05, "loss": 0.8934, "step": 33280 }, { "epoch": 3.74, "learning_rate": 1.5711944205293185e-05, "loss": 0.8725, "step": 33300 }, { "epoch": 3.74, "learning_rate": 1.565900858160068e-05, "loss": 0.8948, "step": 33320 }, { "epoch": 3.75, "learning_rate": 1.5606145720640442e-05, "loss": 0.8983, "step": 33340 }, { "epoch": 3.75, "learning_rate": 1.555335573441989e-05, "loss": 0.9201, "step": 33360 }, { "epoch": 3.75, "learning_rate": 1.5500638734792055e-05, "loss": 0.8854, "step": 33380 }, { "epoch": 3.75, "learning_rate": 1.5447994833455292e-05, "loss": 0.8975, "step": 33400 }, { "epoch": 3.76, "learning_rate": 1.53954241419531e-05, "loss": 0.8831, "step": 33420 }, { "epoch": 3.76, "learning_rate": 1.5342926771673842e-05, "loss": 0.8813, "step": 33440 }, { "epoch": 3.76, "learning_rate": 1.5290502833850578e-05, "loss": 0.8986, "step": 33460 }, { "epoch": 3.76, "learning_rate": 1.5238152439560693e-05, "loss": 0.8652, "step": 33480 }, { "epoch": 3.76, "learning_rate": 1.5185875699725793e-05, "loss": 0.9025, "step": 33500 }, { "epoch": 3.77, "learning_rate": 1.5133672725111425e-05, "loss": 0.8958, "step": 33520 }, { "epoch": 3.77, "learning_rate": 1.5081543626326833e-05, "loss": 0.9204, "step": 33540 }, { "epoch": 3.77, "learning_rate": 1.5029488513824724e-05, "loss": 0.8805, "step": 33560 }, { "epoch": 3.77, "learning_rate": 1.4977507497901055e-05, "loss": 0.9058, "step": 33580 }, { "epoch": 3.78, "learning_rate": 1.492560068869478e-05, "loss": 0.9166, "step": 33600 }, { "epoch": 3.78, "learning_rate": 1.4873768196187593e-05, "loss": 0.9029, "step": 33620 }, { "epoch": 3.78, "learning_rate": 1.482201013020375e-05, "loss": 0.883, "step": 33640 }, { "epoch": 3.78, "learning_rate": 1.4770326600409851e-05, "loss": 0.9034, "step": 33660 }, { "epoch": 3.78, "learning_rate": 1.471871771631448e-05, "loss": 0.8594, "step": 33680 }, { "epoch": 3.79, "learning_rate": 1.4667183587268118e-05, "loss": 0.9064, "step": 33700 }, { "epoch": 3.79, "learning_rate": 1.4615724322462838e-05, "loss": 0.9083, "step": 33720 }, { "epoch": 3.79, "learning_rate": 1.4564340030932083e-05, "loss": 0.8579, "step": 33740 }, { "epoch": 3.79, "learning_rate": 1.4513030821550449e-05, "loss": 0.8899, "step": 33760 }, { "epoch": 3.8, "learning_rate": 1.4461796803033445e-05, "loss": 0.9189, "step": 33780 }, { "epoch": 3.8, "learning_rate": 1.4410638083937272e-05, "loss": 0.9012, "step": 33800 }, { "epoch": 3.8, "learning_rate": 1.4359554772658552e-05, "loss": 0.9, "step": 33820 }, { "epoch": 3.8, "learning_rate": 1.4308546977434135e-05, "loss": 0.957, "step": 33840 }, { "epoch": 3.8, "learning_rate": 1.4257614806340919e-05, "loss": 0.8913, "step": 33860 }, { "epoch": 3.81, "learning_rate": 1.4206758367295537e-05, "loss": 0.9182, "step": 33880 }, { "epoch": 3.81, "learning_rate": 1.4155977768054113e-05, "loss": 0.9013, "step": 33900 }, { "epoch": 3.81, "learning_rate": 1.4105273116212136e-05, "loss": 0.9113, "step": 33920 }, { "epoch": 3.81, "learning_rate": 1.4054644519204157e-05, "loss": 0.8801, "step": 33940 }, { "epoch": 3.82, "learning_rate": 1.4004092084303583e-05, "loss": 0.9287, "step": 33960 }, { "epoch": 3.82, "learning_rate": 1.3953615918622443e-05, "loss": 0.9068, "step": 33980 }, { "epoch": 3.82, "learning_rate": 1.3903216129111174e-05, "loss": 0.8831, "step": 34000 }, { "epoch": 3.82, "learning_rate": 1.385289282255835e-05, "loss": 0.9099, "step": 34020 }, { "epoch": 3.82, "learning_rate": 1.380264610559051e-05, "loss": 0.9004, "step": 34040 }, { "epoch": 3.83, "learning_rate": 1.3752476084671962e-05, "loss": 0.8964, "step": 34060 }, { "epoch": 3.83, "learning_rate": 1.3702382866104457e-05, "loss": 0.8801, "step": 34080 }, { "epoch": 3.83, "learning_rate": 1.3652366556026996e-05, "loss": 0.919, "step": 34100 }, { "epoch": 3.83, "learning_rate": 1.3602427260415663e-05, "loss": 0.8887, "step": 34120 }, { "epoch": 3.84, "learning_rate": 1.3552565085083352e-05, "loss": 0.9103, "step": 34140 }, { "epoch": 3.84, "learning_rate": 1.350278013567955e-05, "loss": 0.9001, "step": 34160 }, { "epoch": 3.84, "learning_rate": 1.3453072517690107e-05, "loss": 0.8922, "step": 34180 }, { "epoch": 3.84, "learning_rate": 1.3403442336437039e-05, "loss": 0.8959, "step": 34200 }, { "epoch": 3.84, "learning_rate": 1.3353889697078287e-05, "loss": 0.8825, "step": 34220 }, { "epoch": 3.85, "learning_rate": 1.3304414704607443e-05, "loss": 0.9026, "step": 34240 }, { "epoch": 3.85, "learning_rate": 1.3255017463853659e-05, "loss": 0.8999, "step": 34260 }, { "epoch": 3.85, "learning_rate": 1.3205698079481298e-05, "loss": 0.8748, "step": 34280 }, { "epoch": 3.85, "learning_rate": 1.3156456655989746e-05, "loss": 0.882, "step": 34300 }, { "epoch": 3.86, "learning_rate": 1.3107293297713236e-05, "loss": 0.8574, "step": 34320 }, { "epoch": 3.86, "learning_rate": 1.3058208108820574e-05, "loss": 0.9052, "step": 34340 }, { "epoch": 3.86, "learning_rate": 1.3009201193314947e-05, "loss": 0.9069, "step": 34360 }, { "epoch": 3.86, "learning_rate": 1.2960272655033689e-05, "loss": 0.8821, "step": 34380 }, { "epoch": 3.87, "learning_rate": 1.2911422597648077e-05, "loss": 0.8953, "step": 34400 }, { "epoch": 3.87, "learning_rate": 1.2862651124663095e-05, "loss": 0.8736, "step": 34420 }, { "epoch": 3.87, "learning_rate": 1.2813958339417176e-05, "loss": 0.9102, "step": 34440 }, { "epoch": 3.87, "learning_rate": 1.2765344345082114e-05, "loss": 0.8673, "step": 34460 }, { "epoch": 3.87, "learning_rate": 1.2716809244662691e-05, "loss": 0.9082, "step": 34480 }, { "epoch": 3.88, "learning_rate": 1.266835314099657e-05, "loss": 0.8893, "step": 34500 }, { "epoch": 3.88, "learning_rate": 1.261997613675398e-05, "loss": 0.8961, "step": 34520 }, { "epoch": 3.88, "learning_rate": 1.2571678334437591e-05, "loss": 0.8572, "step": 34540 }, { "epoch": 3.88, "learning_rate": 1.252345983638225e-05, "loss": 0.9225, "step": 34560 }, { "epoch": 3.89, "learning_rate": 1.2475320744754776e-05, "loss": 0.9017, "step": 34580 }, { "epoch": 3.89, "learning_rate": 1.2427261161553732e-05, "loss": 0.9022, "step": 34600 }, { "epoch": 3.89, "learning_rate": 1.2379281188609209e-05, "loss": 0.9, "step": 34620 }, { "epoch": 3.89, "learning_rate": 1.2331380927582642e-05, "loss": 0.8776, "step": 34640 }, { "epoch": 3.89, "learning_rate": 1.2283560479966538e-05, "loss": 0.8804, "step": 34660 }, { "epoch": 3.9, "learning_rate": 1.223581994708432e-05, "loss": 0.8881, "step": 34680 }, { "epoch": 3.9, "learning_rate": 1.2188159430090085e-05, "loss": 0.8949, "step": 34700 }, { "epoch": 3.9, "learning_rate": 1.2140579029968352e-05, "loss": 0.8953, "step": 34720 }, { "epoch": 3.9, "learning_rate": 1.2093078847533922e-05, "loss": 0.8937, "step": 34740 }, { "epoch": 3.91, "learning_rate": 1.2045658983431612e-05, "loss": 0.9329, "step": 34760 }, { "epoch": 3.91, "learning_rate": 1.199831953813611e-05, "loss": 0.8943, "step": 34780 }, { "epoch": 3.91, "learning_rate": 1.1951060611951615e-05, "loss": 0.8774, "step": 34800 }, { "epoch": 3.91, "learning_rate": 1.1903882305011793e-05, "loss": 0.9075, "step": 34820 }, { "epoch": 3.91, "learning_rate": 1.1856784717279462e-05, "loss": 0.8714, "step": 34840 }, { "epoch": 3.92, "learning_rate": 1.1809767948546419e-05, "loss": 0.8841, "step": 34860 }, { "epoch": 3.92, "learning_rate": 1.1762832098433219e-05, "loss": 0.8719, "step": 34880 }, { "epoch": 3.92, "learning_rate": 1.1715977266388961e-05, "loss": 0.8972, "step": 34900 }, { "epoch": 3.92, "learning_rate": 1.1669203551691093e-05, "loss": 0.8943, "step": 34920 }, { "epoch": 3.93, "learning_rate": 1.1622511053445156e-05, "loss": 0.8861, "step": 34940 }, { "epoch": 3.93, "learning_rate": 1.1575899870584621e-05, "loss": 0.9284, "step": 34960 }, { "epoch": 3.93, "learning_rate": 1.1529370101870723e-05, "loss": 0.8943, "step": 34980 }, { "epoch": 3.93, "learning_rate": 1.1482921845892098e-05, "loss": 0.8904, "step": 35000 }, { "epoch": 3.93, "learning_rate": 1.143655520106473e-05, "loss": 0.8703, "step": 35020 }, { "epoch": 3.94, "learning_rate": 1.1390270265631675e-05, "loss": 0.9096, "step": 35040 }, { "epoch": 3.94, "learning_rate": 1.134406713766285e-05, "loss": 0.9049, "step": 35060 }, { "epoch": 3.94, "learning_rate": 1.1297945915054842e-05, "loss": 0.8983, "step": 35080 }, { "epoch": 3.94, "learning_rate": 1.1251906695530701e-05, "loss": 0.9089, "step": 35100 }, { "epoch": 3.95, "learning_rate": 1.1205949576639723e-05, "loss": 0.8768, "step": 35120 }, { "epoch": 3.95, "learning_rate": 1.116007465575722e-05, "loss": 0.9167, "step": 35140 }, { "epoch": 3.95, "learning_rate": 1.1114282030084361e-05, "loss": 0.9169, "step": 35160 }, { "epoch": 3.95, "learning_rate": 1.1068571796647992e-05, "loss": 0.903, "step": 35180 }, { "epoch": 3.96, "learning_rate": 1.1022944052300293e-05, "loss": 0.8746, "step": 35200 }, { "epoch": 3.96, "learning_rate": 1.0977398893718732e-05, "loss": 0.9006, "step": 35220 }, { "epoch": 3.96, "learning_rate": 1.0931936417405764e-05, "loss": 0.8895, "step": 35240 }, { "epoch": 3.96, "learning_rate": 1.0886556719688662e-05, "loss": 0.8928, "step": 35260 }, { "epoch": 3.96, "learning_rate": 1.0841259896719297e-05, "loss": 0.9004, "step": 35280 }, { "epoch": 3.97, "learning_rate": 1.0796046044473962e-05, "loss": 0.9078, "step": 35300 }, { "epoch": 3.97, "learning_rate": 1.0750915258753141e-05, "loss": 0.8804, "step": 35320 }, { "epoch": 3.97, "learning_rate": 1.0705867635181278e-05, "loss": 0.902, "step": 35340 }, { "epoch": 3.97, "learning_rate": 1.0660903269206652e-05, "loss": 0.898, "step": 35360 }, { "epoch": 3.98, "learning_rate": 1.0616022256101143e-05, "loss": 0.8605, "step": 35380 }, { "epoch": 3.98, "learning_rate": 1.0571224690960002e-05, "loss": 0.8795, "step": 35400 }, { "epoch": 3.98, "learning_rate": 1.0526510668701633e-05, "loss": 0.8985, "step": 35420 }, { "epoch": 3.98, "learning_rate": 1.0481880284067485e-05, "loss": 0.9146, "step": 35440 }, { "epoch": 3.98, "learning_rate": 1.0437333631621765e-05, "loss": 0.8778, "step": 35460 }, { "epoch": 3.99, "learning_rate": 1.0392870805751265e-05, "loss": 0.9129, "step": 35480 }, { "epoch": 3.99, "learning_rate": 1.0348491900665164e-05, "loss": 0.9134, "step": 35500 }, { "epoch": 3.99, "learning_rate": 1.030419701039484e-05, "loss": 0.9043, "step": 35520 }, { "epoch": 3.99, "learning_rate": 1.025998622879365e-05, "loss": 0.8896, "step": 35540 }, { "epoch": 4.0, "learning_rate": 1.0215859649536702e-05, "loss": 0.9067, "step": 35560 }, { "epoch": 4.0, "learning_rate": 1.0171817366120767e-05, "loss": 0.9166, "step": 35580 }, { "epoch": 4.0, "learning_rate": 1.012785947186397e-05, "loss": 0.8699, "step": 35600 }, { "epoch": 4.0, "learning_rate": 1.0083986059905598e-05, "loss": 0.8923, "step": 35620 }, { "epoch": 4.0, "learning_rate": 1.0040197223205978e-05, "loss": 0.8432, "step": 35640 }, { "epoch": 4.01, "learning_rate": 9.996493054546214e-06, "loss": 0.8849, "step": 35660 }, { "epoch": 4.01, "learning_rate": 9.95287364652806e-06, "loss": 0.8686, "step": 35680 }, { "epoch": 4.01, "learning_rate": 9.909339091573594e-06, "loss": 0.8646, "step": 35700 }, { "epoch": 4.01, "learning_rate": 9.865889481925167e-06, "loss": 0.8976, "step": 35720 }, { "epoch": 4.02, "learning_rate": 9.822524909645137e-06, "loss": 0.8732, "step": 35740 }, { "epoch": 4.02, "learning_rate": 9.779245466615639e-06, "loss": 0.8954, "step": 35760 }, { "epoch": 4.02, "learning_rate": 9.736051244538497e-06, "loss": 0.912, "step": 35780 }, { "epoch": 4.02, "learning_rate": 9.692942334934935e-06, "loss": 0.891, "step": 35800 }, { "epoch": 4.02, "learning_rate": 9.649918829145415e-06, "loss": 0.8954, "step": 35820 }, { "epoch": 4.03, "learning_rate": 9.60698081832943e-06, "loss": 0.9018, "step": 35840 }, { "epoch": 4.03, "learning_rate": 9.564128393465332e-06, "loss": 0.8627, "step": 35860 }, { "epoch": 4.03, "learning_rate": 9.52136164535018e-06, "loss": 0.9076, "step": 35880 }, { "epoch": 4.03, "learning_rate": 9.478680664599404e-06, "loss": 0.93, "step": 35900 }, { "epoch": 4.04, "learning_rate": 9.436085541646783e-06, "loss": 0.8731, "step": 35920 }, { "epoch": 4.04, "learning_rate": 9.393576366744162e-06, "loss": 0.8885, "step": 35940 }, { "epoch": 4.04, "learning_rate": 9.351153229961223e-06, "loss": 0.8715, "step": 35960 }, { "epoch": 4.04, "learning_rate": 9.308816221185441e-06, "loss": 0.8789, "step": 35980 }, { "epoch": 4.04, "learning_rate": 9.266565430121733e-06, "loss": 0.886, "step": 36000 }, { "epoch": 4.05, "learning_rate": 9.224400946292367e-06, "loss": 0.8862, "step": 36020 }, { "epoch": 4.05, "learning_rate": 9.182322859036702e-06, "loss": 0.9107, "step": 36040 }, { "epoch": 4.05, "learning_rate": 9.14033125751107e-06, "loss": 0.877, "step": 36060 }, { "epoch": 4.05, "learning_rate": 9.098426230688578e-06, "loss": 0.8937, "step": 36080 }, { "epoch": 4.06, "learning_rate": 9.056607867358829e-06, "loss": 0.8663, "step": 36100 }, { "epoch": 4.06, "learning_rate": 9.014876256127852e-06, "loss": 0.8781, "step": 36120 }, { "epoch": 4.06, "learning_rate": 8.973231485417849e-06, "loss": 0.866, "step": 36140 }, { "epoch": 4.06, "learning_rate": 8.931673643467014e-06, "loss": 0.8693, "step": 36160 }, { "epoch": 4.07, "learning_rate": 8.890202818329368e-06, "loss": 0.8629, "step": 36180 }, { "epoch": 4.07, "learning_rate": 8.84881909787455e-06, "loss": 0.8731, "step": 36200 }, { "epoch": 4.07, "learning_rate": 8.807522569787653e-06, "loss": 0.8921, "step": 36220 }, { "epoch": 4.07, "learning_rate": 8.76631332156898e-06, "loss": 0.8864, "step": 36240 }, { "epoch": 4.07, "learning_rate": 8.725191440533936e-06, "loss": 0.8866, "step": 36260 }, { "epoch": 4.08, "learning_rate": 8.684157013812839e-06, "loss": 0.8669, "step": 36280 }, { "epoch": 4.08, "learning_rate": 8.643210128350665e-06, "loss": 0.8555, "step": 36300 }, { "epoch": 4.08, "learning_rate": 8.602350870906895e-06, "loss": 0.8809, "step": 36320 }, { "epoch": 4.08, "learning_rate": 8.561579328055375e-06, "loss": 0.865, "step": 36340 }, { "epoch": 4.09, "learning_rate": 8.52089558618408e-06, "loss": 0.888, "step": 36360 }, { "epoch": 4.09, "learning_rate": 8.480299731494956e-06, "loss": 0.8966, "step": 36380 }, { "epoch": 4.09, "learning_rate": 8.439791850003726e-06, "loss": 0.8947, "step": 36400 }, { "epoch": 4.09, "learning_rate": 8.39937202753972e-06, "loss": 0.8662, "step": 36420 }, { "epoch": 4.09, "learning_rate": 8.35904034974569e-06, "loss": 0.8828, "step": 36440 }, { "epoch": 4.1, "learning_rate": 8.31879690207758e-06, "loss": 0.8679, "step": 36460 }, { "epoch": 4.1, "learning_rate": 8.278641769804469e-06, "loss": 0.8961, "step": 36480 }, { "epoch": 4.1, "learning_rate": 8.23857503800825e-06, "loss": 0.8836, "step": 36500 }, { "epoch": 4.1, "learning_rate": 8.198596791583523e-06, "loss": 0.8672, "step": 36520 }, { "epoch": 4.11, "learning_rate": 8.158707115237407e-06, "loss": 0.8916, "step": 36540 }, { "epoch": 4.11, "learning_rate": 8.118906093489358e-06, "loss": 0.9051, "step": 36560 }, { "epoch": 4.11, "learning_rate": 8.079193810670988e-06, "loss": 0.8754, "step": 36580 }, { "epoch": 4.11, "learning_rate": 8.039570350925878e-06, "loss": 0.895, "step": 36600 }, { "epoch": 4.11, "learning_rate": 8.000035798209421e-06, "loss": 0.8794, "step": 36620 }, { "epoch": 4.12, "learning_rate": 7.960590236288633e-06, "loss": 0.8761, "step": 36640 }, { "epoch": 4.12, "learning_rate": 7.921233748741934e-06, "loss": 0.8677, "step": 36660 }, { "epoch": 4.12, "learning_rate": 7.88196641895907e-06, "loss": 0.8771, "step": 36680 }, { "epoch": 4.12, "learning_rate": 7.842788330140838e-06, "loss": 0.8851, "step": 36700 }, { "epoch": 4.13, "learning_rate": 7.803699565298972e-06, "loss": 0.9103, "step": 36720 }, { "epoch": 4.13, "learning_rate": 7.764700207255903e-06, "loss": 0.8621, "step": 36740 }, { "epoch": 4.13, "learning_rate": 7.725790338644673e-06, "loss": 0.8558, "step": 36760 }, { "epoch": 4.13, "learning_rate": 7.686970041908675e-06, "loss": 0.8793, "step": 36780 }, { "epoch": 4.13, "learning_rate": 7.648239399301544e-06, "loss": 0.9105, "step": 36800 }, { "epoch": 4.14, "learning_rate": 7.6095984928869265e-06, "loss": 0.879, "step": 36820 }, { "epoch": 4.14, "learning_rate": 7.571047404538351e-06, "loss": 0.8657, "step": 36840 }, { "epoch": 4.14, "learning_rate": 7.532586215939025e-06, "loss": 0.8624, "step": 36860 }, { "epoch": 4.14, "learning_rate": 7.49421500858169e-06, "loss": 0.8568, "step": 36880 }, { "epoch": 4.15, "learning_rate": 7.45593386376841e-06, "loss": 0.8805, "step": 36900 }, { "epoch": 4.15, "learning_rate": 7.41774286261045e-06, "loss": 0.8731, "step": 36920 }, { "epoch": 4.15, "learning_rate": 7.379642086028038e-06, "loss": 0.9025, "step": 36940 }, { "epoch": 4.15, "learning_rate": 7.341631614750266e-06, "loss": 0.867, "step": 36960 }, { "epoch": 4.16, "learning_rate": 7.303711529314861e-06, "loss": 0.877, "step": 36980 }, { "epoch": 4.16, "learning_rate": 7.265881910068062e-06, "loss": 0.8611, "step": 37000 }, { "epoch": 4.16, "learning_rate": 7.228142837164404e-06, "loss": 0.8895, "step": 37020 }, { "epoch": 4.16, "learning_rate": 7.190494390566571e-06, "loss": 0.9216, "step": 37040 }, { "epoch": 4.16, "learning_rate": 7.152936650045245e-06, "loss": 0.8817, "step": 37060 }, { "epoch": 4.17, "learning_rate": 7.115469695178895e-06, "loss": 0.8688, "step": 37080 }, { "epoch": 4.17, "learning_rate": 7.078093605353642e-06, "loss": 0.8903, "step": 37100 }, { "epoch": 4.17, "learning_rate": 7.040808459763082e-06, "loss": 0.8687, "step": 37120 }, { "epoch": 4.17, "learning_rate": 7.003614337408099e-06, "loss": 0.8761, "step": 37140 }, { "epoch": 4.18, "learning_rate": 6.96651131709673e-06, "loss": 0.88, "step": 37160 }, { "epoch": 4.18, "learning_rate": 6.929499477443962e-06, "loss": 0.8919, "step": 37180 }, { "epoch": 4.18, "learning_rate": 6.892578896871643e-06, "loss": 0.9064, "step": 37200 }, { "epoch": 4.18, "learning_rate": 6.855749653608179e-06, "loss": 0.8838, "step": 37220 }, { "epoch": 4.18, "learning_rate": 6.819011825688498e-06, "loss": 0.8945, "step": 37240 }, { "epoch": 4.19, "learning_rate": 6.782365490953824e-06, "loss": 0.8609, "step": 37260 }, { "epoch": 4.19, "learning_rate": 6.745810727051521e-06, "loss": 0.8978, "step": 37280 }, { "epoch": 4.19, "learning_rate": 6.709347611434924e-06, "loss": 0.8814, "step": 37300 }, { "epoch": 4.19, "learning_rate": 6.672976221363186e-06, "loss": 0.8896, "step": 37320 }, { "epoch": 4.2, "learning_rate": 6.636696633901124e-06, "loss": 0.9108, "step": 37340 }, { "epoch": 4.2, "learning_rate": 6.600508925919008e-06, "loss": 0.9018, "step": 37360 }, { "epoch": 4.2, "learning_rate": 6.564413174092443e-06, "loss": 0.9047, "step": 37380 }, { "epoch": 4.2, "learning_rate": 6.528409454902235e-06, "loss": 0.8608, "step": 37400 }, { "epoch": 4.2, "learning_rate": 6.492497844634121e-06, "loss": 0.8941, "step": 37420 }, { "epoch": 4.21, "learning_rate": 6.4566784193787255e-06, "loss": 0.8743, "step": 37440 }, { "epoch": 4.21, "learning_rate": 6.4209512550313215e-06, "loss": 0.8991, "step": 37460 }, { "epoch": 4.21, "learning_rate": 6.38531642729171e-06, "loss": 0.8968, "step": 37480 }, { "epoch": 4.21, "learning_rate": 6.3497740116640396e-06, "loss": 0.8719, "step": 37500 }, { "epoch": 4.22, "learning_rate": 6.314324083456663e-06, "loss": 0.9034, "step": 37520 }, { "epoch": 4.22, "learning_rate": 6.2789667177819755e-06, "loss": 0.8603, "step": 37540 }, { "epoch": 4.22, "learning_rate": 6.2437019895561995e-06, "loss": 0.9047, "step": 37560 }, { "epoch": 4.22, "learning_rate": 6.208529973499316e-06, "loss": 0.9057, "step": 37580 }, { "epoch": 4.22, "learning_rate": 6.1734507441348785e-06, "loss": 0.8938, "step": 37600 }, { "epoch": 4.23, "learning_rate": 6.138464375789821e-06, "loss": 0.8755, "step": 37620 }, { "epoch": 4.23, "learning_rate": 6.1035709425943e-06, "loss": 0.8896, "step": 37640 }, { "epoch": 4.23, "learning_rate": 6.068770518481582e-06, "loss": 0.8586, "step": 37660 }, { "epoch": 4.23, "learning_rate": 6.034063177187865e-06, "loss": 0.8803, "step": 37680 }, { "epoch": 4.24, "learning_rate": 5.9994489922521155e-06, "loss": 0.9121, "step": 37700 }, { "epoch": 4.24, "learning_rate": 5.96492803701591e-06, "loss": 0.8782, "step": 37720 }, { "epoch": 4.24, "learning_rate": 5.9305003846233e-06, "loss": 0.9016, "step": 37740 }, { "epoch": 4.24, "learning_rate": 5.8961661080206454e-06, "loss": 0.8687, "step": 37760 }, { "epoch": 4.24, "learning_rate": 5.861925279956415e-06, "loss": 0.885, "step": 37780 }, { "epoch": 4.25, "learning_rate": 5.827777972981152e-06, "loss": 0.853, "step": 37800 }, { "epoch": 4.25, "learning_rate": 5.793724259447203e-06, "loss": 0.8716, "step": 37820 }, { "epoch": 4.25, "learning_rate": 5.759764211508578e-06, "loss": 0.868, "step": 37840 }, { "epoch": 4.25, "learning_rate": 5.7258979011208746e-06, "loss": 0.8453, "step": 37860 }, { "epoch": 4.26, "learning_rate": 5.692125400041049e-06, "loss": 0.8788, "step": 37880 }, { "epoch": 4.26, "learning_rate": 5.658446779827309e-06, "loss": 0.8741, "step": 37900 }, { "epoch": 4.26, "learning_rate": 5.624862111838919e-06, "loss": 0.8998, "step": 37920 }, { "epoch": 4.26, "learning_rate": 5.5913714672361065e-06, "loss": 0.8717, "step": 37940 }, { "epoch": 4.27, "learning_rate": 5.557974916979863e-06, "loss": 0.8747, "step": 37960 }, { "epoch": 4.27, "learning_rate": 5.5246725318317815e-06, "loss": 0.8667, "step": 37980 }, { "epoch": 4.27, "learning_rate": 5.491464382354e-06, "loss": 0.8788, "step": 38000 }, { "epoch": 4.27, "learning_rate": 5.458350538908946e-06, "loss": 0.8948, "step": 38020 }, { "epoch": 4.27, "learning_rate": 5.425331071659212e-06, "loss": 0.8609, "step": 38040 }, { "epoch": 4.28, "learning_rate": 5.392406050567455e-06, "loss": 0.8659, "step": 38060 }, { "epoch": 4.28, "learning_rate": 5.3595755453962115e-06, "loss": 0.8705, "step": 38080 }, { "epoch": 4.28, "learning_rate": 5.3268396257077465e-06, "loss": 0.8849, "step": 38100 }, { "epoch": 4.28, "learning_rate": 5.294198360863917e-06, "loss": 0.8659, "step": 38120 }, { "epoch": 4.29, "learning_rate": 5.26165182002602e-06, "loss": 0.8674, "step": 38140 }, { "epoch": 4.29, "learning_rate": 5.229200072154672e-06, "loss": 0.8786, "step": 38160 }, { "epoch": 4.29, "learning_rate": 5.196843186009581e-06, "loss": 0.8893, "step": 38180 }, { "epoch": 4.29, "learning_rate": 5.164581230149529e-06, "loss": 0.899, "step": 38200 }, { "epoch": 4.29, "learning_rate": 5.132414272932107e-06, "loss": 0.8991, "step": 38220 }, { "epoch": 4.3, "learning_rate": 5.100342382513662e-06, "loss": 0.867, "step": 38240 }, { "epoch": 4.3, "learning_rate": 5.068365626849058e-06, "loss": 0.8965, "step": 38260 }, { "epoch": 4.3, "learning_rate": 5.036484073691622e-06, "loss": 0.9204, "step": 38280 }, { "epoch": 4.3, "learning_rate": 5.004697790592961e-06, "loss": 0.9037, "step": 38300 }, { "epoch": 4.31, "learning_rate": 4.9730068449028e-06, "loss": 0.8833, "step": 38320 }, { "epoch": 4.31, "learning_rate": 4.941411303768889e-06, "loss": 0.88, "step": 38340 }, { "epoch": 4.31, "learning_rate": 4.90991123413681e-06, "loss": 0.8873, "step": 38360 }, { "epoch": 4.31, "learning_rate": 4.878506702749869e-06, "loss": 0.8802, "step": 38380 }, { "epoch": 4.31, "learning_rate": 4.847197776148932e-06, "loss": 0.8771, "step": 38400 }, { "epoch": 4.32, "learning_rate": 4.815984520672301e-06, "loss": 0.8883, "step": 38420 }, { "epoch": 4.32, "learning_rate": 4.784867002455584e-06, "loss": 0.8629, "step": 38440 }, { "epoch": 4.32, "learning_rate": 4.753845287431491e-06, "loss": 0.8824, "step": 38460 }, { "epoch": 4.32, "learning_rate": 4.722919441329782e-06, "loss": 0.882, "step": 38480 }, { "epoch": 4.33, "learning_rate": 4.692089529677074e-06, "loss": 0.8704, "step": 38500 }, { "epoch": 4.33, "learning_rate": 4.661355617796742e-06, "loss": 0.8956, "step": 38520 }, { "epoch": 4.33, "learning_rate": 4.630717770808696e-06, "loss": 0.8867, "step": 38540 }, { "epoch": 4.33, "learning_rate": 4.600176053629346e-06, "loss": 0.8825, "step": 38560 }, { "epoch": 4.33, "learning_rate": 4.569730530971411e-06, "loss": 0.8755, "step": 38580 }, { "epoch": 4.34, "learning_rate": 4.5393812673437844e-06, "loss": 0.8778, "step": 38600 }, { "epoch": 4.34, "learning_rate": 4.5091283270513985e-06, "loss": 0.8921, "step": 38620 }, { "epoch": 4.34, "learning_rate": 4.4789717741951065e-06, "loss": 0.8814, "step": 38640 }, { "epoch": 4.34, "learning_rate": 4.448911672671535e-06, "loss": 0.8655, "step": 38660 }, { "epoch": 4.35, "learning_rate": 4.418948086172914e-06, "loss": 0.9148, "step": 38680 }, { "epoch": 4.35, "learning_rate": 4.389081078186996e-06, "loss": 0.8551, "step": 38700 }, { "epoch": 4.35, "learning_rate": 4.359310711996939e-06, "loss": 0.8897, "step": 38720 }, { "epoch": 4.35, "learning_rate": 4.329637050681057e-06, "loss": 0.859, "step": 38740 }, { "epoch": 4.36, "learning_rate": 4.300060157112817e-06, "loss": 0.8528, "step": 38760 }, { "epoch": 4.36, "learning_rate": 4.270580093960641e-06, "loss": 0.8661, "step": 38780 }, { "epoch": 4.36, "learning_rate": 4.241196923687774e-06, "loss": 0.8779, "step": 38800 }, { "epoch": 4.36, "learning_rate": 4.2119107085521636e-06, "loss": 0.8813, "step": 38820 }, { "epoch": 4.36, "learning_rate": 4.18272151060633e-06, "loss": 0.865, "step": 38840 }, { "epoch": 4.37, "learning_rate": 4.153629391697244e-06, "loss": 0.8753, "step": 38860 }, { "epoch": 4.37, "learning_rate": 4.12463441346615e-06, "loss": 0.9042, "step": 38880 }, { "epoch": 4.37, "learning_rate": 4.09573663734848e-06, "loss": 0.8962, "step": 38900 }, { "epoch": 4.37, "learning_rate": 4.066936124573734e-06, "loss": 0.8484, "step": 38920 }, { "epoch": 4.38, "learning_rate": 4.03823293616532e-06, "loss": 0.8848, "step": 38940 }, { "epoch": 4.38, "learning_rate": 4.009627132940397e-06, "loss": 0.8626, "step": 38960 }, { "epoch": 4.38, "learning_rate": 3.981118775509812e-06, "loss": 0.8792, "step": 38980 }, { "epoch": 4.38, "learning_rate": 3.952707924277949e-06, "loss": 0.8841, "step": 39000 }, { "epoch": 4.38, "learning_rate": 3.9243946394425635e-06, "loss": 0.8447, "step": 39020 }, { "epoch": 4.39, "learning_rate": 3.896178980994714e-06, "loss": 0.8927, "step": 39040 }, { "epoch": 4.39, "learning_rate": 3.868061008718593e-06, "loss": 0.8913, "step": 39060 }, { "epoch": 4.39, "learning_rate": 3.840040782191401e-06, "loss": 0.889, "step": 39080 }, { "epoch": 4.39, "learning_rate": 3.8121183607832344e-06, "loss": 0.869, "step": 39100 }, { "epoch": 4.4, "learning_rate": 3.7842938036569854e-06, "loss": 0.9043, "step": 39120 }, { "epoch": 4.4, "learning_rate": 3.756567169768166e-06, "loss": 0.8772, "step": 39140 }, { "epoch": 4.4, "learning_rate": 3.728938517864794e-06, "loss": 0.8899, "step": 39160 }, { "epoch": 4.4, "learning_rate": 3.701407906487303e-06, "loss": 0.8797, "step": 39180 }, { "epoch": 4.4, "learning_rate": 3.6739753939683817e-06, "loss": 0.8753, "step": 39200 }, { "epoch": 4.41, "learning_rate": 3.6466410384328685e-06, "loss": 0.8659, "step": 39220 }, { "epoch": 4.41, "learning_rate": 3.6194048977976237e-06, "loss": 0.875, "step": 39240 }, { "epoch": 4.41, "learning_rate": 3.5922670297714124e-06, "loss": 0.8816, "step": 39260 }, { "epoch": 4.41, "learning_rate": 3.5652274918547724e-06, "loss": 0.8792, "step": 39280 }, { "epoch": 4.42, "learning_rate": 3.5382863413398694e-06, "loss": 0.872, "step": 39300 }, { "epoch": 4.42, "learning_rate": 3.5114436353104574e-06, "loss": 0.8681, "step": 39320 }, { "epoch": 4.42, "learning_rate": 3.4846994306416746e-06, "loss": 0.855, "step": 39340 }, { "epoch": 4.42, "learning_rate": 3.4580537839999374e-06, "loss": 0.8619, "step": 39360 }, { "epoch": 4.42, "learning_rate": 3.43150675184285e-06, "loss": 0.8799, "step": 39380 }, { "epoch": 4.43, "learning_rate": 3.405058390419069e-06, "loss": 0.8626, "step": 39400 }, { "epoch": 4.43, "learning_rate": 3.3787087557681895e-06, "loss": 0.8733, "step": 39420 }, { "epoch": 4.43, "learning_rate": 3.352457903720613e-06, "loss": 0.8642, "step": 39440 }, { "epoch": 4.43, "learning_rate": 3.326305889897435e-06, "loss": 0.8841, "step": 39460 }, { "epoch": 4.44, "learning_rate": 3.3002527697103435e-06, "loss": 0.8708, "step": 39480 }, { "epoch": 4.44, "learning_rate": 3.274298598361458e-06, "loss": 0.896, "step": 39500 }, { "epoch": 4.44, "learning_rate": 3.2484434308432843e-06, "loss": 0.8654, "step": 39520 }, { "epoch": 4.44, "learning_rate": 3.2226873219385224e-06, "loss": 0.8616, "step": 39540 }, { "epoch": 4.44, "learning_rate": 3.197030326220013e-06, "loss": 0.8735, "step": 39560 }, { "epoch": 4.45, "learning_rate": 3.1714724980505484e-06, "loss": 0.8782, "step": 39580 }, { "epoch": 4.45, "learning_rate": 3.146013891582844e-06, "loss": 0.8788, "step": 39600 }, { "epoch": 4.45, "learning_rate": 3.1206545607593616e-06, "loss": 0.8564, "step": 39620 }, { "epoch": 4.45, "learning_rate": 3.095394559312226e-06, "loss": 0.8777, "step": 39640 }, { "epoch": 4.46, "learning_rate": 3.070233940763084e-06, "loss": 0.8988, "step": 39660 }, { "epoch": 4.46, "learning_rate": 3.0451727584230207e-06, "loss": 0.871, "step": 39680 }, { "epoch": 4.46, "learning_rate": 3.020211065392431e-06, "loss": 0.885, "step": 39700 }, { "epoch": 4.46, "learning_rate": 2.995348914560897e-06, "loss": 0.8604, "step": 39720 }, { "epoch": 4.47, "learning_rate": 2.9705863586071027e-06, "loss": 0.8535, "step": 39740 }, { "epoch": 4.47, "learning_rate": 2.945923449998711e-06, "loss": 0.9172, "step": 39760 }, { "epoch": 4.47, "learning_rate": 2.921360240992216e-06, "loss": 0.8873, "step": 39780 }, { "epoch": 4.47, "learning_rate": 2.8968967836329077e-06, "loss": 0.8798, "step": 39800 }, { "epoch": 4.47, "learning_rate": 2.8725331297546786e-06, "loss": 0.8649, "step": 39820 }, { "epoch": 4.48, "learning_rate": 2.8482693309800136e-06, "loss": 0.8986, "step": 39840 }, { "epoch": 4.48, "learning_rate": 2.8241054387197487e-06, "loss": 0.92, "step": 39860 }, { "epoch": 4.48, "learning_rate": 2.8000415041730845e-06, "loss": 0.8747, "step": 39880 }, { "epoch": 4.48, "learning_rate": 2.7760775783274127e-06, "loss": 0.8853, "step": 39900 }, { "epoch": 4.49, "learning_rate": 2.7522137119582238e-06, "loss": 0.8646, "step": 39920 }, { "epoch": 4.49, "learning_rate": 2.7284499556290033e-06, "loss": 0.8972, "step": 39940 }, { "epoch": 4.49, "learning_rate": 2.7047863596911248e-06, "loss": 0.8619, "step": 39960 }, { "epoch": 4.49, "learning_rate": 2.681222974283715e-06, "loss": 0.8512, "step": 39980 }, { "epoch": 4.49, "learning_rate": 2.657759849333591e-06, "loss": 0.868, "step": 40000 }, { "epoch": 4.5, "learning_rate": 2.6343970345551363e-06, "loss": 0.8721, "step": 40020 }, { "epoch": 4.5, "learning_rate": 2.6111345794502027e-06, "loss": 0.8967, "step": 40040 }, { "epoch": 4.5, "learning_rate": 2.587972533307964e-06, "loss": 0.8751, "step": 40060 }, { "epoch": 4.5, "learning_rate": 2.5649109452048735e-06, "loss": 0.8797, "step": 40080 }, { "epoch": 4.51, "learning_rate": 2.541949864004528e-06, "loss": 0.8924, "step": 40100 }, { "epoch": 4.51, "learning_rate": 2.5190893383575498e-06, "loss": 0.855, "step": 40120 }, { "epoch": 4.51, "learning_rate": 2.4963294167015204e-06, "loss": 0.8679, "step": 40140 }, { "epoch": 4.51, "learning_rate": 2.473670147260848e-06, "loss": 0.8751, "step": 40160 }, { "epoch": 4.51, "learning_rate": 2.451111578046689e-06, "loss": 0.8775, "step": 40180 }, { "epoch": 4.52, "learning_rate": 2.4286537568568023e-06, "loss": 0.8797, "step": 40200 }, { "epoch": 4.52, "learning_rate": 2.4062967312755037e-06, "loss": 0.8527, "step": 40220 }, { "epoch": 4.52, "learning_rate": 2.3840405486735607e-06, "loss": 0.8736, "step": 40240 }, { "epoch": 4.52, "learning_rate": 2.3618852562080187e-06, "loss": 0.8782, "step": 40260 }, { "epoch": 4.53, "learning_rate": 2.339830900822193e-06, "loss": 0.9045, "step": 40280 }, { "epoch": 4.53, "learning_rate": 2.3178775292455214e-06, "loss": 0.8947, "step": 40300 }, { "epoch": 4.53, "learning_rate": 2.296025187993467e-06, "loss": 0.8586, "step": 40320 }, { "epoch": 4.53, "learning_rate": 2.274273923367437e-06, "loss": 0.8814, "step": 40340 }, { "epoch": 4.53, "learning_rate": 2.25262378145466e-06, "loss": 0.8635, "step": 40360 }, { "epoch": 4.54, "learning_rate": 2.231074808128131e-06, "loss": 0.8544, "step": 40380 }, { "epoch": 4.54, "learning_rate": 2.2096270490464476e-06, "loss": 0.8556, "step": 40400 }, { "epoch": 4.54, "learning_rate": 2.1882805496537705e-06, "loss": 0.9008, "step": 40420 }, { "epoch": 4.54, "learning_rate": 2.167035355179725e-06, "loss": 0.8816, "step": 40440 }, { "epoch": 4.55, "learning_rate": 2.1458915106392697e-06, "loss": 0.8555, "step": 40460 }, { "epoch": 4.55, "learning_rate": 2.1248490608326123e-06, "loss": 0.8593, "step": 40480 }, { "epoch": 4.55, "learning_rate": 2.1039080503451325e-06, "loss": 0.8759, "step": 40500 }, { "epoch": 4.55, "learning_rate": 2.083068523547288e-06, "loss": 0.8855, "step": 40520 }, { "epoch": 4.56, "learning_rate": 2.0623305245944913e-06, "loss": 0.8577, "step": 40540 }, { "epoch": 4.56, "learning_rate": 2.0416940974270384e-06, "loss": 0.8959, "step": 40560 }, { "epoch": 4.56, "learning_rate": 2.0211592857700303e-06, "loss": 0.8824, "step": 40580 }, { "epoch": 4.56, "learning_rate": 2.0007261331332404e-06, "loss": 0.9085, "step": 40600 }, { "epoch": 4.56, "learning_rate": 1.9803946828110375e-06, "loss": 0.8982, "step": 40620 }, { "epoch": 4.57, "learning_rate": 1.9601649778823337e-06, "loss": 0.863, "step": 40640 }, { "epoch": 4.57, "learning_rate": 1.940037061210437e-06, "loss": 0.8765, "step": 40660 }, { "epoch": 4.57, "learning_rate": 1.920010975442976e-06, "loss": 0.8977, "step": 40680 }, { "epoch": 4.57, "learning_rate": 1.9000867630118324e-06, "loss": 0.8776, "step": 40700 }, { "epoch": 4.58, "learning_rate": 1.8802644661330304e-06, "loss": 0.862, "step": 40720 }, { "epoch": 4.58, "learning_rate": 1.8605441268066625e-06, "loss": 0.8683, "step": 40740 }, { "epoch": 4.58, "learning_rate": 1.8409257868167718e-06, "loss": 0.8883, "step": 40760 }, { "epoch": 4.58, "learning_rate": 1.8214094877312849e-06, "loss": 0.8897, "step": 40780 }, { "epoch": 4.58, "learning_rate": 1.8019952709019404e-06, "loss": 0.8771, "step": 40800 }, { "epoch": 4.59, "learning_rate": 1.7826831774641617e-06, "loss": 0.8785, "step": 40820 }, { "epoch": 4.59, "learning_rate": 1.7634732483369943e-06, "loss": 0.8497, "step": 40840 }, { "epoch": 4.59, "learning_rate": 1.7443655242230238e-06, "loss": 0.8961, "step": 40860 }, { "epoch": 4.59, "learning_rate": 1.7253600456082764e-06, "loss": 0.8995, "step": 40880 }, { "epoch": 4.6, "learning_rate": 1.7064568527621228e-06, "loss": 0.8858, "step": 40900 }, { "epoch": 4.6, "learning_rate": 1.6876559857372243e-06, "loss": 0.8618, "step": 40920 }, { "epoch": 4.6, "learning_rate": 1.6689574843694433e-06, "loss": 0.8773, "step": 40940 }, { "epoch": 4.6, "learning_rate": 1.6503613882777101e-06, "loss": 0.8614, "step": 40960 }, { "epoch": 4.6, "learning_rate": 1.6318677368640067e-06, "loss": 0.9059, "step": 40980 }, { "epoch": 4.61, "learning_rate": 1.6134765693132382e-06, "loss": 0.9023, "step": 41000 }, { "epoch": 4.61, "learning_rate": 1.5951879245931723e-06, "loss": 0.8583, "step": 41020 }, { "epoch": 4.61, "learning_rate": 1.5770018414543342e-06, "loss": 0.8845, "step": 41040 }, { "epoch": 4.61, "learning_rate": 1.5589183584299504e-06, "loss": 0.8818, "step": 41060 }, { "epoch": 4.62, "learning_rate": 1.5409375138358663e-06, "loss": 0.8754, "step": 41080 }, { "epoch": 4.62, "learning_rate": 1.5230593457704168e-06, "loss": 0.8513, "step": 41100 }, { "epoch": 4.62, "learning_rate": 1.505283892114412e-06, "loss": 0.8949, "step": 41120 }, { "epoch": 4.62, "learning_rate": 1.4876111905310408e-06, "loss": 0.8962, "step": 41140 }, { "epoch": 4.62, "learning_rate": 1.4700412784657336e-06, "loss": 0.8671, "step": 41160 }, { "epoch": 4.63, "learning_rate": 1.4525741931461612e-06, "loss": 0.8822, "step": 41180 }, { "epoch": 4.63, "learning_rate": 1.4352099715821133e-06, "loss": 0.8698, "step": 41200 }, { "epoch": 4.63, "learning_rate": 1.4179486505654316e-06, "loss": 0.8912, "step": 41220 }, { "epoch": 4.63, "learning_rate": 1.4007902666699157e-06, "loss": 0.9107, "step": 41240 }, { "epoch": 4.64, "learning_rate": 1.3837348562512842e-06, "loss": 0.8674, "step": 41260 }, { "epoch": 4.64, "learning_rate": 1.3667824554470466e-06, "loss": 0.8724, "step": 41280 }, { "epoch": 4.64, "learning_rate": 1.3499331001764592e-06, "loss": 0.8747, "step": 41300 }, { "epoch": 4.64, "learning_rate": 1.3331868261404479e-06, "loss": 0.8595, "step": 41320 }, { "epoch": 4.64, "learning_rate": 1.316543668821535e-06, "loss": 0.8836, "step": 41340 }, { "epoch": 4.65, "learning_rate": 1.3000036634837398e-06, "loss": 0.8632, "step": 41360 }, { "epoch": 4.65, "learning_rate": 1.2835668451725292e-06, "loss": 0.8968, "step": 41380 }, { "epoch": 4.65, "learning_rate": 1.267233248714722e-06, "loss": 0.8771, "step": 41400 }, { "epoch": 4.65, "learning_rate": 1.251002908718446e-06, "loss": 0.8829, "step": 41420 }, { "epoch": 4.66, "learning_rate": 1.2348758595730314e-06, "loss": 0.8628, "step": 41440 }, { "epoch": 4.66, "learning_rate": 1.2188521354489613e-06, "loss": 0.862, "step": 41460 }, { "epoch": 4.66, "learning_rate": 1.2029317702977882e-06, "loss": 0.8919, "step": 41480 }, { "epoch": 4.66, "learning_rate": 1.187114797852068e-06, "loss": 0.8733, "step": 41500 }, { "epoch": 4.67, "learning_rate": 1.1714012516252648e-06, "loss": 0.8723, "step": 41520 }, { "epoch": 4.67, "learning_rate": 1.1557911649117293e-06, "loss": 0.8721, "step": 41540 }, { "epoch": 4.67, "learning_rate": 1.1402845707865928e-06, "loss": 0.8917, "step": 41560 }, { "epoch": 4.67, "learning_rate": 1.124881502105679e-06, "loss": 0.871, "step": 41580 }, { "epoch": 4.67, "learning_rate": 1.1095819915054872e-06, "loss": 0.8686, "step": 41600 }, { "epoch": 4.68, "learning_rate": 1.094386071403075e-06, "loss": 0.8621, "step": 41620 }, { "epoch": 4.68, "learning_rate": 1.0792937739960262e-06, "loss": 0.8884, "step": 41640 }, { "epoch": 4.68, "learning_rate": 1.0643051312623553e-06, "loss": 0.8675, "step": 41660 }, { "epoch": 4.68, "learning_rate": 1.0494201749604525e-06, "loss": 0.8882, "step": 41680 }, { "epoch": 4.69, "learning_rate": 1.0346389366290122e-06, "loss": 0.8953, "step": 41700 }, { "epoch": 4.69, "learning_rate": 1.0199614475869646e-06, "loss": 0.8781, "step": 41720 }, { "epoch": 4.69, "learning_rate": 1.0053877389334277e-06, "loss": 0.8719, "step": 41740 }, { "epoch": 4.69, "learning_rate": 9.909178415476116e-07, "loss": 0.8847, "step": 41760 }, { "epoch": 4.69, "learning_rate": 9.765517860887808e-07, "loss": 0.8803, "step": 41780 }, { "epoch": 4.7, "learning_rate": 9.62289602996158e-07, "loss": 0.8687, "step": 41800 }, { "epoch": 4.7, "learning_rate": 9.481313224888877e-07, "loss": 0.8566, "step": 41820 }, { "epoch": 4.7, "learning_rate": 9.340769745659672e-07, "loss": 0.9047, "step": 41840 }, { "epoch": 4.7, "learning_rate": 9.201265890061816e-07, "loss": 0.8534, "step": 41860 }, { "epoch": 4.71, "learning_rate": 9.062801953680145e-07, "loss": 0.9076, "step": 41880 }, { "epoch": 4.71, "learning_rate": 8.925378229896364e-07, "loss": 0.9027, "step": 41900 }, { "epoch": 4.71, "learning_rate": 8.788995009888002e-07, "loss": 0.8772, "step": 41920 }, { "epoch": 4.71, "learning_rate": 8.653652582627958e-07, "loss": 0.8791, "step": 41940 }, { "epoch": 4.71, "learning_rate": 8.519351234883787e-07, "loss": 0.8871, "step": 41960 }, { "epoch": 4.72, "learning_rate": 8.386091251217365e-07, "loss": 0.8808, "step": 41980 }, { "epoch": 4.72, "learning_rate": 8.253872913983884e-07, "loss": 0.9046, "step": 42000 }, { "epoch": 4.72, "learning_rate": 8.122696503331583e-07, "loss": 0.9211, "step": 42020 }, { "epoch": 4.72, "learning_rate": 7.992562297201023e-07, "loss": 0.8927, "step": 42040 }, { "epoch": 4.73, "learning_rate": 7.863470571324527e-07, "loss": 0.8639, "step": 42060 }, { "epoch": 4.73, "learning_rate": 7.735421599225467e-07, "loss": 0.9165, "step": 42080 }, { "epoch": 4.73, "learning_rate": 7.608415652217982e-07, "loss": 0.8829, "step": 42100 }, { "epoch": 4.73, "learning_rate": 7.482452999406087e-07, "loss": 0.8938, "step": 42120 }, { "epoch": 4.73, "learning_rate": 7.357533907683234e-07, "loss": 0.8722, "step": 42140 }, { "epoch": 4.74, "learning_rate": 7.23365864173181e-07, "loss": 0.8738, "step": 42160 }, { "epoch": 4.74, "learning_rate": 7.110827464022474e-07, "loss": 0.8595, "step": 42180 }, { "epoch": 4.74, "learning_rate": 6.989040634813648e-07, "loss": 0.8597, "step": 42200 }, { "epoch": 4.74, "learning_rate": 6.868298412150864e-07, "loss": 0.9116, "step": 42220 }, { "epoch": 4.75, "learning_rate": 6.748601051866532e-07, "loss": 0.8424, "step": 42240 }, { "epoch": 4.75, "learning_rate": 6.629948807579001e-07, "loss": 0.8709, "step": 42260 }, { "epoch": 4.75, "learning_rate": 6.512341930692167e-07, "loss": 0.9012, "step": 42280 }, { "epoch": 4.75, "learning_rate": 6.395780670395147e-07, "loss": 0.869, "step": 42300 }, { "epoch": 4.76, "learning_rate": 6.280265273661379e-07, "loss": 0.8905, "step": 42320 }, { "epoch": 4.76, "learning_rate": 6.165795985248413e-07, "loss": 0.8933, "step": 42340 }, { "epoch": 4.76, "learning_rate": 6.052373047697236e-07, "loss": 0.8983, "step": 42360 }, { "epoch": 4.76, "learning_rate": 5.939996701331884e-07, "loss": 0.9021, "step": 42380 }, { "epoch": 4.76, "learning_rate": 5.828667184258673e-07, "loss": 0.889, "step": 42400 }, { "epoch": 4.77, "learning_rate": 5.718384732365967e-07, "loss": 0.8627, "step": 42420 }, { "epoch": 4.77, "learning_rate": 5.609149579323513e-07, "loss": 0.8778, "step": 42440 }, { "epoch": 4.77, "learning_rate": 5.50096195658223e-07, "loss": 0.8989, "step": 42460 }, { "epoch": 4.77, "learning_rate": 5.393822093373135e-07, "loss": 0.8895, "step": 42480 }, { "epoch": 4.78, "learning_rate": 5.287730216707532e-07, "loss": 0.8777, "step": 42500 }, { "epoch": 4.78, "learning_rate": 5.182686551376048e-07, "loss": 0.895, "step": 42520 }, { "epoch": 4.78, "learning_rate": 5.07869131994837e-07, "loss": 0.8736, "step": 42540 }, { "epoch": 4.78, "learning_rate": 4.975744742772848e-07, "loss": 0.8999, "step": 42560 }, { "epoch": 4.78, "learning_rate": 4.873847037975665e-07, "loss": 0.8983, "step": 42580 }, { "epoch": 4.79, "learning_rate": 4.772998421460895e-07, "loss": 0.8648, "step": 42600 }, { "epoch": 4.79, "learning_rate": 4.6731991069094984e-07, "loss": 0.8712, "step": 42620 }, { "epoch": 4.79, "learning_rate": 4.574449305779327e-07, "loss": 0.8866, "step": 42640 }, { "epoch": 4.79, "learning_rate": 4.4767492273045665e-07, "loss": 0.9028, "step": 42660 }, { "epoch": 4.8, "learning_rate": 4.380099078495015e-07, "loss": 0.88, "step": 42680 }, { "epoch": 4.8, "learning_rate": 4.284499064135916e-07, "loss": 0.8929, "step": 42700 }, { "epoch": 4.8, "learning_rate": 4.189949386787462e-07, "loss": 0.8876, "step": 42720 }, { "epoch": 4.8, "learning_rate": 4.0964502467844e-07, "loss": 0.9048, "step": 42740 }, { "epoch": 4.8, "learning_rate": 4.0040018422355385e-07, "loss": 0.8726, "step": 42760 }, { "epoch": 4.81, "learning_rate": 3.9126043690234093e-07, "loss": 0.8974, "step": 42780 }, { "epoch": 4.81, "learning_rate": 3.82225802080366e-07, "loss": 0.8804, "step": 42800 }, { "epoch": 4.81, "learning_rate": 3.7329629890048846e-07, "loss": 0.8947, "step": 42820 }, { "epoch": 4.81, "learning_rate": 3.6447194628281276e-07, "loss": 0.8923, "step": 42840 }, { "epoch": 4.82, "learning_rate": 3.557527629246438e-07, "loss": 0.8837, "step": 42860 }, { "epoch": 4.82, "learning_rate": 3.471387673004534e-07, "loss": 0.8695, "step": 42880 }, { "epoch": 4.82, "learning_rate": 3.3862997766182515e-07, "loss": 0.8827, "step": 42900 }, { "epoch": 4.82, "learning_rate": 3.302264120374543e-07, "loss": 0.8632, "step": 42920 }, { "epoch": 4.82, "learning_rate": 3.219280882330644e-07, "loss": 0.8822, "step": 42940 }, { "epoch": 4.83, "learning_rate": 3.137350238313963e-07, "loss": 0.9121, "step": 42960 }, { "epoch": 4.83, "learning_rate": 3.0564723619215807e-07, "loss": 0.9014, "step": 42980 }, { "epoch": 4.83, "learning_rate": 2.9766474245200847e-07, "loss": 0.8743, "step": 43000 }, { "epoch": 4.83, "learning_rate": 2.8978755952448475e-07, "loss": 0.8561, "step": 43020 }, { "epoch": 4.84, "learning_rate": 2.8201570410000824e-07, "loss": 0.8845, "step": 43040 }, { "epoch": 4.84, "learning_rate": 2.74349192645823e-07, "loss": 0.8676, "step": 43060 }, { "epoch": 4.84, "learning_rate": 2.6678804140596315e-07, "loss": 0.8449, "step": 43080 }, { "epoch": 4.84, "learning_rate": 2.5933226640121875e-07, "loss": 0.8725, "step": 43100 }, { "epoch": 4.84, "learning_rate": 2.5198188342912543e-07, "loss": 0.876, "step": 43120 }, { "epoch": 4.85, "learning_rate": 2.447369080638806e-07, "loss": 0.8781, "step": 43140 }, { "epoch": 4.85, "learning_rate": 2.3759735565637155e-07, "loss": 0.8812, "step": 43160 }, { "epoch": 4.85, "learning_rate": 2.305632413340919e-07, "loss": 0.8981, "step": 43180 }, { "epoch": 4.85, "learning_rate": 2.236345800011308e-07, "loss": 0.886, "step": 43200 }, { "epoch": 4.86, "learning_rate": 2.1681138633816156e-07, "loss": 0.8873, "step": 43220 }, { "epoch": 4.86, "learning_rate": 2.1009367480235298e-07, "loss": 0.8964, "step": 43240 }, { "epoch": 4.86, "learning_rate": 2.0348145962740817e-07, "loss": 0.8663, "step": 43260 }, { "epoch": 4.86, "learning_rate": 1.9697475482349238e-07, "loss": 0.868, "step": 43280 }, { "epoch": 4.87, "learning_rate": 1.9057357417719968e-07, "loss": 0.881, "step": 43300 }, { "epoch": 4.87, "learning_rate": 1.8427793125154747e-07, "loss": 0.8902, "step": 43320 }, { "epoch": 4.87, "learning_rate": 1.7808783938593198e-07, "loss": 0.8796, "step": 43340 }, { "epoch": 4.87, "learning_rate": 1.7200331169611727e-07, "loss": 0.8813, "step": 43360 }, { "epoch": 4.87, "learning_rate": 1.6602436107417408e-07, "loss": 0.8988, "step": 43380 }, { "epoch": 4.88, "learning_rate": 1.6015100018849095e-07, "loss": 0.8534, "step": 43400 }, { "epoch": 4.88, "learning_rate": 1.5438324148371875e-07, "loss": 0.8964, "step": 43420 }, { "epoch": 4.88, "learning_rate": 1.487210971807651e-07, "loss": 0.8934, "step": 43440 }, { "epoch": 4.88, "learning_rate": 1.4316457927674444e-07, "loss": 0.8765, "step": 43460 }, { "epoch": 4.89, "learning_rate": 1.3771369954497793e-07, "loss": 0.8458, "step": 43480 }, { "epoch": 4.89, "learning_rate": 1.323684695349603e-07, "loss": 0.8656, "step": 43500 }, { "epoch": 4.89, "learning_rate": 1.2712890057232085e-07, "loss": 0.8792, "step": 43520 }, { "epoch": 4.89, "learning_rate": 1.2199500375881247e-07, "loss": 0.8566, "step": 43540 }, { "epoch": 4.89, "learning_rate": 1.1696678997230038e-07, "loss": 0.8954, "step": 43560 }, { "epoch": 4.9, "learning_rate": 1.1204426986671235e-07, "loss": 0.8871, "step": 43580 }, { "epoch": 4.9, "learning_rate": 1.0722745387203859e-07, "loss": 0.875, "step": 43600 }, { "epoch": 4.9, "learning_rate": 1.025163521942818e-07, "loss": 0.8808, "step": 43620 }, { "epoch": 4.9, "learning_rate": 9.791097481547939e-08, "loss": 0.8818, "step": 43640 }, { "epoch": 4.91, "learning_rate": 9.34113314936369e-08, "loss": 0.884, "step": 43660 }, { "epoch": 4.91, "learning_rate": 8.901743176273902e-08, "loss": 0.8735, "step": 43680 }, { "epoch": 4.91, "learning_rate": 8.472928493271082e-08, "loss": 0.883, "step": 43700 }, { "epoch": 4.91, "learning_rate": 8.054690008940657e-08, "loss": 0.8736, "step": 43720 }, { "epoch": 4.91, "learning_rate": 7.647028609459317e-08, "loss": 0.8778, "step": 43740 }, { "epoch": 4.92, "learning_rate": 7.249945158592231e-08, "loss": 0.8444, "step": 43760 }, { "epoch": 4.92, "learning_rate": 6.863440497691942e-08, "loss": 0.8991, "step": 43780 }, { "epoch": 4.92, "learning_rate": 6.487515445696146e-08, "loss": 0.8613, "step": 43800 }, { "epoch": 4.92, "learning_rate": 6.122170799126581e-08, "loss": 0.8667, "step": 43820 }, { "epoch": 4.93, "learning_rate": 5.7674073320856945e-08, "loss": 0.886, "step": 43840 }, { "epoch": 4.93, "learning_rate": 5.423225796257758e-08, "loss": 0.9095, "step": 43860 }, { "epoch": 4.93, "learning_rate": 5.089626920904422e-08, "loss": 0.8674, "step": 43880 }, { "epoch": 4.93, "learning_rate": 4.766611412865829e-08, "loss": 0.9072, "step": 43900 }, { "epoch": 4.93, "learning_rate": 4.4541799565567255e-08, "loss": 0.8989, "step": 43920 }, { "epoch": 4.94, "learning_rate": 4.1523332139664636e-08, "loss": 0.885, "step": 43940 }, { "epoch": 4.94, "learning_rate": 3.861071824656226e-08, "loss": 0.8705, "step": 43960 }, { "epoch": 4.94, "learning_rate": 3.5803964057606885e-08, "loss": 0.8665, "step": 43980 }, { "epoch": 4.94, "learning_rate": 3.310307551983027e-08, "loss": 0.8746, "step": 44000 }, { "epoch": 4.95, "learning_rate": 3.050805835594917e-08, "loss": 0.8846, "step": 44020 }, { "epoch": 4.95, "learning_rate": 2.8018918064376398e-08, "loss": 0.8902, "step": 44040 }, { "epoch": 4.95, "learning_rate": 2.563565991916539e-08, "loss": 0.8422, "step": 44060 }, { "epoch": 4.95, "learning_rate": 2.335828897004344e-08, "loss": 0.8935, "step": 44080 }, { "epoch": 4.96, "learning_rate": 2.1186810042372885e-08, "loss": 0.8708, "step": 44100 }, { "epoch": 4.96, "learning_rate": 1.912122773715108e-08, "loss": 0.8663, "step": 44120 }, { "epoch": 4.96, "learning_rate": 1.7161546430988217e-08, "loss": 0.8972, "step": 44140 }, { "epoch": 4.96, "learning_rate": 1.5307770276123956e-08, "loss": 0.8774, "step": 44160 }, { "epoch": 4.96, "learning_rate": 1.3559903200394131e-08, "loss": 0.9105, "step": 44180 }, { "epoch": 4.97, "learning_rate": 1.1917948907225196e-08, "loss": 0.8849, "step": 44200 }, { "epoch": 4.97, "learning_rate": 1.038191087565088e-08, "loss": 0.8716, "step": 44220 }, { "epoch": 4.97, "learning_rate": 8.951792360267775e-09, "loss": 0.8811, "step": 44240 }, { "epoch": 4.97, "learning_rate": 7.62759639125199e-09, "loss": 0.8778, "step": 44260 }, { "epoch": 4.98, "learning_rate": 6.409325774359154e-09, "loss": 0.8776, "step": 44280 }, { "epoch": 4.98, "learning_rate": 5.296983090891106e-09, "loss": 0.8683, "step": 44300 }, { "epoch": 4.98, "learning_rate": 4.2905706977181e-09, "loss": 0.8893, "step": 44320 }, { "epoch": 4.98, "learning_rate": 3.3900907272510497e-09, "loss": 0.8707, "step": 44340 }, { "epoch": 4.98, "learning_rate": 2.5955450874581845e-09, "loss": 0.879, "step": 44360 }, { "epoch": 4.99, "learning_rate": 1.90693546184284e-09, "loss": 0.857, "step": 44380 }, { "epoch": 4.99, "learning_rate": 1.3242633094545654e-09, "loss": 0.8935, "step": 44400 }, { "epoch": 4.99, "learning_rate": 8.475298648835672e-10, "loss": 0.9124, "step": 44420 }, { "epoch": 4.99, "learning_rate": 4.767361382329583e-10, "loss": 0.9007, "step": 44440 }, { "epoch": 5.0, "learning_rate": 2.1188291516316404e-10, "loss": 0.8689, "step": 44460 }, { "epoch": 5.0, "learning_rate": 5.297075684751463e-11, "loss": 0.8644, "step": 44480 }, { "epoch": 5.0, "learning_rate": 0.0, "loss": 0.8527, "step": 44500 }, { "epoch": 5.0, "step": 44500, "total_flos": 1.4745575028791706e+19, "train_loss": 0.9734652058462079, "train_runtime": 41271.4588, "train_samples_per_second": 17.251, "train_steps_per_second": 1.078 } ], "max_steps": 44500, "num_train_epochs": 5, "total_flos": 1.4745575028791706e+19, "trial_name": null, "trial_params": null }