|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.013256206314774645, |
|
"eval_steps": 500, |
|
"global_step": 110, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00012051096649795132, |
|
"grad_norm": 0.7307866811752319, |
|
"learning_rate": 4e-05, |
|
"loss": 1.2502, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00024102193299590263, |
|
"grad_norm": 0.7944597601890564, |
|
"learning_rate": 8e-05, |
|
"loss": 1.0923, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00036153289949385393, |
|
"grad_norm": 0.8116863965988159, |
|
"learning_rate": 0.00012, |
|
"loss": 1.4372, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00048204386599180526, |
|
"grad_norm": 0.6883746981620789, |
|
"learning_rate": 0.00016, |
|
"loss": 1.2503, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0006025548324897565, |
|
"grad_norm": 0.6956741809844971, |
|
"learning_rate": 0.0002, |
|
"loss": 1.135, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0007230657989877079, |
|
"grad_norm": 0.7852187752723694, |
|
"learning_rate": 0.0001980952380952381, |
|
"loss": 1.0132, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0008435767654856592, |
|
"grad_norm": 0.4692592918872833, |
|
"learning_rate": 0.0001961904761904762, |
|
"loss": 0.7826, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0009640877319836105, |
|
"grad_norm": 0.27623867988586426, |
|
"learning_rate": 0.0001942857142857143, |
|
"loss": 0.664, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0010845986984815619, |
|
"grad_norm": 0.21396474540233612, |
|
"learning_rate": 0.0001923809523809524, |
|
"loss": 0.9179, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.001205109664979513, |
|
"grad_norm": 0.1967506855726242, |
|
"learning_rate": 0.00019047619047619048, |
|
"loss": 0.6711, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0013256206314774645, |
|
"grad_norm": 0.20955657958984375, |
|
"learning_rate": 0.00018857142857142857, |
|
"loss": 0.8331, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0014461315979754157, |
|
"grad_norm": 0.2680826485157013, |
|
"learning_rate": 0.0001866666666666667, |
|
"loss": 0.8829, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0015666425644733672, |
|
"grad_norm": 0.25052550435066223, |
|
"learning_rate": 0.00018476190476190478, |
|
"loss": 0.7536, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0016871535309713184, |
|
"grad_norm": 0.27972114086151123, |
|
"learning_rate": 0.00018285714285714286, |
|
"loss": 0.8129, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0018076644974692696, |
|
"grad_norm": 0.23484091460704803, |
|
"learning_rate": 0.00018095238095238095, |
|
"loss": 0.8715, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.001928175463967221, |
|
"grad_norm": 0.2122180461883545, |
|
"learning_rate": 0.00017904761904761907, |
|
"loss": 0.9421, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0020486864304651723, |
|
"grad_norm": 0.19645242393016815, |
|
"learning_rate": 0.00017714285714285713, |
|
"loss": 0.6596, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0021691973969631237, |
|
"grad_norm": 0.21645572781562805, |
|
"learning_rate": 0.00017523809523809525, |
|
"loss": 0.764, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.002289708363461075, |
|
"grad_norm": 0.15910537540912628, |
|
"learning_rate": 0.00017333333333333334, |
|
"loss": 0.7156, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.002410219329959026, |
|
"grad_norm": 0.1565140336751938, |
|
"learning_rate": 0.00017142857142857143, |
|
"loss": 0.6023, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0025307302964569776, |
|
"grad_norm": 0.17277204990386963, |
|
"learning_rate": 0.00016952380952380954, |
|
"loss": 0.5594, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.002651241262954929, |
|
"grad_norm": 0.17323294281959534, |
|
"learning_rate": 0.00016761904761904763, |
|
"loss": 0.681, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.00277175222945288, |
|
"grad_norm": 0.1539444774389267, |
|
"learning_rate": 0.00016571428571428575, |
|
"loss": 0.7535, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0028922631959508315, |
|
"grad_norm": 0.16936075687408447, |
|
"learning_rate": 0.0001638095238095238, |
|
"loss": 0.5524, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.003012774162448783, |
|
"grad_norm": 0.1893339455127716, |
|
"learning_rate": 0.00016190476190476192, |
|
"loss": 0.802, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0031332851289467343, |
|
"grad_norm": 0.17078277468681335, |
|
"learning_rate": 0.00016, |
|
"loss": 0.677, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0032537960954446853, |
|
"grad_norm": 0.1889839768409729, |
|
"learning_rate": 0.0001580952380952381, |
|
"loss": 0.5551, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.003374307061942637, |
|
"grad_norm": 0.2148876190185547, |
|
"learning_rate": 0.0001561904761904762, |
|
"loss": 0.6161, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0034948180284405882, |
|
"grad_norm": 0.1392691731452942, |
|
"learning_rate": 0.0001542857142857143, |
|
"loss": 0.5348, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0036153289949385392, |
|
"grad_norm": 0.17458081245422363, |
|
"learning_rate": 0.00015238095238095237, |
|
"loss": 0.7913, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0037358399614364907, |
|
"grad_norm": 0.1562052071094513, |
|
"learning_rate": 0.00015047619047619048, |
|
"loss": 0.8158, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.003856350927934442, |
|
"grad_norm": 0.1435224562883377, |
|
"learning_rate": 0.00014857142857142857, |
|
"loss": 0.7528, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0039768618944323935, |
|
"grad_norm": 0.14048519730567932, |
|
"learning_rate": 0.00014666666666666666, |
|
"loss": 0.6955, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0040973728609303445, |
|
"grad_norm": 0.16571789979934692, |
|
"learning_rate": 0.00014476190476190475, |
|
"loss": 0.5537, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0042178838274282955, |
|
"grad_norm": 0.165692538022995, |
|
"learning_rate": 0.00014285714285714287, |
|
"loss": 0.7134, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.004338394793926247, |
|
"grad_norm": 0.1822883039712906, |
|
"learning_rate": 0.00014095238095238096, |
|
"loss": 0.5432, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.004458905760424198, |
|
"grad_norm": 0.1414850652217865, |
|
"learning_rate": 0.00013904761904761905, |
|
"loss": 0.6703, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.00457941672692215, |
|
"grad_norm": 0.15394528210163116, |
|
"learning_rate": 0.00013714285714285716, |
|
"loss": 0.6561, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.004699927693420101, |
|
"grad_norm": 0.1435491144657135, |
|
"learning_rate": 0.00013523809523809525, |
|
"loss": 0.5644, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.004820438659918052, |
|
"grad_norm": 0.16691423952579498, |
|
"learning_rate": 0.00013333333333333334, |
|
"loss": 0.7856, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.004940949626416004, |
|
"grad_norm": 0.14211532473564148, |
|
"learning_rate": 0.00013142857142857143, |
|
"loss": 0.6399, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.005061460592913955, |
|
"grad_norm": 0.18083994090557098, |
|
"learning_rate": 0.00012952380952380954, |
|
"loss": 0.715, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.005181971559411906, |
|
"grad_norm": 0.15873770415782928, |
|
"learning_rate": 0.0001276190476190476, |
|
"loss": 0.7614, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.005302482525909858, |
|
"grad_norm": 0.14993314445018768, |
|
"learning_rate": 0.00012571428571428572, |
|
"loss": 0.6105, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.005422993492407809, |
|
"grad_norm": 0.18779931962490082, |
|
"learning_rate": 0.0001238095238095238, |
|
"loss": 1.0716, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.00554350445890576, |
|
"grad_norm": 0.15650784969329834, |
|
"learning_rate": 0.00012190476190476193, |
|
"loss": 0.738, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.005664015425403712, |
|
"grad_norm": 0.1431063711643219, |
|
"learning_rate": 0.00012, |
|
"loss": 0.5219, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.005784526391901663, |
|
"grad_norm": 0.1359708309173584, |
|
"learning_rate": 0.0001180952380952381, |
|
"loss": 0.5886, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.005905037358399614, |
|
"grad_norm": 0.16217978298664093, |
|
"learning_rate": 0.00011619047619047621, |
|
"loss": 0.7634, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.006025548324897566, |
|
"grad_norm": 0.16889767348766327, |
|
"learning_rate": 0.00011428571428571428, |
|
"loss": 0.7717, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.006146059291395517, |
|
"grad_norm": 0.21841812133789062, |
|
"learning_rate": 0.00011238095238095239, |
|
"loss": 0.937, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.006266570257893469, |
|
"grad_norm": 0.17994704842567444, |
|
"learning_rate": 0.00011047619047619049, |
|
"loss": 0.8443, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.00638708122439142, |
|
"grad_norm": 0.15717928111553192, |
|
"learning_rate": 0.00010857142857142856, |
|
"loss": 0.7624, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.006507592190889371, |
|
"grad_norm": 0.16110721230506897, |
|
"learning_rate": 0.00010666666666666667, |
|
"loss": 0.7228, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.0066281031573873226, |
|
"grad_norm": 0.14764989912509918, |
|
"learning_rate": 0.00010476190476190477, |
|
"loss": 0.6782, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.006748614123885274, |
|
"grad_norm": 0.1577727496623993, |
|
"learning_rate": 0.00010285714285714286, |
|
"loss": 0.7367, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.006869125090383225, |
|
"grad_norm": 0.17438825964927673, |
|
"learning_rate": 0.00010095238095238096, |
|
"loss": 0.65, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0069896360568811764, |
|
"grad_norm": 0.1775740683078766, |
|
"learning_rate": 9.904761904761905e-05, |
|
"loss": 0.7797, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.0071101470233791274, |
|
"grad_norm": 0.18453216552734375, |
|
"learning_rate": 9.714285714285715e-05, |
|
"loss": 0.9153, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.0072306579898770785, |
|
"grad_norm": 0.16022688150405884, |
|
"learning_rate": 9.523809523809524e-05, |
|
"loss": 0.7798, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.00735116895637503, |
|
"grad_norm": 0.16944445669651031, |
|
"learning_rate": 9.333333333333334e-05, |
|
"loss": 0.8193, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.007471679922872981, |
|
"grad_norm": 0.14207735657691956, |
|
"learning_rate": 9.142857142857143e-05, |
|
"loss": 0.5361, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.007592190889370932, |
|
"grad_norm": 0.16854678094387054, |
|
"learning_rate": 8.952380952380953e-05, |
|
"loss": 0.7976, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.007712701855868884, |
|
"grad_norm": 0.17764142155647278, |
|
"learning_rate": 8.761904761904762e-05, |
|
"loss": 0.6938, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.007833212822366835, |
|
"grad_norm": 0.21041354537010193, |
|
"learning_rate": 8.571428571428571e-05, |
|
"loss": 0.8384, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.007953723788864787, |
|
"grad_norm": 0.18576891720294952, |
|
"learning_rate": 8.380952380952382e-05, |
|
"loss": 0.6401, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.008074234755362737, |
|
"grad_norm": 0.20624496042728424, |
|
"learning_rate": 8.19047619047619e-05, |
|
"loss": 0.7563, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.008194745721860689, |
|
"grad_norm": 0.18236589431762695, |
|
"learning_rate": 8e-05, |
|
"loss": 0.748, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.008315256688358641, |
|
"grad_norm": 0.15884153544902802, |
|
"learning_rate": 7.80952380952381e-05, |
|
"loss": 0.649, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.008435767654856591, |
|
"grad_norm": 0.18527762591838837, |
|
"learning_rate": 7.619047619047618e-05, |
|
"loss": 0.5163, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.008556278621354543, |
|
"grad_norm": 0.166184663772583, |
|
"learning_rate": 7.428571428571429e-05, |
|
"loss": 0.7672, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.008676789587852495, |
|
"grad_norm": 0.19784916937351227, |
|
"learning_rate": 7.238095238095238e-05, |
|
"loss": 0.7482, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.008797300554350447, |
|
"grad_norm": 0.16908536851406097, |
|
"learning_rate": 7.047619047619048e-05, |
|
"loss": 0.7461, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.008917811520848397, |
|
"grad_norm": 0.18411517143249512, |
|
"learning_rate": 6.857142857142858e-05, |
|
"loss": 0.5697, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.009038322487346349, |
|
"grad_norm": 0.15351906418800354, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.6597, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.0091588334538443, |
|
"grad_norm": 0.17720364034175873, |
|
"learning_rate": 6.476190476190477e-05, |
|
"loss": 0.808, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.00927934442034225, |
|
"grad_norm": 0.18325303494930267, |
|
"learning_rate": 6.285714285714286e-05, |
|
"loss": 0.7917, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.009399855386840203, |
|
"grad_norm": 0.1679506152868271, |
|
"learning_rate": 6.0952380952380964e-05, |
|
"loss": 0.6326, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.009520366353338154, |
|
"grad_norm": 0.19260190427303314, |
|
"learning_rate": 5.904761904761905e-05, |
|
"loss": 0.5601, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.009640877319836105, |
|
"grad_norm": 0.15009605884552002, |
|
"learning_rate": 5.714285714285714e-05, |
|
"loss": 0.6072, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.009761388286334056, |
|
"grad_norm": 0.15776121616363525, |
|
"learning_rate": 5.5238095238095244e-05, |
|
"loss": 0.6753, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.009881899252832008, |
|
"grad_norm": 0.18575388193130493, |
|
"learning_rate": 5.333333333333333e-05, |
|
"loss": 0.6219, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.010002410219329958, |
|
"grad_norm": 0.21978633105754852, |
|
"learning_rate": 5.142857142857143e-05, |
|
"loss": 0.8581, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.01012292118582791, |
|
"grad_norm": 0.1704164743423462, |
|
"learning_rate": 4.9523809523809525e-05, |
|
"loss": 0.6461, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.010243432152325862, |
|
"grad_norm": 0.18057820200920105, |
|
"learning_rate": 4.761904761904762e-05, |
|
"loss": 0.7416, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.010363943118823812, |
|
"grad_norm": 0.15225447714328766, |
|
"learning_rate": 4.5714285714285716e-05, |
|
"loss": 0.4868, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.010484454085321764, |
|
"grad_norm": 0.17193946242332458, |
|
"learning_rate": 4.380952380952381e-05, |
|
"loss": 0.8092, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.010604965051819716, |
|
"grad_norm": 0.194380983710289, |
|
"learning_rate": 4.190476190476191e-05, |
|
"loss": 0.8461, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.010725476018317666, |
|
"grad_norm": 0.2139783650636673, |
|
"learning_rate": 4e-05, |
|
"loss": 0.6548, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.010845986984815618, |
|
"grad_norm": 0.16700893640518188, |
|
"learning_rate": 3.809523809523809e-05, |
|
"loss": 0.5584, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.01096649795131357, |
|
"grad_norm": 0.1971975564956665, |
|
"learning_rate": 3.619047619047619e-05, |
|
"loss": 0.8535, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.01108700891781152, |
|
"grad_norm": 0.19667109847068787, |
|
"learning_rate": 3.428571428571429e-05, |
|
"loss": 0.8635, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.011207519884309472, |
|
"grad_norm": 0.18818983435630798, |
|
"learning_rate": 3.2380952380952386e-05, |
|
"loss": 0.8435, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.011328030850807424, |
|
"grad_norm": 0.16365501284599304, |
|
"learning_rate": 3.0476190476190482e-05, |
|
"loss": 0.6243, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.011448541817305374, |
|
"grad_norm": 0.20358283817768097, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 0.6483, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.011569052783803326, |
|
"grad_norm": 0.17696398496627808, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 0.6057, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.011689563750301278, |
|
"grad_norm": 0.15508583188056946, |
|
"learning_rate": 2.4761904761904762e-05, |
|
"loss": 0.524, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.011810074716799228, |
|
"grad_norm": 0.18458549678325653, |
|
"learning_rate": 2.2857142857142858e-05, |
|
"loss": 0.8364, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.01193058568329718, |
|
"grad_norm": 0.1944003403186798, |
|
"learning_rate": 2.0952380952380954e-05, |
|
"loss": 0.5383, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.012051096649795132, |
|
"grad_norm": 0.4217074513435364, |
|
"learning_rate": 1.9047619047619046e-05, |
|
"loss": 0.6774, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.012171607616293083, |
|
"grad_norm": 0.20350486040115356, |
|
"learning_rate": 1.7142857142857145e-05, |
|
"loss": 0.6871, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.012292118582791034, |
|
"grad_norm": 0.19154471158981323, |
|
"learning_rate": 1.5238095238095241e-05, |
|
"loss": 0.7226, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.012412629549288986, |
|
"grad_norm": 0.17253194749355316, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.7514, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.012533140515786937, |
|
"grad_norm": 0.14699283242225647, |
|
"learning_rate": 1.1428571428571429e-05, |
|
"loss": 0.5358, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.012653651482284888, |
|
"grad_norm": 0.19192050397396088, |
|
"learning_rate": 9.523809523809523e-06, |
|
"loss": 0.9153, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.01277416244878284, |
|
"grad_norm": 0.15646027028560638, |
|
"learning_rate": 7.6190476190476205e-06, |
|
"loss": 0.5182, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.012894673415280791, |
|
"grad_norm": 0.18160918354988098, |
|
"learning_rate": 5.7142857142857145e-06, |
|
"loss": 0.5822, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.013015184381778741, |
|
"grad_norm": 0.19203059375286102, |
|
"learning_rate": 3.8095238095238102e-06, |
|
"loss": 0.7678, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.013135695348276693, |
|
"grad_norm": 0.20908264815807343, |
|
"learning_rate": 1.9047619047619051e-06, |
|
"loss": 0.8563, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.013256206314774645, |
|
"grad_norm": 0.16366459429264069, |
|
"learning_rate": 0.0, |
|
"loss": 0.4258, |
|
"step": 110 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 110, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 55, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.103938402981235e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|