|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.0, |
|
"eval_steps": 10, |
|
"global_step": 30, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.7314285714285714, |
|
"grad_norm": 9.340608453115205, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 0.7205065488815308, |
|
"memory(GiB)": 63.44, |
|
"step": 1, |
|
"token_acc": 0.8149569178102063, |
|
"train_speed(iter/s)": 0.002817 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 9.340608453115205, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7595161199569702, |
|
"memory(GiB)": 67.12, |
|
"step": 2, |
|
"token_acc": 0.8210892726643333, |
|
"train_speed(iter/s)": 0.004132 |
|
}, |
|
{ |
|
"epoch": 1.7314285714285713, |
|
"grad_norm": 14.990541756622823, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.7181279063224792, |
|
"memory(GiB)": 67.12, |
|
"step": 3, |
|
"token_acc": 0.8385703913117363, |
|
"train_speed(iter/s)": 0.003617 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 14.990541756622823, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.7464776039123535, |
|
"memory(GiB)": 67.12, |
|
"step": 4, |
|
"token_acc": 0.8302431930811472, |
|
"train_speed(iter/s)": 0.004181 |
|
}, |
|
{ |
|
"epoch": 2.7314285714285713, |
|
"grad_norm": 10.51872439314909, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.7061335444450378, |
|
"memory(GiB)": 67.12, |
|
"step": 5, |
|
"token_acc": 0.8495207453582679, |
|
"train_speed(iter/s)": 0.003827 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 7.8322768971767545, |
|
"learning_rate": 3e-06, |
|
"loss": 0.7670756578445435, |
|
"memory(GiB)": 76.32, |
|
"step": 6, |
|
"token_acc": 0.8034914159988437, |
|
"train_speed(iter/s)": 0.00417 |
|
}, |
|
{ |
|
"epoch": 3.7314285714285713, |
|
"grad_norm": 9.640548264571718, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.7187657952308655, |
|
"memory(GiB)": 76.32, |
|
"step": 7, |
|
"token_acc": 0.8349510177213882, |
|
"train_speed(iter/s)": 0.003914 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 9.640548264571718, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.6732273101806641, |
|
"memory(GiB)": 76.32, |
|
"step": 8, |
|
"token_acc": 0.8445810474364907, |
|
"train_speed(iter/s)": 0.004178 |
|
}, |
|
{ |
|
"epoch": 4.731428571428571, |
|
"grad_norm": 18.16921963489396, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.6695082187652588, |
|
"memory(GiB)": 76.32, |
|
"step": 9, |
|
"token_acc": 0.8329077399296925, |
|
"train_speed(iter/s)": 0.003975 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 18.16921963489396, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7095792293548584, |
|
"memory(GiB)": 76.32, |
|
"step": 10, |
|
"token_acc": 0.8494555815610054, |
|
"train_speed(iter/s)": 0.00417 |
|
}, |
|
{ |
|
"epoch": 5.731428571428571, |
|
"grad_norm": 36.47852854409781, |
|
"learning_rate": 4.99847706754774e-06, |
|
"loss": 0.6005474328994751, |
|
"memory(GiB)": 76.32, |
|
"step": 11, |
|
"token_acc": 0.844029136120312, |
|
"train_speed(iter/s)": 0.003971 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 8.15921427581055, |
|
"learning_rate": 4.993910125649561e-06, |
|
"loss": 0.6586757898330688, |
|
"memory(GiB)": 76.32, |
|
"step": 12, |
|
"token_acc": 0.8276634855085236, |
|
"train_speed(iter/s)": 0.004131 |
|
}, |
|
{ |
|
"epoch": 6.731428571428571, |
|
"grad_norm": 9.940528313438021, |
|
"learning_rate": 4.986304738420684e-06, |
|
"loss": 0.5984752178192139, |
|
"memory(GiB)": 76.32, |
|
"step": 13, |
|
"token_acc": 0.8406243779216233, |
|
"train_speed(iter/s)": 0.004005 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 9.940528313438021, |
|
"learning_rate": 4.975670171853926e-06, |
|
"loss": 0.5597701668739319, |
|
"memory(GiB)": 76.32, |
|
"step": 14, |
|
"token_acc": 0.8693022202084277, |
|
"train_speed(iter/s)": 0.004144 |
|
}, |
|
{ |
|
"epoch": 7.731428571428571, |
|
"grad_norm": 4.5395059579286015, |
|
"learning_rate": 4.962019382530521e-06, |
|
"loss": 0.5627670884132385, |
|
"memory(GiB)": 76.32, |
|
"step": 15, |
|
"token_acc": 0.8462122810719561, |
|
"train_speed(iter/s)": 0.004014 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 4.5395059579286015, |
|
"learning_rate": 4.9453690018345144e-06, |
|
"loss": 0.5592302680015564, |
|
"memory(GiB)": 76.32, |
|
"step": 16, |
|
"token_acc": 0.8405773019678591, |
|
"train_speed(iter/s)": 0.004144 |
|
}, |
|
{ |
|
"epoch": 8.731428571428571, |
|
"grad_norm": 6.584180692482273, |
|
"learning_rate": 4.925739315689991e-06, |
|
"loss": 0.49423277378082275, |
|
"memory(GiB)": 76.32, |
|
"step": 17, |
|
"token_acc": 0.8678222664079662, |
|
"train_speed(iter/s)": 0.004034 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 2.39165864781694, |
|
"learning_rate": 4.903154239845798e-06, |
|
"loss": 0.4701133668422699, |
|
"memory(GiB)": 76.32, |
|
"step": 18, |
|
"token_acc": 0.8865700144198917, |
|
"train_speed(iter/s)": 0.004143 |
|
}, |
|
{ |
|
"epoch": 9.731428571428571, |
|
"grad_norm": 5.595725551844695, |
|
"learning_rate": 4.8776412907378845e-06, |
|
"loss": 0.45911359786987305, |
|
"memory(GiB)": 76.32, |
|
"step": 19, |
|
"token_acc": 0.8713313357013871, |
|
"train_speed(iter/s)": 0.004052 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 5.595725551844695, |
|
"learning_rate": 4.849231551964771e-06, |
|
"loss": 0.4861743152141571, |
|
"memory(GiB)": 76.32, |
|
"step": 20, |
|
"token_acc": 0.854624478442281, |
|
"train_speed(iter/s)": 0.00415 |
|
}, |
|
{ |
|
"epoch": 10.731428571428571, |
|
"grad_norm": 2.9321408358661594, |
|
"learning_rate": 4.817959636416969e-06, |
|
"loss": 0.45954838395118713, |
|
"memory(GiB)": 76.32, |
|
"step": 21, |
|
"token_acc": 0.8712813064578979, |
|
"train_speed(iter/s)": 0.004039 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 11.05893897846086, |
|
"learning_rate": 4.783863644106502e-06, |
|
"loss": 0.42236876487731934, |
|
"memory(GiB)": 76.32, |
|
"step": 22, |
|
"token_acc": 0.8768863403778366, |
|
"train_speed(iter/s)": 0.004131 |
|
}, |
|
{ |
|
"epoch": 11.731428571428571, |
|
"grad_norm": 3.349470799070857, |
|
"learning_rate": 4.746985115747918e-06, |
|
"loss": 0.4331884980201721, |
|
"memory(GiB)": 76.32, |
|
"step": 23, |
|
"token_acc": 0.8758490488600685, |
|
"train_speed(iter/s)": 0.004053 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 3.349470799070857, |
|
"learning_rate": 4.707368982147318e-06, |
|
"loss": 0.4267829954624176, |
|
"memory(GiB)": 76.32, |
|
"step": 24, |
|
"token_acc": 0.8883371910699619, |
|
"train_speed(iter/s)": 0.004139 |
|
}, |
|
{ |
|
"epoch": 12.731428571428571, |
|
"grad_norm": 4.894666598752148, |
|
"learning_rate": 4.665063509461098e-06, |
|
"loss": 0.4266759753227234, |
|
"memory(GiB)": 76.32, |
|
"step": 25, |
|
"token_acc": 0.879163815519365, |
|
"train_speed(iter/s)": 0.004064 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 4.894666598752148, |
|
"learning_rate": 4.620120240391065e-06, |
|
"loss": 0.3838977813720703, |
|
"memory(GiB)": 76.32, |
|
"step": 26, |
|
"token_acc": 0.8920969339679625, |
|
"train_speed(iter/s)": 0.004143 |
|
}, |
|
{ |
|
"epoch": 13.731428571428571, |
|
"grad_norm": 2.1762524149258344, |
|
"learning_rate": 4.572593931387604e-06, |
|
"loss": 0.3890763521194458, |
|
"memory(GiB)": 76.32, |
|
"step": 27, |
|
"token_acc": 0.8864340359319397, |
|
"train_speed(iter/s)": 0.004074 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 1.8853256148082378, |
|
"learning_rate": 4.522542485937369e-06, |
|
"loss": 0.36992955207824707, |
|
"memory(GiB)": 76.32, |
|
"step": 28, |
|
"token_acc": 0.8742953776775648, |
|
"train_speed(iter/s)": 0.004147 |
|
}, |
|
{ |
|
"epoch": 14.731428571428571, |
|
"grad_norm": 1.3310226706467028, |
|
"learning_rate": 4.470026884016805e-06, |
|
"loss": 0.3653033375740051, |
|
"memory(GiB)": 76.32, |
|
"step": 29, |
|
"token_acc": 0.8962616884718178, |
|
"train_speed(iter/s)": 0.004085 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 1.3310226706467028, |
|
"learning_rate": 4.415111107797445e-06, |
|
"loss": 0.3536423146724701, |
|
"memory(GiB)": 76.32, |
|
"step": 30, |
|
"token_acc": 0.8946604521852897, |
|
"train_speed(iter/s)": 0.004149 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 57394211913728.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|