|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.8654970760233918, |
|
"eval_steps": 10, |
|
"global_step": 160, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11695906432748537, |
|
"eval_loss": 0.291916161775589, |
|
"eval_runtime": 14.114, |
|
"eval_samples_per_second": 53.635, |
|
"eval_steps_per_second": 6.731, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.23391812865497075, |
|
"grad_norm": 0.9206290389893743, |
|
"learning_rate": 7.692307692307694e-06, |
|
"loss": 0.3086, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.23391812865497075, |
|
"eval_loss": 0.2310570627450943, |
|
"eval_runtime": 13.833, |
|
"eval_samples_per_second": 54.724, |
|
"eval_steps_per_second": 6.868, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3508771929824561, |
|
"eval_loss": 0.2036609798669815, |
|
"eval_runtime": 13.8307, |
|
"eval_samples_per_second": 54.733, |
|
"eval_steps_per_second": 6.869, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4678362573099415, |
|
"grad_norm": 0.6671501778795791, |
|
"learning_rate": 9.908063190205739e-06, |
|
"loss": 0.2113, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.4678362573099415, |
|
"eval_loss": 0.1889752596616745, |
|
"eval_runtime": 13.8386, |
|
"eval_samples_per_second": 54.702, |
|
"eval_steps_per_second": 6.865, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5847953216374269, |
|
"eval_loss": 0.1784025877714157, |
|
"eval_runtime": 13.7963, |
|
"eval_samples_per_second": 54.87, |
|
"eval_steps_per_second": 6.886, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.7017543859649122, |
|
"grad_norm": 0.6133391984537656, |
|
"learning_rate": 9.465880538792519e-06, |
|
"loss": 0.1774, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.7017543859649122, |
|
"eval_loss": 0.17270329594612122, |
|
"eval_runtime": 13.8427, |
|
"eval_samples_per_second": 54.686, |
|
"eval_steps_per_second": 6.863, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.8187134502923976, |
|
"eval_loss": 0.16722090542316437, |
|
"eval_runtime": 13.8528, |
|
"eval_samples_per_second": 54.646, |
|
"eval_steps_per_second": 6.858, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.935672514619883, |
|
"grad_norm": 0.5734119079085649, |
|
"learning_rate": 8.689603152981262e-06, |
|
"loss": 0.1663, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.935672514619883, |
|
"eval_loss": 0.16241328418254852, |
|
"eval_runtime": 13.8277, |
|
"eval_samples_per_second": 54.745, |
|
"eval_steps_per_second": 6.87, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.0467836257309941, |
|
"eval_loss": 0.16258972883224487, |
|
"eval_runtime": 13.8228, |
|
"eval_samples_per_second": 54.764, |
|
"eval_steps_per_second": 6.873, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.1637426900584795, |
|
"grad_norm": 0.5466652845884847, |
|
"learning_rate": 7.637304728380036e-06, |
|
"loss": 0.1251, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.1637426900584795, |
|
"eval_loss": 0.16274206340312958, |
|
"eval_runtime": 13.8628, |
|
"eval_samples_per_second": 54.607, |
|
"eval_steps_per_second": 6.853, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.280701754385965, |
|
"eval_loss": 0.16260136663913727, |
|
"eval_runtime": 13.8176, |
|
"eval_samples_per_second": 54.785, |
|
"eval_steps_per_second": 6.875, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.3976608187134503, |
|
"grad_norm": 0.6270786548860793, |
|
"learning_rate": 6.387708231181229e-06, |
|
"loss": 0.1108, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.3976608187134503, |
|
"eval_loss": 0.1583063155412674, |
|
"eval_runtime": 13.8218, |
|
"eval_samples_per_second": 54.769, |
|
"eval_steps_per_second": 6.873, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.5146198830409356, |
|
"eval_loss": 0.15549179911613464, |
|
"eval_runtime": 13.7868, |
|
"eval_samples_per_second": 54.907, |
|
"eval_steps_per_second": 6.891, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.631578947368421, |
|
"grad_norm": 0.5175376687666863, |
|
"learning_rate": 5.034296594080849e-06, |
|
"loss": 0.1079, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.631578947368421, |
|
"eval_loss": 0.15371112525463104, |
|
"eval_runtime": 13.8138, |
|
"eval_samples_per_second": 54.8, |
|
"eval_steps_per_second": 6.877, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.7485380116959064, |
|
"eval_loss": 0.15189678966999054, |
|
"eval_runtime": 13.8351, |
|
"eval_samples_per_second": 54.716, |
|
"eval_steps_per_second": 6.867, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.8654970760233918, |
|
"grad_norm": 0.5236246557796638, |
|
"learning_rate": 3.6783192117952427e-06, |
|
"loss": 0.1057, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.8654970760233918, |
|
"eval_loss": 0.15082819759845734, |
|
"eval_runtime": 13.8117, |
|
"eval_samples_per_second": 54.808, |
|
"eval_steps_per_second": 6.878, |
|
"step": 160 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 255, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 20, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 40369975721984.0, |
|
"train_batch_size": 5, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|