Spaces:
Paused
Paused
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 20.0, | |
"global_step": 220, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.91, | |
"learning_rate": 0.0009949107209404665, | |
"loss": 4.8778, | |
"step": 10 | |
}, | |
{ | |
"epoch": 1.82, | |
"learning_rate": 0.0009797464868072487, | |
"loss": 4.3468, | |
"step": 20 | |
}, | |
{ | |
"epoch": 2.73, | |
"learning_rate": 0.0009548159976772592, | |
"loss": 4.0995, | |
"step": 30 | |
}, | |
{ | |
"epoch": 3.64, | |
"learning_rate": 0.0009206267664155906, | |
"loss": 3.6941, | |
"step": 40 | |
}, | |
{ | |
"epoch": 4.55, | |
"learning_rate": 0.0008825117959999116, | |
"loss": 3.3691, | |
"step": 50 | |
}, | |
{ | |
"epoch": 5.45, | |
"learning_rate": 0.0008327928391111841, | |
"loss": 2.969, | |
"step": 60 | |
}, | |
{ | |
"epoch": 6.36, | |
"learning_rate": 0.0007762991797134513, | |
"loss": 2.4856, | |
"step": 70 | |
}, | |
{ | |
"epoch": 7.27, | |
"learning_rate": 0.0007141808657977907, | |
"loss": 2.285, | |
"step": 80 | |
}, | |
{ | |
"epoch": 8.18, | |
"learning_rate": 0.0006477024471011001, | |
"loss": 1.8686, | |
"step": 90 | |
}, | |
{ | |
"epoch": 9.09, | |
"learning_rate": 0.0005782172325201155, | |
"loss": 1.5859, | |
"step": 100 | |
}, | |
{ | |
"epoch": 10.0, | |
"learning_rate": 0.0005071397406448937, | |
"loss": 1.2427, | |
"step": 110 | |
}, | |
{ | |
"epoch": 10.91, | |
"learning_rate": 0.0004359169042394536, | |
"loss": 1.0274, | |
"step": 120 | |
}, | |
{ | |
"epoch": 11.82, | |
"learning_rate": 0.0003659986148633107, | |
"loss": 0.8537, | |
"step": 130 | |
}, | |
{ | |
"epoch": 12.73, | |
"learning_rate": 0.0002988082072604661, | |
"loss": 0.6993, | |
"step": 140 | |
}, | |
{ | |
"epoch": 13.64, | |
"learning_rate": 0.00023571348436857904, | |
"loss": 0.4831, | |
"step": 150 | |
}, | |
{ | |
"epoch": 14.55, | |
"learning_rate": 0.00017799887279557237, | |
"loss": 0.4156, | |
"step": 160 | |
}, | |
{ | |
"epoch": 15.45, | |
"learning_rate": 0.00012683927559787656, | |
"loss": 0.3635, | |
"step": 170 | |
}, | |
{ | |
"epoch": 16.36, | |
"learning_rate": 8.327615464234128e-05, | |
"loss": 0.3198, | |
"step": 180 | |
}, | |
{ | |
"epoch": 17.27, | |
"learning_rate": 4.819632944595414e-05, | |
"loss": 0.2574, | |
"step": 190 | |
}, | |
{ | |
"epoch": 18.18, | |
"learning_rate": 2.2313924087851657e-05, | |
"loss": 0.2336, | |
"step": 200 | |
}, | |
{ | |
"epoch": 19.09, | |
"learning_rate": 6.15582970243117e-06, | |
"loss": 0.2314, | |
"step": 210 | |
}, | |
{ | |
"epoch": 20.0, | |
"learning_rate": 5.0978495283349194e-08, | |
"loss": 0.215, | |
"step": 220 | |
}, | |
{ | |
"epoch": 20.0, | |
"step": 220, | |
"total_flos": 5.476535779894886e+16, | |
"train_loss": 1.7238198778846048, | |
"train_runtime": 2069.8284, | |
"train_samples_per_second": 1.672, | |
"train_steps_per_second": 0.106 | |
} | |
], | |
"max_steps": 220, | |
"num_train_epochs": 20, | |
"total_flos": 5.476535779894886e+16, | |
"trial_name": null, | |
"trial_params": null | |
} | |