|
{ |
|
"best_global_step": 885, |
|
"best_metric": 1.009942650794983, |
|
"best_model_checkpoint": "./fine-tuned-model\\checkpoint-885", |
|
"epoch": 19.0, |
|
"eval_steps": 500, |
|
"global_step": 1121, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.847457627118644, |
|
"grad_norm": 0.939425528049469, |
|
"learning_rate": 5.8983050847457634e-05, |
|
"loss": 7.3825, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.3358267545700073, |
|
"eval_runtime": 7.2445, |
|
"eval_samples_per_second": 14.494, |
|
"eval_steps_per_second": 0.966, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 1.694915254237288, |
|
"grad_norm": 2.5237913131713867, |
|
"learning_rate": 5.796610169491525e-05, |
|
"loss": 1.6245, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.1412484645843506, |
|
"eval_runtime": 7.1915, |
|
"eval_samples_per_second": 14.601, |
|
"eval_steps_per_second": 0.973, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 2.542372881355932, |
|
"grad_norm": 7.8459625244140625, |
|
"learning_rate": 5.6949152542372884e-05, |
|
"loss": 1.4469, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 1.0957719087600708, |
|
"eval_runtime": 7.3705, |
|
"eval_samples_per_second": 14.246, |
|
"eval_steps_per_second": 0.95, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 3.389830508474576, |
|
"grad_norm": 1.5296450853347778, |
|
"learning_rate": 5.593220338983051e-05, |
|
"loss": 1.3912, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 1.072253704071045, |
|
"eval_runtime": 7.3695, |
|
"eval_samples_per_second": 14.248, |
|
"eval_steps_per_second": 0.95, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 4.237288135593221, |
|
"grad_norm": 1.9591976404190063, |
|
"learning_rate": 5.4915254237288135e-05, |
|
"loss": 1.2829, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 1.1338605880737305, |
|
"eval_runtime": 7.1975, |
|
"eval_samples_per_second": 14.588, |
|
"eval_steps_per_second": 0.973, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 5.084745762711864, |
|
"grad_norm": 1.4545025825500488, |
|
"learning_rate": 5.389830508474577e-05, |
|
"loss": 1.3583, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.932203389830509, |
|
"grad_norm": 1.4759844541549683, |
|
"learning_rate": 5.288135593220339e-05, |
|
"loss": 1.295, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 1.043823480606079, |
|
"eval_runtime": 7.226, |
|
"eval_samples_per_second": 14.531, |
|
"eval_steps_per_second": 0.969, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 6.779661016949152, |
|
"grad_norm": 1.5640958547592163, |
|
"learning_rate": 5.186440677966102e-05, |
|
"loss": 1.2581, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 1.0363339185714722, |
|
"eval_runtime": 7.3055, |
|
"eval_samples_per_second": 14.373, |
|
"eval_steps_per_second": 0.958, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 7.627118644067797, |
|
"grad_norm": 2.0497965812683105, |
|
"learning_rate": 5.0847457627118643e-05, |
|
"loss": 1.2544, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 1.067766785621643, |
|
"eval_runtime": 7.3717, |
|
"eval_samples_per_second": 14.244, |
|
"eval_steps_per_second": 0.95, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 8.474576271186441, |
|
"grad_norm": 1.2606173753738403, |
|
"learning_rate": 4.9830508474576276e-05, |
|
"loss": 1.3097, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 1.02413809299469, |
|
"eval_runtime": 7.1818, |
|
"eval_samples_per_second": 14.62, |
|
"eval_steps_per_second": 0.975, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 9.322033898305085, |
|
"grad_norm": 2.3118815422058105, |
|
"learning_rate": 4.88135593220339e-05, |
|
"loss": 1.2787, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 1.013655424118042, |
|
"eval_runtime": 7.1577, |
|
"eval_samples_per_second": 14.67, |
|
"eval_steps_per_second": 0.978, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 10.169491525423728, |
|
"grad_norm": 1.2859658002853394, |
|
"learning_rate": 4.7796610169491526e-05, |
|
"loss": 1.2354, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 1.0547661781311035, |
|
"eval_runtime": 7.1926, |
|
"eval_samples_per_second": 14.598, |
|
"eval_steps_per_second": 0.973, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 11.016949152542374, |
|
"grad_norm": 2.121445417404175, |
|
"learning_rate": 4.677966101694916e-05, |
|
"loss": 1.2596, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 11.864406779661017, |
|
"grad_norm": 3.0464370250701904, |
|
"learning_rate": 4.576271186440678e-05, |
|
"loss": 1.2646, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 1.0133599042892456, |
|
"eval_runtime": 10.6666, |
|
"eval_samples_per_second": 9.844, |
|
"eval_steps_per_second": 0.656, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 12.711864406779661, |
|
"grad_norm": 1.1342540979385376, |
|
"learning_rate": 4.474576271186441e-05, |
|
"loss": 1.2068, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 1.0467838048934937, |
|
"eval_runtime": 11.4351, |
|
"eval_samples_per_second": 9.182, |
|
"eval_steps_per_second": 0.612, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 13.559322033898304, |
|
"grad_norm": 2.094381093978882, |
|
"learning_rate": 4.3728813559322035e-05, |
|
"loss": 1.2955, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 1.0249124765396118, |
|
"eval_runtime": 13.2701, |
|
"eval_samples_per_second": 7.913, |
|
"eval_steps_per_second": 0.528, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 14.40677966101695, |
|
"grad_norm": 1.0174381732940674, |
|
"learning_rate": 4.271186440677966e-05, |
|
"loss": 1.2215, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 1.009942650794983, |
|
"eval_runtime": 10.124, |
|
"eval_samples_per_second": 10.371, |
|
"eval_steps_per_second": 0.691, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 15.254237288135593, |
|
"grad_norm": 1.1202493906021118, |
|
"learning_rate": 4.169491525423729e-05, |
|
"loss": 1.2365, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 1.0121246576309204, |
|
"eval_runtime": 9.974, |
|
"eval_samples_per_second": 10.527, |
|
"eval_steps_per_second": 0.702, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 16.10169491525424, |
|
"grad_norm": 1.1021959781646729, |
|
"learning_rate": 4.067796610169492e-05, |
|
"loss": 1.2412, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 16.949152542372882, |
|
"grad_norm": 0.9624550938606262, |
|
"learning_rate": 3.966101694915254e-05, |
|
"loss": 1.2348, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 1.0155479907989502, |
|
"eval_runtime": 8.9635, |
|
"eval_samples_per_second": 11.714, |
|
"eval_steps_per_second": 0.781, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 17.796610169491526, |
|
"grad_norm": 0.9586867094039917, |
|
"learning_rate": 3.864406779661017e-05, |
|
"loss": 1.2455, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 1.0335369110107422, |
|
"eval_runtime": 9.0555, |
|
"eval_samples_per_second": 11.595, |
|
"eval_steps_per_second": 0.773, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 18.64406779661017, |
|
"grad_norm": 1.7303390502929688, |
|
"learning_rate": 3.76271186440678e-05, |
|
"loss": 1.2238, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 1.020735263824463, |
|
"eval_runtime": 9.6479, |
|
"eval_samples_per_second": 10.883, |
|
"eval_steps_per_second": 0.726, |
|
"step": 1121 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 2950, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 4, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 4 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.5260523640520704e+16, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|