{ "best_global_step": 885, "best_metric": 1.009942650794983, "best_model_checkpoint": "./fine-tuned-model\\checkpoint-885", "epoch": 19.0, "eval_steps": 500, "global_step": 1121, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.847457627118644, "grad_norm": 0.939425528049469, "learning_rate": 5.8983050847457634e-05, "loss": 7.3825, "step": 50 }, { "epoch": 1.0, "eval_loss": 1.3358267545700073, "eval_runtime": 7.2445, "eval_samples_per_second": 14.494, "eval_steps_per_second": 0.966, "step": 59 }, { "epoch": 1.694915254237288, "grad_norm": 2.5237913131713867, "learning_rate": 5.796610169491525e-05, "loss": 1.6245, "step": 100 }, { "epoch": 2.0, "eval_loss": 1.1412484645843506, "eval_runtime": 7.1915, "eval_samples_per_second": 14.601, "eval_steps_per_second": 0.973, "step": 118 }, { "epoch": 2.542372881355932, "grad_norm": 7.8459625244140625, "learning_rate": 5.6949152542372884e-05, "loss": 1.4469, "step": 150 }, { "epoch": 3.0, "eval_loss": 1.0957719087600708, "eval_runtime": 7.3705, "eval_samples_per_second": 14.246, "eval_steps_per_second": 0.95, "step": 177 }, { "epoch": 3.389830508474576, "grad_norm": 1.5296450853347778, "learning_rate": 5.593220338983051e-05, "loss": 1.3912, "step": 200 }, { "epoch": 4.0, "eval_loss": 1.072253704071045, "eval_runtime": 7.3695, "eval_samples_per_second": 14.248, "eval_steps_per_second": 0.95, "step": 236 }, { "epoch": 4.237288135593221, "grad_norm": 1.9591976404190063, "learning_rate": 5.4915254237288135e-05, "loss": 1.2829, "step": 250 }, { "epoch": 5.0, "eval_loss": 1.1338605880737305, "eval_runtime": 7.1975, "eval_samples_per_second": 14.588, "eval_steps_per_second": 0.973, "step": 295 }, { "epoch": 5.084745762711864, "grad_norm": 1.4545025825500488, "learning_rate": 5.389830508474577e-05, "loss": 1.3583, "step": 300 }, { "epoch": 5.932203389830509, "grad_norm": 1.4759844541549683, "learning_rate": 5.288135593220339e-05, "loss": 1.295, "step": 350 }, { "epoch": 6.0, "eval_loss": 1.043823480606079, "eval_runtime": 7.226, "eval_samples_per_second": 14.531, "eval_steps_per_second": 0.969, "step": 354 }, { "epoch": 6.779661016949152, "grad_norm": 1.5640958547592163, "learning_rate": 5.186440677966102e-05, "loss": 1.2581, "step": 400 }, { "epoch": 7.0, "eval_loss": 1.0363339185714722, "eval_runtime": 7.3055, "eval_samples_per_second": 14.373, "eval_steps_per_second": 0.958, "step": 413 }, { "epoch": 7.627118644067797, "grad_norm": 2.0497965812683105, "learning_rate": 5.0847457627118643e-05, "loss": 1.2544, "step": 450 }, { "epoch": 8.0, "eval_loss": 1.067766785621643, "eval_runtime": 7.3717, "eval_samples_per_second": 14.244, "eval_steps_per_second": 0.95, "step": 472 }, { "epoch": 8.474576271186441, "grad_norm": 1.2606173753738403, "learning_rate": 4.9830508474576276e-05, "loss": 1.3097, "step": 500 }, { "epoch": 9.0, "eval_loss": 1.02413809299469, "eval_runtime": 7.1818, "eval_samples_per_second": 14.62, "eval_steps_per_second": 0.975, "step": 531 }, { "epoch": 9.322033898305085, "grad_norm": 2.3118815422058105, "learning_rate": 4.88135593220339e-05, "loss": 1.2787, "step": 550 }, { "epoch": 10.0, "eval_loss": 1.013655424118042, "eval_runtime": 7.1577, "eval_samples_per_second": 14.67, "eval_steps_per_second": 0.978, "step": 590 }, { "epoch": 10.169491525423728, "grad_norm": 1.2859658002853394, "learning_rate": 4.7796610169491526e-05, "loss": 1.2354, "step": 600 }, { "epoch": 11.0, "eval_loss": 1.0547661781311035, "eval_runtime": 7.1926, "eval_samples_per_second": 14.598, "eval_steps_per_second": 0.973, "step": 649 }, { "epoch": 11.016949152542374, "grad_norm": 2.121445417404175, "learning_rate": 4.677966101694916e-05, "loss": 1.2596, "step": 650 }, { "epoch": 11.864406779661017, "grad_norm": 3.0464370250701904, "learning_rate": 4.576271186440678e-05, "loss": 1.2646, "step": 700 }, { "epoch": 12.0, "eval_loss": 1.0133599042892456, "eval_runtime": 10.6666, "eval_samples_per_second": 9.844, "eval_steps_per_second": 0.656, "step": 708 }, { "epoch": 12.711864406779661, "grad_norm": 1.1342540979385376, "learning_rate": 4.474576271186441e-05, "loss": 1.2068, "step": 750 }, { "epoch": 13.0, "eval_loss": 1.0467838048934937, "eval_runtime": 11.4351, "eval_samples_per_second": 9.182, "eval_steps_per_second": 0.612, "step": 767 }, { "epoch": 13.559322033898304, "grad_norm": 2.094381093978882, "learning_rate": 4.3728813559322035e-05, "loss": 1.2955, "step": 800 }, { "epoch": 14.0, "eval_loss": 1.0249124765396118, "eval_runtime": 13.2701, "eval_samples_per_second": 7.913, "eval_steps_per_second": 0.528, "step": 826 }, { "epoch": 14.40677966101695, "grad_norm": 1.0174381732940674, "learning_rate": 4.271186440677966e-05, "loss": 1.2215, "step": 850 }, { "epoch": 15.0, "eval_loss": 1.009942650794983, "eval_runtime": 10.124, "eval_samples_per_second": 10.371, "eval_steps_per_second": 0.691, "step": 885 }, { "epoch": 15.254237288135593, "grad_norm": 1.1202493906021118, "learning_rate": 4.169491525423729e-05, "loss": 1.2365, "step": 900 }, { "epoch": 16.0, "eval_loss": 1.0121246576309204, "eval_runtime": 9.974, "eval_samples_per_second": 10.527, "eval_steps_per_second": 0.702, "step": 944 }, { "epoch": 16.10169491525424, "grad_norm": 1.1021959781646729, "learning_rate": 4.067796610169492e-05, "loss": 1.2412, "step": 950 }, { "epoch": 16.949152542372882, "grad_norm": 0.9624550938606262, "learning_rate": 3.966101694915254e-05, "loss": 1.2348, "step": 1000 }, { "epoch": 17.0, "eval_loss": 1.0155479907989502, "eval_runtime": 8.9635, "eval_samples_per_second": 11.714, "eval_steps_per_second": 0.781, "step": 1003 }, { "epoch": 17.796610169491526, "grad_norm": 0.9586867094039917, "learning_rate": 3.864406779661017e-05, "loss": 1.2455, "step": 1050 }, { "epoch": 18.0, "eval_loss": 1.0335369110107422, "eval_runtime": 9.0555, "eval_samples_per_second": 11.595, "eval_steps_per_second": 0.773, "step": 1062 }, { "epoch": 18.64406779661017, "grad_norm": 1.7303390502929688, "learning_rate": 3.76271186440678e-05, "loss": 1.2238, "step": 1100 }, { "epoch": 19.0, "eval_loss": 1.020735263824463, "eval_runtime": 9.6479, "eval_samples_per_second": 10.883, "eval_steps_per_second": 0.726, "step": 1121 } ], "logging_steps": 50, "max_steps": 2950, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 4, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 4 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.5260523640520704e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }