DeanGumas's picture
Storing 16 and 32 rank LoRA finetuned models
48d9ca0
raw
history blame
8.61 kB
{
"best_global_step": 885,
"best_metric": 1.009942650794983,
"best_model_checkpoint": "./fine-tuned-model\\checkpoint-885",
"epoch": 19.0,
"eval_steps": 500,
"global_step": 1121,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.847457627118644,
"grad_norm": 0.939425528049469,
"learning_rate": 5.8983050847457634e-05,
"loss": 7.3825,
"step": 50
},
{
"epoch": 1.0,
"eval_loss": 1.3358267545700073,
"eval_runtime": 7.2445,
"eval_samples_per_second": 14.494,
"eval_steps_per_second": 0.966,
"step": 59
},
{
"epoch": 1.694915254237288,
"grad_norm": 2.5237913131713867,
"learning_rate": 5.796610169491525e-05,
"loss": 1.6245,
"step": 100
},
{
"epoch": 2.0,
"eval_loss": 1.1412484645843506,
"eval_runtime": 7.1915,
"eval_samples_per_second": 14.601,
"eval_steps_per_second": 0.973,
"step": 118
},
{
"epoch": 2.542372881355932,
"grad_norm": 7.8459625244140625,
"learning_rate": 5.6949152542372884e-05,
"loss": 1.4469,
"step": 150
},
{
"epoch": 3.0,
"eval_loss": 1.0957719087600708,
"eval_runtime": 7.3705,
"eval_samples_per_second": 14.246,
"eval_steps_per_second": 0.95,
"step": 177
},
{
"epoch": 3.389830508474576,
"grad_norm": 1.5296450853347778,
"learning_rate": 5.593220338983051e-05,
"loss": 1.3912,
"step": 200
},
{
"epoch": 4.0,
"eval_loss": 1.072253704071045,
"eval_runtime": 7.3695,
"eval_samples_per_second": 14.248,
"eval_steps_per_second": 0.95,
"step": 236
},
{
"epoch": 4.237288135593221,
"grad_norm": 1.9591976404190063,
"learning_rate": 5.4915254237288135e-05,
"loss": 1.2829,
"step": 250
},
{
"epoch": 5.0,
"eval_loss": 1.1338605880737305,
"eval_runtime": 7.1975,
"eval_samples_per_second": 14.588,
"eval_steps_per_second": 0.973,
"step": 295
},
{
"epoch": 5.084745762711864,
"grad_norm": 1.4545025825500488,
"learning_rate": 5.389830508474577e-05,
"loss": 1.3583,
"step": 300
},
{
"epoch": 5.932203389830509,
"grad_norm": 1.4759844541549683,
"learning_rate": 5.288135593220339e-05,
"loss": 1.295,
"step": 350
},
{
"epoch": 6.0,
"eval_loss": 1.043823480606079,
"eval_runtime": 7.226,
"eval_samples_per_second": 14.531,
"eval_steps_per_second": 0.969,
"step": 354
},
{
"epoch": 6.779661016949152,
"grad_norm": 1.5640958547592163,
"learning_rate": 5.186440677966102e-05,
"loss": 1.2581,
"step": 400
},
{
"epoch": 7.0,
"eval_loss": 1.0363339185714722,
"eval_runtime": 7.3055,
"eval_samples_per_second": 14.373,
"eval_steps_per_second": 0.958,
"step": 413
},
{
"epoch": 7.627118644067797,
"grad_norm": 2.0497965812683105,
"learning_rate": 5.0847457627118643e-05,
"loss": 1.2544,
"step": 450
},
{
"epoch": 8.0,
"eval_loss": 1.067766785621643,
"eval_runtime": 7.3717,
"eval_samples_per_second": 14.244,
"eval_steps_per_second": 0.95,
"step": 472
},
{
"epoch": 8.474576271186441,
"grad_norm": 1.2606173753738403,
"learning_rate": 4.9830508474576276e-05,
"loss": 1.3097,
"step": 500
},
{
"epoch": 9.0,
"eval_loss": 1.02413809299469,
"eval_runtime": 7.1818,
"eval_samples_per_second": 14.62,
"eval_steps_per_second": 0.975,
"step": 531
},
{
"epoch": 9.322033898305085,
"grad_norm": 2.3118815422058105,
"learning_rate": 4.88135593220339e-05,
"loss": 1.2787,
"step": 550
},
{
"epoch": 10.0,
"eval_loss": 1.013655424118042,
"eval_runtime": 7.1577,
"eval_samples_per_second": 14.67,
"eval_steps_per_second": 0.978,
"step": 590
},
{
"epoch": 10.169491525423728,
"grad_norm": 1.2859658002853394,
"learning_rate": 4.7796610169491526e-05,
"loss": 1.2354,
"step": 600
},
{
"epoch": 11.0,
"eval_loss": 1.0547661781311035,
"eval_runtime": 7.1926,
"eval_samples_per_second": 14.598,
"eval_steps_per_second": 0.973,
"step": 649
},
{
"epoch": 11.016949152542374,
"grad_norm": 2.121445417404175,
"learning_rate": 4.677966101694916e-05,
"loss": 1.2596,
"step": 650
},
{
"epoch": 11.864406779661017,
"grad_norm": 3.0464370250701904,
"learning_rate": 4.576271186440678e-05,
"loss": 1.2646,
"step": 700
},
{
"epoch": 12.0,
"eval_loss": 1.0133599042892456,
"eval_runtime": 10.6666,
"eval_samples_per_second": 9.844,
"eval_steps_per_second": 0.656,
"step": 708
},
{
"epoch": 12.711864406779661,
"grad_norm": 1.1342540979385376,
"learning_rate": 4.474576271186441e-05,
"loss": 1.2068,
"step": 750
},
{
"epoch": 13.0,
"eval_loss": 1.0467838048934937,
"eval_runtime": 11.4351,
"eval_samples_per_second": 9.182,
"eval_steps_per_second": 0.612,
"step": 767
},
{
"epoch": 13.559322033898304,
"grad_norm": 2.094381093978882,
"learning_rate": 4.3728813559322035e-05,
"loss": 1.2955,
"step": 800
},
{
"epoch": 14.0,
"eval_loss": 1.0249124765396118,
"eval_runtime": 13.2701,
"eval_samples_per_second": 7.913,
"eval_steps_per_second": 0.528,
"step": 826
},
{
"epoch": 14.40677966101695,
"grad_norm": 1.0174381732940674,
"learning_rate": 4.271186440677966e-05,
"loss": 1.2215,
"step": 850
},
{
"epoch": 15.0,
"eval_loss": 1.009942650794983,
"eval_runtime": 10.124,
"eval_samples_per_second": 10.371,
"eval_steps_per_second": 0.691,
"step": 885
},
{
"epoch": 15.254237288135593,
"grad_norm": 1.1202493906021118,
"learning_rate": 4.169491525423729e-05,
"loss": 1.2365,
"step": 900
},
{
"epoch": 16.0,
"eval_loss": 1.0121246576309204,
"eval_runtime": 9.974,
"eval_samples_per_second": 10.527,
"eval_steps_per_second": 0.702,
"step": 944
},
{
"epoch": 16.10169491525424,
"grad_norm": 1.1021959781646729,
"learning_rate": 4.067796610169492e-05,
"loss": 1.2412,
"step": 950
},
{
"epoch": 16.949152542372882,
"grad_norm": 0.9624550938606262,
"learning_rate": 3.966101694915254e-05,
"loss": 1.2348,
"step": 1000
},
{
"epoch": 17.0,
"eval_loss": 1.0155479907989502,
"eval_runtime": 8.9635,
"eval_samples_per_second": 11.714,
"eval_steps_per_second": 0.781,
"step": 1003
},
{
"epoch": 17.796610169491526,
"grad_norm": 0.9586867094039917,
"learning_rate": 3.864406779661017e-05,
"loss": 1.2455,
"step": 1050
},
{
"epoch": 18.0,
"eval_loss": 1.0335369110107422,
"eval_runtime": 9.0555,
"eval_samples_per_second": 11.595,
"eval_steps_per_second": 0.773,
"step": 1062
},
{
"epoch": 18.64406779661017,
"grad_norm": 1.7303390502929688,
"learning_rate": 3.76271186440678e-05,
"loss": 1.2238,
"step": 1100
},
{
"epoch": 19.0,
"eval_loss": 1.020735263824463,
"eval_runtime": 9.6479,
"eval_samples_per_second": 10.883,
"eval_steps_per_second": 0.726,
"step": 1121
}
],
"logging_steps": 50,
"max_steps": 2950,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 4,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 4
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.5260523640520704e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}