DeanGumas's picture
Storing 16 and 32 rank LoRA finetuned models
48d9ca0
raw
history blame
4.91 kB
{
"best_global_step": 590,
"best_metric": 0.9585100412368774,
"best_model_checkpoint": "./fine-tuned-model\\checkpoint-590",
"epoch": 10.0,
"eval_steps": 500,
"global_step": 590,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.847457627118644,
"grad_norm": 2.3572731018066406,
"learning_rate": 3.932203389830509e-05,
"loss": 9.5716,
"step": 50
},
{
"epoch": 1.0,
"eval_loss": 1.5832620859146118,
"eval_runtime": 5.4855,
"eval_samples_per_second": 19.141,
"eval_steps_per_second": 1.276,
"step": 59
},
{
"epoch": 1.694915254237288,
"grad_norm": 3.8656413555145264,
"learning_rate": 3.8644067796610175e-05,
"loss": 1.7466,
"step": 100
},
{
"epoch": 2.0,
"eval_loss": 1.1679714918136597,
"eval_runtime": 5.364,
"eval_samples_per_second": 19.575,
"eval_steps_per_second": 1.305,
"step": 118
},
{
"epoch": 2.542372881355932,
"grad_norm": 2.582818031311035,
"learning_rate": 3.796610169491526e-05,
"loss": 1.5173,
"step": 150
},
{
"epoch": 3.0,
"eval_loss": 1.0937272310256958,
"eval_runtime": 5.4735,
"eval_samples_per_second": 19.183,
"eval_steps_per_second": 1.279,
"step": 177
},
{
"epoch": 3.389830508474576,
"grad_norm": 0.9783422350883484,
"learning_rate": 3.728813559322034e-05,
"loss": 1.4233,
"step": 200
},
{
"epoch": 4.0,
"eval_loss": 1.0387905836105347,
"eval_runtime": 5.354,
"eval_samples_per_second": 19.612,
"eval_steps_per_second": 1.307,
"step": 236
},
{
"epoch": 4.237288135593221,
"grad_norm": 0.6458675265312195,
"learning_rate": 3.6610169491525426e-05,
"loss": 1.3044,
"step": 250
},
{
"epoch": 5.0,
"eval_loss": 1.0661542415618896,
"eval_runtime": 5.4235,
"eval_samples_per_second": 19.36,
"eval_steps_per_second": 1.291,
"step": 295
},
{
"epoch": 5.084745762711864,
"grad_norm": 1.3425668478012085,
"learning_rate": 3.593220338983051e-05,
"loss": 1.3608,
"step": 300
},
{
"epoch": 5.932203389830509,
"grad_norm": 1.331030011177063,
"learning_rate": 3.52542372881356e-05,
"loss": 1.2839,
"step": 350
},
{
"epoch": 6.0,
"eval_loss": 0.9894506335258484,
"eval_runtime": 5.406,
"eval_samples_per_second": 19.423,
"eval_steps_per_second": 1.295,
"step": 354
},
{
"epoch": 6.779661016949152,
"grad_norm": 1.02914297580719,
"learning_rate": 3.457627118644068e-05,
"loss": 1.2485,
"step": 400
},
{
"epoch": 7.0,
"eval_loss": 0.9816469550132751,
"eval_runtime": 5.4105,
"eval_samples_per_second": 19.407,
"eval_steps_per_second": 1.294,
"step": 413
},
{
"epoch": 7.627118644067797,
"grad_norm": 1.8862000703811646,
"learning_rate": 3.389830508474576e-05,
"loss": 1.2426,
"step": 450
},
{
"epoch": 8.0,
"eval_loss": 1.0074799060821533,
"eval_runtime": 5.423,
"eval_samples_per_second": 19.362,
"eval_steps_per_second": 1.291,
"step": 472
},
{
"epoch": 8.474576271186441,
"grad_norm": 0.9509351849555969,
"learning_rate": 3.322033898305085e-05,
"loss": 1.2903,
"step": 500
},
{
"epoch": 9.0,
"eval_loss": 0.9700178503990173,
"eval_runtime": 5.388,
"eval_samples_per_second": 19.488,
"eval_steps_per_second": 1.299,
"step": 531
},
{
"epoch": 9.322033898305085,
"grad_norm": 1.861725926399231,
"learning_rate": 3.2542372881355934e-05,
"loss": 1.2588,
"step": 550
},
{
"epoch": 10.0,
"eval_loss": 0.9585100412368774,
"eval_runtime": 5.409,
"eval_samples_per_second": 19.412,
"eval_steps_per_second": 1.294,
"step": 590
}
],
"logging_steps": 50,
"max_steps": 2950,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 2,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.86489122586624e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}