DeanGumas's picture
finished finetuning with 16 rank lora
3ad5864
raw
history blame
4.57 kB
{
"best_global_step": 413,
"best_metric": 0.1363692432641983,
"best_model_checkpoint": "./fine-tuned-model-16\\checkpoint-413",
"epoch": 9.0,
"eval_steps": 500,
"global_step": 531,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.8519701810436635,
"grad_norm": 0.2258640080690384,
"learning_rate": 4.5689655172413794e-05,
"loss": 0.5163,
"step": 50
},
{
"epoch": 1.0,
"eval_loss": 0.17305970191955566,
"eval_runtime": 188.3457,
"eval_samples_per_second": 0.557,
"eval_steps_per_second": 0.557,
"step": 59
},
{
"epoch": 1.698615548455804,
"grad_norm": 0.16990907490253448,
"learning_rate": 4.1379310344827587e-05,
"loss": 0.1656,
"step": 100
},
{
"epoch": 2.0,
"eval_loss": 0.15038533508777618,
"eval_runtime": 188.0181,
"eval_samples_per_second": 0.558,
"eval_steps_per_second": 0.558,
"step": 118
},
{
"epoch": 2.545260915867945,
"grad_norm": 0.28422054648399353,
"learning_rate": 3.7068965517241385e-05,
"loss": 0.133,
"step": 150
},
{
"epoch": 3.0,
"eval_loss": 0.14531958103179932,
"eval_runtime": 187.6033,
"eval_samples_per_second": 0.56,
"eval_steps_per_second": 0.56,
"step": 177
},
{
"epoch": 3.3919062832800853,
"grad_norm": 0.25157108902931213,
"learning_rate": 3.275862068965517e-05,
"loss": 0.1179,
"step": 200
},
{
"epoch": 4.0,
"eval_loss": 0.14057818055152893,
"eval_runtime": 188.9752,
"eval_samples_per_second": 0.556,
"eval_steps_per_second": 0.556,
"step": 236
},
{
"epoch": 4.238551650692226,
"grad_norm": 0.224246546626091,
"learning_rate": 2.844827586206897e-05,
"loss": 0.1041,
"step": 250
},
{
"epoch": 5.0,
"eval_loss": 0.1371779888868332,
"eval_runtime": 187.9394,
"eval_samples_per_second": 0.559,
"eval_steps_per_second": 0.559,
"step": 295
},
{
"epoch": 5.085197018104366,
"grad_norm": 0.23708942532539368,
"learning_rate": 2.413793103448276e-05,
"loss": 0.0963,
"step": 300
},
{
"epoch": 5.93716719914803,
"grad_norm": 0.3290882408618927,
"learning_rate": 1.9827586206896554e-05,
"loss": 0.0877,
"step": 350
},
{
"epoch": 6.0,
"eval_loss": 0.13712403178215027,
"eval_runtime": 188.2543,
"eval_samples_per_second": 0.558,
"eval_steps_per_second": 0.558,
"step": 354
},
{
"epoch": 6.7838125665601705,
"grad_norm": 0.21974337100982666,
"learning_rate": 1.5517241379310346e-05,
"loss": 0.0787,
"step": 400
},
{
"epoch": 7.0,
"eval_loss": 0.1363692432641983,
"eval_runtime": 188.8614,
"eval_samples_per_second": 0.556,
"eval_steps_per_second": 0.556,
"step": 413
},
{
"epoch": 7.630457933972311,
"grad_norm": 0.30215010046958923,
"learning_rate": 1.1206896551724138e-05,
"loss": 0.0773,
"step": 450
},
{
"epoch": 8.0,
"eval_loss": 0.13983061909675598,
"eval_runtime": 188.1378,
"eval_samples_per_second": 0.558,
"eval_steps_per_second": 0.558,
"step": 472
},
{
"epoch": 8.477103301384451,
"grad_norm": 0.31231704354286194,
"learning_rate": 6.896551724137932e-06,
"loss": 0.0729,
"step": 500
},
{
"epoch": 9.0,
"eval_loss": 0.14362066984176636,
"eval_runtime": 188.0244,
"eval_samples_per_second": 0.558,
"eval_steps_per_second": 0.558,
"step": 531
}
],
"logging_steps": 50,
"max_steps": 580,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 2,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 2
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.0722456587298406e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}