|
{ |
|
"best_global_step": 171, |
|
"best_metric": 1.0352519750595093, |
|
"best_model_checkpoint": "./gemma-patent-assistant/checkpoint-171", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 171, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.17777777777777778, |
|
"grad_norm": 12.329893112182617, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 12.9427, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.35555555555555557, |
|
"grad_norm": 8.226090431213379, |
|
"learning_rate": 1.7777777777777777e-05, |
|
"loss": 12.1671, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5333333333333333, |
|
"grad_norm": 2.704000473022461, |
|
"learning_rate": 1.9865386046236597e-05, |
|
"loss": 10.8122, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.7111111111111111, |
|
"grad_norm": 1.6485083103179932, |
|
"learning_rate": 1.932472229404356e-05, |
|
"loss": 9.8163, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 1.5979063510894775, |
|
"learning_rate": 1.839229287286327e-05, |
|
"loss": 9.4013, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.1528561115264893, |
|
"eval_runtime": 680.397, |
|
"eval_samples_per_second": 0.147, |
|
"eval_steps_per_second": 0.073, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 1.0533333333333332, |
|
"grad_norm": 1.6079875230789185, |
|
"learning_rate": 1.710727255106447e-05, |
|
"loss": 8.2906, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.231111111111111, |
|
"grad_norm": 2.0471384525299072, |
|
"learning_rate": 1.5693639270213138e-05, |
|
"loss": 8.8615, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.4088888888888889, |
|
"grad_norm": 1.443812370300293, |
|
"learning_rate": 1.3897858732926794e-05, |
|
"loss": 8.4692, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.5866666666666667, |
|
"grad_norm": 1.348875880241394, |
|
"learning_rate": 1.1938314902110701e-05, |
|
"loss": 8.3483, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.7644444444444445, |
|
"grad_norm": 1.4459165334701538, |
|
"learning_rate": 9.897335376977104e-06, |
|
"loss": 8.1664, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.942222222222222, |
|
"grad_norm": 1.4485529661178589, |
|
"learning_rate": 7.860669167935028e-06, |
|
"loss": 8.2337, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.046842098236084, |
|
"eval_runtime": 457.2398, |
|
"eval_samples_per_second": 0.219, |
|
"eval_steps_per_second": 0.109, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 2.1066666666666665, |
|
"grad_norm": 1.3392361402511597, |
|
"learning_rate": 5.913884067217686e-06, |
|
"loss": 7.5023, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.2844444444444445, |
|
"grad_norm": 1.6693141460418701, |
|
"learning_rate": 4.138771633147856e-06, |
|
"loss": 8.154, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.462222222222222, |
|
"grad_norm": 1.43593168258667, |
|
"learning_rate": 2.6099108277934105e-06, |
|
"loss": 8.0723, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"grad_norm": 1.5561100244522095, |
|
"learning_rate": 1.3915346821563235e-06, |
|
"loss": 7.9664, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.8177777777777777, |
|
"grad_norm": 1.4849770069122314, |
|
"learning_rate": 5.348316317440549e-07, |
|
"loss": 7.8758, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.9955555555555557, |
|
"grad_norm": 1.4685490131378174, |
|
"learning_rate": 7.579490328064265e-08, |
|
"loss": 8.2081, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 1.0352519750595093, |
|
"eval_runtime": 449.063, |
|
"eval_samples_per_second": 0.223, |
|
"eval_steps_per_second": 0.111, |
|
"step": 171 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 171, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.438388691861504e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|