URL-TITLE-classifier / trainer_state.json
tshasan's picture
Upload folder using huggingface_hub
c90a07f verified
raw
history blame
4.71 kB
{
"best_global_step": 8337,
"best_metric": 0.677,
"best_model_checkpoint": "URL-TITLE-classifier/checkpoint-8337",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 8337,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 2.487246036529541,
"learning_rate": 1.3333333333333333e-05,
"loss": 0.2453,
"step": 2779
},
{
"epoch": 1.0,
"eval_exact_match": 0.409,
"eval_f1_Chat": 0.858,
"eval_f1_Education": 0.743,
"eval_f1_Entertainment": 0.715,
"eval_f1_Government": 0.623,
"eval_f1_Health": 0.561,
"eval_f1_News": 0.518,
"eval_f1_Shop": 0.688,
"eval_f1_Technology": 0.709,
"eval_f1_Travel": 0.582,
"eval_f1_Uncategorized": 0.652,
"eval_f1_Work": 0.464,
"eval_f1_macro": 0.647,
"eval_f1_micro": 0.912,
"eval_f1_weighted": 0.681,
"eval_hamming_loss": 0.088,
"eval_jaccard_macro": 0.487,
"eval_jaccard_micro": 0.838,
"eval_loss": 0.2192692905664444,
"eval_pr_auc_macro": 0.744,
"eval_pr_auc_micro": 0.794,
"eval_precision_macro": 0.799,
"eval_precision_micro": 0.912,
"eval_precision_weighted": 0.804,
"eval_recall_macro": 0.555,
"eval_recall_micro": 0.912,
"eval_recall_weighted": 0.599,
"eval_roc_auc_macro": 0.919,
"eval_roc_auc_micro": 0.932,
"eval_runtime": 11.0104,
"eval_samples_per_second": 448.669,
"eval_steps_per_second": 28.064,
"step": 2779
},
{
"epoch": 2.0,
"grad_norm": 2.526982307434082,
"learning_rate": 6.666666666666667e-06,
"loss": 0.2086,
"step": 5558
},
{
"epoch": 2.0,
"eval_exact_match": 0.434,
"eval_f1_Chat": 0.874,
"eval_f1_Education": 0.757,
"eval_f1_Entertainment": 0.751,
"eval_f1_Government": 0.663,
"eval_f1_Health": 0.57,
"eval_f1_News": 0.591,
"eval_f1_Shop": 0.7,
"eval_f1_Technology": 0.742,
"eval_f1_Travel": 0.577,
"eval_f1_Uncategorized": 0.63,
"eval_f1_Work": 0.541,
"eval_f1_macro": 0.672,
"eval_f1_micro": 0.916,
"eval_f1_weighted": 0.704,
"eval_hamming_loss": 0.084,
"eval_jaccard_macro": 0.515,
"eval_jaccard_micro": 0.845,
"eval_loss": 0.2088017612695694,
"eval_pr_auc_macro": 0.762,
"eval_pr_auc_micro": 0.811,
"eval_precision_macro": 0.789,
"eval_precision_micro": 0.916,
"eval_precision_weighted": 0.796,
"eval_recall_macro": 0.592,
"eval_recall_micro": 0.916,
"eval_recall_weighted": 0.636,
"eval_roc_auc_macro": 0.926,
"eval_roc_auc_micro": 0.939,
"eval_runtime": 11.0839,
"eval_samples_per_second": 445.69,
"eval_steps_per_second": 27.878,
"step": 5558
},
{
"epoch": 3.0,
"grad_norm": 2.597599506378174,
"learning_rate": 0.0,
"loss": 0.1917,
"step": 8337
},
{
"epoch": 3.0,
"eval_exact_match": 0.445,
"eval_f1_Chat": 0.875,
"eval_f1_Education": 0.763,
"eval_f1_Entertainment": 0.764,
"eval_f1_Government": 0.667,
"eval_f1_Health": 0.574,
"eval_f1_News": 0.605,
"eval_f1_Shop": 0.704,
"eval_f1_Technology": 0.738,
"eval_f1_Travel": 0.571,
"eval_f1_Uncategorized": 0.657,
"eval_f1_Work": 0.527,
"eval_f1_macro": 0.677,
"eval_f1_micro": 0.917,
"eval_f1_weighted": 0.711,
"eval_hamming_loss": 0.083,
"eval_jaccard_macro": 0.52,
"eval_jaccard_micro": 0.848,
"eval_loss": 0.2073148488998413,
"eval_pr_auc_macro": 0.765,
"eval_pr_auc_micro": 0.815,
"eval_precision_macro": 0.795,
"eval_precision_micro": 0.917,
"eval_precision_weighted": 0.798,
"eval_recall_macro": 0.598,
"eval_recall_micro": 0.917,
"eval_recall_weighted": 0.647,
"eval_roc_auc_macro": 0.928,
"eval_roc_auc_micro": 0.941,
"eval_runtime": 11.0867,
"eval_samples_per_second": 445.58,
"eval_steps_per_second": 27.871,
"step": 8337
}
],
"logging_steps": 500,
"max_steps": 8337,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.4145838586061798e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}