ubowang's picture
Upload folder using huggingface_hub
e6d1861 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 15.0,
"eval_steps": 10,
"global_step": 30,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.7314285714285714,
"grad_norm": 9.340608453115205,
"learning_rate": 5.000000000000001e-07,
"loss": 0.7205065488815308,
"memory(GiB)": 63.44,
"step": 1,
"token_acc": 0.8149569178102063,
"train_speed(iter/s)": 0.002817
},
{
"epoch": 1.0,
"grad_norm": 9.340608453115205,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.7595161199569702,
"memory(GiB)": 67.12,
"step": 2,
"token_acc": 0.8210892726643333,
"train_speed(iter/s)": 0.004132
},
{
"epoch": 1.7314285714285713,
"grad_norm": 14.990541756622823,
"learning_rate": 1.5e-06,
"loss": 0.7181279063224792,
"memory(GiB)": 67.12,
"step": 3,
"token_acc": 0.8385703913117363,
"train_speed(iter/s)": 0.003617
},
{
"epoch": 2.0,
"grad_norm": 14.990541756622823,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.7464776039123535,
"memory(GiB)": 67.12,
"step": 4,
"token_acc": 0.8302431930811472,
"train_speed(iter/s)": 0.004181
},
{
"epoch": 2.7314285714285713,
"grad_norm": 10.51872439314909,
"learning_rate": 2.5e-06,
"loss": 0.7061335444450378,
"memory(GiB)": 67.12,
"step": 5,
"token_acc": 0.8495207453582679,
"train_speed(iter/s)": 0.003827
},
{
"epoch": 3.0,
"grad_norm": 7.8322768971767545,
"learning_rate": 3e-06,
"loss": 0.7670756578445435,
"memory(GiB)": 76.32,
"step": 6,
"token_acc": 0.8034914159988437,
"train_speed(iter/s)": 0.00417
},
{
"epoch": 3.7314285714285713,
"grad_norm": 9.640548264571718,
"learning_rate": 3.5e-06,
"loss": 0.7187657952308655,
"memory(GiB)": 76.32,
"step": 7,
"token_acc": 0.8349510177213882,
"train_speed(iter/s)": 0.003914
},
{
"epoch": 4.0,
"grad_norm": 9.640548264571718,
"learning_rate": 4.000000000000001e-06,
"loss": 0.6732273101806641,
"memory(GiB)": 76.32,
"step": 8,
"token_acc": 0.8445810474364907,
"train_speed(iter/s)": 0.004178
},
{
"epoch": 4.731428571428571,
"grad_norm": 18.16921963489396,
"learning_rate": 4.5e-06,
"loss": 0.6695082187652588,
"memory(GiB)": 76.32,
"step": 9,
"token_acc": 0.8329077399296925,
"train_speed(iter/s)": 0.003975
},
{
"epoch": 5.0,
"grad_norm": 18.16921963489396,
"learning_rate": 5e-06,
"loss": 0.7095792293548584,
"memory(GiB)": 76.32,
"step": 10,
"token_acc": 0.8494555815610054,
"train_speed(iter/s)": 0.00417
},
{
"epoch": 5.731428571428571,
"grad_norm": 36.47852854409781,
"learning_rate": 4.99847706754774e-06,
"loss": 0.6005474328994751,
"memory(GiB)": 76.32,
"step": 11,
"token_acc": 0.844029136120312,
"train_speed(iter/s)": 0.003971
},
{
"epoch": 6.0,
"grad_norm": 8.15921427581055,
"learning_rate": 4.993910125649561e-06,
"loss": 0.6586757898330688,
"memory(GiB)": 76.32,
"step": 12,
"token_acc": 0.8276634855085236,
"train_speed(iter/s)": 0.004131
},
{
"epoch": 6.731428571428571,
"grad_norm": 9.940528313438021,
"learning_rate": 4.986304738420684e-06,
"loss": 0.5984752178192139,
"memory(GiB)": 76.32,
"step": 13,
"token_acc": 0.8406243779216233,
"train_speed(iter/s)": 0.004005
},
{
"epoch": 7.0,
"grad_norm": 9.940528313438021,
"learning_rate": 4.975670171853926e-06,
"loss": 0.5597701668739319,
"memory(GiB)": 76.32,
"step": 14,
"token_acc": 0.8693022202084277,
"train_speed(iter/s)": 0.004144
},
{
"epoch": 7.731428571428571,
"grad_norm": 4.5395059579286015,
"learning_rate": 4.962019382530521e-06,
"loss": 0.5627670884132385,
"memory(GiB)": 76.32,
"step": 15,
"token_acc": 0.8462122810719561,
"train_speed(iter/s)": 0.004014
},
{
"epoch": 8.0,
"grad_norm": 4.5395059579286015,
"learning_rate": 4.9453690018345144e-06,
"loss": 0.5592302680015564,
"memory(GiB)": 76.32,
"step": 16,
"token_acc": 0.8405773019678591,
"train_speed(iter/s)": 0.004144
},
{
"epoch": 8.731428571428571,
"grad_norm": 6.584180692482273,
"learning_rate": 4.925739315689991e-06,
"loss": 0.49423277378082275,
"memory(GiB)": 76.32,
"step": 17,
"token_acc": 0.8678222664079662,
"train_speed(iter/s)": 0.004034
},
{
"epoch": 9.0,
"grad_norm": 2.39165864781694,
"learning_rate": 4.903154239845798e-06,
"loss": 0.4701133668422699,
"memory(GiB)": 76.32,
"step": 18,
"token_acc": 0.8865700144198917,
"train_speed(iter/s)": 0.004143
},
{
"epoch": 9.731428571428571,
"grad_norm": 5.595725551844695,
"learning_rate": 4.8776412907378845e-06,
"loss": 0.45911359786987305,
"memory(GiB)": 76.32,
"step": 19,
"token_acc": 0.8713313357013871,
"train_speed(iter/s)": 0.004052
},
{
"epoch": 10.0,
"grad_norm": 5.595725551844695,
"learning_rate": 4.849231551964771e-06,
"loss": 0.4861743152141571,
"memory(GiB)": 76.32,
"step": 20,
"token_acc": 0.854624478442281,
"train_speed(iter/s)": 0.00415
},
{
"epoch": 10.731428571428571,
"grad_norm": 2.9321408358661594,
"learning_rate": 4.817959636416969e-06,
"loss": 0.45954838395118713,
"memory(GiB)": 76.32,
"step": 21,
"token_acc": 0.8712813064578979,
"train_speed(iter/s)": 0.004039
},
{
"epoch": 11.0,
"grad_norm": 11.05893897846086,
"learning_rate": 4.783863644106502e-06,
"loss": 0.42236876487731934,
"memory(GiB)": 76.32,
"step": 22,
"token_acc": 0.8768863403778366,
"train_speed(iter/s)": 0.004131
},
{
"epoch": 11.731428571428571,
"grad_norm": 3.349470799070857,
"learning_rate": 4.746985115747918e-06,
"loss": 0.4331884980201721,
"memory(GiB)": 76.32,
"step": 23,
"token_acc": 0.8758490488600685,
"train_speed(iter/s)": 0.004053
},
{
"epoch": 12.0,
"grad_norm": 3.349470799070857,
"learning_rate": 4.707368982147318e-06,
"loss": 0.4267829954624176,
"memory(GiB)": 76.32,
"step": 24,
"token_acc": 0.8883371910699619,
"train_speed(iter/s)": 0.004139
},
{
"epoch": 12.731428571428571,
"grad_norm": 4.894666598752148,
"learning_rate": 4.665063509461098e-06,
"loss": 0.4266759753227234,
"memory(GiB)": 76.32,
"step": 25,
"token_acc": 0.879163815519365,
"train_speed(iter/s)": 0.004064
},
{
"epoch": 13.0,
"grad_norm": 4.894666598752148,
"learning_rate": 4.620120240391065e-06,
"loss": 0.3838977813720703,
"memory(GiB)": 76.32,
"step": 26,
"token_acc": 0.8920969339679625,
"train_speed(iter/s)": 0.004143
},
{
"epoch": 13.731428571428571,
"grad_norm": 2.1762524149258344,
"learning_rate": 4.572593931387604e-06,
"loss": 0.3890763521194458,
"memory(GiB)": 76.32,
"step": 27,
"token_acc": 0.8864340359319397,
"train_speed(iter/s)": 0.004074
},
{
"epoch": 14.0,
"grad_norm": 1.8853256148082378,
"learning_rate": 4.522542485937369e-06,
"loss": 0.36992955207824707,
"memory(GiB)": 76.32,
"step": 28,
"token_acc": 0.8742953776775648,
"train_speed(iter/s)": 0.004147
},
{
"epoch": 14.731428571428571,
"grad_norm": 1.3310226706467028,
"learning_rate": 4.470026884016805e-06,
"loss": 0.3653033375740051,
"memory(GiB)": 76.32,
"step": 29,
"token_acc": 0.8962616884718178,
"train_speed(iter/s)": 0.004085
},
{
"epoch": 15.0,
"grad_norm": 1.3310226706467028,
"learning_rate": 4.415111107797445e-06,
"loss": 0.3536423146724701,
"memory(GiB)": 76.32,
"step": 30,
"token_acc": 0.8946604521852897,
"train_speed(iter/s)": 0.004149
}
],
"logging_steps": 1,
"max_steps": 100,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 57394211913728.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}