CodCodingCode's picture
Upload folder using huggingface_hub
27a72dd verified
raw
history blame
91.5 kB
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.009290658243136527,
"eval_steps": 500,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 622.0,
"completions/max_terminated_length": 622.0,
"completions/mean_length": 551.0,
"completions/mean_terminated_length": 551.0,
"completions/min_length": 515.0,
"completions/min_terminated_length": 515.0,
"epoch": 9.290658243136526e-05,
"frac_reward_zero_std": 0.0,
"grad_norm": 8.375,
"learning_rate": 5e-07,
"loss": 0.0,
"num_tokens": 4252.0,
"reward": -8.15000057220459,
"reward_std": 3.0405590534210205,
"rewards/chatgpt_combined_reward/mean": -8.15000057220459,
"rewards/chatgpt_combined_reward/std": 4.306971073150635,
"step": 1
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 559.0,
"completions/max_terminated_length": 559.0,
"completions/mean_length": 538.5,
"completions/mean_terminated_length": 538.5,
"completions/min_length": 513.0,
"completions/min_terminated_length": 513.0,
"epoch": 0.00018581316486273051,
"frac_reward_zero_std": 0.5,
"grad_norm": 4.875,
"learning_rate": 4.95e-07,
"loss": -0.0058,
"num_tokens": 8340.0,
"reward": -3.500124931335449,
"reward_std": 1.414036750793457,
"rewards/chatgpt_combined_reward/mean": -3.500124931335449,
"rewards/chatgpt_combined_reward/std": 7.6809611320495605,
"step": 2
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 730.0,
"completions/max_terminated_length": 730.0,
"completions/mean_length": 598.75,
"completions/mean_terminated_length": 598.75,
"completions/min_length": 523.0,
"completions/min_terminated_length": 523.0,
"epoch": 0.00027871974729409577,
"frac_reward_zero_std": 0.5,
"grad_norm": 5.4375,
"learning_rate": 4.9e-07,
"loss": -0.0225,
"num_tokens": 12655.0,
"reward": -1.5003752708435059,
"reward_std": 1.414036750793457,
"rewards/chatgpt_combined_reward/mean": -1.5003752708435059,
"rewards/chatgpt_combined_reward/std": 9.949413299560547,
"step": 3
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 648.0,
"completions/mean_length": 803.5,
"completions/mean_terminated_length": 583.0,
"completions/min_length": 518.0,
"completions/min_terminated_length": 518.0,
"epoch": 0.00037162632972546103,
"frac_reward_zero_std": 0.0,
"grad_norm": 8.9375,
"learning_rate": 4.85e-07,
"loss": -0.0365,
"num_tokens": 17917.0,
"reward": -8.15000057220459,
"reward_std": 0.21213217079639435,
"rewards/chatgpt_combined_reward/mean": -8.15000057220459,
"rewards/chatgpt_combined_reward/std": 0.387298583984375,
"step": 4
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 674.0,
"completions/max_terminated_length": 674.0,
"completions/mean_length": 566.25,
"completions/mean_terminated_length": 566.25,
"completions/min_length": 516.0,
"completions/min_terminated_length": 516.0,
"epoch": 0.0004645329121568263,
"frac_reward_zero_std": 0.5,
"grad_norm": 5.34375,
"learning_rate": 4.8e-07,
"loss": -0.0466,
"num_tokens": 21406.0,
"reward": -1.7503750324249268,
"reward_std": 2.4746968746185303,
"rewards/chatgpt_combined_reward/mean": -1.7503750324249268,
"rewards/chatgpt_combined_reward/std": 9.945212364196777,
"step": 5
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 778.0,
"completions/max_terminated_length": 778.0,
"completions/mean_length": 599.0,
"completions/mean_terminated_length": 599.0,
"completions/min_length": 513.0,
"completions/min_terminated_length": 513.0,
"epoch": 0.0005574394945881915,
"frac_reward_zero_std": 0.0,
"grad_norm": 10.0625,
"learning_rate": 4.7499999999999995e-07,
"loss": -0.0,
"num_tokens": 25850.0,
"reward": -8.15000057220459,
"reward_std": 0.21213217079639435,
"rewards/chatgpt_combined_reward/mean": -8.15000057220459,
"rewards/chatgpt_combined_reward/std": 0.387298583984375,
"step": 6
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 600.0,
"completions/max_terminated_length": 600.0,
"completions/mean_length": 550.5,
"completions/mean_terminated_length": 550.5,
"completions/min_length": 527.0,
"completions/min_terminated_length": 527.0,
"epoch": 0.0006503460770195569,
"frac_reward_zero_std": 0.5,
"grad_norm": 5.5625,
"learning_rate": 4.6999999999999995e-07,
"loss": -0.009,
"num_tokens": 29262.0,
"reward": -2.9171252250671387,
"reward_std": 4.124730587005615,
"rewards/chatgpt_combined_reward/mean": -2.9171252250671387,
"rewards/chatgpt_combined_reward/std": 9.46435546875,
"step": 7
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 607.0,
"completions/max_terminated_length": 607.0,
"completions/mean_length": 544.25,
"completions/mean_terminated_length": 544.25,
"completions/min_length": 515.0,
"completions/min_terminated_length": 515.0,
"epoch": 0.0007432526594509221,
"frac_reward_zero_std": 0.5,
"grad_norm": 5.4375,
"learning_rate": 4.65e-07,
"loss": -0.0106,
"num_tokens": 32691.0,
"reward": -2.583625078201294,
"reward_std": 1.296303391456604,
"rewards/chatgpt_combined_reward/mean": -2.583625078201294,
"rewards/chatgpt_combined_reward/std": 8.693524360656738,
"step": 8
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 543.0,
"completions/mean_length": 776.5,
"completions/mean_terminated_length": 529.0,
"completions/min_length": 515.0,
"completions/min_terminated_length": 515.0,
"epoch": 0.0008361592418822874,
"frac_reward_zero_std": 0.0,
"grad_norm": 9.25,
"learning_rate": 4.6e-07,
"loss": 0.2254,
"num_tokens": 37845.0,
"reward": -6.249625205993652,
"reward_std": 2.9463372230529785,
"rewards/chatgpt_combined_reward/mean": -6.249625205993652,
"rewards/chatgpt_combined_reward/std": 2.846529960632324,
"step": 9
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 537.0,
"completions/max_terminated_length": 537.0,
"completions/mean_length": 528.75,
"completions/mean_terminated_length": 528.75,
"completions/min_length": 518.0,
"completions/min_terminated_length": 518.0,
"epoch": 0.0009290658243136526,
"frac_reward_zero_std": 0.0,
"grad_norm": 8.1875,
"learning_rate": 4.55e-07,
"loss": 0.0017,
"num_tokens": 41190.0,
"reward": 0.33299994468688965,
"reward_std": 8.249107360839844,
"rewards/chatgpt_combined_reward/mean": 0.33299994468688965,
"rewards/chatgpt_combined_reward/std": 7.683218955993652,
"step": 10
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.25,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 554.0,
"completions/mean_length": 655.75,
"completions/mean_terminated_length": 533.0,
"completions/min_length": 516.0,
"completions/min_terminated_length": 516.0,
"epoch": 0.001021972406745018,
"frac_reward_zero_std": 0.0,
"grad_norm": 8.875,
"learning_rate": 4.5e-07,
"loss": 0.1097,
"num_tokens": 45861.0,
"reward": -8.89987564086914,
"reward_std": 1.7679438591003418,
"rewards/chatgpt_combined_reward/mean": -8.89987564086914,
"rewards/chatgpt_combined_reward/std": 2.62703275680542,
"step": 11
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 561.0,
"completions/max_terminated_length": 561.0,
"completions/mean_length": 529.5,
"completions/mean_terminated_length": 529.5,
"completions/min_length": 513.0,
"completions/min_terminated_length": 513.0,
"epoch": 0.001114878989176383,
"frac_reward_zero_std": 0.5,
"grad_norm": 5.59375,
"learning_rate": 4.45e-07,
"loss": -0.0048,
"num_tokens": 49193.0,
"reward": -2.2503747940063477,
"reward_std": 2.003410577774048,
"rewards/chatgpt_combined_reward/mean": -2.2503747940063477,
"rewards/chatgpt_combined_reward/std": 9.242679595947266,
"step": 12
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 591.0,
"completions/max_terminated_length": 591.0,
"completions/mean_length": 558.5,
"completions/mean_terminated_length": 558.5,
"completions/min_length": 515.0,
"completions/min_terminated_length": 515.0,
"epoch": 0.0012077855716077484,
"frac_reward_zero_std": 0.0,
"grad_norm": 7.90625,
"learning_rate": 4.3999999999999997e-07,
"loss": 0.0078,
"num_tokens": 53475.0,
"reward": -8.15000057220459,
"reward_std": 0.21213217079639435,
"rewards/chatgpt_combined_reward/mean": -8.15000057220459,
"rewards/chatgpt_combined_reward/std": 0.387298583984375,
"step": 13
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1008.0,
"completions/max_terminated_length": 1008.0,
"completions/mean_length": 680.75,
"completions/mean_terminated_length": 680.75,
"completions/min_length": 523.0,
"completions/min_terminated_length": 523.0,
"epoch": 0.0013006921540391137,
"frac_reward_zero_std": 0.0,
"grad_norm": 6.9375,
"learning_rate": 4.3499999999999996e-07,
"loss": 0.0722,
"num_tokens": 58246.0,
"reward": -8.89987564086914,
"reward_std": 1.7679438591003418,
"rewards/chatgpt_combined_reward/mean": -8.89987564086914,
"rewards/chatgpt_combined_reward/std": 2.62703275680542,
"step": 14
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 567.0,
"completions/max_terminated_length": 567.0,
"completions/mean_length": 533.5,
"completions/mean_terminated_length": 533.5,
"completions/min_length": 514.0,
"completions/min_terminated_length": 514.0,
"epoch": 0.001393598736470479,
"frac_reward_zero_std": 0.5,
"grad_norm": 5.53125,
"learning_rate": 4.2999999999999996e-07,
"loss": 0.0044,
"num_tokens": 62196.0,
"reward": -2.083625078201294,
"reward_std": 0.5891967415809631,
"rewards/chatgpt_combined_reward/mean": -2.083625078201294,
"rewards/chatgpt_combined_reward/std": 9.166325569152832,
"step": 15
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 729.0,
"completions/mean_length": 842.0,
"completions/mean_terminated_length": 660.0,
"completions/min_length": 591.0,
"completions/min_terminated_length": 591.0,
"epoch": 0.0014865053189018441,
"frac_reward_zero_std": 0.0,
"grad_norm": 8.75,
"learning_rate": 4.2499999999999995e-07,
"loss": -0.1542,
"num_tokens": 67612.0,
"reward": -8.15000057220459,
"reward_std": 0.21213217079639435,
"rewards/chatgpt_combined_reward/mean": -8.15000057220459,
"rewards/chatgpt_combined_reward/std": 0.387298583984375,
"step": 16
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 682.0,
"completions/max_terminated_length": 682.0,
"completions/mean_length": 560.0,
"completions/mean_terminated_length": 560.0,
"completions/min_length": 514.0,
"completions/min_terminated_length": 514.0,
"epoch": 0.0015794119013332094,
"frac_reward_zero_std": 0.5,
"grad_norm": 5.375,
"learning_rate": 4.1999999999999995e-07,
"loss": 0.0252,
"num_tokens": 71066.0,
"reward": -1.6670000553131104,
"reward_std": 2.3567867279052734,
"rewards/chatgpt_combined_reward/mean": -1.6670000553131104,
"rewards/chatgpt_combined_reward/std": 9.999555587768555,
"step": 17
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 699.0,
"completions/max_terminated_length": 699.0,
"completions/mean_length": 607.5,
"completions/mean_terminated_length": 607.5,
"completions/min_length": 521.0,
"completions/min_terminated_length": 521.0,
"epoch": 0.0016723184837645747,
"frac_reward_zero_std": 0.0,
"grad_norm": 9.125,
"learning_rate": 4.1499999999999994e-07,
"loss": -0.0983,
"num_tokens": 75544.0,
"reward": -8.89987564086914,
"reward_std": 1.7679438591003418,
"rewards/chatgpt_combined_reward/mean": -8.89987564086914,
"rewards/chatgpt_combined_reward/std": 2.2138428688049316,
"step": 18
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 754.0,
"completions/max_terminated_length": 754.0,
"completions/mean_length": 596.75,
"completions/mean_terminated_length": 596.75,
"completions/min_length": 529.0,
"completions/min_terminated_length": 529.0,
"epoch": 0.00176522506619594,
"frac_reward_zero_std": 0.0,
"grad_norm": 8.8125,
"learning_rate": 4.0999999999999994e-07,
"loss": 0.0,
"num_tokens": 79979.0,
"reward": -8.48324966430664,
"reward_std": 2.357140302658081,
"rewards/chatgpt_combined_reward/mean": -8.48324966430664,
"rewards/chatgpt_combined_reward/std": 3.0433735847473145,
"step": 19
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 593.0,
"completions/max_terminated_length": 593.0,
"completions/mean_length": 545.25,
"completions/mean_terminated_length": 545.25,
"completions/min_length": 513.0,
"completions/min_terminated_length": 513.0,
"epoch": 0.0018581316486273051,
"frac_reward_zero_std": 0.5,
"grad_norm": 6.03125,
"learning_rate": 4.05e-07,
"loss": -0.0202,
"num_tokens": 83912.0,
"reward": -2.333625078201294,
"reward_std": 1.414036750793457,
"rewards/chatgpt_combined_reward/mean": -2.333625078201294,
"rewards/chatgpt_combined_reward/std": 9.001688957214355,
"step": 20
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 832.0,
"completions/max_terminated_length": 832.0,
"completions/mean_length": 612.0,
"completions/mean_terminated_length": 612.0,
"completions/min_length": 524.0,
"completions/min_terminated_length": 524.0,
"epoch": 0.0019510382310586705,
"frac_reward_zero_std": 0.5,
"grad_norm": 5.5,
"learning_rate": 4e-07,
"loss": 0.0457,
"num_tokens": 88190.0,
"reward": -2.1670000553131104,
"reward_std": 0.47128671407699585,
"rewards/chatgpt_combined_reward/mean": -2.1670000553131104,
"rewards/chatgpt_combined_reward/std": 9.061125755310059,
"step": 21
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.25,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 632.0,
"completions/mean_length": 709.5,
"completions/mean_terminated_length": 604.6666870117188,
"completions/min_length": 582.0,
"completions/min_terminated_length": 582.0,
"epoch": 0.002043944813490036,
"frac_reward_zero_std": 0.0,
"grad_norm": 7.75,
"learning_rate": 3.95e-07,
"loss": 0.089,
"num_tokens": 93076.0,
"reward": -6.816500663757324,
"reward_std": 0.21213209629058838,
"rewards/chatgpt_combined_reward/mean": -6.816500663757324,
"rewards/chatgpt_combined_reward/std": 4.199178218841553,
"step": 22
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 547.0,
"completions/max_terminated_length": 547.0,
"completions/mean_length": 533.0,
"completions/mean_terminated_length": 533.0,
"completions/min_length": 517.0,
"completions/min_terminated_length": 517.0,
"epoch": 0.002136851395921401,
"frac_reward_zero_std": 0.5,
"grad_norm": 6.6875,
"learning_rate": 3.8999999999999997e-07,
"loss": 0.0049,
"num_tokens": 96414.0,
"reward": -2.5003750324249268,
"reward_std": 1.6498569250106812,
"rewards/chatgpt_combined_reward/mean": -2.5003750324249268,
"rewards/chatgpt_combined_reward/std": 8.866897583007812,
"step": 23
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 992.0,
"completions/max_terminated_length": 992.0,
"completions/mean_length": 722.25,
"completions/mean_terminated_length": 722.25,
"completions/min_length": 521.0,
"completions/min_terminated_length": 521.0,
"epoch": 0.002229757978352766,
"frac_reward_zero_std": 0.5,
"grad_norm": 4.3125,
"learning_rate": 3.8499999999999997e-07,
"loss": 0.0319,
"num_tokens": 100539.0,
"reward": -2.083624839782715,
"reward_std": 1.7675902843475342,
"rewards/chatgpt_combined_reward/mean": -2.083624839782715,
"rewards/chatgpt_combined_reward/std": 9.366135597229004,
"step": 24
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 716.0,
"completions/max_terminated_length": 716.0,
"completions/mean_length": 616.5,
"completions/mean_terminated_length": 616.5,
"completions/min_length": 517.0,
"completions/min_terminated_length": 517.0,
"epoch": 0.0023226645607841317,
"frac_reward_zero_std": 0.5,
"grad_norm": 4.90625,
"learning_rate": 3.7999999999999996e-07,
"loss": 0.0023,
"num_tokens": 104789.0,
"reward": -2.333750009536743,
"reward_std": 0.942926824092865,
"rewards/chatgpt_combined_reward/mean": -2.333750009536743,
"rewards/chatgpt_combined_reward/std": 8.91893196105957,
"step": 25
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 801.0,
"completions/max_terminated_length": 801.0,
"completions/mean_length": 624.0,
"completions/mean_terminated_length": 624.0,
"completions/min_length": 518.0,
"completions/min_terminated_length": 518.0,
"epoch": 0.002415571143215497,
"frac_reward_zero_std": 0.0,
"grad_norm": 8.0625,
"learning_rate": 3.75e-07,
"loss": -0.0,
"num_tokens": 109333.0,
"reward": -7.916375160217285,
"reward_std": 2.9466910362243652,
"rewards/chatgpt_combined_reward/mean": -7.916375160217285,
"rewards/chatgpt_combined_reward/std": 2.500305652618408,
"step": 26
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 602.0,
"completions/max_terminated_length": 602.0,
"completions/mean_length": 579.5,
"completions/mean_terminated_length": 579.5,
"completions/min_length": 539.0,
"completions/min_terminated_length": 539.0,
"epoch": 0.002508477725646862,
"frac_reward_zero_std": 0.0,
"grad_norm": 7.21875,
"learning_rate": 3.7e-07,
"loss": 0.015,
"num_tokens": 112875.0,
"reward": 2.384185791015625e-07,
"reward_std": 2.3567869663238525,
"rewards/chatgpt_combined_reward/mean": 2.384185791015625e-07,
"rewards/chatgpt_combined_reward/std": 7.070361137390137,
"step": 27
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.25,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 695.0,
"completions/mean_length": 694.0,
"completions/mean_terminated_length": 584.0,
"completions/min_length": 516.0,
"completions/min_terminated_length": 516.0,
"epoch": 0.0026013843080782274,
"frac_reward_zero_std": 0.0,
"grad_norm": 8.5625,
"learning_rate": 3.65e-07,
"loss": -0.076,
"num_tokens": 117699.0,
"reward": -5.416375160217285,
"reward_std": 2.9463372230529785,
"rewards/chatgpt_combined_reward/mean": -5.416375160217285,
"rewards/chatgpt_combined_reward/std": 3.4360225200653076,
"step": 28
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 571.0,
"completions/mean_length": 910.75,
"completions/mean_terminated_length": 571.0,
"completions/min_length": 571.0,
"completions/min_terminated_length": 571.0,
"epoch": 0.0026942908905095925,
"frac_reward_zero_std": 0.0,
"grad_norm": 8.4375,
"learning_rate": 3.6e-07,
"loss": 0.1004,
"num_tokens": 123390.0,
"reward": -8.48324966430664,
"reward_std": 2.357140302658081,
"rewards/chatgpt_combined_reward/mean": -8.48324966430664,
"rewards/chatgpt_combined_reward/std": 3.0433735847473145,
"step": 29
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.25,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 636.0,
"completions/mean_length": 700.75,
"completions/mean_terminated_length": 593.0,
"completions/min_length": 517.0,
"completions/min_terminated_length": 517.0,
"epoch": 0.002787197472940958,
"frac_reward_zero_std": 0.5,
"grad_norm": 4.5,
"learning_rate": 3.55e-07,
"loss": 0.0609,
"num_tokens": 128241.0,
"reward": -7.499750137329102,
"reward_std": 1.1783934831619263,
"rewards/chatgpt_combined_reward/mean": -7.499750137329102,
"rewards/chatgpt_combined_reward/std": 3.191626787185669,
"step": 30
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 733.0,
"completions/max_terminated_length": 733.0,
"completions/mean_length": 613.0,
"completions/mean_terminated_length": 613.0,
"completions/min_length": 513.0,
"completions/min_terminated_length": 513.0,
"epoch": 0.002880104055372323,
"frac_reward_zero_std": 0.5,
"grad_norm": 5.21875,
"learning_rate": 3.5e-07,
"loss": 0.0078,
"num_tokens": 132735.0,
"reward": -2.5002501010894775,
"reward_std": 1.1783934831619263,
"rewards/chatgpt_combined_reward/mean": -2.5002501010894775,
"rewards/chatgpt_combined_reward/std": 8.766212463378906,
"step": 31
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 995.0,
"completions/max_terminated_length": 995.0,
"completions/mean_length": 712.0,
"completions/mean_terminated_length": 712.0,
"completions/min_length": 534.0,
"completions/min_terminated_length": 534.0,
"epoch": 0.0029730106378036882,
"frac_reward_zero_std": 0.5,
"grad_norm": 4.46875,
"learning_rate": 3.45e-07,
"loss": 0.0859,
"num_tokens": 137571.0,
"reward": -1.7503750324249268,
"reward_std": 2.4746968746185303,
"rewards/chatgpt_combined_reward/mean": -1.7503750324249268,
"rewards/chatgpt_combined_reward/std": 9.945212364196777,
"step": 32
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 570.0,
"completions/max_terminated_length": 570.0,
"completions/mean_length": 531.0,
"completions/mean_terminated_length": 531.0,
"completions/min_length": 513.0,
"completions/min_terminated_length": 513.0,
"epoch": 0.0030659172202350538,
"frac_reward_zero_std": 0.0,
"grad_norm": 7.46875,
"learning_rate": 3.4000000000000003e-07,
"loss": 0.0149,
"num_tokens": 141743.0,
"reward": -9.316499710083008,
"reward_std": 1.1787471771240234,
"rewards/chatgpt_combined_reward/mean": -9.316499710083008,
"rewards/chatgpt_combined_reward/std": 1.3887726068496704,
"step": 33
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.25,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 721.0,
"completions/mean_length": 750.75,
"completions/mean_terminated_length": 659.6666870117188,
"completions/min_length": 605.0,
"completions/min_terminated_length": 605.0,
"epoch": 0.003158823802666419,
"frac_reward_zero_std": 0.5,
"grad_norm": 5.34375,
"learning_rate": 3.35e-07,
"loss": -0.0175,
"num_tokens": 146734.0,
"reward": -2.083624839782715,
"reward_std": 1.7675902843475342,
"rewards/chatgpt_combined_reward/mean": -2.083624839782715,
"rewards/chatgpt_combined_reward/std": 9.366135597229004,
"step": 34
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 643.0,
"completions/max_terminated_length": 643.0,
"completions/mean_length": 552.5,
"completions/mean_terminated_length": 552.5,
"completions/min_length": 514.0,
"completions/min_terminated_length": 514.0,
"epoch": 0.0032517303850977844,
"frac_reward_zero_std": 0.5,
"grad_norm": 5.09375,
"learning_rate": 3.3e-07,
"loss": -0.0381,
"num_tokens": 150992.0,
"reward": -6.666500091552734,
"reward_std": 1.1783933639526367,
"rewards/chatgpt_combined_reward/mean": -6.666500091552734,
"rewards/chatgpt_combined_reward/std": 4.0826191902160645,
"step": 35
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 738.0,
"completions/max_terminated_length": 738.0,
"completions/mean_length": 577.25,
"completions/mean_terminated_length": 577.25,
"completions/min_length": 522.0,
"completions/min_terminated_length": 522.0,
"epoch": 0.0033446369675291495,
"frac_reward_zero_std": 0.0,
"grad_norm": 8.5625,
"learning_rate": 3.25e-07,
"loss": -0.059,
"num_tokens": 155349.0,
"reward": -6.249875068664551,
"reward_std": 5.3034772872924805,
"rewards/chatgpt_combined_reward/mean": -6.249875068664551,
"rewards/chatgpt_combined_reward/std": 4.383391380310059,
"step": 36
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 801.0,
"completions/max_terminated_length": 801.0,
"completions/mean_length": 589.5,
"completions/mean_terminated_length": 589.5,
"completions/min_length": 517.0,
"completions/min_terminated_length": 517.0,
"epoch": 0.0034375435499605146,
"frac_reward_zero_std": 0.0,
"grad_norm": 7.125,
"learning_rate": 3.2e-07,
"loss": -0.0,
"num_tokens": 159755.0,
"reward": -8.15000057220459,
"reward_std": 0.21213217079639435,
"rewards/chatgpt_combined_reward/mean": -8.15000057220459,
"rewards/chatgpt_combined_reward/std": 0.387298583984375,
"step": 37
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 713.0,
"completions/max_terminated_length": 713.0,
"completions/mean_length": 602.5,
"completions/mean_terminated_length": 602.5,
"completions/min_length": 548.0,
"completions/min_terminated_length": 548.0,
"epoch": 0.00353045013239188,
"frac_reward_zero_std": 0.0,
"grad_norm": 8.25,
"learning_rate": 3.15e-07,
"loss": -0.0297,
"num_tokens": 164213.0,
"reward": -8.066625595092773,
"reward_std": 2.9463369846343994,
"rewards/chatgpt_combined_reward/mean": -8.066625595092773,
"rewards/chatgpt_combined_reward/std": 4.283124923706055,
"step": 38
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 810.0,
"completions/max_terminated_length": 810.0,
"completions/mean_length": 696.5,
"completions/mean_terminated_length": 696.5,
"completions/min_length": 536.0,
"completions/min_terminated_length": 536.0,
"epoch": 0.003623356714823245,
"frac_reward_zero_std": 0.0,
"grad_norm": 6.875,
"learning_rate": 3.1e-07,
"loss": -0.0857,
"num_tokens": 169047.0,
"reward": -8.483250617980957,
"reward_std": 2.357140302658081,
"rewards/chatgpt_combined_reward/mean": -8.483250617980957,
"rewards/chatgpt_combined_reward/std": 3.453827142715454,
"step": 39
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.25,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 575.0,
"completions/mean_length": 666.0,
"completions/mean_terminated_length": 546.6666870117188,
"completions/min_length": 530.0,
"completions/min_terminated_length": 530.0,
"epoch": 0.0037162632972546103,
"frac_reward_zero_std": 0.0,
"grad_norm": 8.25,
"learning_rate": 3.05e-07,
"loss": -0.0,
"num_tokens": 172953.0,
"reward": -0.25024986267089844,
"reward_std": 3.889087200164795,
"rewards/chatgpt_combined_reward/mean": -0.25024986267089844,
"rewards/chatgpt_combined_reward/std": 8.088777542114258,
"step": 40
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 575.0,
"completions/max_terminated_length": 575.0,
"completions/mean_length": 537.25,
"completions/mean_terminated_length": 537.25,
"completions/min_length": 522.0,
"completions/min_terminated_length": 522.0,
"epoch": 0.003809169879685976,
"frac_reward_zero_std": 0.5,
"grad_norm": 5.71875,
"learning_rate": 3e-07,
"loss": -0.0072,
"num_tokens": 176344.0,
"reward": -3.666874885559082,
"reward_std": 0.9427501559257507,
"rewards/chatgpt_combined_reward/mean": -3.666874885559082,
"rewards/chatgpt_combined_reward/std": 7.393443584442139,
"step": 41
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 670.0,
"completions/max_terminated_length": 670.0,
"completions/mean_length": 577.0,
"completions/mean_terminated_length": 577.0,
"completions/min_length": 538.0,
"completions/min_terminated_length": 538.0,
"epoch": 0.003902076462117341,
"frac_reward_zero_std": 0.0,
"grad_norm": 8.0,
"learning_rate": 2.95e-07,
"loss": -0.0,
"num_tokens": 180700.0,
"reward": -8.15000057220459,
"reward_std": 0.21213217079639435,
"rewards/chatgpt_combined_reward/mean": -8.15000057220459,
"rewards/chatgpt_combined_reward/std": 0.387298583984375,
"step": 42
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 741.0,
"completions/max_terminated_length": 741.0,
"completions/mean_length": 606.0,
"completions/mean_terminated_length": 606.0,
"completions/min_length": 522.0,
"completions/min_terminated_length": 522.0,
"epoch": 0.003994983044548706,
"frac_reward_zero_std": 0.0,
"grad_norm": 7.625,
"learning_rate": 2.9e-07,
"loss": 0.033,
"num_tokens": 185098.0,
"reward": -1.9167499542236328,
"reward_std": 5.06748104095459,
"rewards/chatgpt_combined_reward/mean": -1.9167499542236328,
"rewards/chatgpt_combined_reward/std": 5.698338031768799,
"step": 43
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 575.0,
"completions/max_terminated_length": 575.0,
"completions/mean_length": 560.0,
"completions/mean_terminated_length": 560.0,
"completions/min_length": 544.0,
"completions/min_terminated_length": 544.0,
"epoch": 0.004087889626980072,
"frac_reward_zero_std": 0.0,
"grad_norm": 8.4375,
"learning_rate": 2.8499999999999997e-07,
"loss": 0.0152,
"num_tokens": 188550.0,
"reward": -1.6668751239776611,
"reward_std": 1.8856770992279053,
"rewards/chatgpt_combined_reward/mean": -1.6668751239776611,
"rewards/chatgpt_combined_reward/std": 7.039121150970459,
"step": 44
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 573.0,
"completions/mean_length": 785.0,
"completions/mean_terminated_length": 546.0,
"completions/min_length": 519.0,
"completions/min_terminated_length": 519.0,
"epoch": 0.004180796209411437,
"frac_reward_zero_std": 0.0,
"grad_norm": 8.75,
"learning_rate": 2.8e-07,
"loss": -0.0,
"num_tokens": 193738.0,
"reward": -8.15000057220459,
"reward_std": 0.21213217079639435,
"rewards/chatgpt_combined_reward/mean": -8.15000057220459,
"rewards/chatgpt_combined_reward/std": 0.387298583984375,
"step": 45
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.25,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 534.0,
"completions/mean_length": 651.5,
"completions/mean_terminated_length": 527.3333740234375,
"completions/min_length": 515.0,
"completions/min_terminated_length": 515.0,
"epoch": 0.004273702791842802,
"frac_reward_zero_std": 0.0,
"grad_norm": 7.6875,
"learning_rate": 2.75e-07,
"loss": 0.1173,
"num_tokens": 198392.0,
"reward": -8.066625595092773,
"reward_std": 3.1584692001342773,
"rewards/chatgpt_combined_reward/mean": -8.066625595092773,
"rewards/chatgpt_combined_reward/std": 4.083926677703857,
"step": 46
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 761.0,
"completions/max_terminated_length": 761.0,
"completions/mean_length": 585.5,
"completions/mean_terminated_length": 585.5,
"completions/min_length": 518.0,
"completions/min_terminated_length": 518.0,
"epoch": 0.004366609374274168,
"frac_reward_zero_std": 0.5,
"grad_norm": 5.21875,
"learning_rate": 2.7e-07,
"loss": 0.0291,
"num_tokens": 201948.0,
"reward": -1.333749771118164,
"reward_std": 0.7071069478988647,
"rewards/chatgpt_combined_reward/mean": -1.333749771118164,
"rewards/chatgpt_combined_reward/std": 10.040179252624512,
"step": 47
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 619.0,
"completions/max_terminated_length": 619.0,
"completions/mean_length": 579.0,
"completions/mean_terminated_length": 579.0,
"completions/min_length": 526.0,
"completions/min_terminated_length": 526.0,
"epoch": 0.004459515956705532,
"frac_reward_zero_std": 0.0,
"grad_norm": 9.25,
"learning_rate": 2.65e-07,
"loss": -0.0357,
"num_tokens": 205824.0,
"reward": 2.082624673843384,
"reward_std": 2.2388768196105957,
"rewards/chatgpt_combined_reward/mean": 2.082624673843384,
"rewards/chatgpt_combined_reward/std": 5.983651638031006,
"step": 48
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 664.0,
"completions/max_terminated_length": 664.0,
"completions/mean_length": 567.25,
"completions/mean_terminated_length": 567.25,
"completions/min_length": 524.0,
"completions/min_terminated_length": 524.0,
"epoch": 0.004552422539136898,
"frac_reward_zero_std": 0.0,
"grad_norm": 8.125,
"learning_rate": 2.6e-07,
"loss": 0.0394,
"num_tokens": 210141.0,
"reward": -9.316500663757324,
"reward_std": 1.3908790349960327,
"rewards/chatgpt_combined_reward/mean": -9.316500663757324,
"rewards/chatgpt_combined_reward/std": 1.982193112373352,
"step": 49
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 739.0,
"completions/max_terminated_length": 739.0,
"completions/mean_length": 640.0,
"completions/mean_terminated_length": 640.0,
"completions/min_length": 538.0,
"completions/min_terminated_length": 538.0,
"epoch": 0.004645329121568263,
"frac_reward_zero_std": 0.5,
"grad_norm": 5.75,
"learning_rate": 2.55e-07,
"loss": 0.0381,
"num_tokens": 213923.0,
"reward": -4.583374977111816,
"reward_std": 2.945983409881592,
"rewards/chatgpt_combined_reward/mean": -4.583374977111816,
"rewards/chatgpt_combined_reward/std": 7.119798183441162,
"step": 50
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.25,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 559.0,
"completions/mean_length": 661.0,
"completions/mean_terminated_length": 540.0,
"completions/min_length": 530.0,
"completions/min_terminated_length": 530.0,
"epoch": 0.004738235703999628,
"frac_reward_zero_std": 0.0,
"grad_norm": 9.125,
"learning_rate": 2.5e-07,
"loss": -0.0,
"num_tokens": 218615.0,
"reward": -8.15000057220459,
"reward_std": 0.21213217079639435,
"rewards/chatgpt_combined_reward/mean": -8.15000057220459,
"rewards/chatgpt_combined_reward/std": 0.387298583984375,
"step": 51
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 829.0,
"completions/max_terminated_length": 829.0,
"completions/mean_length": 595.25,
"completions/mean_terminated_length": 595.25,
"completions/min_length": 515.0,
"completions/min_terminated_length": 515.0,
"epoch": 0.004831142286430994,
"frac_reward_zero_std": 0.5,
"grad_norm": 4.65625,
"learning_rate": 2.45e-07,
"loss": 0.0813,
"num_tokens": 222836.0,
"reward": -3.750124931335449,
"reward_std": 4.124377250671387,
"rewards/chatgpt_combined_reward/mean": -3.750124931335449,
"rewards/chatgpt_combined_reward/std": 8.646496772766113,
"step": 52
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1000.0,
"completions/max_terminated_length": 1000.0,
"completions/mean_length": 675.5,
"completions/mean_terminated_length": 675.5,
"completions/min_length": 517.0,
"completions/min_terminated_length": 517.0,
"epoch": 0.004924048868862359,
"frac_reward_zero_std": 0.0,
"grad_norm": 7.53125,
"learning_rate": 2.4e-07,
"loss": -0.0,
"num_tokens": 227586.0,
"reward": -8.15000057220459,
"reward_std": 0.21213217079639435,
"rewards/chatgpt_combined_reward/mean": -8.15000057220459,
"rewards/chatgpt_combined_reward/std": 0.387298583984375,
"step": 53
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 697.0,
"completions/max_terminated_length": 697.0,
"completions/mean_length": 603.25,
"completions/mean_terminated_length": 603.25,
"completions/min_length": 522.0,
"completions/min_terminated_length": 522.0,
"epoch": 0.005016955451293724,
"frac_reward_zero_std": 0.0,
"grad_norm": 8.25,
"learning_rate": 2.3499999999999997e-07,
"loss": 0.0037,
"num_tokens": 231885.0,
"reward": -0.25025010108947754,
"reward_std": 5.067480087280273,
"rewards/chatgpt_combined_reward/mean": -0.25025010108947754,
"rewards/chatgpt_combined_reward/std": 8.490804672241211,
"step": 54
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.25,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 601.0,
"completions/mean_length": 667.75,
"completions/mean_terminated_length": 549.0,
"completions/min_length": 521.0,
"completions/min_terminated_length": 521.0,
"epoch": 0.005109862033725089,
"frac_reward_zero_std": 0.5,
"grad_norm": 4.78125,
"learning_rate": 2.3e-07,
"loss": -0.0456,
"num_tokens": 236320.0,
"reward": -2.833625078201294,
"reward_std": 0.7069300413131714,
"rewards/chatgpt_combined_reward/mean": -2.833625078201294,
"rewards/chatgpt_combined_reward/std": 8.315181732177734,
"step": 55
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 650.0,
"completions/max_terminated_length": 650.0,
"completions/mean_length": 571.25,
"completions/mean_terminated_length": 571.25,
"completions/min_length": 522.0,
"completions/min_terminated_length": 522.0,
"epoch": 0.005202768616156455,
"frac_reward_zero_std": 0.0,
"grad_norm": 7.40625,
"learning_rate": 2.25e-07,
"loss": 0.0162,
"num_tokens": 239589.0,
"reward": 7.665875434875488,
"reward_std": 0.9427504539489746,
"rewards/chatgpt_combined_reward/mean": 7.665875434875488,
"rewards/chatgpt_combined_reward/std": 0.8164288401603699,
"step": 56
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 585.0,
"completions/max_terminated_length": 585.0,
"completions/mean_length": 555.75,
"completions/mean_terminated_length": 555.75,
"completions/min_length": 524.0,
"completions/min_terminated_length": 524.0,
"epoch": 0.00529567519858782,
"frac_reward_zero_std": 0.0,
"grad_norm": 8.3125,
"learning_rate": 2.1999999999999998e-07,
"loss": 0.0021,
"num_tokens": 243860.0,
"reward": -8.15000057220459,
"reward_std": 0.21213217079639435,
"rewards/chatgpt_combined_reward/mean": -8.15000057220459,
"rewards/chatgpt_combined_reward/std": 0.387298583984375,
"step": 57
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 724.0,
"completions/max_terminated_length": 724.0,
"completions/mean_length": 616.0,
"completions/mean_terminated_length": 616.0,
"completions/min_length": 562.0,
"completions/min_terminated_length": 562.0,
"epoch": 0.005388581781019185,
"frac_reward_zero_std": 0.0,
"grad_norm": 7.53125,
"learning_rate": 2.1499999999999998e-07,
"loss": 0.0464,
"num_tokens": 248372.0,
"reward": -7.499750137329102,
"reward_std": 3.5358872413635254,
"rewards/chatgpt_combined_reward/mean": -7.499750137329102,
"rewards/chatgpt_combined_reward/std": 3.191626787185669,
"step": 58
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.25,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 835.0,
"completions/mean_length": 726.5,
"completions/mean_terminated_length": 627.3333740234375,
"completions/min_length": 517.0,
"completions/min_terminated_length": 517.0,
"epoch": 0.0054814883634505505,
"frac_reward_zero_std": 0.0,
"grad_norm": 7.875,
"learning_rate": 2.0999999999999997e-07,
"loss": 0.0292,
"num_tokens": 253326.0,
"reward": -8.15000057220459,
"reward_std": 0.21213217079639435,
"rewards/chatgpt_combined_reward/mean": -8.15000057220459,
"rewards/chatgpt_combined_reward/std": 0.387298583984375,
"step": 59
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 665.0,
"completions/max_terminated_length": 665.0,
"completions/mean_length": 558.0,
"completions/mean_terminated_length": 558.0,
"completions/min_length": 515.0,
"completions/min_terminated_length": 515.0,
"epoch": 0.005574394945881916,
"frac_reward_zero_std": 0.0,
"grad_norm": 10.375,
"learning_rate": 2.0499999999999997e-07,
"loss": 0.0376,
"num_tokens": 257606.0,
"reward": -7.083125114440918,
"reward_std": 4.125083923339844,
"rewards/chatgpt_combined_reward/mean": -7.083125114440918,
"rewards/chatgpt_combined_reward/std": 3.436143636703491,
"step": 60
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 692.0,
"completions/max_terminated_length": 692.0,
"completions/mean_length": 613.25,
"completions/mean_terminated_length": 613.25,
"completions/min_length": 514.0,
"completions/min_terminated_length": 514.0,
"epoch": 0.005667301528313281,
"frac_reward_zero_std": 0.0,
"grad_norm": 9.75,
"learning_rate": 2e-07,
"loss": -0.0,
"num_tokens": 262107.0,
"reward": -5.833000183105469,
"reward_std": 3.535533905029297,
"rewards/chatgpt_combined_reward/mean": -5.833000183105469,
"rewards/chatgpt_combined_reward/std": 3.469496726989746,
"step": 61
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.25,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 572.0,
"completions/mean_length": 672.0,
"completions/mean_terminated_length": 554.6666870117188,
"completions/min_length": 537.0,
"completions/min_terminated_length": 537.0,
"epoch": 0.005760208110744646,
"frac_reward_zero_std": 0.0,
"grad_norm": 7.5625,
"learning_rate": 1.9499999999999999e-07,
"loss": 0.1156,
"num_tokens": 266843.0,
"reward": -8.15000057220459,
"reward_std": 0.21213217079639435,
"rewards/chatgpt_combined_reward/mean": -8.15000057220459,
"rewards/chatgpt_combined_reward/std": 0.387298583984375,
"step": 62
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 884.0,
"completions/max_terminated_length": 884.0,
"completions/mean_length": 666.5,
"completions/mean_terminated_length": 666.5,
"completions/min_length": 543.0,
"completions/min_terminated_length": 543.0,
"epoch": 0.005853114693176012,
"frac_reward_zero_std": 0.0,
"grad_norm": 9.8125,
"learning_rate": 1.8999999999999998e-07,
"loss": 0.0609,
"num_tokens": 271557.0,
"reward": -8.15000057220459,
"reward_std": 0.21213217079639435,
"rewards/chatgpt_combined_reward/mean": -8.15000057220459,
"rewards/chatgpt_combined_reward/std": 0.387298583984375,
"step": 63
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.25,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 667.0,
"completions/mean_length": 683.0,
"completions/mean_terminated_length": 569.3333740234375,
"completions/min_length": 518.0,
"completions/min_terminated_length": 518.0,
"epoch": 0.0059460212756073764,
"frac_reward_zero_std": 0.0,
"grad_norm": 8.8125,
"learning_rate": 1.85e-07,
"loss": -0.0,
"num_tokens": 276127.0,
"reward": -0.0002503395080566406,
"reward_std": 3.535533905029297,
"rewards/chatgpt_combined_reward/mean": -0.0002503395080566406,
"rewards/chatgpt_combined_reward/std": 9.128161430358887,
"step": 64
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.25,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 990.0,
"completions/mean_length": 838.0,
"completions/mean_terminated_length": 776.0,
"completions/min_length": 549.0,
"completions/min_terminated_length": 549.0,
"epoch": 0.006038927858038742,
"frac_reward_zero_std": 0.0,
"grad_norm": 8.8125,
"learning_rate": 1.8e-07,
"loss": 0.0668,
"num_tokens": 281527.0,
"reward": -9.316499710083008,
"reward_std": 1.3908792734146118,
"rewards/chatgpt_combined_reward/mean": -9.316499710083008,
"rewards/chatgpt_combined_reward/std": 1.61105215549469,
"step": 65
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 803.0,
"completions/mean_length": 881.75,
"completions/mean_terminated_length": 739.5,
"completions/min_length": 676.0,
"completions/min_terminated_length": 676.0,
"epoch": 0.0061318344404701075,
"frac_reward_zero_std": 0.0,
"grad_norm": 7.875,
"learning_rate": 1.75e-07,
"loss": 0.1151,
"num_tokens": 287102.0,
"reward": -8.89987564086914,
"reward_std": 1.7679438591003418,
"rewards/chatgpt_combined_reward/mean": -8.89987564086914,
"rewards/chatgpt_combined_reward/std": 2.2138428688049316,
"step": 66
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 665.0,
"completions/max_terminated_length": 665.0,
"completions/mean_length": 606.5,
"completions/mean_terminated_length": 606.5,
"completions/min_length": 550.0,
"completions/min_terminated_length": 550.0,
"epoch": 0.006224741022901472,
"frac_reward_zero_std": 0.5,
"grad_norm": 5.28125,
"learning_rate": 1.7000000000000001e-07,
"loss": 0.0032,
"num_tokens": 291296.0,
"reward": -2.333750009536743,
"reward_std": 1.4142135381698608,
"rewards/chatgpt_combined_reward/mean": -2.333750009536743,
"rewards/chatgpt_combined_reward/std": 9.001585006713867,
"step": 67
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 658.0,
"completions/max_terminated_length": 658.0,
"completions/mean_length": 580.5,
"completions/mean_terminated_length": 580.5,
"completions/min_length": 544.0,
"completions/min_terminated_length": 544.0,
"epoch": 0.006317647605332838,
"frac_reward_zero_std": 0.0,
"grad_norm": 7.625,
"learning_rate": 1.65e-07,
"loss": 0.026,
"num_tokens": 295666.0,
"reward": -8.15000057220459,
"reward_std": 0.21213217079639435,
"rewards/chatgpt_combined_reward/mean": -8.15000057220459,
"rewards/chatgpt_combined_reward/std": 0.387298583984375,
"step": 68
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.25,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 621.0,
"completions/mean_length": 673.0,
"completions/mean_terminated_length": 556.0,
"completions/min_length": 515.0,
"completions/min_terminated_length": 515.0,
"epoch": 0.006410554187764203,
"frac_reward_zero_std": 0.0,
"grad_norm": 7.09375,
"learning_rate": 1.6e-07,
"loss": 0.1442,
"num_tokens": 300406.0,
"reward": -7.499750137329102,
"reward_std": 3.5358872413635254,
"rewards/chatgpt_combined_reward/mean": -7.499750137329102,
"rewards/chatgpt_combined_reward/std": 3.191626787185669,
"step": 69
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 692.0,
"completions/max_terminated_length": 692.0,
"completions/mean_length": 593.25,
"completions/mean_terminated_length": 593.25,
"completions/min_length": 520.0,
"completions/min_terminated_length": 520.0,
"epoch": 0.006503460770195569,
"frac_reward_zero_std": 0.5,
"grad_norm": 4.53125,
"learning_rate": 1.55e-07,
"loss": 0.031,
"num_tokens": 304811.0,
"reward": -0.41687512397766113,
"reward_std": 1.7679438591003418,
"rewards/chatgpt_combined_reward/mean": -0.41687512397766113,
"rewards/chatgpt_combined_reward/std": 8.42953109741211,
"step": 70
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 681.0,
"completions/max_terminated_length": 681.0,
"completions/mean_length": 581.25,
"completions/mean_terminated_length": 581.25,
"completions/min_length": 514.0,
"completions/min_terminated_length": 514.0,
"epoch": 0.006596367352626933,
"frac_reward_zero_std": 0.0,
"grad_norm": 8.375,
"learning_rate": 1.5e-07,
"loss": 0.0708,
"num_tokens": 308360.0,
"reward": -2.0002501010894775,
"reward_std": 5.42103385925293,
"rewards/chatgpt_combined_reward/mean": -2.0002501010894775,
"rewards/chatgpt_combined_reward/std": 6.996899127960205,
"step": 71
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 852.0,
"completions/max_terminated_length": 852.0,
"completions/mean_length": 611.75,
"completions/mean_terminated_length": 611.75,
"completions/min_length": 522.0,
"completions/min_terminated_length": 522.0,
"epoch": 0.006689273935058299,
"frac_reward_zero_std": 0.0,
"grad_norm": 9.1875,
"learning_rate": 1.45e-07,
"loss": 0.0753,
"num_tokens": 312855.0,
"reward": -8.15000057220459,
"reward_std": 0.21213217079639435,
"rewards/chatgpt_combined_reward/mean": -8.15000057220459,
"rewards/chatgpt_combined_reward/std": 0.387298583984375,
"step": 72
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 683.0,
"completions/max_terminated_length": 683.0,
"completions/mean_length": 599.75,
"completions/mean_terminated_length": 599.75,
"completions/min_length": 556.0,
"completions/min_terminated_length": 556.0,
"epoch": 0.0067821805174896645,
"frac_reward_zero_std": 0.5,
"grad_norm": 5.625,
"learning_rate": 1.4e-07,
"loss": -0.0233,
"num_tokens": 317276.0,
"reward": -1.2503752708435059,
"reward_std": 1.7675901651382446,
"rewards/chatgpt_combined_reward/mean": -1.2503752708435059,
"rewards/chatgpt_combined_reward/std": 10.30729866027832,
"step": 73
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.25,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 685.0,
"completions/mean_length": 707.5,
"completions/mean_terminated_length": 602.0,
"completions/min_length": 517.0,
"completions/min_terminated_length": 517.0,
"epoch": 0.006875087099921029,
"frac_reward_zero_std": 0.5,
"grad_norm": 3.984375,
"learning_rate": 1.35e-07,
"loss": 0.0693,
"num_tokens": 322154.0,
"reward": -7.499750137329102,
"reward_std": 1.1783934831619263,
"rewards/chatgpt_combined_reward/mean": -7.499750137329102,
"rewards/chatgpt_combined_reward/std": 3.191626787185669,
"step": 74
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 723.0,
"completions/max_terminated_length": 723.0,
"completions/mean_length": 623.0,
"completions/mean_terminated_length": 623.0,
"completions/min_length": 515.0,
"completions/min_terminated_length": 515.0,
"epoch": 0.006967993682352395,
"frac_reward_zero_std": 0.5,
"grad_norm": 5.6875,
"learning_rate": 1.3e-07,
"loss": 0.0071,
"num_tokens": 325852.0,
"reward": -2.5002501010894775,
"reward_std": 1.1783934831619263,
"rewards/chatgpt_combined_reward/mean": -2.5002501010894775,
"rewards/chatgpt_combined_reward/std": 8.766212463378906,
"step": 75
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 588.0,
"completions/max_terminated_length": 588.0,
"completions/mean_length": 543.0,
"completions/mean_terminated_length": 543.0,
"completions/min_length": 515.0,
"completions/min_terminated_length": 515.0,
"epoch": 0.00706090026478376,
"frac_reward_zero_std": 0.0,
"grad_norm": 8.875,
"learning_rate": 1.25e-07,
"loss": -0.0118,
"num_tokens": 330072.0,
"reward": -4.583125114440918,
"reward_std": 1.7679438591003418,
"rewards/chatgpt_combined_reward/mean": -4.583125114440918,
"rewards/chatgpt_combined_reward/std": 4.589576721191406,
"step": 76
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.25,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 540.0,
"completions/mean_length": 656.0,
"completions/mean_terminated_length": 533.3333740234375,
"completions/min_length": 527.0,
"completions/min_terminated_length": 527.0,
"epoch": 0.007153806847215125,
"frac_reward_zero_std": 0.0,
"grad_norm": 8.5,
"learning_rate": 1.2e-07,
"loss": 0.1072,
"num_tokens": 334744.0,
"reward": -7.90000057220459,
"reward_std": 3.3941125869750977,
"rewards/chatgpt_combined_reward/mean": -7.90000057220459,
"rewards/chatgpt_combined_reward/std": 4.806246280670166,
"step": 77
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 814.0,
"completions/max_terminated_length": 814.0,
"completions/mean_length": 618.25,
"completions/mean_terminated_length": 618.25,
"completions/min_length": 519.0,
"completions/min_terminated_length": 519.0,
"epoch": 0.00724671342964649,
"frac_reward_zero_std": 0.5,
"grad_norm": 4.4375,
"learning_rate": 1.15e-07,
"loss": -0.0338,
"num_tokens": 339217.0,
"reward": -2.0003750324249268,
"reward_std": 0.4714634120464325,
"rewards/chatgpt_combined_reward/mean": -2.0003750324249268,
"rewards/chatgpt_combined_reward/std": 9.253199577331543,
"step": 78
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 664.0,
"completions/max_terminated_length": 664.0,
"completions/mean_length": 561.75,
"completions/mean_terminated_length": 561.75,
"completions/min_length": 520.0,
"completions/min_terminated_length": 520.0,
"epoch": 0.007339620012077856,
"frac_reward_zero_std": 0.0,
"grad_norm": 8.6875,
"learning_rate": 1.0999999999999999e-07,
"loss": -0.0368,
"num_tokens": 342692.0,
"reward": -1.2501251697540283,
"reward_std": 2.9463372230529785,
"rewards/chatgpt_combined_reward/mean": -1.2501251697540283,
"rewards/chatgpt_combined_reward/std": 7.622077465057373,
"step": 79
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 818.0,
"completions/max_terminated_length": 818.0,
"completions/mean_length": 644.25,
"completions/mean_terminated_length": 644.25,
"completions/min_length": 532.0,
"completions/min_terminated_length": 532.0,
"epoch": 0.0074325265945092206,
"frac_reward_zero_std": 0.0,
"grad_norm": 7.8125,
"learning_rate": 1.0499999999999999e-07,
"loss": -0.1178,
"num_tokens": 346483.0,
"reward": -1.5832499265670776,
"reward_std": 2.23905348777771,
"rewards/chatgpt_combined_reward/mean": -1.5832499265670776,
"rewards/chatgpt_combined_reward/std": 5.307532787322998,
"step": 80
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 518.0,
"completions/mean_length": 770.5,
"completions/mean_terminated_length": 517.0,
"completions/min_length": 516.0,
"completions/min_terminated_length": 516.0,
"epoch": 0.007525433176940586,
"frac_reward_zero_std": 0.0,
"grad_norm": 9.3125,
"learning_rate": 1e-07,
"loss": -0.0007,
"num_tokens": 351613.0,
"reward": -8.15000057220459,
"reward_std": 0.21213217079639435,
"rewards/chatgpt_combined_reward/mean": -8.15000057220459,
"rewards/chatgpt_combined_reward/std": 0.387298583984375,
"step": 81
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 636.0,
"completions/max_terminated_length": 636.0,
"completions/mean_length": 553.5,
"completions/mean_terminated_length": 553.5,
"completions/min_length": 525.0,
"completions/min_terminated_length": 525.0,
"epoch": 0.007618339759371952,
"frac_reward_zero_std": 0.5,
"grad_norm": 5.625,
"learning_rate": 9.499999999999999e-08,
"loss": 0.0164,
"num_tokens": 355029.0,
"reward": -2.7502501010894775,
"reward_std": 3.8887336254119873,
"rewards/chatgpt_combined_reward/mean": -2.7502501010894775,
"rewards/chatgpt_combined_reward/std": 9.499552726745605,
"step": 82
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 626.0,
"completions/max_terminated_length": 626.0,
"completions/mean_length": 580.75,
"completions/mean_terminated_length": 580.75,
"completions/min_length": 532.0,
"completions/min_terminated_length": 532.0,
"epoch": 0.007711246341803317,
"frac_reward_zero_std": 0.5,
"grad_norm": 5.6875,
"learning_rate": 9e-08,
"loss": 0.0014,
"num_tokens": 358566.0,
"reward": -0.2505002021789551,
"reward_std": 0.3535533845424652,
"rewards/chatgpt_combined_reward/mean": -0.2505002021789551,
"rewards/chatgpt_combined_reward/std": 11.265152931213379,
"step": 83
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.25,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 693.0,
"completions/mean_length": 705.5,
"completions/mean_terminated_length": 599.3333740234375,
"completions/min_length": 515.0,
"completions/min_terminated_length": 515.0,
"epoch": 0.007804152924234682,
"frac_reward_zero_std": 0.0,
"grad_norm": 8.8125,
"learning_rate": 8.500000000000001e-08,
"loss": -0.1453,
"num_tokens": 363436.0,
"reward": -3.749875068664551,
"reward_std": 4.124730587005615,
"rewards/chatgpt_combined_reward/mean": -3.749875068664551,
"rewards/chatgpt_combined_reward/std": 4.383296489715576,
"step": 84
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 890.0,
"completions/max_terminated_length": 890.0,
"completions/mean_length": 618.75,
"completions/mean_terminated_length": 618.75,
"completions/min_length": 522.0,
"completions/min_terminated_length": 522.0,
"epoch": 0.007897059506666047,
"frac_reward_zero_std": 0.0,
"grad_norm": 7.125,
"learning_rate": 8e-08,
"loss": -0.086,
"num_tokens": 367959.0,
"reward": -8.89987564086914,
"reward_std": 1.9800759553909302,
"rewards/chatgpt_combined_reward/mean": -8.89987564086914,
"rewards/chatgpt_combined_reward/std": 2.4292385578155518,
"step": 85
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.25,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 519.0,
"completions/mean_length": 644.25,
"completions/mean_terminated_length": 517.6666870117188,
"completions/min_length": 517.0,
"completions/min_terminated_length": 517.0,
"epoch": 0.007989966089097413,
"frac_reward_zero_std": 0.0,
"grad_norm": 7.0625,
"learning_rate": 7.5e-08,
"loss": 0.1156,
"num_tokens": 370960.0,
"reward": 3.832624912261963,
"reward_std": 3.771176815032959,
"rewards/chatgpt_combined_reward/mean": 3.832624912261963,
"rewards/chatgpt_combined_reward/std": 4.7957444190979,
"step": 86
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 731.0,
"completions/max_terminated_length": 731.0,
"completions/mean_length": 585.75,
"completions/mean_terminated_length": 585.75,
"completions/min_length": 520.0,
"completions/min_terminated_length": 520.0,
"epoch": 0.008082872671528778,
"frac_reward_zero_std": 0.0,
"grad_norm": 7.9375,
"learning_rate": 7e-08,
"loss": 0.0622,
"num_tokens": 375351.0,
"reward": -5.983250141143799,
"reward_std": 5.892674446105957,
"rewards/chatgpt_combined_reward/mean": -5.983250141143799,
"rewards/chatgpt_combined_reward/std": 4.826911449432373,
"step": 87
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1002.0,
"completions/max_terminated_length": 1002.0,
"completions/mean_length": 724.25,
"completions/mean_terminated_length": 724.25,
"completions/min_length": 562.0,
"completions/min_terminated_length": 562.0,
"epoch": 0.008175779253960144,
"frac_reward_zero_std": 0.0,
"grad_norm": 7.625,
"learning_rate": 6.5e-08,
"loss": -0.0,
"num_tokens": 380296.0,
"reward": -8.15000057220459,
"reward_std": 0.21213217079639435,
"rewards/chatgpt_combined_reward/mean": -8.15000057220459,
"rewards/chatgpt_combined_reward/std": 0.387298583984375,
"step": 88
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 746.0,
"completions/max_terminated_length": 746.0,
"completions/mean_length": 592.25,
"completions/mean_terminated_length": 592.25,
"completions/min_length": 517.0,
"completions/min_terminated_length": 517.0,
"epoch": 0.008268685836391508,
"frac_reward_zero_std": 0.0,
"grad_norm": 7.75,
"learning_rate": 6e-08,
"loss": -0.0,
"num_tokens": 384329.0,
"reward": 4.832625389099121,
"reward_std": 3.771177291870117,
"rewards/chatgpt_combined_reward/mean": 4.832625389099121,
"rewards/chatgpt_combined_reward/std": 3.911078929901123,
"step": 89
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 698.0,
"completions/max_terminated_length": 698.0,
"completions/mean_length": 633.0,
"completions/mean_terminated_length": 633.0,
"completions/min_length": 598.0,
"completions/min_terminated_length": 598.0,
"epoch": 0.008361592418822873,
"frac_reward_zero_std": 0.0,
"grad_norm": 7.46875,
"learning_rate": 5.4999999999999996e-08,
"loss": -0.0157,
"num_tokens": 388909.0,
"reward": -5.983250141143799,
"reward_std": 0.21213209629058838,
"rewards/chatgpt_combined_reward/mean": -5.983250141143799,
"rewards/chatgpt_combined_reward/std": 4.468296527862549,
"step": 90
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 750.0,
"completions/max_terminated_length": 750.0,
"completions/mean_length": 584.5,
"completions/mean_terminated_length": 584.5,
"completions/min_length": 523.0,
"completions/min_terminated_length": 523.0,
"epoch": 0.008454499001254239,
"frac_reward_zero_std": 0.5,
"grad_norm": 4.78125,
"learning_rate": 5e-08,
"loss": 0.0284,
"num_tokens": 392953.0,
"reward": -2.083624839782715,
"reward_std": 1.7675902843475342,
"rewards/chatgpt_combined_reward/mean": -2.083624839782715,
"rewards/chatgpt_combined_reward/std": 9.366135597229004,
"step": 91
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.25,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 686.0,
"completions/mean_length": 714.25,
"completions/mean_terminated_length": 611.0,
"completions/min_length": 522.0,
"completions/min_terminated_length": 522.0,
"epoch": 0.008547405583685604,
"frac_reward_zero_std": 0.0,
"grad_norm": 8.0,
"learning_rate": 4.5e-08,
"loss": 0.1335,
"num_tokens": 397858.0,
"reward": -9.316499710083008,
"reward_std": 1.3908792734146118,
"rewards/chatgpt_combined_reward/mean": -9.316499710083008,
"rewards/chatgpt_combined_reward/std": 1.61105215549469,
"step": 92
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 848.0,
"completions/max_terminated_length": 848.0,
"completions/mean_length": 737.75,
"completions/mean_terminated_length": 737.75,
"completions/min_length": 639.0,
"completions/min_terminated_length": 639.0,
"epoch": 0.00864031216611697,
"frac_reward_zero_std": 0.0,
"grad_norm": 6.6875,
"learning_rate": 4e-08,
"loss": -0.0,
"num_tokens": 402857.0,
"reward": -8.15000057220459,
"reward_std": 0.21213217079639435,
"rewards/chatgpt_combined_reward/mean": -8.15000057220459,
"rewards/chatgpt_combined_reward/std": 0.387298583984375,
"step": 93
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 958.0,
"completions/max_terminated_length": 958.0,
"completions/mean_length": 649.0,
"completions/mean_terminated_length": 649.0,
"completions/min_length": 526.0,
"completions/min_terminated_length": 526.0,
"epoch": 0.008733218748548335,
"frac_reward_zero_std": 0.0,
"grad_norm": 9.5,
"learning_rate": 3.5e-08,
"loss": 0.0,
"num_tokens": 407501.0,
"reward": -9.316499710083008,
"reward_std": 1.1787470579147339,
"rewards/chatgpt_combined_reward/mean": -9.316499710083008,
"rewards/chatgpt_combined_reward/std": 1.8061809539794922,
"step": 94
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 795.0,
"completions/max_terminated_length": 795.0,
"completions/mean_length": 645.0,
"completions/mean_terminated_length": 645.0,
"completions/min_length": 530.0,
"completions/min_terminated_length": 530.0,
"epoch": 0.0088261253309797,
"frac_reward_zero_std": 0.0,
"grad_norm": 8.75,
"learning_rate": 3e-08,
"loss": -0.0538,
"num_tokens": 412129.0,
"reward": -2.583124876022339,
"reward_std": 1.2963035106658936,
"rewards/chatgpt_combined_reward/mean": -2.583124876022339,
"rewards/chatgpt_combined_reward/std": 2.114532947540283,
"step": 95
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.25,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 806.0,
"completions/mean_length": 732.25,
"completions/mean_terminated_length": 635.0,
"completions/min_length": 525.0,
"completions/min_terminated_length": 525.0,
"epoch": 0.008919031913411065,
"frac_reward_zero_std": 0.5,
"grad_norm": 5.0,
"learning_rate": 2.5e-08,
"loss": 0.0132,
"num_tokens": 416252.0,
"reward": -3.1670000553131104,
"reward_std": 0.2354665994644165,
"rewards/chatgpt_combined_reward/mean": -3.1670000553131104,
"rewards/chatgpt_combined_reward/std": 7.894752502441406,
"step": 96
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 772.0,
"completions/max_terminated_length": 772.0,
"completions/mean_length": 650.0,
"completions/mean_terminated_length": 650.0,
"completions/min_length": 518.0,
"completions/min_terminated_length": 518.0,
"epoch": 0.00901193849584243,
"frac_reward_zero_std": 0.5,
"grad_norm": 4.4375,
"learning_rate": 2e-08,
"loss": -0.0286,
"num_tokens": 420900.0,
"reward": -5.916625022888184,
"reward_std": 0.11791006475687027,
"rewards/chatgpt_combined_reward/mean": -5.916625022888184,
"rewards/chatgpt_combined_reward/std": 4.717040538787842,
"step": 97
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.25,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 652.0,
"completions/mean_length": 683.25,
"completions/mean_terminated_length": 569.6666870117188,
"completions/min_length": 516.0,
"completions/min_terminated_length": 516.0,
"epoch": 0.009104845078273796,
"frac_reward_zero_std": 0.5,
"grad_norm": 5.40625,
"learning_rate": 1.5e-08,
"loss": 0.0434,
"num_tokens": 425291.0,
"reward": -0.8337502479553223,
"reward_std": 1.1783933639526367,
"rewards/chatgpt_combined_reward/mean": -0.8337502479553223,
"rewards/chatgpt_combined_reward/std": 10.671379089355469,
"step": 98
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 622.0,
"completions/max_terminated_length": 622.0,
"completions/mean_length": 573.5,
"completions/mean_terminated_length": 573.5,
"completions/min_length": 519.0,
"completions/min_terminated_length": 519.0,
"epoch": 0.009197751660705161,
"frac_reward_zero_std": 0.5,
"grad_norm": 5.0625,
"learning_rate": 1e-08,
"loss": -0.0229,
"num_tokens": 429633.0,
"reward": -7.749750137329102,
"reward_std": 0.8248399496078491,
"rewards/chatgpt_combined_reward/mean": -7.749750137329102,
"rewards/chatgpt_combined_reward/std": 2.7674262523651123,
"step": 99
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 757.0,
"completions/max_terminated_length": 757.0,
"completions/mean_length": 595.25,
"completions/mean_terminated_length": 595.25,
"completions/min_length": 531.0,
"completions/min_terminated_length": 531.0,
"epoch": 0.009290658243136527,
"frac_reward_zero_std": 0.5,
"grad_norm": 5.25,
"learning_rate": 5e-09,
"loss": 0.0081,
"num_tokens": 433854.0,
"reward": -2.0003747940063477,
"reward_std": 1.6498571634292603,
"rewards/chatgpt_combined_reward/mean": -2.0003747940063477,
"rewards/chatgpt_combined_reward/std": 9.431580543518066,
"step": 100
}
],
"logging_steps": 1,
"max_steps": 100,
"num_input_tokens_seen": 433854,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}