|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.009290658243136527, |
|
"eval_steps": 500, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 622.0, |
|
"completions/max_terminated_length": 622.0, |
|
"completions/mean_length": 551.0, |
|
"completions/mean_terminated_length": 551.0, |
|
"completions/min_length": 515.0, |
|
"completions/min_terminated_length": 515.0, |
|
"epoch": 9.290658243136526e-05, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 8.375, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0, |
|
"num_tokens": 4252.0, |
|
"reward": -8.15000057220459, |
|
"reward_std": 3.0405590534210205, |
|
"rewards/chatgpt_combined_reward/mean": -8.15000057220459, |
|
"rewards/chatgpt_combined_reward/std": 4.306971073150635, |
|
"step": 1 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 559.0, |
|
"completions/max_terminated_length": 559.0, |
|
"completions/mean_length": 538.5, |
|
"completions/mean_terminated_length": 538.5, |
|
"completions/min_length": 513.0, |
|
"completions/min_terminated_length": 513.0, |
|
"epoch": 0.00018581316486273051, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 4.875, |
|
"learning_rate": 4.95e-07, |
|
"loss": -0.0058, |
|
"num_tokens": 8340.0, |
|
"reward": -3.500124931335449, |
|
"reward_std": 1.414036750793457, |
|
"rewards/chatgpt_combined_reward/mean": -3.500124931335449, |
|
"rewards/chatgpt_combined_reward/std": 7.6809611320495605, |
|
"step": 2 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 730.0, |
|
"completions/max_terminated_length": 730.0, |
|
"completions/mean_length": 598.75, |
|
"completions/mean_terminated_length": 598.75, |
|
"completions/min_length": 523.0, |
|
"completions/min_terminated_length": 523.0, |
|
"epoch": 0.00027871974729409577, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 5.4375, |
|
"learning_rate": 4.9e-07, |
|
"loss": -0.0225, |
|
"num_tokens": 12655.0, |
|
"reward": -1.5003752708435059, |
|
"reward_std": 1.414036750793457, |
|
"rewards/chatgpt_combined_reward/mean": -1.5003752708435059, |
|
"rewards/chatgpt_combined_reward/std": 9.949413299560547, |
|
"step": 3 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.5, |
|
"completions/max_length": 1024.0, |
|
"completions/max_terminated_length": 648.0, |
|
"completions/mean_length": 803.5, |
|
"completions/mean_terminated_length": 583.0, |
|
"completions/min_length": 518.0, |
|
"completions/min_terminated_length": 518.0, |
|
"epoch": 0.00037162632972546103, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 8.9375, |
|
"learning_rate": 4.85e-07, |
|
"loss": -0.0365, |
|
"num_tokens": 17917.0, |
|
"reward": -8.15000057220459, |
|
"reward_std": 0.21213217079639435, |
|
"rewards/chatgpt_combined_reward/mean": -8.15000057220459, |
|
"rewards/chatgpt_combined_reward/std": 0.387298583984375, |
|
"step": 4 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 674.0, |
|
"completions/max_terminated_length": 674.0, |
|
"completions/mean_length": 566.25, |
|
"completions/mean_terminated_length": 566.25, |
|
"completions/min_length": 516.0, |
|
"completions/min_terminated_length": 516.0, |
|
"epoch": 0.0004645329121568263, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 5.34375, |
|
"learning_rate": 4.8e-07, |
|
"loss": -0.0466, |
|
"num_tokens": 21406.0, |
|
"reward": -1.7503750324249268, |
|
"reward_std": 2.4746968746185303, |
|
"rewards/chatgpt_combined_reward/mean": -1.7503750324249268, |
|
"rewards/chatgpt_combined_reward/std": 9.945212364196777, |
|
"step": 5 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 778.0, |
|
"completions/max_terminated_length": 778.0, |
|
"completions/mean_length": 599.0, |
|
"completions/mean_terminated_length": 599.0, |
|
"completions/min_length": 513.0, |
|
"completions/min_terminated_length": 513.0, |
|
"epoch": 0.0005574394945881915, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 10.0625, |
|
"learning_rate": 4.7499999999999995e-07, |
|
"loss": -0.0, |
|
"num_tokens": 25850.0, |
|
"reward": -8.15000057220459, |
|
"reward_std": 0.21213217079639435, |
|
"rewards/chatgpt_combined_reward/mean": -8.15000057220459, |
|
"rewards/chatgpt_combined_reward/std": 0.387298583984375, |
|
"step": 6 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 600.0, |
|
"completions/max_terminated_length": 600.0, |
|
"completions/mean_length": 550.5, |
|
"completions/mean_terminated_length": 550.5, |
|
"completions/min_length": 527.0, |
|
"completions/min_terminated_length": 527.0, |
|
"epoch": 0.0006503460770195569, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 5.5625, |
|
"learning_rate": 4.6999999999999995e-07, |
|
"loss": -0.009, |
|
"num_tokens": 29262.0, |
|
"reward": -2.9171252250671387, |
|
"reward_std": 4.124730587005615, |
|
"rewards/chatgpt_combined_reward/mean": -2.9171252250671387, |
|
"rewards/chatgpt_combined_reward/std": 9.46435546875, |
|
"step": 7 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 607.0, |
|
"completions/max_terminated_length": 607.0, |
|
"completions/mean_length": 544.25, |
|
"completions/mean_terminated_length": 544.25, |
|
"completions/min_length": 515.0, |
|
"completions/min_terminated_length": 515.0, |
|
"epoch": 0.0007432526594509221, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 5.4375, |
|
"learning_rate": 4.65e-07, |
|
"loss": -0.0106, |
|
"num_tokens": 32691.0, |
|
"reward": -2.583625078201294, |
|
"reward_std": 1.296303391456604, |
|
"rewards/chatgpt_combined_reward/mean": -2.583625078201294, |
|
"rewards/chatgpt_combined_reward/std": 8.693524360656738, |
|
"step": 8 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.5, |
|
"completions/max_length": 1024.0, |
|
"completions/max_terminated_length": 543.0, |
|
"completions/mean_length": 776.5, |
|
"completions/mean_terminated_length": 529.0, |
|
"completions/min_length": 515.0, |
|
"completions/min_terminated_length": 515.0, |
|
"epoch": 0.0008361592418822874, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 9.25, |
|
"learning_rate": 4.6e-07, |
|
"loss": 0.2254, |
|
"num_tokens": 37845.0, |
|
"reward": -6.249625205993652, |
|
"reward_std": 2.9463372230529785, |
|
"rewards/chatgpt_combined_reward/mean": -6.249625205993652, |
|
"rewards/chatgpt_combined_reward/std": 2.846529960632324, |
|
"step": 9 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 537.0, |
|
"completions/max_terminated_length": 537.0, |
|
"completions/mean_length": 528.75, |
|
"completions/mean_terminated_length": 528.75, |
|
"completions/min_length": 518.0, |
|
"completions/min_terminated_length": 518.0, |
|
"epoch": 0.0009290658243136526, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 8.1875, |
|
"learning_rate": 4.55e-07, |
|
"loss": 0.0017, |
|
"num_tokens": 41190.0, |
|
"reward": 0.33299994468688965, |
|
"reward_std": 8.249107360839844, |
|
"rewards/chatgpt_combined_reward/mean": 0.33299994468688965, |
|
"rewards/chatgpt_combined_reward/std": 7.683218955993652, |
|
"step": 10 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.25, |
|
"completions/max_length": 1024.0, |
|
"completions/max_terminated_length": 554.0, |
|
"completions/mean_length": 655.75, |
|
"completions/mean_terminated_length": 533.0, |
|
"completions/min_length": 516.0, |
|
"completions/min_terminated_length": 516.0, |
|
"epoch": 0.001021972406745018, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 8.875, |
|
"learning_rate": 4.5e-07, |
|
"loss": 0.1097, |
|
"num_tokens": 45861.0, |
|
"reward": -8.89987564086914, |
|
"reward_std": 1.7679438591003418, |
|
"rewards/chatgpt_combined_reward/mean": -8.89987564086914, |
|
"rewards/chatgpt_combined_reward/std": 2.62703275680542, |
|
"step": 11 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 561.0, |
|
"completions/max_terminated_length": 561.0, |
|
"completions/mean_length": 529.5, |
|
"completions/mean_terminated_length": 529.5, |
|
"completions/min_length": 513.0, |
|
"completions/min_terminated_length": 513.0, |
|
"epoch": 0.001114878989176383, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 5.59375, |
|
"learning_rate": 4.45e-07, |
|
"loss": -0.0048, |
|
"num_tokens": 49193.0, |
|
"reward": -2.2503747940063477, |
|
"reward_std": 2.003410577774048, |
|
"rewards/chatgpt_combined_reward/mean": -2.2503747940063477, |
|
"rewards/chatgpt_combined_reward/std": 9.242679595947266, |
|
"step": 12 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 591.0, |
|
"completions/max_terminated_length": 591.0, |
|
"completions/mean_length": 558.5, |
|
"completions/mean_terminated_length": 558.5, |
|
"completions/min_length": 515.0, |
|
"completions/min_terminated_length": 515.0, |
|
"epoch": 0.0012077855716077484, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 7.90625, |
|
"learning_rate": 4.3999999999999997e-07, |
|
"loss": 0.0078, |
|
"num_tokens": 53475.0, |
|
"reward": -8.15000057220459, |
|
"reward_std": 0.21213217079639435, |
|
"rewards/chatgpt_combined_reward/mean": -8.15000057220459, |
|
"rewards/chatgpt_combined_reward/std": 0.387298583984375, |
|
"step": 13 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1008.0, |
|
"completions/max_terminated_length": 1008.0, |
|
"completions/mean_length": 680.75, |
|
"completions/mean_terminated_length": 680.75, |
|
"completions/min_length": 523.0, |
|
"completions/min_terminated_length": 523.0, |
|
"epoch": 0.0013006921540391137, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 6.9375, |
|
"learning_rate": 4.3499999999999996e-07, |
|
"loss": 0.0722, |
|
"num_tokens": 58246.0, |
|
"reward": -8.89987564086914, |
|
"reward_std": 1.7679438591003418, |
|
"rewards/chatgpt_combined_reward/mean": -8.89987564086914, |
|
"rewards/chatgpt_combined_reward/std": 2.62703275680542, |
|
"step": 14 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 567.0, |
|
"completions/max_terminated_length": 567.0, |
|
"completions/mean_length": 533.5, |
|
"completions/mean_terminated_length": 533.5, |
|
"completions/min_length": 514.0, |
|
"completions/min_terminated_length": 514.0, |
|
"epoch": 0.001393598736470479, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 5.53125, |
|
"learning_rate": 4.2999999999999996e-07, |
|
"loss": 0.0044, |
|
"num_tokens": 62196.0, |
|
"reward": -2.083625078201294, |
|
"reward_std": 0.5891967415809631, |
|
"rewards/chatgpt_combined_reward/mean": -2.083625078201294, |
|
"rewards/chatgpt_combined_reward/std": 9.166325569152832, |
|
"step": 15 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.5, |
|
"completions/max_length": 1024.0, |
|
"completions/max_terminated_length": 729.0, |
|
"completions/mean_length": 842.0, |
|
"completions/mean_terminated_length": 660.0, |
|
"completions/min_length": 591.0, |
|
"completions/min_terminated_length": 591.0, |
|
"epoch": 0.0014865053189018441, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 8.75, |
|
"learning_rate": 4.2499999999999995e-07, |
|
"loss": -0.1542, |
|
"num_tokens": 67612.0, |
|
"reward": -8.15000057220459, |
|
"reward_std": 0.21213217079639435, |
|
"rewards/chatgpt_combined_reward/mean": -8.15000057220459, |
|
"rewards/chatgpt_combined_reward/std": 0.387298583984375, |
|
"step": 16 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 682.0, |
|
"completions/max_terminated_length": 682.0, |
|
"completions/mean_length": 560.0, |
|
"completions/mean_terminated_length": 560.0, |
|
"completions/min_length": 514.0, |
|
"completions/min_terminated_length": 514.0, |
|
"epoch": 0.0015794119013332094, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 5.375, |
|
"learning_rate": 4.1999999999999995e-07, |
|
"loss": 0.0252, |
|
"num_tokens": 71066.0, |
|
"reward": -1.6670000553131104, |
|
"reward_std": 2.3567867279052734, |
|
"rewards/chatgpt_combined_reward/mean": -1.6670000553131104, |
|
"rewards/chatgpt_combined_reward/std": 9.999555587768555, |
|
"step": 17 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 699.0, |
|
"completions/max_terminated_length": 699.0, |
|
"completions/mean_length": 607.5, |
|
"completions/mean_terminated_length": 607.5, |
|
"completions/min_length": 521.0, |
|
"completions/min_terminated_length": 521.0, |
|
"epoch": 0.0016723184837645747, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 9.125, |
|
"learning_rate": 4.1499999999999994e-07, |
|
"loss": -0.0983, |
|
"num_tokens": 75544.0, |
|
"reward": -8.89987564086914, |
|
"reward_std": 1.7679438591003418, |
|
"rewards/chatgpt_combined_reward/mean": -8.89987564086914, |
|
"rewards/chatgpt_combined_reward/std": 2.2138428688049316, |
|
"step": 18 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 754.0, |
|
"completions/max_terminated_length": 754.0, |
|
"completions/mean_length": 596.75, |
|
"completions/mean_terminated_length": 596.75, |
|
"completions/min_length": 529.0, |
|
"completions/min_terminated_length": 529.0, |
|
"epoch": 0.00176522506619594, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 8.8125, |
|
"learning_rate": 4.0999999999999994e-07, |
|
"loss": 0.0, |
|
"num_tokens": 79979.0, |
|
"reward": -8.48324966430664, |
|
"reward_std": 2.357140302658081, |
|
"rewards/chatgpt_combined_reward/mean": -8.48324966430664, |
|
"rewards/chatgpt_combined_reward/std": 3.0433735847473145, |
|
"step": 19 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 593.0, |
|
"completions/max_terminated_length": 593.0, |
|
"completions/mean_length": 545.25, |
|
"completions/mean_terminated_length": 545.25, |
|
"completions/min_length": 513.0, |
|
"completions/min_terminated_length": 513.0, |
|
"epoch": 0.0018581316486273051, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 6.03125, |
|
"learning_rate": 4.05e-07, |
|
"loss": -0.0202, |
|
"num_tokens": 83912.0, |
|
"reward": -2.333625078201294, |
|
"reward_std": 1.414036750793457, |
|
"rewards/chatgpt_combined_reward/mean": -2.333625078201294, |
|
"rewards/chatgpt_combined_reward/std": 9.001688957214355, |
|
"step": 20 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 832.0, |
|
"completions/max_terminated_length": 832.0, |
|
"completions/mean_length": 612.0, |
|
"completions/mean_terminated_length": 612.0, |
|
"completions/min_length": 524.0, |
|
"completions/min_terminated_length": 524.0, |
|
"epoch": 0.0019510382310586705, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 5.5, |
|
"learning_rate": 4e-07, |
|
"loss": 0.0457, |
|
"num_tokens": 88190.0, |
|
"reward": -2.1670000553131104, |
|
"reward_std": 0.47128671407699585, |
|
"rewards/chatgpt_combined_reward/mean": -2.1670000553131104, |
|
"rewards/chatgpt_combined_reward/std": 9.061125755310059, |
|
"step": 21 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.25, |
|
"completions/max_length": 1024.0, |
|
"completions/max_terminated_length": 632.0, |
|
"completions/mean_length": 709.5, |
|
"completions/mean_terminated_length": 604.6666870117188, |
|
"completions/min_length": 582.0, |
|
"completions/min_terminated_length": 582.0, |
|
"epoch": 0.002043944813490036, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 7.75, |
|
"learning_rate": 3.95e-07, |
|
"loss": 0.089, |
|
"num_tokens": 93076.0, |
|
"reward": -6.816500663757324, |
|
"reward_std": 0.21213209629058838, |
|
"rewards/chatgpt_combined_reward/mean": -6.816500663757324, |
|
"rewards/chatgpt_combined_reward/std": 4.199178218841553, |
|
"step": 22 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 547.0, |
|
"completions/max_terminated_length": 547.0, |
|
"completions/mean_length": 533.0, |
|
"completions/mean_terminated_length": 533.0, |
|
"completions/min_length": 517.0, |
|
"completions/min_terminated_length": 517.0, |
|
"epoch": 0.002136851395921401, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 6.6875, |
|
"learning_rate": 3.8999999999999997e-07, |
|
"loss": 0.0049, |
|
"num_tokens": 96414.0, |
|
"reward": -2.5003750324249268, |
|
"reward_std": 1.6498569250106812, |
|
"rewards/chatgpt_combined_reward/mean": -2.5003750324249268, |
|
"rewards/chatgpt_combined_reward/std": 8.866897583007812, |
|
"step": 23 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 992.0, |
|
"completions/max_terminated_length": 992.0, |
|
"completions/mean_length": 722.25, |
|
"completions/mean_terminated_length": 722.25, |
|
"completions/min_length": 521.0, |
|
"completions/min_terminated_length": 521.0, |
|
"epoch": 0.002229757978352766, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 4.3125, |
|
"learning_rate": 3.8499999999999997e-07, |
|
"loss": 0.0319, |
|
"num_tokens": 100539.0, |
|
"reward": -2.083624839782715, |
|
"reward_std": 1.7675902843475342, |
|
"rewards/chatgpt_combined_reward/mean": -2.083624839782715, |
|
"rewards/chatgpt_combined_reward/std": 9.366135597229004, |
|
"step": 24 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 716.0, |
|
"completions/max_terminated_length": 716.0, |
|
"completions/mean_length": 616.5, |
|
"completions/mean_terminated_length": 616.5, |
|
"completions/min_length": 517.0, |
|
"completions/min_terminated_length": 517.0, |
|
"epoch": 0.0023226645607841317, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 4.90625, |
|
"learning_rate": 3.7999999999999996e-07, |
|
"loss": 0.0023, |
|
"num_tokens": 104789.0, |
|
"reward": -2.333750009536743, |
|
"reward_std": 0.942926824092865, |
|
"rewards/chatgpt_combined_reward/mean": -2.333750009536743, |
|
"rewards/chatgpt_combined_reward/std": 8.91893196105957, |
|
"step": 25 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 801.0, |
|
"completions/max_terminated_length": 801.0, |
|
"completions/mean_length": 624.0, |
|
"completions/mean_terminated_length": 624.0, |
|
"completions/min_length": 518.0, |
|
"completions/min_terminated_length": 518.0, |
|
"epoch": 0.002415571143215497, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 8.0625, |
|
"learning_rate": 3.75e-07, |
|
"loss": -0.0, |
|
"num_tokens": 109333.0, |
|
"reward": -7.916375160217285, |
|
"reward_std": 2.9466910362243652, |
|
"rewards/chatgpt_combined_reward/mean": -7.916375160217285, |
|
"rewards/chatgpt_combined_reward/std": 2.500305652618408, |
|
"step": 26 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 602.0, |
|
"completions/max_terminated_length": 602.0, |
|
"completions/mean_length": 579.5, |
|
"completions/mean_terminated_length": 579.5, |
|
"completions/min_length": 539.0, |
|
"completions/min_terminated_length": 539.0, |
|
"epoch": 0.002508477725646862, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 7.21875, |
|
"learning_rate": 3.7e-07, |
|
"loss": 0.015, |
|
"num_tokens": 112875.0, |
|
"reward": 2.384185791015625e-07, |
|
"reward_std": 2.3567869663238525, |
|
"rewards/chatgpt_combined_reward/mean": 2.384185791015625e-07, |
|
"rewards/chatgpt_combined_reward/std": 7.070361137390137, |
|
"step": 27 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.25, |
|
"completions/max_length": 1024.0, |
|
"completions/max_terminated_length": 695.0, |
|
"completions/mean_length": 694.0, |
|
"completions/mean_terminated_length": 584.0, |
|
"completions/min_length": 516.0, |
|
"completions/min_terminated_length": 516.0, |
|
"epoch": 0.0026013843080782274, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 8.5625, |
|
"learning_rate": 3.65e-07, |
|
"loss": -0.076, |
|
"num_tokens": 117699.0, |
|
"reward": -5.416375160217285, |
|
"reward_std": 2.9463372230529785, |
|
"rewards/chatgpt_combined_reward/mean": -5.416375160217285, |
|
"rewards/chatgpt_combined_reward/std": 3.4360225200653076, |
|
"step": 28 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.75, |
|
"completions/max_length": 1024.0, |
|
"completions/max_terminated_length": 571.0, |
|
"completions/mean_length": 910.75, |
|
"completions/mean_terminated_length": 571.0, |
|
"completions/min_length": 571.0, |
|
"completions/min_terminated_length": 571.0, |
|
"epoch": 0.0026942908905095925, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 8.4375, |
|
"learning_rate": 3.6e-07, |
|
"loss": 0.1004, |
|
"num_tokens": 123390.0, |
|
"reward": -8.48324966430664, |
|
"reward_std": 2.357140302658081, |
|
"rewards/chatgpt_combined_reward/mean": -8.48324966430664, |
|
"rewards/chatgpt_combined_reward/std": 3.0433735847473145, |
|
"step": 29 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.25, |
|
"completions/max_length": 1024.0, |
|
"completions/max_terminated_length": 636.0, |
|
"completions/mean_length": 700.75, |
|
"completions/mean_terminated_length": 593.0, |
|
"completions/min_length": 517.0, |
|
"completions/min_terminated_length": 517.0, |
|
"epoch": 0.002787197472940958, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 4.5, |
|
"learning_rate": 3.55e-07, |
|
"loss": 0.0609, |
|
"num_tokens": 128241.0, |
|
"reward": -7.499750137329102, |
|
"reward_std": 1.1783934831619263, |
|
"rewards/chatgpt_combined_reward/mean": -7.499750137329102, |
|
"rewards/chatgpt_combined_reward/std": 3.191626787185669, |
|
"step": 30 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 733.0, |
|
"completions/max_terminated_length": 733.0, |
|
"completions/mean_length": 613.0, |
|
"completions/mean_terminated_length": 613.0, |
|
"completions/min_length": 513.0, |
|
"completions/min_terminated_length": 513.0, |
|
"epoch": 0.002880104055372323, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 5.21875, |
|
"learning_rate": 3.5e-07, |
|
"loss": 0.0078, |
|
"num_tokens": 132735.0, |
|
"reward": -2.5002501010894775, |
|
"reward_std": 1.1783934831619263, |
|
"rewards/chatgpt_combined_reward/mean": -2.5002501010894775, |
|
"rewards/chatgpt_combined_reward/std": 8.766212463378906, |
|
"step": 31 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 995.0, |
|
"completions/max_terminated_length": 995.0, |
|
"completions/mean_length": 712.0, |
|
"completions/mean_terminated_length": 712.0, |
|
"completions/min_length": 534.0, |
|
"completions/min_terminated_length": 534.0, |
|
"epoch": 0.0029730106378036882, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 4.46875, |
|
"learning_rate": 3.45e-07, |
|
"loss": 0.0859, |
|
"num_tokens": 137571.0, |
|
"reward": -1.7503750324249268, |
|
"reward_std": 2.4746968746185303, |
|
"rewards/chatgpt_combined_reward/mean": -1.7503750324249268, |
|
"rewards/chatgpt_combined_reward/std": 9.945212364196777, |
|
"step": 32 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 570.0, |
|
"completions/max_terminated_length": 570.0, |
|
"completions/mean_length": 531.0, |
|
"completions/mean_terminated_length": 531.0, |
|
"completions/min_length": 513.0, |
|
"completions/min_terminated_length": 513.0, |
|
"epoch": 0.0030659172202350538, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 7.46875, |
|
"learning_rate": 3.4000000000000003e-07, |
|
"loss": 0.0149, |
|
"num_tokens": 141743.0, |
|
"reward": -9.316499710083008, |
|
"reward_std": 1.1787471771240234, |
|
"rewards/chatgpt_combined_reward/mean": -9.316499710083008, |
|
"rewards/chatgpt_combined_reward/std": 1.3887726068496704, |
|
"step": 33 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.25, |
|
"completions/max_length": 1024.0, |
|
"completions/max_terminated_length": 721.0, |
|
"completions/mean_length": 750.75, |
|
"completions/mean_terminated_length": 659.6666870117188, |
|
"completions/min_length": 605.0, |
|
"completions/min_terminated_length": 605.0, |
|
"epoch": 0.003158823802666419, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 5.34375, |
|
"learning_rate": 3.35e-07, |
|
"loss": -0.0175, |
|
"num_tokens": 146734.0, |
|
"reward": -2.083624839782715, |
|
"reward_std": 1.7675902843475342, |
|
"rewards/chatgpt_combined_reward/mean": -2.083624839782715, |
|
"rewards/chatgpt_combined_reward/std": 9.366135597229004, |
|
"step": 34 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 643.0, |
|
"completions/max_terminated_length": 643.0, |
|
"completions/mean_length": 552.5, |
|
"completions/mean_terminated_length": 552.5, |
|
"completions/min_length": 514.0, |
|
"completions/min_terminated_length": 514.0, |
|
"epoch": 0.0032517303850977844, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 5.09375, |
|
"learning_rate": 3.3e-07, |
|
"loss": -0.0381, |
|
"num_tokens": 150992.0, |
|
"reward": -6.666500091552734, |
|
"reward_std": 1.1783933639526367, |
|
"rewards/chatgpt_combined_reward/mean": -6.666500091552734, |
|
"rewards/chatgpt_combined_reward/std": 4.0826191902160645, |
|
"step": 35 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 738.0, |
|
"completions/max_terminated_length": 738.0, |
|
"completions/mean_length": 577.25, |
|
"completions/mean_terminated_length": 577.25, |
|
"completions/min_length": 522.0, |
|
"completions/min_terminated_length": 522.0, |
|
"epoch": 0.0033446369675291495, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 8.5625, |
|
"learning_rate": 3.25e-07, |
|
"loss": -0.059, |
|
"num_tokens": 155349.0, |
|
"reward": -6.249875068664551, |
|
"reward_std": 5.3034772872924805, |
|
"rewards/chatgpt_combined_reward/mean": -6.249875068664551, |
|
"rewards/chatgpt_combined_reward/std": 4.383391380310059, |
|
"step": 36 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 801.0, |
|
"completions/max_terminated_length": 801.0, |
|
"completions/mean_length": 589.5, |
|
"completions/mean_terminated_length": 589.5, |
|
"completions/min_length": 517.0, |
|
"completions/min_terminated_length": 517.0, |
|
"epoch": 0.0034375435499605146, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 7.125, |
|
"learning_rate": 3.2e-07, |
|
"loss": -0.0, |
|
"num_tokens": 159755.0, |
|
"reward": -8.15000057220459, |
|
"reward_std": 0.21213217079639435, |
|
"rewards/chatgpt_combined_reward/mean": -8.15000057220459, |
|
"rewards/chatgpt_combined_reward/std": 0.387298583984375, |
|
"step": 37 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 713.0, |
|
"completions/max_terminated_length": 713.0, |
|
"completions/mean_length": 602.5, |
|
"completions/mean_terminated_length": 602.5, |
|
"completions/min_length": 548.0, |
|
"completions/min_terminated_length": 548.0, |
|
"epoch": 0.00353045013239188, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 8.25, |
|
"learning_rate": 3.15e-07, |
|
"loss": -0.0297, |
|
"num_tokens": 164213.0, |
|
"reward": -8.066625595092773, |
|
"reward_std": 2.9463369846343994, |
|
"rewards/chatgpt_combined_reward/mean": -8.066625595092773, |
|
"rewards/chatgpt_combined_reward/std": 4.283124923706055, |
|
"step": 38 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 810.0, |
|
"completions/max_terminated_length": 810.0, |
|
"completions/mean_length": 696.5, |
|
"completions/mean_terminated_length": 696.5, |
|
"completions/min_length": 536.0, |
|
"completions/min_terminated_length": 536.0, |
|
"epoch": 0.003623356714823245, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 6.875, |
|
"learning_rate": 3.1e-07, |
|
"loss": -0.0857, |
|
"num_tokens": 169047.0, |
|
"reward": -8.483250617980957, |
|
"reward_std": 2.357140302658081, |
|
"rewards/chatgpt_combined_reward/mean": -8.483250617980957, |
|
"rewards/chatgpt_combined_reward/std": 3.453827142715454, |
|
"step": 39 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.25, |
|
"completions/max_length": 1024.0, |
|
"completions/max_terminated_length": 575.0, |
|
"completions/mean_length": 666.0, |
|
"completions/mean_terminated_length": 546.6666870117188, |
|
"completions/min_length": 530.0, |
|
"completions/min_terminated_length": 530.0, |
|
"epoch": 0.0037162632972546103, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 8.25, |
|
"learning_rate": 3.05e-07, |
|
"loss": -0.0, |
|
"num_tokens": 172953.0, |
|
"reward": -0.25024986267089844, |
|
"reward_std": 3.889087200164795, |
|
"rewards/chatgpt_combined_reward/mean": -0.25024986267089844, |
|
"rewards/chatgpt_combined_reward/std": 8.088777542114258, |
|
"step": 40 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 575.0, |
|
"completions/max_terminated_length": 575.0, |
|
"completions/mean_length": 537.25, |
|
"completions/mean_terminated_length": 537.25, |
|
"completions/min_length": 522.0, |
|
"completions/min_terminated_length": 522.0, |
|
"epoch": 0.003809169879685976, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 5.71875, |
|
"learning_rate": 3e-07, |
|
"loss": -0.0072, |
|
"num_tokens": 176344.0, |
|
"reward": -3.666874885559082, |
|
"reward_std": 0.9427501559257507, |
|
"rewards/chatgpt_combined_reward/mean": -3.666874885559082, |
|
"rewards/chatgpt_combined_reward/std": 7.393443584442139, |
|
"step": 41 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 670.0, |
|
"completions/max_terminated_length": 670.0, |
|
"completions/mean_length": 577.0, |
|
"completions/mean_terminated_length": 577.0, |
|
"completions/min_length": 538.0, |
|
"completions/min_terminated_length": 538.0, |
|
"epoch": 0.003902076462117341, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 8.0, |
|
"learning_rate": 2.95e-07, |
|
"loss": -0.0, |
|
"num_tokens": 180700.0, |
|
"reward": -8.15000057220459, |
|
"reward_std": 0.21213217079639435, |
|
"rewards/chatgpt_combined_reward/mean": -8.15000057220459, |
|
"rewards/chatgpt_combined_reward/std": 0.387298583984375, |
|
"step": 42 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 741.0, |
|
"completions/max_terminated_length": 741.0, |
|
"completions/mean_length": 606.0, |
|
"completions/mean_terminated_length": 606.0, |
|
"completions/min_length": 522.0, |
|
"completions/min_terminated_length": 522.0, |
|
"epoch": 0.003994983044548706, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 7.625, |
|
"learning_rate": 2.9e-07, |
|
"loss": 0.033, |
|
"num_tokens": 185098.0, |
|
"reward": -1.9167499542236328, |
|
"reward_std": 5.06748104095459, |
|
"rewards/chatgpt_combined_reward/mean": -1.9167499542236328, |
|
"rewards/chatgpt_combined_reward/std": 5.698338031768799, |
|
"step": 43 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 575.0, |
|
"completions/max_terminated_length": 575.0, |
|
"completions/mean_length": 560.0, |
|
"completions/mean_terminated_length": 560.0, |
|
"completions/min_length": 544.0, |
|
"completions/min_terminated_length": 544.0, |
|
"epoch": 0.004087889626980072, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 8.4375, |
|
"learning_rate": 2.8499999999999997e-07, |
|
"loss": 0.0152, |
|
"num_tokens": 188550.0, |
|
"reward": -1.6668751239776611, |
|
"reward_std": 1.8856770992279053, |
|
"rewards/chatgpt_combined_reward/mean": -1.6668751239776611, |
|
"rewards/chatgpt_combined_reward/std": 7.039121150970459, |
|
"step": 44 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.5, |
|
"completions/max_length": 1024.0, |
|
"completions/max_terminated_length": 573.0, |
|
"completions/mean_length": 785.0, |
|
"completions/mean_terminated_length": 546.0, |
|
"completions/min_length": 519.0, |
|
"completions/min_terminated_length": 519.0, |
|
"epoch": 0.004180796209411437, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 8.75, |
|
"learning_rate": 2.8e-07, |
|
"loss": -0.0, |
|
"num_tokens": 193738.0, |
|
"reward": -8.15000057220459, |
|
"reward_std": 0.21213217079639435, |
|
"rewards/chatgpt_combined_reward/mean": -8.15000057220459, |
|
"rewards/chatgpt_combined_reward/std": 0.387298583984375, |
|
"step": 45 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.25, |
|
"completions/max_length": 1024.0, |
|
"completions/max_terminated_length": 534.0, |
|
"completions/mean_length": 651.5, |
|
"completions/mean_terminated_length": 527.3333740234375, |
|
"completions/min_length": 515.0, |
|
"completions/min_terminated_length": 515.0, |
|
"epoch": 0.004273702791842802, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 7.6875, |
|
"learning_rate": 2.75e-07, |
|
"loss": 0.1173, |
|
"num_tokens": 198392.0, |
|
"reward": -8.066625595092773, |
|
"reward_std": 3.1584692001342773, |
|
"rewards/chatgpt_combined_reward/mean": -8.066625595092773, |
|
"rewards/chatgpt_combined_reward/std": 4.083926677703857, |
|
"step": 46 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 761.0, |
|
"completions/max_terminated_length": 761.0, |
|
"completions/mean_length": 585.5, |
|
"completions/mean_terminated_length": 585.5, |
|
"completions/min_length": 518.0, |
|
"completions/min_terminated_length": 518.0, |
|
"epoch": 0.004366609374274168, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 5.21875, |
|
"learning_rate": 2.7e-07, |
|
"loss": 0.0291, |
|
"num_tokens": 201948.0, |
|
"reward": -1.333749771118164, |
|
"reward_std": 0.7071069478988647, |
|
"rewards/chatgpt_combined_reward/mean": -1.333749771118164, |
|
"rewards/chatgpt_combined_reward/std": 10.040179252624512, |
|
"step": 47 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 619.0, |
|
"completions/max_terminated_length": 619.0, |
|
"completions/mean_length": 579.0, |
|
"completions/mean_terminated_length": 579.0, |
|
"completions/min_length": 526.0, |
|
"completions/min_terminated_length": 526.0, |
|
"epoch": 0.004459515956705532, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 9.25, |
|
"learning_rate": 2.65e-07, |
|
"loss": -0.0357, |
|
"num_tokens": 205824.0, |
|
"reward": 2.082624673843384, |
|
"reward_std": 2.2388768196105957, |
|
"rewards/chatgpt_combined_reward/mean": 2.082624673843384, |
|
"rewards/chatgpt_combined_reward/std": 5.983651638031006, |
|
"step": 48 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 664.0, |
|
"completions/max_terminated_length": 664.0, |
|
"completions/mean_length": 567.25, |
|
"completions/mean_terminated_length": 567.25, |
|
"completions/min_length": 524.0, |
|
"completions/min_terminated_length": 524.0, |
|
"epoch": 0.004552422539136898, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 8.125, |
|
"learning_rate": 2.6e-07, |
|
"loss": 0.0394, |
|
"num_tokens": 210141.0, |
|
"reward": -9.316500663757324, |
|
"reward_std": 1.3908790349960327, |
|
"rewards/chatgpt_combined_reward/mean": -9.316500663757324, |
|
"rewards/chatgpt_combined_reward/std": 1.982193112373352, |
|
"step": 49 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 739.0, |
|
"completions/max_terminated_length": 739.0, |
|
"completions/mean_length": 640.0, |
|
"completions/mean_terminated_length": 640.0, |
|
"completions/min_length": 538.0, |
|
"completions/min_terminated_length": 538.0, |
|
"epoch": 0.004645329121568263, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 5.75, |
|
"learning_rate": 2.55e-07, |
|
"loss": 0.0381, |
|
"num_tokens": 213923.0, |
|
"reward": -4.583374977111816, |
|
"reward_std": 2.945983409881592, |
|
"rewards/chatgpt_combined_reward/mean": -4.583374977111816, |
|
"rewards/chatgpt_combined_reward/std": 7.119798183441162, |
|
"step": 50 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.25, |
|
"completions/max_length": 1024.0, |
|
"completions/max_terminated_length": 559.0, |
|
"completions/mean_length": 661.0, |
|
"completions/mean_terminated_length": 540.0, |
|
"completions/min_length": 530.0, |
|
"completions/min_terminated_length": 530.0, |
|
"epoch": 0.004738235703999628, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 9.125, |
|
"learning_rate": 2.5e-07, |
|
"loss": -0.0, |
|
"num_tokens": 218615.0, |
|
"reward": -8.15000057220459, |
|
"reward_std": 0.21213217079639435, |
|
"rewards/chatgpt_combined_reward/mean": -8.15000057220459, |
|
"rewards/chatgpt_combined_reward/std": 0.387298583984375, |
|
"step": 51 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 829.0, |
|
"completions/max_terminated_length": 829.0, |
|
"completions/mean_length": 595.25, |
|
"completions/mean_terminated_length": 595.25, |
|
"completions/min_length": 515.0, |
|
"completions/min_terminated_length": 515.0, |
|
"epoch": 0.004831142286430994, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 4.65625, |
|
"learning_rate": 2.45e-07, |
|
"loss": 0.0813, |
|
"num_tokens": 222836.0, |
|
"reward": -3.750124931335449, |
|
"reward_std": 4.124377250671387, |
|
"rewards/chatgpt_combined_reward/mean": -3.750124931335449, |
|
"rewards/chatgpt_combined_reward/std": 8.646496772766113, |
|
"step": 52 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1000.0, |
|
"completions/max_terminated_length": 1000.0, |
|
"completions/mean_length": 675.5, |
|
"completions/mean_terminated_length": 675.5, |
|
"completions/min_length": 517.0, |
|
"completions/min_terminated_length": 517.0, |
|
"epoch": 0.004924048868862359, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 7.53125, |
|
"learning_rate": 2.4e-07, |
|
"loss": -0.0, |
|
"num_tokens": 227586.0, |
|
"reward": -8.15000057220459, |
|
"reward_std": 0.21213217079639435, |
|
"rewards/chatgpt_combined_reward/mean": -8.15000057220459, |
|
"rewards/chatgpt_combined_reward/std": 0.387298583984375, |
|
"step": 53 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 697.0, |
|
"completions/max_terminated_length": 697.0, |
|
"completions/mean_length": 603.25, |
|
"completions/mean_terminated_length": 603.25, |
|
"completions/min_length": 522.0, |
|
"completions/min_terminated_length": 522.0, |
|
"epoch": 0.005016955451293724, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 8.25, |
|
"learning_rate": 2.3499999999999997e-07, |
|
"loss": 0.0037, |
|
"num_tokens": 231885.0, |
|
"reward": -0.25025010108947754, |
|
"reward_std": 5.067480087280273, |
|
"rewards/chatgpt_combined_reward/mean": -0.25025010108947754, |
|
"rewards/chatgpt_combined_reward/std": 8.490804672241211, |
|
"step": 54 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.25, |
|
"completions/max_length": 1024.0, |
|
"completions/max_terminated_length": 601.0, |
|
"completions/mean_length": 667.75, |
|
"completions/mean_terminated_length": 549.0, |
|
"completions/min_length": 521.0, |
|
"completions/min_terminated_length": 521.0, |
|
"epoch": 0.005109862033725089, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 4.78125, |
|
"learning_rate": 2.3e-07, |
|
"loss": -0.0456, |
|
"num_tokens": 236320.0, |
|
"reward": -2.833625078201294, |
|
"reward_std": 0.7069300413131714, |
|
"rewards/chatgpt_combined_reward/mean": -2.833625078201294, |
|
"rewards/chatgpt_combined_reward/std": 8.315181732177734, |
|
"step": 55 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 650.0, |
|
"completions/max_terminated_length": 650.0, |
|
"completions/mean_length": 571.25, |
|
"completions/mean_terminated_length": 571.25, |
|
"completions/min_length": 522.0, |
|
"completions/min_terminated_length": 522.0, |
|
"epoch": 0.005202768616156455, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 7.40625, |
|
"learning_rate": 2.25e-07, |
|
"loss": 0.0162, |
|
"num_tokens": 239589.0, |
|
"reward": 7.665875434875488, |
|
"reward_std": 0.9427504539489746, |
|
"rewards/chatgpt_combined_reward/mean": 7.665875434875488, |
|
"rewards/chatgpt_combined_reward/std": 0.8164288401603699, |
|
"step": 56 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 585.0, |
|
"completions/max_terminated_length": 585.0, |
|
"completions/mean_length": 555.75, |
|
"completions/mean_terminated_length": 555.75, |
|
"completions/min_length": 524.0, |
|
"completions/min_terminated_length": 524.0, |
|
"epoch": 0.00529567519858782, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 8.3125, |
|
"learning_rate": 2.1999999999999998e-07, |
|
"loss": 0.0021, |
|
"num_tokens": 243860.0, |
|
"reward": -8.15000057220459, |
|
"reward_std": 0.21213217079639435, |
|
"rewards/chatgpt_combined_reward/mean": -8.15000057220459, |
|
"rewards/chatgpt_combined_reward/std": 0.387298583984375, |
|
"step": 57 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 724.0, |
|
"completions/max_terminated_length": 724.0, |
|
"completions/mean_length": 616.0, |
|
"completions/mean_terminated_length": 616.0, |
|
"completions/min_length": 562.0, |
|
"completions/min_terminated_length": 562.0, |
|
"epoch": 0.005388581781019185, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 7.53125, |
|
"learning_rate": 2.1499999999999998e-07, |
|
"loss": 0.0464, |
|
"num_tokens": 248372.0, |
|
"reward": -7.499750137329102, |
|
"reward_std": 3.5358872413635254, |
|
"rewards/chatgpt_combined_reward/mean": -7.499750137329102, |
|
"rewards/chatgpt_combined_reward/std": 3.191626787185669, |
|
"step": 58 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.25, |
|
"completions/max_length": 1024.0, |
|
"completions/max_terminated_length": 835.0, |
|
"completions/mean_length": 726.5, |
|
"completions/mean_terminated_length": 627.3333740234375, |
|
"completions/min_length": 517.0, |
|
"completions/min_terminated_length": 517.0, |
|
"epoch": 0.0054814883634505505, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 7.875, |
|
"learning_rate": 2.0999999999999997e-07, |
|
"loss": 0.0292, |
|
"num_tokens": 253326.0, |
|
"reward": -8.15000057220459, |
|
"reward_std": 0.21213217079639435, |
|
"rewards/chatgpt_combined_reward/mean": -8.15000057220459, |
|
"rewards/chatgpt_combined_reward/std": 0.387298583984375, |
|
"step": 59 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 665.0, |
|
"completions/max_terminated_length": 665.0, |
|
"completions/mean_length": 558.0, |
|
"completions/mean_terminated_length": 558.0, |
|
"completions/min_length": 515.0, |
|
"completions/min_terminated_length": 515.0, |
|
"epoch": 0.005574394945881916, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 10.375, |
|
"learning_rate": 2.0499999999999997e-07, |
|
"loss": 0.0376, |
|
"num_tokens": 257606.0, |
|
"reward": -7.083125114440918, |
|
"reward_std": 4.125083923339844, |
|
"rewards/chatgpt_combined_reward/mean": -7.083125114440918, |
|
"rewards/chatgpt_combined_reward/std": 3.436143636703491, |
|
"step": 60 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 692.0, |
|
"completions/max_terminated_length": 692.0, |
|
"completions/mean_length": 613.25, |
|
"completions/mean_terminated_length": 613.25, |
|
"completions/min_length": 514.0, |
|
"completions/min_terminated_length": 514.0, |
|
"epoch": 0.005667301528313281, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 9.75, |
|
"learning_rate": 2e-07, |
|
"loss": -0.0, |
|
"num_tokens": 262107.0, |
|
"reward": -5.833000183105469, |
|
"reward_std": 3.535533905029297, |
|
"rewards/chatgpt_combined_reward/mean": -5.833000183105469, |
|
"rewards/chatgpt_combined_reward/std": 3.469496726989746, |
|
"step": 61 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.25, |
|
"completions/max_length": 1024.0, |
|
"completions/max_terminated_length": 572.0, |
|
"completions/mean_length": 672.0, |
|
"completions/mean_terminated_length": 554.6666870117188, |
|
"completions/min_length": 537.0, |
|
"completions/min_terminated_length": 537.0, |
|
"epoch": 0.005760208110744646, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 7.5625, |
|
"learning_rate": 1.9499999999999999e-07, |
|
"loss": 0.1156, |
|
"num_tokens": 266843.0, |
|
"reward": -8.15000057220459, |
|
"reward_std": 0.21213217079639435, |
|
"rewards/chatgpt_combined_reward/mean": -8.15000057220459, |
|
"rewards/chatgpt_combined_reward/std": 0.387298583984375, |
|
"step": 62 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 884.0, |
|
"completions/max_terminated_length": 884.0, |
|
"completions/mean_length": 666.5, |
|
"completions/mean_terminated_length": 666.5, |
|
"completions/min_length": 543.0, |
|
"completions/min_terminated_length": 543.0, |
|
"epoch": 0.005853114693176012, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 9.8125, |
|
"learning_rate": 1.8999999999999998e-07, |
|
"loss": 0.0609, |
|
"num_tokens": 271557.0, |
|
"reward": -8.15000057220459, |
|
"reward_std": 0.21213217079639435, |
|
"rewards/chatgpt_combined_reward/mean": -8.15000057220459, |
|
"rewards/chatgpt_combined_reward/std": 0.387298583984375, |
|
"step": 63 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.25, |
|
"completions/max_length": 1024.0, |
|
"completions/max_terminated_length": 667.0, |
|
"completions/mean_length": 683.0, |
|
"completions/mean_terminated_length": 569.3333740234375, |
|
"completions/min_length": 518.0, |
|
"completions/min_terminated_length": 518.0, |
|
"epoch": 0.0059460212756073764, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 8.8125, |
|
"learning_rate": 1.85e-07, |
|
"loss": -0.0, |
|
"num_tokens": 276127.0, |
|
"reward": -0.0002503395080566406, |
|
"reward_std": 3.535533905029297, |
|
"rewards/chatgpt_combined_reward/mean": -0.0002503395080566406, |
|
"rewards/chatgpt_combined_reward/std": 9.128161430358887, |
|
"step": 64 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.25, |
|
"completions/max_length": 1024.0, |
|
"completions/max_terminated_length": 990.0, |
|
"completions/mean_length": 838.0, |
|
"completions/mean_terminated_length": 776.0, |
|
"completions/min_length": 549.0, |
|
"completions/min_terminated_length": 549.0, |
|
"epoch": 0.006038927858038742, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 8.8125, |
|
"learning_rate": 1.8e-07, |
|
"loss": 0.0668, |
|
"num_tokens": 281527.0, |
|
"reward": -9.316499710083008, |
|
"reward_std": 1.3908792734146118, |
|
"rewards/chatgpt_combined_reward/mean": -9.316499710083008, |
|
"rewards/chatgpt_combined_reward/std": 1.61105215549469, |
|
"step": 65 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.5, |
|
"completions/max_length": 1024.0, |
|
"completions/max_terminated_length": 803.0, |
|
"completions/mean_length": 881.75, |
|
"completions/mean_terminated_length": 739.5, |
|
"completions/min_length": 676.0, |
|
"completions/min_terminated_length": 676.0, |
|
"epoch": 0.0061318344404701075, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 7.875, |
|
"learning_rate": 1.75e-07, |
|
"loss": 0.1151, |
|
"num_tokens": 287102.0, |
|
"reward": -8.89987564086914, |
|
"reward_std": 1.7679438591003418, |
|
"rewards/chatgpt_combined_reward/mean": -8.89987564086914, |
|
"rewards/chatgpt_combined_reward/std": 2.2138428688049316, |
|
"step": 66 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 665.0, |
|
"completions/max_terminated_length": 665.0, |
|
"completions/mean_length": 606.5, |
|
"completions/mean_terminated_length": 606.5, |
|
"completions/min_length": 550.0, |
|
"completions/min_terminated_length": 550.0, |
|
"epoch": 0.006224741022901472, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 5.28125, |
|
"learning_rate": 1.7000000000000001e-07, |
|
"loss": 0.0032, |
|
"num_tokens": 291296.0, |
|
"reward": -2.333750009536743, |
|
"reward_std": 1.4142135381698608, |
|
"rewards/chatgpt_combined_reward/mean": -2.333750009536743, |
|
"rewards/chatgpt_combined_reward/std": 9.001585006713867, |
|
"step": 67 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 658.0, |
|
"completions/max_terminated_length": 658.0, |
|
"completions/mean_length": 580.5, |
|
"completions/mean_terminated_length": 580.5, |
|
"completions/min_length": 544.0, |
|
"completions/min_terminated_length": 544.0, |
|
"epoch": 0.006317647605332838, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 7.625, |
|
"learning_rate": 1.65e-07, |
|
"loss": 0.026, |
|
"num_tokens": 295666.0, |
|
"reward": -8.15000057220459, |
|
"reward_std": 0.21213217079639435, |
|
"rewards/chatgpt_combined_reward/mean": -8.15000057220459, |
|
"rewards/chatgpt_combined_reward/std": 0.387298583984375, |
|
"step": 68 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.25, |
|
"completions/max_length": 1024.0, |
|
"completions/max_terminated_length": 621.0, |
|
"completions/mean_length": 673.0, |
|
"completions/mean_terminated_length": 556.0, |
|
"completions/min_length": 515.0, |
|
"completions/min_terminated_length": 515.0, |
|
"epoch": 0.006410554187764203, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 7.09375, |
|
"learning_rate": 1.6e-07, |
|
"loss": 0.1442, |
|
"num_tokens": 300406.0, |
|
"reward": -7.499750137329102, |
|
"reward_std": 3.5358872413635254, |
|
"rewards/chatgpt_combined_reward/mean": -7.499750137329102, |
|
"rewards/chatgpt_combined_reward/std": 3.191626787185669, |
|
"step": 69 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 692.0, |
|
"completions/max_terminated_length": 692.0, |
|
"completions/mean_length": 593.25, |
|
"completions/mean_terminated_length": 593.25, |
|
"completions/min_length": 520.0, |
|
"completions/min_terminated_length": 520.0, |
|
"epoch": 0.006503460770195569, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 4.53125, |
|
"learning_rate": 1.55e-07, |
|
"loss": 0.031, |
|
"num_tokens": 304811.0, |
|
"reward": -0.41687512397766113, |
|
"reward_std": 1.7679438591003418, |
|
"rewards/chatgpt_combined_reward/mean": -0.41687512397766113, |
|
"rewards/chatgpt_combined_reward/std": 8.42953109741211, |
|
"step": 70 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 681.0, |
|
"completions/max_terminated_length": 681.0, |
|
"completions/mean_length": 581.25, |
|
"completions/mean_terminated_length": 581.25, |
|
"completions/min_length": 514.0, |
|
"completions/min_terminated_length": 514.0, |
|
"epoch": 0.006596367352626933, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 8.375, |
|
"learning_rate": 1.5e-07, |
|
"loss": 0.0708, |
|
"num_tokens": 308360.0, |
|
"reward": -2.0002501010894775, |
|
"reward_std": 5.42103385925293, |
|
"rewards/chatgpt_combined_reward/mean": -2.0002501010894775, |
|
"rewards/chatgpt_combined_reward/std": 6.996899127960205, |
|
"step": 71 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 852.0, |
|
"completions/max_terminated_length": 852.0, |
|
"completions/mean_length": 611.75, |
|
"completions/mean_terminated_length": 611.75, |
|
"completions/min_length": 522.0, |
|
"completions/min_terminated_length": 522.0, |
|
"epoch": 0.006689273935058299, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 9.1875, |
|
"learning_rate": 1.45e-07, |
|
"loss": 0.0753, |
|
"num_tokens": 312855.0, |
|
"reward": -8.15000057220459, |
|
"reward_std": 0.21213217079639435, |
|
"rewards/chatgpt_combined_reward/mean": -8.15000057220459, |
|
"rewards/chatgpt_combined_reward/std": 0.387298583984375, |
|
"step": 72 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 683.0, |
|
"completions/max_terminated_length": 683.0, |
|
"completions/mean_length": 599.75, |
|
"completions/mean_terminated_length": 599.75, |
|
"completions/min_length": 556.0, |
|
"completions/min_terminated_length": 556.0, |
|
"epoch": 0.0067821805174896645, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 5.625, |
|
"learning_rate": 1.4e-07, |
|
"loss": -0.0233, |
|
"num_tokens": 317276.0, |
|
"reward": -1.2503752708435059, |
|
"reward_std": 1.7675901651382446, |
|
"rewards/chatgpt_combined_reward/mean": -1.2503752708435059, |
|
"rewards/chatgpt_combined_reward/std": 10.30729866027832, |
|
"step": 73 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.25, |
|
"completions/max_length": 1024.0, |
|
"completions/max_terminated_length": 685.0, |
|
"completions/mean_length": 707.5, |
|
"completions/mean_terminated_length": 602.0, |
|
"completions/min_length": 517.0, |
|
"completions/min_terminated_length": 517.0, |
|
"epoch": 0.006875087099921029, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 3.984375, |
|
"learning_rate": 1.35e-07, |
|
"loss": 0.0693, |
|
"num_tokens": 322154.0, |
|
"reward": -7.499750137329102, |
|
"reward_std": 1.1783934831619263, |
|
"rewards/chatgpt_combined_reward/mean": -7.499750137329102, |
|
"rewards/chatgpt_combined_reward/std": 3.191626787185669, |
|
"step": 74 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 723.0, |
|
"completions/max_terminated_length": 723.0, |
|
"completions/mean_length": 623.0, |
|
"completions/mean_terminated_length": 623.0, |
|
"completions/min_length": 515.0, |
|
"completions/min_terminated_length": 515.0, |
|
"epoch": 0.006967993682352395, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 5.6875, |
|
"learning_rate": 1.3e-07, |
|
"loss": 0.0071, |
|
"num_tokens": 325852.0, |
|
"reward": -2.5002501010894775, |
|
"reward_std": 1.1783934831619263, |
|
"rewards/chatgpt_combined_reward/mean": -2.5002501010894775, |
|
"rewards/chatgpt_combined_reward/std": 8.766212463378906, |
|
"step": 75 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 588.0, |
|
"completions/max_terminated_length": 588.0, |
|
"completions/mean_length": 543.0, |
|
"completions/mean_terminated_length": 543.0, |
|
"completions/min_length": 515.0, |
|
"completions/min_terminated_length": 515.0, |
|
"epoch": 0.00706090026478376, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 8.875, |
|
"learning_rate": 1.25e-07, |
|
"loss": -0.0118, |
|
"num_tokens": 330072.0, |
|
"reward": -4.583125114440918, |
|
"reward_std": 1.7679438591003418, |
|
"rewards/chatgpt_combined_reward/mean": -4.583125114440918, |
|
"rewards/chatgpt_combined_reward/std": 4.589576721191406, |
|
"step": 76 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.25, |
|
"completions/max_length": 1024.0, |
|
"completions/max_terminated_length": 540.0, |
|
"completions/mean_length": 656.0, |
|
"completions/mean_terminated_length": 533.3333740234375, |
|
"completions/min_length": 527.0, |
|
"completions/min_terminated_length": 527.0, |
|
"epoch": 0.007153806847215125, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 8.5, |
|
"learning_rate": 1.2e-07, |
|
"loss": 0.1072, |
|
"num_tokens": 334744.0, |
|
"reward": -7.90000057220459, |
|
"reward_std": 3.3941125869750977, |
|
"rewards/chatgpt_combined_reward/mean": -7.90000057220459, |
|
"rewards/chatgpt_combined_reward/std": 4.806246280670166, |
|
"step": 77 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 814.0, |
|
"completions/max_terminated_length": 814.0, |
|
"completions/mean_length": 618.25, |
|
"completions/mean_terminated_length": 618.25, |
|
"completions/min_length": 519.0, |
|
"completions/min_terminated_length": 519.0, |
|
"epoch": 0.00724671342964649, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 4.4375, |
|
"learning_rate": 1.15e-07, |
|
"loss": -0.0338, |
|
"num_tokens": 339217.0, |
|
"reward": -2.0003750324249268, |
|
"reward_std": 0.4714634120464325, |
|
"rewards/chatgpt_combined_reward/mean": -2.0003750324249268, |
|
"rewards/chatgpt_combined_reward/std": 9.253199577331543, |
|
"step": 78 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 664.0, |
|
"completions/max_terminated_length": 664.0, |
|
"completions/mean_length": 561.75, |
|
"completions/mean_terminated_length": 561.75, |
|
"completions/min_length": 520.0, |
|
"completions/min_terminated_length": 520.0, |
|
"epoch": 0.007339620012077856, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 8.6875, |
|
"learning_rate": 1.0999999999999999e-07, |
|
"loss": -0.0368, |
|
"num_tokens": 342692.0, |
|
"reward": -1.2501251697540283, |
|
"reward_std": 2.9463372230529785, |
|
"rewards/chatgpt_combined_reward/mean": -1.2501251697540283, |
|
"rewards/chatgpt_combined_reward/std": 7.622077465057373, |
|
"step": 79 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 818.0, |
|
"completions/max_terminated_length": 818.0, |
|
"completions/mean_length": 644.25, |
|
"completions/mean_terminated_length": 644.25, |
|
"completions/min_length": 532.0, |
|
"completions/min_terminated_length": 532.0, |
|
"epoch": 0.0074325265945092206, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 7.8125, |
|
"learning_rate": 1.0499999999999999e-07, |
|
"loss": -0.1178, |
|
"num_tokens": 346483.0, |
|
"reward": -1.5832499265670776, |
|
"reward_std": 2.23905348777771, |
|
"rewards/chatgpt_combined_reward/mean": -1.5832499265670776, |
|
"rewards/chatgpt_combined_reward/std": 5.307532787322998, |
|
"step": 80 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.5, |
|
"completions/max_length": 1024.0, |
|
"completions/max_terminated_length": 518.0, |
|
"completions/mean_length": 770.5, |
|
"completions/mean_terminated_length": 517.0, |
|
"completions/min_length": 516.0, |
|
"completions/min_terminated_length": 516.0, |
|
"epoch": 0.007525433176940586, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 9.3125, |
|
"learning_rate": 1e-07, |
|
"loss": -0.0007, |
|
"num_tokens": 351613.0, |
|
"reward": -8.15000057220459, |
|
"reward_std": 0.21213217079639435, |
|
"rewards/chatgpt_combined_reward/mean": -8.15000057220459, |
|
"rewards/chatgpt_combined_reward/std": 0.387298583984375, |
|
"step": 81 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 636.0, |
|
"completions/max_terminated_length": 636.0, |
|
"completions/mean_length": 553.5, |
|
"completions/mean_terminated_length": 553.5, |
|
"completions/min_length": 525.0, |
|
"completions/min_terminated_length": 525.0, |
|
"epoch": 0.007618339759371952, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 5.625, |
|
"learning_rate": 9.499999999999999e-08, |
|
"loss": 0.0164, |
|
"num_tokens": 355029.0, |
|
"reward": -2.7502501010894775, |
|
"reward_std": 3.8887336254119873, |
|
"rewards/chatgpt_combined_reward/mean": -2.7502501010894775, |
|
"rewards/chatgpt_combined_reward/std": 9.499552726745605, |
|
"step": 82 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 626.0, |
|
"completions/max_terminated_length": 626.0, |
|
"completions/mean_length": 580.75, |
|
"completions/mean_terminated_length": 580.75, |
|
"completions/min_length": 532.0, |
|
"completions/min_terminated_length": 532.0, |
|
"epoch": 0.007711246341803317, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 5.6875, |
|
"learning_rate": 9e-08, |
|
"loss": 0.0014, |
|
"num_tokens": 358566.0, |
|
"reward": -0.2505002021789551, |
|
"reward_std": 0.3535533845424652, |
|
"rewards/chatgpt_combined_reward/mean": -0.2505002021789551, |
|
"rewards/chatgpt_combined_reward/std": 11.265152931213379, |
|
"step": 83 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.25, |
|
"completions/max_length": 1024.0, |
|
"completions/max_terminated_length": 693.0, |
|
"completions/mean_length": 705.5, |
|
"completions/mean_terminated_length": 599.3333740234375, |
|
"completions/min_length": 515.0, |
|
"completions/min_terminated_length": 515.0, |
|
"epoch": 0.007804152924234682, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 8.8125, |
|
"learning_rate": 8.500000000000001e-08, |
|
"loss": -0.1453, |
|
"num_tokens": 363436.0, |
|
"reward": -3.749875068664551, |
|
"reward_std": 4.124730587005615, |
|
"rewards/chatgpt_combined_reward/mean": -3.749875068664551, |
|
"rewards/chatgpt_combined_reward/std": 4.383296489715576, |
|
"step": 84 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 890.0, |
|
"completions/max_terminated_length": 890.0, |
|
"completions/mean_length": 618.75, |
|
"completions/mean_terminated_length": 618.75, |
|
"completions/min_length": 522.0, |
|
"completions/min_terminated_length": 522.0, |
|
"epoch": 0.007897059506666047, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 7.125, |
|
"learning_rate": 8e-08, |
|
"loss": -0.086, |
|
"num_tokens": 367959.0, |
|
"reward": -8.89987564086914, |
|
"reward_std": 1.9800759553909302, |
|
"rewards/chatgpt_combined_reward/mean": -8.89987564086914, |
|
"rewards/chatgpt_combined_reward/std": 2.4292385578155518, |
|
"step": 85 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.25, |
|
"completions/max_length": 1024.0, |
|
"completions/max_terminated_length": 519.0, |
|
"completions/mean_length": 644.25, |
|
"completions/mean_terminated_length": 517.6666870117188, |
|
"completions/min_length": 517.0, |
|
"completions/min_terminated_length": 517.0, |
|
"epoch": 0.007989966089097413, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 7.0625, |
|
"learning_rate": 7.5e-08, |
|
"loss": 0.1156, |
|
"num_tokens": 370960.0, |
|
"reward": 3.832624912261963, |
|
"reward_std": 3.771176815032959, |
|
"rewards/chatgpt_combined_reward/mean": 3.832624912261963, |
|
"rewards/chatgpt_combined_reward/std": 4.7957444190979, |
|
"step": 86 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 731.0, |
|
"completions/max_terminated_length": 731.0, |
|
"completions/mean_length": 585.75, |
|
"completions/mean_terminated_length": 585.75, |
|
"completions/min_length": 520.0, |
|
"completions/min_terminated_length": 520.0, |
|
"epoch": 0.008082872671528778, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 7.9375, |
|
"learning_rate": 7e-08, |
|
"loss": 0.0622, |
|
"num_tokens": 375351.0, |
|
"reward": -5.983250141143799, |
|
"reward_std": 5.892674446105957, |
|
"rewards/chatgpt_combined_reward/mean": -5.983250141143799, |
|
"rewards/chatgpt_combined_reward/std": 4.826911449432373, |
|
"step": 87 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1002.0, |
|
"completions/max_terminated_length": 1002.0, |
|
"completions/mean_length": 724.25, |
|
"completions/mean_terminated_length": 724.25, |
|
"completions/min_length": 562.0, |
|
"completions/min_terminated_length": 562.0, |
|
"epoch": 0.008175779253960144, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 7.625, |
|
"learning_rate": 6.5e-08, |
|
"loss": -0.0, |
|
"num_tokens": 380296.0, |
|
"reward": -8.15000057220459, |
|
"reward_std": 0.21213217079639435, |
|
"rewards/chatgpt_combined_reward/mean": -8.15000057220459, |
|
"rewards/chatgpt_combined_reward/std": 0.387298583984375, |
|
"step": 88 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 746.0, |
|
"completions/max_terminated_length": 746.0, |
|
"completions/mean_length": 592.25, |
|
"completions/mean_terminated_length": 592.25, |
|
"completions/min_length": 517.0, |
|
"completions/min_terminated_length": 517.0, |
|
"epoch": 0.008268685836391508, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 7.75, |
|
"learning_rate": 6e-08, |
|
"loss": -0.0, |
|
"num_tokens": 384329.0, |
|
"reward": 4.832625389099121, |
|
"reward_std": 3.771177291870117, |
|
"rewards/chatgpt_combined_reward/mean": 4.832625389099121, |
|
"rewards/chatgpt_combined_reward/std": 3.911078929901123, |
|
"step": 89 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 698.0, |
|
"completions/max_terminated_length": 698.0, |
|
"completions/mean_length": 633.0, |
|
"completions/mean_terminated_length": 633.0, |
|
"completions/min_length": 598.0, |
|
"completions/min_terminated_length": 598.0, |
|
"epoch": 0.008361592418822873, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 7.46875, |
|
"learning_rate": 5.4999999999999996e-08, |
|
"loss": -0.0157, |
|
"num_tokens": 388909.0, |
|
"reward": -5.983250141143799, |
|
"reward_std": 0.21213209629058838, |
|
"rewards/chatgpt_combined_reward/mean": -5.983250141143799, |
|
"rewards/chatgpt_combined_reward/std": 4.468296527862549, |
|
"step": 90 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 750.0, |
|
"completions/max_terminated_length": 750.0, |
|
"completions/mean_length": 584.5, |
|
"completions/mean_terminated_length": 584.5, |
|
"completions/min_length": 523.0, |
|
"completions/min_terminated_length": 523.0, |
|
"epoch": 0.008454499001254239, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 4.78125, |
|
"learning_rate": 5e-08, |
|
"loss": 0.0284, |
|
"num_tokens": 392953.0, |
|
"reward": -2.083624839782715, |
|
"reward_std": 1.7675902843475342, |
|
"rewards/chatgpt_combined_reward/mean": -2.083624839782715, |
|
"rewards/chatgpt_combined_reward/std": 9.366135597229004, |
|
"step": 91 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.25, |
|
"completions/max_length": 1024.0, |
|
"completions/max_terminated_length": 686.0, |
|
"completions/mean_length": 714.25, |
|
"completions/mean_terminated_length": 611.0, |
|
"completions/min_length": 522.0, |
|
"completions/min_terminated_length": 522.0, |
|
"epoch": 0.008547405583685604, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 8.0, |
|
"learning_rate": 4.5e-08, |
|
"loss": 0.1335, |
|
"num_tokens": 397858.0, |
|
"reward": -9.316499710083008, |
|
"reward_std": 1.3908792734146118, |
|
"rewards/chatgpt_combined_reward/mean": -9.316499710083008, |
|
"rewards/chatgpt_combined_reward/std": 1.61105215549469, |
|
"step": 92 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 848.0, |
|
"completions/max_terminated_length": 848.0, |
|
"completions/mean_length": 737.75, |
|
"completions/mean_terminated_length": 737.75, |
|
"completions/min_length": 639.0, |
|
"completions/min_terminated_length": 639.0, |
|
"epoch": 0.00864031216611697, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 6.6875, |
|
"learning_rate": 4e-08, |
|
"loss": -0.0, |
|
"num_tokens": 402857.0, |
|
"reward": -8.15000057220459, |
|
"reward_std": 0.21213217079639435, |
|
"rewards/chatgpt_combined_reward/mean": -8.15000057220459, |
|
"rewards/chatgpt_combined_reward/std": 0.387298583984375, |
|
"step": 93 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 958.0, |
|
"completions/max_terminated_length": 958.0, |
|
"completions/mean_length": 649.0, |
|
"completions/mean_terminated_length": 649.0, |
|
"completions/min_length": 526.0, |
|
"completions/min_terminated_length": 526.0, |
|
"epoch": 0.008733218748548335, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 9.5, |
|
"learning_rate": 3.5e-08, |
|
"loss": 0.0, |
|
"num_tokens": 407501.0, |
|
"reward": -9.316499710083008, |
|
"reward_std": 1.1787470579147339, |
|
"rewards/chatgpt_combined_reward/mean": -9.316499710083008, |
|
"rewards/chatgpt_combined_reward/std": 1.8061809539794922, |
|
"step": 94 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 795.0, |
|
"completions/max_terminated_length": 795.0, |
|
"completions/mean_length": 645.0, |
|
"completions/mean_terminated_length": 645.0, |
|
"completions/min_length": 530.0, |
|
"completions/min_terminated_length": 530.0, |
|
"epoch": 0.0088261253309797, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 8.75, |
|
"learning_rate": 3e-08, |
|
"loss": -0.0538, |
|
"num_tokens": 412129.0, |
|
"reward": -2.583124876022339, |
|
"reward_std": 1.2963035106658936, |
|
"rewards/chatgpt_combined_reward/mean": -2.583124876022339, |
|
"rewards/chatgpt_combined_reward/std": 2.114532947540283, |
|
"step": 95 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.25, |
|
"completions/max_length": 1024.0, |
|
"completions/max_terminated_length": 806.0, |
|
"completions/mean_length": 732.25, |
|
"completions/mean_terminated_length": 635.0, |
|
"completions/min_length": 525.0, |
|
"completions/min_terminated_length": 525.0, |
|
"epoch": 0.008919031913411065, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 5.0, |
|
"learning_rate": 2.5e-08, |
|
"loss": 0.0132, |
|
"num_tokens": 416252.0, |
|
"reward": -3.1670000553131104, |
|
"reward_std": 0.2354665994644165, |
|
"rewards/chatgpt_combined_reward/mean": -3.1670000553131104, |
|
"rewards/chatgpt_combined_reward/std": 7.894752502441406, |
|
"step": 96 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 772.0, |
|
"completions/max_terminated_length": 772.0, |
|
"completions/mean_length": 650.0, |
|
"completions/mean_terminated_length": 650.0, |
|
"completions/min_length": 518.0, |
|
"completions/min_terminated_length": 518.0, |
|
"epoch": 0.00901193849584243, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 4.4375, |
|
"learning_rate": 2e-08, |
|
"loss": -0.0286, |
|
"num_tokens": 420900.0, |
|
"reward": -5.916625022888184, |
|
"reward_std": 0.11791006475687027, |
|
"rewards/chatgpt_combined_reward/mean": -5.916625022888184, |
|
"rewards/chatgpt_combined_reward/std": 4.717040538787842, |
|
"step": 97 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.25, |
|
"completions/max_length": 1024.0, |
|
"completions/max_terminated_length": 652.0, |
|
"completions/mean_length": 683.25, |
|
"completions/mean_terminated_length": 569.6666870117188, |
|
"completions/min_length": 516.0, |
|
"completions/min_terminated_length": 516.0, |
|
"epoch": 0.009104845078273796, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 5.40625, |
|
"learning_rate": 1.5e-08, |
|
"loss": 0.0434, |
|
"num_tokens": 425291.0, |
|
"reward": -0.8337502479553223, |
|
"reward_std": 1.1783933639526367, |
|
"rewards/chatgpt_combined_reward/mean": -0.8337502479553223, |
|
"rewards/chatgpt_combined_reward/std": 10.671379089355469, |
|
"step": 98 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 622.0, |
|
"completions/max_terminated_length": 622.0, |
|
"completions/mean_length": 573.5, |
|
"completions/mean_terminated_length": 573.5, |
|
"completions/min_length": 519.0, |
|
"completions/min_terminated_length": 519.0, |
|
"epoch": 0.009197751660705161, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 5.0625, |
|
"learning_rate": 1e-08, |
|
"loss": -0.0229, |
|
"num_tokens": 429633.0, |
|
"reward": -7.749750137329102, |
|
"reward_std": 0.8248399496078491, |
|
"rewards/chatgpt_combined_reward/mean": -7.749750137329102, |
|
"rewards/chatgpt_combined_reward/std": 2.7674262523651123, |
|
"step": 99 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 757.0, |
|
"completions/max_terminated_length": 757.0, |
|
"completions/mean_length": 595.25, |
|
"completions/mean_terminated_length": 595.25, |
|
"completions/min_length": 531.0, |
|
"completions/min_terminated_length": 531.0, |
|
"epoch": 0.009290658243136527, |
|
"frac_reward_zero_std": 0.5, |
|
"grad_norm": 5.25, |
|
"learning_rate": 5e-09, |
|
"loss": 0.0081, |
|
"num_tokens": 433854.0, |
|
"reward": -2.0003747940063477, |
|
"reward_std": 1.6498571634292603, |
|
"rewards/chatgpt_combined_reward/mean": -2.0003747940063477, |
|
"rewards/chatgpt_combined_reward/std": 9.431580543518066, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 433854, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|