{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.009290658243136527, "eval_steps": 500, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 622.0, "completions/max_terminated_length": 622.0, "completions/mean_length": 551.0, "completions/mean_terminated_length": 551.0, "completions/min_length": 515.0, "completions/min_terminated_length": 515.0, "epoch": 9.290658243136526e-05, "frac_reward_zero_std": 0.0, "grad_norm": 8.375, "learning_rate": 5e-07, "loss": 0.0, "num_tokens": 4252.0, "reward": -8.15000057220459, "reward_std": 3.0405590534210205, "rewards/chatgpt_combined_reward/mean": -8.15000057220459, "rewards/chatgpt_combined_reward/std": 4.306971073150635, "step": 1 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 559.0, "completions/max_terminated_length": 559.0, "completions/mean_length": 538.5, "completions/mean_terminated_length": 538.5, "completions/min_length": 513.0, "completions/min_terminated_length": 513.0, "epoch": 0.00018581316486273051, "frac_reward_zero_std": 0.5, "grad_norm": 4.875, "learning_rate": 4.95e-07, "loss": -0.0058, "num_tokens": 8340.0, "reward": -3.500124931335449, "reward_std": 1.414036750793457, "rewards/chatgpt_combined_reward/mean": -3.500124931335449, "rewards/chatgpt_combined_reward/std": 7.6809611320495605, "step": 2 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 730.0, "completions/max_terminated_length": 730.0, "completions/mean_length": 598.75, "completions/mean_terminated_length": 598.75, "completions/min_length": 523.0, "completions/min_terminated_length": 523.0, "epoch": 0.00027871974729409577, "frac_reward_zero_std": 0.5, "grad_norm": 5.4375, "learning_rate": 4.9e-07, "loss": -0.0225, "num_tokens": 12655.0, "reward": -1.5003752708435059, "reward_std": 1.414036750793457, "rewards/chatgpt_combined_reward/mean": -1.5003752708435059, "rewards/chatgpt_combined_reward/std": 9.949413299560547, "step": 3 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1024.0, "completions/max_terminated_length": 648.0, "completions/mean_length": 803.5, "completions/mean_terminated_length": 583.0, "completions/min_length": 518.0, "completions/min_terminated_length": 518.0, "epoch": 0.00037162632972546103, "frac_reward_zero_std": 0.0, "grad_norm": 8.9375, "learning_rate": 4.85e-07, "loss": -0.0365, "num_tokens": 17917.0, "reward": -8.15000057220459, "reward_std": 0.21213217079639435, "rewards/chatgpt_combined_reward/mean": -8.15000057220459, "rewards/chatgpt_combined_reward/std": 0.387298583984375, "step": 4 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 674.0, "completions/max_terminated_length": 674.0, "completions/mean_length": 566.25, "completions/mean_terminated_length": 566.25, "completions/min_length": 516.0, "completions/min_terminated_length": 516.0, "epoch": 0.0004645329121568263, "frac_reward_zero_std": 0.5, "grad_norm": 5.34375, "learning_rate": 4.8e-07, "loss": -0.0466, "num_tokens": 21406.0, "reward": -1.7503750324249268, "reward_std": 2.4746968746185303, "rewards/chatgpt_combined_reward/mean": -1.7503750324249268, "rewards/chatgpt_combined_reward/std": 9.945212364196777, "step": 5 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 778.0, "completions/max_terminated_length": 778.0, "completions/mean_length": 599.0, "completions/mean_terminated_length": 599.0, "completions/min_length": 513.0, "completions/min_terminated_length": 513.0, "epoch": 0.0005574394945881915, "frac_reward_zero_std": 0.0, "grad_norm": 10.0625, "learning_rate": 4.7499999999999995e-07, "loss": -0.0, "num_tokens": 25850.0, "reward": -8.15000057220459, "reward_std": 0.21213217079639435, "rewards/chatgpt_combined_reward/mean": -8.15000057220459, "rewards/chatgpt_combined_reward/std": 0.387298583984375, "step": 6 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 600.0, "completions/max_terminated_length": 600.0, "completions/mean_length": 550.5, "completions/mean_terminated_length": 550.5, "completions/min_length": 527.0, "completions/min_terminated_length": 527.0, "epoch": 0.0006503460770195569, "frac_reward_zero_std": 0.5, "grad_norm": 5.5625, "learning_rate": 4.6999999999999995e-07, "loss": -0.009, "num_tokens": 29262.0, "reward": -2.9171252250671387, "reward_std": 4.124730587005615, "rewards/chatgpt_combined_reward/mean": -2.9171252250671387, "rewards/chatgpt_combined_reward/std": 9.46435546875, "step": 7 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 607.0, "completions/max_terminated_length": 607.0, "completions/mean_length": 544.25, "completions/mean_terminated_length": 544.25, "completions/min_length": 515.0, "completions/min_terminated_length": 515.0, "epoch": 0.0007432526594509221, "frac_reward_zero_std": 0.5, "grad_norm": 5.4375, "learning_rate": 4.65e-07, "loss": -0.0106, "num_tokens": 32691.0, "reward": -2.583625078201294, "reward_std": 1.296303391456604, "rewards/chatgpt_combined_reward/mean": -2.583625078201294, "rewards/chatgpt_combined_reward/std": 8.693524360656738, "step": 8 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1024.0, "completions/max_terminated_length": 543.0, "completions/mean_length": 776.5, "completions/mean_terminated_length": 529.0, "completions/min_length": 515.0, "completions/min_terminated_length": 515.0, "epoch": 0.0008361592418822874, "frac_reward_zero_std": 0.0, "grad_norm": 9.25, "learning_rate": 4.6e-07, "loss": 0.2254, "num_tokens": 37845.0, "reward": -6.249625205993652, "reward_std": 2.9463372230529785, "rewards/chatgpt_combined_reward/mean": -6.249625205993652, "rewards/chatgpt_combined_reward/std": 2.846529960632324, "step": 9 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 537.0, "completions/max_terminated_length": 537.0, "completions/mean_length": 528.75, "completions/mean_terminated_length": 528.75, "completions/min_length": 518.0, "completions/min_terminated_length": 518.0, "epoch": 0.0009290658243136526, "frac_reward_zero_std": 0.0, "grad_norm": 8.1875, "learning_rate": 4.55e-07, "loss": 0.0017, "num_tokens": 41190.0, "reward": 0.33299994468688965, "reward_std": 8.249107360839844, "rewards/chatgpt_combined_reward/mean": 0.33299994468688965, "rewards/chatgpt_combined_reward/std": 7.683218955993652, "step": 10 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1024.0, "completions/max_terminated_length": 554.0, "completions/mean_length": 655.75, "completions/mean_terminated_length": 533.0, "completions/min_length": 516.0, "completions/min_terminated_length": 516.0, "epoch": 0.001021972406745018, "frac_reward_zero_std": 0.0, "grad_norm": 8.875, "learning_rate": 4.5e-07, "loss": 0.1097, "num_tokens": 45861.0, "reward": -8.89987564086914, "reward_std": 1.7679438591003418, "rewards/chatgpt_combined_reward/mean": -8.89987564086914, "rewards/chatgpt_combined_reward/std": 2.62703275680542, "step": 11 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 561.0, "completions/max_terminated_length": 561.0, "completions/mean_length": 529.5, "completions/mean_terminated_length": 529.5, "completions/min_length": 513.0, "completions/min_terminated_length": 513.0, "epoch": 0.001114878989176383, "frac_reward_zero_std": 0.5, "grad_norm": 5.59375, "learning_rate": 4.45e-07, "loss": -0.0048, "num_tokens": 49193.0, "reward": -2.2503747940063477, "reward_std": 2.003410577774048, "rewards/chatgpt_combined_reward/mean": -2.2503747940063477, "rewards/chatgpt_combined_reward/std": 9.242679595947266, "step": 12 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 591.0, "completions/max_terminated_length": 591.0, "completions/mean_length": 558.5, "completions/mean_terminated_length": 558.5, "completions/min_length": 515.0, "completions/min_terminated_length": 515.0, "epoch": 0.0012077855716077484, "frac_reward_zero_std": 0.0, "grad_norm": 7.90625, "learning_rate": 4.3999999999999997e-07, "loss": 0.0078, "num_tokens": 53475.0, "reward": -8.15000057220459, "reward_std": 0.21213217079639435, "rewards/chatgpt_combined_reward/mean": -8.15000057220459, "rewards/chatgpt_combined_reward/std": 0.387298583984375, "step": 13 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1008.0, "completions/max_terminated_length": 1008.0, "completions/mean_length": 680.75, "completions/mean_terminated_length": 680.75, "completions/min_length": 523.0, "completions/min_terminated_length": 523.0, "epoch": 0.0013006921540391137, "frac_reward_zero_std": 0.0, "grad_norm": 6.9375, "learning_rate": 4.3499999999999996e-07, "loss": 0.0722, "num_tokens": 58246.0, "reward": -8.89987564086914, "reward_std": 1.7679438591003418, "rewards/chatgpt_combined_reward/mean": -8.89987564086914, "rewards/chatgpt_combined_reward/std": 2.62703275680542, "step": 14 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 567.0, "completions/max_terminated_length": 567.0, "completions/mean_length": 533.5, "completions/mean_terminated_length": 533.5, "completions/min_length": 514.0, "completions/min_terminated_length": 514.0, "epoch": 0.001393598736470479, "frac_reward_zero_std": 0.5, "grad_norm": 5.53125, "learning_rate": 4.2999999999999996e-07, "loss": 0.0044, "num_tokens": 62196.0, "reward": -2.083625078201294, "reward_std": 0.5891967415809631, "rewards/chatgpt_combined_reward/mean": -2.083625078201294, "rewards/chatgpt_combined_reward/std": 9.166325569152832, "step": 15 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1024.0, "completions/max_terminated_length": 729.0, "completions/mean_length": 842.0, "completions/mean_terminated_length": 660.0, "completions/min_length": 591.0, "completions/min_terminated_length": 591.0, "epoch": 0.0014865053189018441, "frac_reward_zero_std": 0.0, "grad_norm": 8.75, "learning_rate": 4.2499999999999995e-07, "loss": -0.1542, "num_tokens": 67612.0, "reward": -8.15000057220459, "reward_std": 0.21213217079639435, "rewards/chatgpt_combined_reward/mean": -8.15000057220459, "rewards/chatgpt_combined_reward/std": 0.387298583984375, "step": 16 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 682.0, "completions/max_terminated_length": 682.0, "completions/mean_length": 560.0, "completions/mean_terminated_length": 560.0, "completions/min_length": 514.0, "completions/min_terminated_length": 514.0, "epoch": 0.0015794119013332094, "frac_reward_zero_std": 0.5, "grad_norm": 5.375, "learning_rate": 4.1999999999999995e-07, "loss": 0.0252, "num_tokens": 71066.0, "reward": -1.6670000553131104, "reward_std": 2.3567867279052734, "rewards/chatgpt_combined_reward/mean": -1.6670000553131104, "rewards/chatgpt_combined_reward/std": 9.999555587768555, "step": 17 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 699.0, "completions/max_terminated_length": 699.0, "completions/mean_length": 607.5, "completions/mean_terminated_length": 607.5, "completions/min_length": 521.0, "completions/min_terminated_length": 521.0, "epoch": 0.0016723184837645747, "frac_reward_zero_std": 0.0, "grad_norm": 9.125, "learning_rate": 4.1499999999999994e-07, "loss": -0.0983, "num_tokens": 75544.0, "reward": -8.89987564086914, "reward_std": 1.7679438591003418, "rewards/chatgpt_combined_reward/mean": -8.89987564086914, "rewards/chatgpt_combined_reward/std": 2.2138428688049316, "step": 18 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 754.0, "completions/max_terminated_length": 754.0, "completions/mean_length": 596.75, "completions/mean_terminated_length": 596.75, "completions/min_length": 529.0, "completions/min_terminated_length": 529.0, "epoch": 0.00176522506619594, "frac_reward_zero_std": 0.0, "grad_norm": 8.8125, "learning_rate": 4.0999999999999994e-07, "loss": 0.0, "num_tokens": 79979.0, "reward": -8.48324966430664, "reward_std": 2.357140302658081, "rewards/chatgpt_combined_reward/mean": -8.48324966430664, "rewards/chatgpt_combined_reward/std": 3.0433735847473145, "step": 19 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 593.0, "completions/max_terminated_length": 593.0, "completions/mean_length": 545.25, "completions/mean_terminated_length": 545.25, "completions/min_length": 513.0, "completions/min_terminated_length": 513.0, "epoch": 0.0018581316486273051, "frac_reward_zero_std": 0.5, "grad_norm": 6.03125, "learning_rate": 4.05e-07, "loss": -0.0202, "num_tokens": 83912.0, "reward": -2.333625078201294, "reward_std": 1.414036750793457, "rewards/chatgpt_combined_reward/mean": -2.333625078201294, "rewards/chatgpt_combined_reward/std": 9.001688957214355, "step": 20 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 832.0, "completions/max_terminated_length": 832.0, "completions/mean_length": 612.0, "completions/mean_terminated_length": 612.0, "completions/min_length": 524.0, "completions/min_terminated_length": 524.0, "epoch": 0.0019510382310586705, "frac_reward_zero_std": 0.5, "grad_norm": 5.5, "learning_rate": 4e-07, "loss": 0.0457, "num_tokens": 88190.0, "reward": -2.1670000553131104, "reward_std": 0.47128671407699585, "rewards/chatgpt_combined_reward/mean": -2.1670000553131104, "rewards/chatgpt_combined_reward/std": 9.061125755310059, "step": 21 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1024.0, "completions/max_terminated_length": 632.0, "completions/mean_length": 709.5, "completions/mean_terminated_length": 604.6666870117188, "completions/min_length": 582.0, "completions/min_terminated_length": 582.0, "epoch": 0.002043944813490036, "frac_reward_zero_std": 0.0, "grad_norm": 7.75, "learning_rate": 3.95e-07, "loss": 0.089, "num_tokens": 93076.0, "reward": -6.816500663757324, "reward_std": 0.21213209629058838, "rewards/chatgpt_combined_reward/mean": -6.816500663757324, "rewards/chatgpt_combined_reward/std": 4.199178218841553, "step": 22 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 547.0, "completions/max_terminated_length": 547.0, "completions/mean_length": 533.0, "completions/mean_terminated_length": 533.0, "completions/min_length": 517.0, "completions/min_terminated_length": 517.0, "epoch": 0.002136851395921401, "frac_reward_zero_std": 0.5, "grad_norm": 6.6875, "learning_rate": 3.8999999999999997e-07, "loss": 0.0049, "num_tokens": 96414.0, "reward": -2.5003750324249268, "reward_std": 1.6498569250106812, "rewards/chatgpt_combined_reward/mean": -2.5003750324249268, "rewards/chatgpt_combined_reward/std": 8.866897583007812, "step": 23 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 992.0, "completions/max_terminated_length": 992.0, "completions/mean_length": 722.25, "completions/mean_terminated_length": 722.25, "completions/min_length": 521.0, "completions/min_terminated_length": 521.0, "epoch": 0.002229757978352766, "frac_reward_zero_std": 0.5, "grad_norm": 4.3125, "learning_rate": 3.8499999999999997e-07, "loss": 0.0319, "num_tokens": 100539.0, "reward": -2.083624839782715, "reward_std": 1.7675902843475342, "rewards/chatgpt_combined_reward/mean": -2.083624839782715, "rewards/chatgpt_combined_reward/std": 9.366135597229004, "step": 24 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 716.0, "completions/max_terminated_length": 716.0, "completions/mean_length": 616.5, "completions/mean_terminated_length": 616.5, "completions/min_length": 517.0, "completions/min_terminated_length": 517.0, "epoch": 0.0023226645607841317, "frac_reward_zero_std": 0.5, "grad_norm": 4.90625, "learning_rate": 3.7999999999999996e-07, "loss": 0.0023, "num_tokens": 104789.0, "reward": -2.333750009536743, "reward_std": 0.942926824092865, "rewards/chatgpt_combined_reward/mean": -2.333750009536743, "rewards/chatgpt_combined_reward/std": 8.91893196105957, "step": 25 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 801.0, "completions/max_terminated_length": 801.0, "completions/mean_length": 624.0, "completions/mean_terminated_length": 624.0, "completions/min_length": 518.0, "completions/min_terminated_length": 518.0, "epoch": 0.002415571143215497, "frac_reward_zero_std": 0.0, "grad_norm": 8.0625, "learning_rate": 3.75e-07, "loss": -0.0, "num_tokens": 109333.0, "reward": -7.916375160217285, "reward_std": 2.9466910362243652, "rewards/chatgpt_combined_reward/mean": -7.916375160217285, "rewards/chatgpt_combined_reward/std": 2.500305652618408, "step": 26 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 602.0, "completions/max_terminated_length": 602.0, "completions/mean_length": 579.5, "completions/mean_terminated_length": 579.5, "completions/min_length": 539.0, "completions/min_terminated_length": 539.0, "epoch": 0.002508477725646862, "frac_reward_zero_std": 0.0, "grad_norm": 7.21875, "learning_rate": 3.7e-07, "loss": 0.015, "num_tokens": 112875.0, "reward": 2.384185791015625e-07, "reward_std": 2.3567869663238525, "rewards/chatgpt_combined_reward/mean": 2.384185791015625e-07, "rewards/chatgpt_combined_reward/std": 7.070361137390137, "step": 27 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1024.0, "completions/max_terminated_length": 695.0, "completions/mean_length": 694.0, "completions/mean_terminated_length": 584.0, "completions/min_length": 516.0, "completions/min_terminated_length": 516.0, "epoch": 0.0026013843080782274, "frac_reward_zero_std": 0.0, "grad_norm": 8.5625, "learning_rate": 3.65e-07, "loss": -0.076, "num_tokens": 117699.0, "reward": -5.416375160217285, "reward_std": 2.9463372230529785, "rewards/chatgpt_combined_reward/mean": -5.416375160217285, "rewards/chatgpt_combined_reward/std": 3.4360225200653076, "step": 28 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.75, "completions/max_length": 1024.0, "completions/max_terminated_length": 571.0, "completions/mean_length": 910.75, "completions/mean_terminated_length": 571.0, "completions/min_length": 571.0, "completions/min_terminated_length": 571.0, "epoch": 0.0026942908905095925, "frac_reward_zero_std": 0.0, "grad_norm": 8.4375, "learning_rate": 3.6e-07, "loss": 0.1004, "num_tokens": 123390.0, "reward": -8.48324966430664, "reward_std": 2.357140302658081, "rewards/chatgpt_combined_reward/mean": -8.48324966430664, "rewards/chatgpt_combined_reward/std": 3.0433735847473145, "step": 29 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1024.0, "completions/max_terminated_length": 636.0, "completions/mean_length": 700.75, "completions/mean_terminated_length": 593.0, "completions/min_length": 517.0, "completions/min_terminated_length": 517.0, "epoch": 0.002787197472940958, "frac_reward_zero_std": 0.5, "grad_norm": 4.5, "learning_rate": 3.55e-07, "loss": 0.0609, "num_tokens": 128241.0, "reward": -7.499750137329102, "reward_std": 1.1783934831619263, "rewards/chatgpt_combined_reward/mean": -7.499750137329102, "rewards/chatgpt_combined_reward/std": 3.191626787185669, "step": 30 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 733.0, "completions/max_terminated_length": 733.0, "completions/mean_length": 613.0, "completions/mean_terminated_length": 613.0, "completions/min_length": 513.0, "completions/min_terminated_length": 513.0, "epoch": 0.002880104055372323, "frac_reward_zero_std": 0.5, "grad_norm": 5.21875, "learning_rate": 3.5e-07, "loss": 0.0078, "num_tokens": 132735.0, "reward": -2.5002501010894775, "reward_std": 1.1783934831619263, "rewards/chatgpt_combined_reward/mean": -2.5002501010894775, "rewards/chatgpt_combined_reward/std": 8.766212463378906, "step": 31 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 995.0, "completions/max_terminated_length": 995.0, "completions/mean_length": 712.0, "completions/mean_terminated_length": 712.0, "completions/min_length": 534.0, "completions/min_terminated_length": 534.0, "epoch": 0.0029730106378036882, "frac_reward_zero_std": 0.5, "grad_norm": 4.46875, "learning_rate": 3.45e-07, "loss": 0.0859, "num_tokens": 137571.0, "reward": -1.7503750324249268, "reward_std": 2.4746968746185303, "rewards/chatgpt_combined_reward/mean": -1.7503750324249268, "rewards/chatgpt_combined_reward/std": 9.945212364196777, "step": 32 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 570.0, "completions/max_terminated_length": 570.0, "completions/mean_length": 531.0, "completions/mean_terminated_length": 531.0, "completions/min_length": 513.0, "completions/min_terminated_length": 513.0, "epoch": 0.0030659172202350538, "frac_reward_zero_std": 0.0, "grad_norm": 7.46875, "learning_rate": 3.4000000000000003e-07, "loss": 0.0149, "num_tokens": 141743.0, "reward": -9.316499710083008, "reward_std": 1.1787471771240234, "rewards/chatgpt_combined_reward/mean": -9.316499710083008, "rewards/chatgpt_combined_reward/std": 1.3887726068496704, "step": 33 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1024.0, "completions/max_terminated_length": 721.0, "completions/mean_length": 750.75, "completions/mean_terminated_length": 659.6666870117188, "completions/min_length": 605.0, "completions/min_terminated_length": 605.0, "epoch": 0.003158823802666419, "frac_reward_zero_std": 0.5, "grad_norm": 5.34375, "learning_rate": 3.35e-07, "loss": -0.0175, "num_tokens": 146734.0, "reward": -2.083624839782715, "reward_std": 1.7675902843475342, "rewards/chatgpt_combined_reward/mean": -2.083624839782715, "rewards/chatgpt_combined_reward/std": 9.366135597229004, "step": 34 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 643.0, "completions/max_terminated_length": 643.0, "completions/mean_length": 552.5, "completions/mean_terminated_length": 552.5, "completions/min_length": 514.0, "completions/min_terminated_length": 514.0, "epoch": 0.0032517303850977844, "frac_reward_zero_std": 0.5, "grad_norm": 5.09375, "learning_rate": 3.3e-07, "loss": -0.0381, "num_tokens": 150992.0, "reward": -6.666500091552734, "reward_std": 1.1783933639526367, "rewards/chatgpt_combined_reward/mean": -6.666500091552734, "rewards/chatgpt_combined_reward/std": 4.0826191902160645, "step": 35 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 738.0, "completions/max_terminated_length": 738.0, "completions/mean_length": 577.25, "completions/mean_terminated_length": 577.25, "completions/min_length": 522.0, "completions/min_terminated_length": 522.0, "epoch": 0.0033446369675291495, "frac_reward_zero_std": 0.0, "grad_norm": 8.5625, "learning_rate": 3.25e-07, "loss": -0.059, "num_tokens": 155349.0, "reward": -6.249875068664551, "reward_std": 5.3034772872924805, "rewards/chatgpt_combined_reward/mean": -6.249875068664551, "rewards/chatgpt_combined_reward/std": 4.383391380310059, "step": 36 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 801.0, "completions/max_terminated_length": 801.0, "completions/mean_length": 589.5, "completions/mean_terminated_length": 589.5, "completions/min_length": 517.0, "completions/min_terminated_length": 517.0, "epoch": 0.0034375435499605146, "frac_reward_zero_std": 0.0, "grad_norm": 7.125, "learning_rate": 3.2e-07, "loss": -0.0, "num_tokens": 159755.0, "reward": -8.15000057220459, "reward_std": 0.21213217079639435, "rewards/chatgpt_combined_reward/mean": -8.15000057220459, "rewards/chatgpt_combined_reward/std": 0.387298583984375, "step": 37 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 713.0, "completions/max_terminated_length": 713.0, "completions/mean_length": 602.5, "completions/mean_terminated_length": 602.5, "completions/min_length": 548.0, "completions/min_terminated_length": 548.0, "epoch": 0.00353045013239188, "frac_reward_zero_std": 0.0, "grad_norm": 8.25, "learning_rate": 3.15e-07, "loss": -0.0297, "num_tokens": 164213.0, "reward": -8.066625595092773, "reward_std": 2.9463369846343994, "rewards/chatgpt_combined_reward/mean": -8.066625595092773, "rewards/chatgpt_combined_reward/std": 4.283124923706055, "step": 38 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 810.0, "completions/max_terminated_length": 810.0, "completions/mean_length": 696.5, "completions/mean_terminated_length": 696.5, "completions/min_length": 536.0, "completions/min_terminated_length": 536.0, "epoch": 0.003623356714823245, "frac_reward_zero_std": 0.0, "grad_norm": 6.875, "learning_rate": 3.1e-07, "loss": -0.0857, "num_tokens": 169047.0, "reward": -8.483250617980957, "reward_std": 2.357140302658081, "rewards/chatgpt_combined_reward/mean": -8.483250617980957, "rewards/chatgpt_combined_reward/std": 3.453827142715454, "step": 39 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1024.0, "completions/max_terminated_length": 575.0, "completions/mean_length": 666.0, "completions/mean_terminated_length": 546.6666870117188, "completions/min_length": 530.0, "completions/min_terminated_length": 530.0, "epoch": 0.0037162632972546103, "frac_reward_zero_std": 0.0, "grad_norm": 8.25, "learning_rate": 3.05e-07, "loss": -0.0, "num_tokens": 172953.0, "reward": -0.25024986267089844, "reward_std": 3.889087200164795, "rewards/chatgpt_combined_reward/mean": -0.25024986267089844, "rewards/chatgpt_combined_reward/std": 8.088777542114258, "step": 40 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 575.0, "completions/max_terminated_length": 575.0, "completions/mean_length": 537.25, "completions/mean_terminated_length": 537.25, "completions/min_length": 522.0, "completions/min_terminated_length": 522.0, "epoch": 0.003809169879685976, "frac_reward_zero_std": 0.5, "grad_norm": 5.71875, "learning_rate": 3e-07, "loss": -0.0072, "num_tokens": 176344.0, "reward": -3.666874885559082, "reward_std": 0.9427501559257507, "rewards/chatgpt_combined_reward/mean": -3.666874885559082, "rewards/chatgpt_combined_reward/std": 7.393443584442139, "step": 41 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 670.0, "completions/max_terminated_length": 670.0, "completions/mean_length": 577.0, "completions/mean_terminated_length": 577.0, "completions/min_length": 538.0, "completions/min_terminated_length": 538.0, "epoch": 0.003902076462117341, "frac_reward_zero_std": 0.0, "grad_norm": 8.0, "learning_rate": 2.95e-07, "loss": -0.0, "num_tokens": 180700.0, "reward": -8.15000057220459, "reward_std": 0.21213217079639435, "rewards/chatgpt_combined_reward/mean": -8.15000057220459, "rewards/chatgpt_combined_reward/std": 0.387298583984375, "step": 42 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 741.0, "completions/max_terminated_length": 741.0, "completions/mean_length": 606.0, "completions/mean_terminated_length": 606.0, "completions/min_length": 522.0, "completions/min_terminated_length": 522.0, "epoch": 0.003994983044548706, "frac_reward_zero_std": 0.0, "grad_norm": 7.625, "learning_rate": 2.9e-07, "loss": 0.033, "num_tokens": 185098.0, "reward": -1.9167499542236328, "reward_std": 5.06748104095459, "rewards/chatgpt_combined_reward/mean": -1.9167499542236328, "rewards/chatgpt_combined_reward/std": 5.698338031768799, "step": 43 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 575.0, "completions/max_terminated_length": 575.0, "completions/mean_length": 560.0, "completions/mean_terminated_length": 560.0, "completions/min_length": 544.0, "completions/min_terminated_length": 544.0, "epoch": 0.004087889626980072, "frac_reward_zero_std": 0.0, "grad_norm": 8.4375, "learning_rate": 2.8499999999999997e-07, "loss": 0.0152, "num_tokens": 188550.0, "reward": -1.6668751239776611, "reward_std": 1.8856770992279053, "rewards/chatgpt_combined_reward/mean": -1.6668751239776611, "rewards/chatgpt_combined_reward/std": 7.039121150970459, "step": 44 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1024.0, "completions/max_terminated_length": 573.0, "completions/mean_length": 785.0, "completions/mean_terminated_length": 546.0, "completions/min_length": 519.0, "completions/min_terminated_length": 519.0, "epoch": 0.004180796209411437, "frac_reward_zero_std": 0.0, "grad_norm": 8.75, "learning_rate": 2.8e-07, "loss": -0.0, "num_tokens": 193738.0, "reward": -8.15000057220459, "reward_std": 0.21213217079639435, "rewards/chatgpt_combined_reward/mean": -8.15000057220459, "rewards/chatgpt_combined_reward/std": 0.387298583984375, "step": 45 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1024.0, "completions/max_terminated_length": 534.0, "completions/mean_length": 651.5, "completions/mean_terminated_length": 527.3333740234375, "completions/min_length": 515.0, "completions/min_terminated_length": 515.0, "epoch": 0.004273702791842802, "frac_reward_zero_std": 0.0, "grad_norm": 7.6875, "learning_rate": 2.75e-07, "loss": 0.1173, "num_tokens": 198392.0, "reward": -8.066625595092773, "reward_std": 3.1584692001342773, "rewards/chatgpt_combined_reward/mean": -8.066625595092773, "rewards/chatgpt_combined_reward/std": 4.083926677703857, "step": 46 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 761.0, "completions/max_terminated_length": 761.0, "completions/mean_length": 585.5, "completions/mean_terminated_length": 585.5, "completions/min_length": 518.0, "completions/min_terminated_length": 518.0, "epoch": 0.004366609374274168, "frac_reward_zero_std": 0.5, "grad_norm": 5.21875, "learning_rate": 2.7e-07, "loss": 0.0291, "num_tokens": 201948.0, "reward": -1.333749771118164, "reward_std": 0.7071069478988647, "rewards/chatgpt_combined_reward/mean": -1.333749771118164, "rewards/chatgpt_combined_reward/std": 10.040179252624512, "step": 47 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 619.0, "completions/max_terminated_length": 619.0, "completions/mean_length": 579.0, "completions/mean_terminated_length": 579.0, "completions/min_length": 526.0, "completions/min_terminated_length": 526.0, "epoch": 0.004459515956705532, "frac_reward_zero_std": 0.0, "grad_norm": 9.25, "learning_rate": 2.65e-07, "loss": -0.0357, "num_tokens": 205824.0, "reward": 2.082624673843384, "reward_std": 2.2388768196105957, "rewards/chatgpt_combined_reward/mean": 2.082624673843384, "rewards/chatgpt_combined_reward/std": 5.983651638031006, "step": 48 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 664.0, "completions/max_terminated_length": 664.0, "completions/mean_length": 567.25, "completions/mean_terminated_length": 567.25, "completions/min_length": 524.0, "completions/min_terminated_length": 524.0, "epoch": 0.004552422539136898, "frac_reward_zero_std": 0.0, "grad_norm": 8.125, "learning_rate": 2.6e-07, "loss": 0.0394, "num_tokens": 210141.0, "reward": -9.316500663757324, "reward_std": 1.3908790349960327, "rewards/chatgpt_combined_reward/mean": -9.316500663757324, "rewards/chatgpt_combined_reward/std": 1.982193112373352, "step": 49 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 739.0, "completions/max_terminated_length": 739.0, "completions/mean_length": 640.0, "completions/mean_terminated_length": 640.0, "completions/min_length": 538.0, "completions/min_terminated_length": 538.0, "epoch": 0.004645329121568263, "frac_reward_zero_std": 0.5, "grad_norm": 5.75, "learning_rate": 2.55e-07, "loss": 0.0381, "num_tokens": 213923.0, "reward": -4.583374977111816, "reward_std": 2.945983409881592, "rewards/chatgpt_combined_reward/mean": -4.583374977111816, "rewards/chatgpt_combined_reward/std": 7.119798183441162, "step": 50 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1024.0, "completions/max_terminated_length": 559.0, "completions/mean_length": 661.0, "completions/mean_terminated_length": 540.0, "completions/min_length": 530.0, "completions/min_terminated_length": 530.0, "epoch": 0.004738235703999628, "frac_reward_zero_std": 0.0, "grad_norm": 9.125, "learning_rate": 2.5e-07, "loss": -0.0, "num_tokens": 218615.0, "reward": -8.15000057220459, "reward_std": 0.21213217079639435, "rewards/chatgpt_combined_reward/mean": -8.15000057220459, "rewards/chatgpt_combined_reward/std": 0.387298583984375, "step": 51 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 829.0, "completions/max_terminated_length": 829.0, "completions/mean_length": 595.25, "completions/mean_terminated_length": 595.25, "completions/min_length": 515.0, "completions/min_terminated_length": 515.0, "epoch": 0.004831142286430994, "frac_reward_zero_std": 0.5, "grad_norm": 4.65625, "learning_rate": 2.45e-07, "loss": 0.0813, "num_tokens": 222836.0, "reward": -3.750124931335449, "reward_std": 4.124377250671387, "rewards/chatgpt_combined_reward/mean": -3.750124931335449, "rewards/chatgpt_combined_reward/std": 8.646496772766113, "step": 52 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1000.0, "completions/max_terminated_length": 1000.0, "completions/mean_length": 675.5, "completions/mean_terminated_length": 675.5, "completions/min_length": 517.0, "completions/min_terminated_length": 517.0, "epoch": 0.004924048868862359, "frac_reward_zero_std": 0.0, "grad_norm": 7.53125, "learning_rate": 2.4e-07, "loss": -0.0, "num_tokens": 227586.0, "reward": -8.15000057220459, "reward_std": 0.21213217079639435, "rewards/chatgpt_combined_reward/mean": -8.15000057220459, "rewards/chatgpt_combined_reward/std": 0.387298583984375, "step": 53 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 697.0, "completions/max_terminated_length": 697.0, "completions/mean_length": 603.25, "completions/mean_terminated_length": 603.25, "completions/min_length": 522.0, "completions/min_terminated_length": 522.0, "epoch": 0.005016955451293724, "frac_reward_zero_std": 0.0, "grad_norm": 8.25, "learning_rate": 2.3499999999999997e-07, "loss": 0.0037, "num_tokens": 231885.0, "reward": -0.25025010108947754, "reward_std": 5.067480087280273, "rewards/chatgpt_combined_reward/mean": -0.25025010108947754, "rewards/chatgpt_combined_reward/std": 8.490804672241211, "step": 54 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1024.0, "completions/max_terminated_length": 601.0, "completions/mean_length": 667.75, "completions/mean_terminated_length": 549.0, "completions/min_length": 521.0, "completions/min_terminated_length": 521.0, "epoch": 0.005109862033725089, "frac_reward_zero_std": 0.5, "grad_norm": 4.78125, "learning_rate": 2.3e-07, "loss": -0.0456, "num_tokens": 236320.0, "reward": -2.833625078201294, "reward_std": 0.7069300413131714, "rewards/chatgpt_combined_reward/mean": -2.833625078201294, "rewards/chatgpt_combined_reward/std": 8.315181732177734, "step": 55 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 650.0, "completions/max_terminated_length": 650.0, "completions/mean_length": 571.25, "completions/mean_terminated_length": 571.25, "completions/min_length": 522.0, "completions/min_terminated_length": 522.0, "epoch": 0.005202768616156455, "frac_reward_zero_std": 0.0, "grad_norm": 7.40625, "learning_rate": 2.25e-07, "loss": 0.0162, "num_tokens": 239589.0, "reward": 7.665875434875488, "reward_std": 0.9427504539489746, "rewards/chatgpt_combined_reward/mean": 7.665875434875488, "rewards/chatgpt_combined_reward/std": 0.8164288401603699, "step": 56 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 585.0, "completions/max_terminated_length": 585.0, "completions/mean_length": 555.75, "completions/mean_terminated_length": 555.75, "completions/min_length": 524.0, "completions/min_terminated_length": 524.0, "epoch": 0.00529567519858782, "frac_reward_zero_std": 0.0, "grad_norm": 8.3125, "learning_rate": 2.1999999999999998e-07, "loss": 0.0021, "num_tokens": 243860.0, "reward": -8.15000057220459, "reward_std": 0.21213217079639435, "rewards/chatgpt_combined_reward/mean": -8.15000057220459, "rewards/chatgpt_combined_reward/std": 0.387298583984375, "step": 57 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 724.0, "completions/max_terminated_length": 724.0, "completions/mean_length": 616.0, "completions/mean_terminated_length": 616.0, "completions/min_length": 562.0, "completions/min_terminated_length": 562.0, "epoch": 0.005388581781019185, "frac_reward_zero_std": 0.0, "grad_norm": 7.53125, "learning_rate": 2.1499999999999998e-07, "loss": 0.0464, "num_tokens": 248372.0, "reward": -7.499750137329102, "reward_std": 3.5358872413635254, "rewards/chatgpt_combined_reward/mean": -7.499750137329102, "rewards/chatgpt_combined_reward/std": 3.191626787185669, "step": 58 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1024.0, "completions/max_terminated_length": 835.0, "completions/mean_length": 726.5, "completions/mean_terminated_length": 627.3333740234375, "completions/min_length": 517.0, "completions/min_terminated_length": 517.0, "epoch": 0.0054814883634505505, "frac_reward_zero_std": 0.0, "grad_norm": 7.875, "learning_rate": 2.0999999999999997e-07, "loss": 0.0292, "num_tokens": 253326.0, "reward": -8.15000057220459, "reward_std": 0.21213217079639435, "rewards/chatgpt_combined_reward/mean": -8.15000057220459, "rewards/chatgpt_combined_reward/std": 0.387298583984375, "step": 59 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 665.0, "completions/max_terminated_length": 665.0, "completions/mean_length": 558.0, "completions/mean_terminated_length": 558.0, "completions/min_length": 515.0, "completions/min_terminated_length": 515.0, "epoch": 0.005574394945881916, "frac_reward_zero_std": 0.0, "grad_norm": 10.375, "learning_rate": 2.0499999999999997e-07, "loss": 0.0376, "num_tokens": 257606.0, "reward": -7.083125114440918, "reward_std": 4.125083923339844, "rewards/chatgpt_combined_reward/mean": -7.083125114440918, "rewards/chatgpt_combined_reward/std": 3.436143636703491, "step": 60 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 692.0, "completions/max_terminated_length": 692.0, "completions/mean_length": 613.25, "completions/mean_terminated_length": 613.25, "completions/min_length": 514.0, "completions/min_terminated_length": 514.0, "epoch": 0.005667301528313281, "frac_reward_zero_std": 0.0, "grad_norm": 9.75, "learning_rate": 2e-07, "loss": -0.0, "num_tokens": 262107.0, "reward": -5.833000183105469, "reward_std": 3.535533905029297, "rewards/chatgpt_combined_reward/mean": -5.833000183105469, "rewards/chatgpt_combined_reward/std": 3.469496726989746, "step": 61 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1024.0, "completions/max_terminated_length": 572.0, "completions/mean_length": 672.0, "completions/mean_terminated_length": 554.6666870117188, "completions/min_length": 537.0, "completions/min_terminated_length": 537.0, "epoch": 0.005760208110744646, "frac_reward_zero_std": 0.0, "grad_norm": 7.5625, "learning_rate": 1.9499999999999999e-07, "loss": 0.1156, "num_tokens": 266843.0, "reward": -8.15000057220459, "reward_std": 0.21213217079639435, "rewards/chatgpt_combined_reward/mean": -8.15000057220459, "rewards/chatgpt_combined_reward/std": 0.387298583984375, "step": 62 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 884.0, "completions/max_terminated_length": 884.0, "completions/mean_length": 666.5, "completions/mean_terminated_length": 666.5, "completions/min_length": 543.0, "completions/min_terminated_length": 543.0, "epoch": 0.005853114693176012, "frac_reward_zero_std": 0.0, "grad_norm": 9.8125, "learning_rate": 1.8999999999999998e-07, "loss": 0.0609, "num_tokens": 271557.0, "reward": -8.15000057220459, "reward_std": 0.21213217079639435, "rewards/chatgpt_combined_reward/mean": -8.15000057220459, "rewards/chatgpt_combined_reward/std": 0.387298583984375, "step": 63 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1024.0, "completions/max_terminated_length": 667.0, "completions/mean_length": 683.0, "completions/mean_terminated_length": 569.3333740234375, "completions/min_length": 518.0, "completions/min_terminated_length": 518.0, "epoch": 0.0059460212756073764, "frac_reward_zero_std": 0.0, "grad_norm": 8.8125, "learning_rate": 1.85e-07, "loss": -0.0, "num_tokens": 276127.0, "reward": -0.0002503395080566406, "reward_std": 3.535533905029297, "rewards/chatgpt_combined_reward/mean": -0.0002503395080566406, "rewards/chatgpt_combined_reward/std": 9.128161430358887, "step": 64 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1024.0, "completions/max_terminated_length": 990.0, "completions/mean_length": 838.0, "completions/mean_terminated_length": 776.0, "completions/min_length": 549.0, "completions/min_terminated_length": 549.0, "epoch": 0.006038927858038742, "frac_reward_zero_std": 0.0, "grad_norm": 8.8125, "learning_rate": 1.8e-07, "loss": 0.0668, "num_tokens": 281527.0, "reward": -9.316499710083008, "reward_std": 1.3908792734146118, "rewards/chatgpt_combined_reward/mean": -9.316499710083008, "rewards/chatgpt_combined_reward/std": 1.61105215549469, "step": 65 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1024.0, "completions/max_terminated_length": 803.0, "completions/mean_length": 881.75, "completions/mean_terminated_length": 739.5, "completions/min_length": 676.0, "completions/min_terminated_length": 676.0, "epoch": 0.0061318344404701075, "frac_reward_zero_std": 0.0, "grad_norm": 7.875, "learning_rate": 1.75e-07, "loss": 0.1151, "num_tokens": 287102.0, "reward": -8.89987564086914, "reward_std": 1.7679438591003418, "rewards/chatgpt_combined_reward/mean": -8.89987564086914, "rewards/chatgpt_combined_reward/std": 2.2138428688049316, "step": 66 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 665.0, "completions/max_terminated_length": 665.0, "completions/mean_length": 606.5, "completions/mean_terminated_length": 606.5, "completions/min_length": 550.0, "completions/min_terminated_length": 550.0, "epoch": 0.006224741022901472, "frac_reward_zero_std": 0.5, "grad_norm": 5.28125, "learning_rate": 1.7000000000000001e-07, "loss": 0.0032, "num_tokens": 291296.0, "reward": -2.333750009536743, "reward_std": 1.4142135381698608, "rewards/chatgpt_combined_reward/mean": -2.333750009536743, "rewards/chatgpt_combined_reward/std": 9.001585006713867, "step": 67 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 658.0, "completions/max_terminated_length": 658.0, "completions/mean_length": 580.5, "completions/mean_terminated_length": 580.5, "completions/min_length": 544.0, "completions/min_terminated_length": 544.0, "epoch": 0.006317647605332838, "frac_reward_zero_std": 0.0, "grad_norm": 7.625, "learning_rate": 1.65e-07, "loss": 0.026, "num_tokens": 295666.0, "reward": -8.15000057220459, "reward_std": 0.21213217079639435, "rewards/chatgpt_combined_reward/mean": -8.15000057220459, "rewards/chatgpt_combined_reward/std": 0.387298583984375, "step": 68 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1024.0, "completions/max_terminated_length": 621.0, "completions/mean_length": 673.0, "completions/mean_terminated_length": 556.0, "completions/min_length": 515.0, "completions/min_terminated_length": 515.0, "epoch": 0.006410554187764203, "frac_reward_zero_std": 0.0, "grad_norm": 7.09375, "learning_rate": 1.6e-07, "loss": 0.1442, "num_tokens": 300406.0, "reward": -7.499750137329102, "reward_std": 3.5358872413635254, "rewards/chatgpt_combined_reward/mean": -7.499750137329102, "rewards/chatgpt_combined_reward/std": 3.191626787185669, "step": 69 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 692.0, "completions/max_terminated_length": 692.0, "completions/mean_length": 593.25, "completions/mean_terminated_length": 593.25, "completions/min_length": 520.0, "completions/min_terminated_length": 520.0, "epoch": 0.006503460770195569, "frac_reward_zero_std": 0.5, "grad_norm": 4.53125, "learning_rate": 1.55e-07, "loss": 0.031, "num_tokens": 304811.0, "reward": -0.41687512397766113, "reward_std": 1.7679438591003418, "rewards/chatgpt_combined_reward/mean": -0.41687512397766113, "rewards/chatgpt_combined_reward/std": 8.42953109741211, "step": 70 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 681.0, "completions/max_terminated_length": 681.0, "completions/mean_length": 581.25, "completions/mean_terminated_length": 581.25, "completions/min_length": 514.0, "completions/min_terminated_length": 514.0, "epoch": 0.006596367352626933, "frac_reward_zero_std": 0.0, "grad_norm": 8.375, "learning_rate": 1.5e-07, "loss": 0.0708, "num_tokens": 308360.0, "reward": -2.0002501010894775, "reward_std": 5.42103385925293, "rewards/chatgpt_combined_reward/mean": -2.0002501010894775, "rewards/chatgpt_combined_reward/std": 6.996899127960205, "step": 71 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 852.0, "completions/max_terminated_length": 852.0, "completions/mean_length": 611.75, "completions/mean_terminated_length": 611.75, "completions/min_length": 522.0, "completions/min_terminated_length": 522.0, "epoch": 0.006689273935058299, "frac_reward_zero_std": 0.0, "grad_norm": 9.1875, "learning_rate": 1.45e-07, "loss": 0.0753, "num_tokens": 312855.0, "reward": -8.15000057220459, "reward_std": 0.21213217079639435, "rewards/chatgpt_combined_reward/mean": -8.15000057220459, "rewards/chatgpt_combined_reward/std": 0.387298583984375, "step": 72 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 683.0, "completions/max_terminated_length": 683.0, "completions/mean_length": 599.75, "completions/mean_terminated_length": 599.75, "completions/min_length": 556.0, "completions/min_terminated_length": 556.0, "epoch": 0.0067821805174896645, "frac_reward_zero_std": 0.5, "grad_norm": 5.625, "learning_rate": 1.4e-07, "loss": -0.0233, "num_tokens": 317276.0, "reward": -1.2503752708435059, "reward_std": 1.7675901651382446, "rewards/chatgpt_combined_reward/mean": -1.2503752708435059, "rewards/chatgpt_combined_reward/std": 10.30729866027832, "step": 73 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1024.0, "completions/max_terminated_length": 685.0, "completions/mean_length": 707.5, "completions/mean_terminated_length": 602.0, "completions/min_length": 517.0, "completions/min_terminated_length": 517.0, "epoch": 0.006875087099921029, "frac_reward_zero_std": 0.5, "grad_norm": 3.984375, "learning_rate": 1.35e-07, "loss": 0.0693, "num_tokens": 322154.0, "reward": -7.499750137329102, "reward_std": 1.1783934831619263, "rewards/chatgpt_combined_reward/mean": -7.499750137329102, "rewards/chatgpt_combined_reward/std": 3.191626787185669, "step": 74 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 723.0, "completions/max_terminated_length": 723.0, "completions/mean_length": 623.0, "completions/mean_terminated_length": 623.0, "completions/min_length": 515.0, "completions/min_terminated_length": 515.0, "epoch": 0.006967993682352395, "frac_reward_zero_std": 0.5, "grad_norm": 5.6875, "learning_rate": 1.3e-07, "loss": 0.0071, "num_tokens": 325852.0, "reward": -2.5002501010894775, "reward_std": 1.1783934831619263, "rewards/chatgpt_combined_reward/mean": -2.5002501010894775, "rewards/chatgpt_combined_reward/std": 8.766212463378906, "step": 75 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 588.0, "completions/max_terminated_length": 588.0, "completions/mean_length": 543.0, "completions/mean_terminated_length": 543.0, "completions/min_length": 515.0, "completions/min_terminated_length": 515.0, "epoch": 0.00706090026478376, "frac_reward_zero_std": 0.0, "grad_norm": 8.875, "learning_rate": 1.25e-07, "loss": -0.0118, "num_tokens": 330072.0, "reward": -4.583125114440918, "reward_std": 1.7679438591003418, "rewards/chatgpt_combined_reward/mean": -4.583125114440918, "rewards/chatgpt_combined_reward/std": 4.589576721191406, "step": 76 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1024.0, "completions/max_terminated_length": 540.0, "completions/mean_length": 656.0, "completions/mean_terminated_length": 533.3333740234375, "completions/min_length": 527.0, "completions/min_terminated_length": 527.0, "epoch": 0.007153806847215125, "frac_reward_zero_std": 0.0, "grad_norm": 8.5, "learning_rate": 1.2e-07, "loss": 0.1072, "num_tokens": 334744.0, "reward": -7.90000057220459, "reward_std": 3.3941125869750977, "rewards/chatgpt_combined_reward/mean": -7.90000057220459, "rewards/chatgpt_combined_reward/std": 4.806246280670166, "step": 77 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 814.0, "completions/max_terminated_length": 814.0, "completions/mean_length": 618.25, "completions/mean_terminated_length": 618.25, "completions/min_length": 519.0, "completions/min_terminated_length": 519.0, "epoch": 0.00724671342964649, "frac_reward_zero_std": 0.5, "grad_norm": 4.4375, "learning_rate": 1.15e-07, "loss": -0.0338, "num_tokens": 339217.0, "reward": -2.0003750324249268, "reward_std": 0.4714634120464325, "rewards/chatgpt_combined_reward/mean": -2.0003750324249268, "rewards/chatgpt_combined_reward/std": 9.253199577331543, "step": 78 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 664.0, "completions/max_terminated_length": 664.0, "completions/mean_length": 561.75, "completions/mean_terminated_length": 561.75, "completions/min_length": 520.0, "completions/min_terminated_length": 520.0, "epoch": 0.007339620012077856, "frac_reward_zero_std": 0.0, "grad_norm": 8.6875, "learning_rate": 1.0999999999999999e-07, "loss": -0.0368, "num_tokens": 342692.0, "reward": -1.2501251697540283, "reward_std": 2.9463372230529785, "rewards/chatgpt_combined_reward/mean": -1.2501251697540283, "rewards/chatgpt_combined_reward/std": 7.622077465057373, "step": 79 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 818.0, "completions/max_terminated_length": 818.0, "completions/mean_length": 644.25, "completions/mean_terminated_length": 644.25, "completions/min_length": 532.0, "completions/min_terminated_length": 532.0, "epoch": 0.0074325265945092206, "frac_reward_zero_std": 0.0, "grad_norm": 7.8125, "learning_rate": 1.0499999999999999e-07, "loss": -0.1178, "num_tokens": 346483.0, "reward": -1.5832499265670776, "reward_std": 2.23905348777771, "rewards/chatgpt_combined_reward/mean": -1.5832499265670776, "rewards/chatgpt_combined_reward/std": 5.307532787322998, "step": 80 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1024.0, "completions/max_terminated_length": 518.0, "completions/mean_length": 770.5, "completions/mean_terminated_length": 517.0, "completions/min_length": 516.0, "completions/min_terminated_length": 516.0, "epoch": 0.007525433176940586, "frac_reward_zero_std": 0.0, "grad_norm": 9.3125, "learning_rate": 1e-07, "loss": -0.0007, "num_tokens": 351613.0, "reward": -8.15000057220459, "reward_std": 0.21213217079639435, "rewards/chatgpt_combined_reward/mean": -8.15000057220459, "rewards/chatgpt_combined_reward/std": 0.387298583984375, "step": 81 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 636.0, "completions/max_terminated_length": 636.0, "completions/mean_length": 553.5, "completions/mean_terminated_length": 553.5, "completions/min_length": 525.0, "completions/min_terminated_length": 525.0, "epoch": 0.007618339759371952, "frac_reward_zero_std": 0.5, "grad_norm": 5.625, "learning_rate": 9.499999999999999e-08, "loss": 0.0164, "num_tokens": 355029.0, "reward": -2.7502501010894775, "reward_std": 3.8887336254119873, "rewards/chatgpt_combined_reward/mean": -2.7502501010894775, "rewards/chatgpt_combined_reward/std": 9.499552726745605, "step": 82 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 626.0, "completions/max_terminated_length": 626.0, "completions/mean_length": 580.75, "completions/mean_terminated_length": 580.75, "completions/min_length": 532.0, "completions/min_terminated_length": 532.0, "epoch": 0.007711246341803317, "frac_reward_zero_std": 0.5, "grad_norm": 5.6875, "learning_rate": 9e-08, "loss": 0.0014, "num_tokens": 358566.0, "reward": -0.2505002021789551, "reward_std": 0.3535533845424652, "rewards/chatgpt_combined_reward/mean": -0.2505002021789551, "rewards/chatgpt_combined_reward/std": 11.265152931213379, "step": 83 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1024.0, "completions/max_terminated_length": 693.0, "completions/mean_length": 705.5, "completions/mean_terminated_length": 599.3333740234375, "completions/min_length": 515.0, "completions/min_terminated_length": 515.0, "epoch": 0.007804152924234682, "frac_reward_zero_std": 0.0, "grad_norm": 8.8125, "learning_rate": 8.500000000000001e-08, "loss": -0.1453, "num_tokens": 363436.0, "reward": -3.749875068664551, "reward_std": 4.124730587005615, "rewards/chatgpt_combined_reward/mean": -3.749875068664551, "rewards/chatgpt_combined_reward/std": 4.383296489715576, "step": 84 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 890.0, "completions/max_terminated_length": 890.0, "completions/mean_length": 618.75, "completions/mean_terminated_length": 618.75, "completions/min_length": 522.0, "completions/min_terminated_length": 522.0, "epoch": 0.007897059506666047, "frac_reward_zero_std": 0.0, "grad_norm": 7.125, "learning_rate": 8e-08, "loss": -0.086, "num_tokens": 367959.0, "reward": -8.89987564086914, "reward_std": 1.9800759553909302, "rewards/chatgpt_combined_reward/mean": -8.89987564086914, "rewards/chatgpt_combined_reward/std": 2.4292385578155518, "step": 85 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1024.0, "completions/max_terminated_length": 519.0, "completions/mean_length": 644.25, "completions/mean_terminated_length": 517.6666870117188, "completions/min_length": 517.0, "completions/min_terminated_length": 517.0, "epoch": 0.007989966089097413, "frac_reward_zero_std": 0.0, "grad_norm": 7.0625, "learning_rate": 7.5e-08, "loss": 0.1156, "num_tokens": 370960.0, "reward": 3.832624912261963, "reward_std": 3.771176815032959, "rewards/chatgpt_combined_reward/mean": 3.832624912261963, "rewards/chatgpt_combined_reward/std": 4.7957444190979, "step": 86 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 731.0, "completions/max_terminated_length": 731.0, "completions/mean_length": 585.75, "completions/mean_terminated_length": 585.75, "completions/min_length": 520.0, "completions/min_terminated_length": 520.0, "epoch": 0.008082872671528778, "frac_reward_zero_std": 0.0, "grad_norm": 7.9375, "learning_rate": 7e-08, "loss": 0.0622, "num_tokens": 375351.0, "reward": -5.983250141143799, "reward_std": 5.892674446105957, "rewards/chatgpt_combined_reward/mean": -5.983250141143799, "rewards/chatgpt_combined_reward/std": 4.826911449432373, "step": 87 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1002.0, "completions/max_terminated_length": 1002.0, "completions/mean_length": 724.25, "completions/mean_terminated_length": 724.25, "completions/min_length": 562.0, "completions/min_terminated_length": 562.0, "epoch": 0.008175779253960144, "frac_reward_zero_std": 0.0, "grad_norm": 7.625, "learning_rate": 6.5e-08, "loss": -0.0, "num_tokens": 380296.0, "reward": -8.15000057220459, "reward_std": 0.21213217079639435, "rewards/chatgpt_combined_reward/mean": -8.15000057220459, "rewards/chatgpt_combined_reward/std": 0.387298583984375, "step": 88 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 746.0, "completions/max_terminated_length": 746.0, "completions/mean_length": 592.25, "completions/mean_terminated_length": 592.25, "completions/min_length": 517.0, "completions/min_terminated_length": 517.0, "epoch": 0.008268685836391508, "frac_reward_zero_std": 0.0, "grad_norm": 7.75, "learning_rate": 6e-08, "loss": -0.0, "num_tokens": 384329.0, "reward": 4.832625389099121, "reward_std": 3.771177291870117, "rewards/chatgpt_combined_reward/mean": 4.832625389099121, "rewards/chatgpt_combined_reward/std": 3.911078929901123, "step": 89 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 698.0, "completions/max_terminated_length": 698.0, "completions/mean_length": 633.0, "completions/mean_terminated_length": 633.0, "completions/min_length": 598.0, "completions/min_terminated_length": 598.0, "epoch": 0.008361592418822873, "frac_reward_zero_std": 0.0, "grad_norm": 7.46875, "learning_rate": 5.4999999999999996e-08, "loss": -0.0157, "num_tokens": 388909.0, "reward": -5.983250141143799, "reward_std": 0.21213209629058838, "rewards/chatgpt_combined_reward/mean": -5.983250141143799, "rewards/chatgpt_combined_reward/std": 4.468296527862549, "step": 90 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 750.0, "completions/max_terminated_length": 750.0, "completions/mean_length": 584.5, "completions/mean_terminated_length": 584.5, "completions/min_length": 523.0, "completions/min_terminated_length": 523.0, "epoch": 0.008454499001254239, "frac_reward_zero_std": 0.5, "grad_norm": 4.78125, "learning_rate": 5e-08, "loss": 0.0284, "num_tokens": 392953.0, "reward": -2.083624839782715, "reward_std": 1.7675902843475342, "rewards/chatgpt_combined_reward/mean": -2.083624839782715, "rewards/chatgpt_combined_reward/std": 9.366135597229004, "step": 91 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1024.0, "completions/max_terminated_length": 686.0, "completions/mean_length": 714.25, "completions/mean_terminated_length": 611.0, "completions/min_length": 522.0, "completions/min_terminated_length": 522.0, "epoch": 0.008547405583685604, "frac_reward_zero_std": 0.0, "grad_norm": 8.0, "learning_rate": 4.5e-08, "loss": 0.1335, "num_tokens": 397858.0, "reward": -9.316499710083008, "reward_std": 1.3908792734146118, "rewards/chatgpt_combined_reward/mean": -9.316499710083008, "rewards/chatgpt_combined_reward/std": 1.61105215549469, "step": 92 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 848.0, "completions/max_terminated_length": 848.0, "completions/mean_length": 737.75, "completions/mean_terminated_length": 737.75, "completions/min_length": 639.0, "completions/min_terminated_length": 639.0, "epoch": 0.00864031216611697, "frac_reward_zero_std": 0.0, "grad_norm": 6.6875, "learning_rate": 4e-08, "loss": -0.0, "num_tokens": 402857.0, "reward": -8.15000057220459, "reward_std": 0.21213217079639435, "rewards/chatgpt_combined_reward/mean": -8.15000057220459, "rewards/chatgpt_combined_reward/std": 0.387298583984375, "step": 93 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 958.0, "completions/max_terminated_length": 958.0, "completions/mean_length": 649.0, "completions/mean_terminated_length": 649.0, "completions/min_length": 526.0, "completions/min_terminated_length": 526.0, "epoch": 0.008733218748548335, "frac_reward_zero_std": 0.0, "grad_norm": 9.5, "learning_rate": 3.5e-08, "loss": 0.0, "num_tokens": 407501.0, "reward": -9.316499710083008, "reward_std": 1.1787470579147339, "rewards/chatgpt_combined_reward/mean": -9.316499710083008, "rewards/chatgpt_combined_reward/std": 1.8061809539794922, "step": 94 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 795.0, "completions/max_terminated_length": 795.0, "completions/mean_length": 645.0, "completions/mean_terminated_length": 645.0, "completions/min_length": 530.0, "completions/min_terminated_length": 530.0, "epoch": 0.0088261253309797, "frac_reward_zero_std": 0.0, "grad_norm": 8.75, "learning_rate": 3e-08, "loss": -0.0538, "num_tokens": 412129.0, "reward": -2.583124876022339, "reward_std": 1.2963035106658936, "rewards/chatgpt_combined_reward/mean": -2.583124876022339, "rewards/chatgpt_combined_reward/std": 2.114532947540283, "step": 95 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1024.0, "completions/max_terminated_length": 806.0, "completions/mean_length": 732.25, "completions/mean_terminated_length": 635.0, "completions/min_length": 525.0, "completions/min_terminated_length": 525.0, "epoch": 0.008919031913411065, "frac_reward_zero_std": 0.5, "grad_norm": 5.0, "learning_rate": 2.5e-08, "loss": 0.0132, "num_tokens": 416252.0, "reward": -3.1670000553131104, "reward_std": 0.2354665994644165, "rewards/chatgpt_combined_reward/mean": -3.1670000553131104, "rewards/chatgpt_combined_reward/std": 7.894752502441406, "step": 96 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 772.0, "completions/max_terminated_length": 772.0, "completions/mean_length": 650.0, "completions/mean_terminated_length": 650.0, "completions/min_length": 518.0, "completions/min_terminated_length": 518.0, "epoch": 0.00901193849584243, "frac_reward_zero_std": 0.5, "grad_norm": 4.4375, "learning_rate": 2e-08, "loss": -0.0286, "num_tokens": 420900.0, "reward": -5.916625022888184, "reward_std": 0.11791006475687027, "rewards/chatgpt_combined_reward/mean": -5.916625022888184, "rewards/chatgpt_combined_reward/std": 4.717040538787842, "step": 97 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1024.0, "completions/max_terminated_length": 652.0, "completions/mean_length": 683.25, "completions/mean_terminated_length": 569.6666870117188, "completions/min_length": 516.0, "completions/min_terminated_length": 516.0, "epoch": 0.009104845078273796, "frac_reward_zero_std": 0.5, "grad_norm": 5.40625, "learning_rate": 1.5e-08, "loss": 0.0434, "num_tokens": 425291.0, "reward": -0.8337502479553223, "reward_std": 1.1783933639526367, "rewards/chatgpt_combined_reward/mean": -0.8337502479553223, "rewards/chatgpt_combined_reward/std": 10.671379089355469, "step": 98 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 622.0, "completions/max_terminated_length": 622.0, "completions/mean_length": 573.5, "completions/mean_terminated_length": 573.5, "completions/min_length": 519.0, "completions/min_terminated_length": 519.0, "epoch": 0.009197751660705161, "frac_reward_zero_std": 0.5, "grad_norm": 5.0625, "learning_rate": 1e-08, "loss": -0.0229, "num_tokens": 429633.0, "reward": -7.749750137329102, "reward_std": 0.8248399496078491, "rewards/chatgpt_combined_reward/mean": -7.749750137329102, "rewards/chatgpt_combined_reward/std": 2.7674262523651123, "step": 99 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 757.0, "completions/max_terminated_length": 757.0, "completions/mean_length": 595.25, "completions/mean_terminated_length": 595.25, "completions/min_length": 531.0, "completions/min_terminated_length": 531.0, "epoch": 0.009290658243136527, "frac_reward_zero_std": 0.5, "grad_norm": 5.25, "learning_rate": 5e-09, "loss": 0.0081, "num_tokens": 433854.0, "reward": -2.0003747940063477, "reward_std": 1.6498571634292603, "rewards/chatgpt_combined_reward/mean": -2.0003747940063477, "rewards/chatgpt_combined_reward/std": 9.431580543518066, "step": 100 } ], "logging_steps": 1, "max_steps": 100, "num_input_tokens_seen": 433854, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }