dpo-selective-buffer-spo-shift / eval_results.json
wxzhang's picture
Model save
5d84c8e verified
raw
history blame contribute delete
685 Bytes
{
"epoch": 1.0,
"eval_logits/chosen": -2.0824697017669678,
"eval_logits/rejected": -1.830767273902893,
"eval_logps/chosen": -131.0029296875,
"eval_logps/rejected": -92.43484497070312,
"eval_loss": 0.6776853799819946,
"eval_rewards/accuracies": 0.4693247675895691,
"eval_rewards/chosen": -0.13705651462078094,
"eval_rewards/margins": -0.0540921576321125,
"eval_rewards/rejected": -0.08296435326337814,
"eval_rewards/safe_rewards": -0.1332445591688156,
"eval_rewards/unsafe_rewards": -0.12632378935813904,
"eval_runtime": 2192.4744,
"eval_samples": 33044,
"eval_samples_per_second": 15.072,
"eval_steps_per_second": 0.942
}