{ "epoch": 1.0, "eval_logits/chosen": -1.9525097608566284, "eval_logits/rejected": -1.7713532447814941, "eval_logps/chosen": -224.6856231689453, "eval_logps/rejected": -193.18142700195312, "eval_loss": 4322.0576171875, "eval_rewards/accuracies": 0.6033397912979126, "eval_rewards/chosen": -0.9425504207611084, "eval_rewards/margins": 0.06460745632648468, "eval_rewards/rejected": -1.0071579217910767, "eval_rewards/safe_rewards": -0.9376580119132996, "eval_rewards/unsafe_rewards": -0.9381570816040039, "eval_runtime": 993.6614, "eval_samples": 33044, "eval_samples_per_second": 33.255, "eval_steps_per_second": 1.04 }