{ "epoch": 3.0, "eval_logits/chosen": -2.1648077964782715, "eval_logits/rejected": -2.102295398712158, "eval_logps/chosen": -294.6768493652344, "eval_logps/rejected": -329.85784912109375, "eval_loss": 0.6326602697372437, "eval_rewards/accuracies": 0.6725000143051147, "eval_rewards/chosen": -9.871585845947266, "eval_rewards/margins": 4.674891948699951, "eval_rewards/rejected": -14.546478271484375, "eval_runtime": 132.202, "eval_samples": 3156, "eval_samples_per_second": 23.873, "eval_steps_per_second": 0.378 }