{ "epoch": 0.9989071038251366, "eval_logits/chosen": -1.200439691543579, "eval_logits/rejected": -1.150753140449524, "eval_logps/chosen": -2.5561068058013916, "eval_logps/rejected": -3.309354782104492, "eval_loss": 1.7538212537765503, "eval_rewards/accuracies": 0.8433734774589539, "eval_rewards/chosen": -25.561067581176758, "eval_rewards/margins": 7.532484531402588, "eval_rewards/rejected": -33.09355163574219, "eval_runtime": 33.6933, "eval_samples": 1318, "eval_samples_per_second": 39.118, "eval_semantic_entropy": 0.36752766370773315, "eval_steps_per_second": 2.463 }