{ "epoch": 5.992211163998269, "eval_logits/chosen": -0.271484375, "eval_logits/rejected": -0.1357421875, "eval_logps/chosen": -110.5, "eval_logps/rejected": -129.0, "eval_loss": 0.6188219785690308, "eval_rewards/accuracies": 0.5, "eval_rewards/chosen": 1.0859375, "eval_rewards/margins": 4.0, "eval_rewards/rejected": -2.90625, "eval_runtime": 53.3159, "eval_samples_per_second": 72.999, "eval_steps_per_second": 2.288 }