{ "epoch": 1.0, "eval_logits/chosen": -4.456838607788086, "eval_logits/rejected": -4.443554401397705, "eval_logps/chosen": -375.13671875, "eval_logps/rejected": -394.703125, "eval_loss": 0.6773372888565063, "eval_rewards/accuracies": 0.5686666369438171, "eval_rewards/chosen": -1.5442434549331665, "eval_rewards/margins": 0.13949742913246155, "eval_rewards/rejected": -1.6837408542633057, "eval_runtime": 1623.421, "eval_samples": 2994, "eval_samples_per_second": 1.844, "eval_steps_per_second": 0.231 }