{ "epoch": 1.0, "eval_logps/chosen": -2.755657434463501, "eval_logps/rejected": -1.0496879816055298, "eval_loss": 21.31629180908203, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -27.55657386779785, "eval_rewards/margins": -17.059694290161133, "eval_rewards/rejected": -10.496879577636719, "eval_runtime": 4.3483, "eval_samples": 12, "eval_samples_per_second": 2.76, "eval_steps_per_second": 0.69 }