{ "epoch": 1.0, "eval_logits/chosen": -0.6058345437049866, "eval_logits/rejected": 0.2021634727716446, "eval_logps/chosen": -325.1861572265625, "eval_logps/rejected": -342.98297119140625, "eval_loss": 0.11247362196445465, "eval_rewards/accuracies": 0.75, "eval_rewards/chosen": -0.4009607136249542, "eval_rewards/margins": 0.5636443495750427, "eval_rewards/rejected": -0.9646049737930298, "eval_runtime": 95.1407, "eval_samples": 1818, "eval_samples_per_second": 19.109, "eval_steps_per_second": 0.305, "total_flos": 0.0, "train_loss": 0.010263856175705927, "train_runtime": 11602.0724, "train_samples": 55758, "train_samples_per_second": 4.806, "train_steps_per_second": 0.038 }