{ "epoch": 1.0, "eval_logits/chosen": 0.41779446601867676, "eval_logits/rejected": 1.454390525817871, "eval_logps/chosen": -324.46234130859375, "eval_logps/rejected": -342.5584716796875, "eval_loss": 0.11311393231153488, "eval_rewards/accuracies": 0.7543103694915771, "eval_rewards/chosen": -0.393722802400589, "eval_rewards/margins": 0.566637396812439, "eval_rewards/rejected": -0.9603601098060608, "eval_runtime": 93.576, "eval_samples": 1818, "eval_samples_per_second": 19.428, "eval_steps_per_second": 0.31, "total_flos": 0.0, "train_loss": 0.13197535372108493, "train_runtime": 11523.0815, "train_samples": 55758, "train_samples_per_second": 4.839, "train_steps_per_second": 0.038 }