{ "epoch": 1.0, "eval_logits/chosen": -3.164646625518799, "eval_logits/rejected": -3.153470039367676, "eval_logps/chosen": -543.001708984375, "eval_logps/rejected": -567.34228515625, "eval_loss": 0.7142251133918762, "eval_rewards/accuracies": 0.546875, "eval_rewards/chosen": -1.4159148931503296, "eval_rewards/margins": 0.08076205104589462, "eval_rewards/rejected": -1.496677041053772, "eval_runtime": 125.7685, "eval_samples": 1000, "eval_samples_per_second": 7.951, "eval_steps_per_second": 0.254, "train_loss": 0.6047722502396657, "train_runtime": 5332.3567, "train_samples": 19958, "train_samples_per_second": 3.75, "train_steps_per_second": 0.059 }