{ "epoch": 1.0, "eval_logits/chosen": -1.9525097608566284, "eval_logits/rejected": -1.7713532447814941, "eval_logps/chosen": -224.6856231689453, "eval_logps/rejected": -193.18142700195312, "eval_loss": 4322.0576171875, "eval_rewards/accuracies": 0.6033397912979126, "eval_rewards/chosen": -0.9425504207611084, "eval_rewards/margins": 0.06460745632648468, "eval_rewards/rejected": -1.0071579217910767, "eval_rewards/safe_rewards": -0.9376580119132996, "eval_rewards/unsafe_rewards": -0.9381570816040039, "eval_runtime": 993.6614, "eval_samples": 33044, "eval_samples_per_second": 33.255, "eval_steps_per_second": 1.04, "train_loss": 11692.422362755651, "train_runtime": 23508.9776, "train_samples": 59478, "train_samples_per_second": 2.53, "train_steps_per_second": 0.079 }