{ "epoch": 1.0, "eval_logits/chosen": -1.3773202896118164, "eval_logits/rejected": -1.3735064268112183, "eval_logps/chosen": -342.40301513671875, "eval_logps/rejected": -313.79144287109375, "eval_loss": 0.43090611696243286, "eval_pred_label": 2268.77783203125, "eval_rewards/accuracies": 0.7460317611694336, "eval_rewards/chosen": 1.2747503519058228, "eval_rewards/margins": 1.4222474098205566, "eval_rewards/rejected": -0.14749698340892792, "eval_runtime": 274.7725, "eval_samples": 2000, "eval_samples_per_second": 7.279, "eval_steps_per_second": 0.229, "eval_use_label": 5751.22216796875, "train_loss": 0.47434414693822413, "train_runtime": 13309.0876, "train_samples": 61135, "train_samples_per_second": 4.593, "train_steps_per_second": 0.072 }