{ "epoch": 0.9990049751243781, "eval_logits/chosen": 1.5755633115768433, "eval_logits/rejected": 1.1696137189865112, "eval_logps/chosen": -600.6971435546875, "eval_logps/rejected": -556.9004516601562, "eval_loss": 0.3912544250488281, "eval_rewards/accuracies": 0.9417910575866699, "eval_rewards/chosen": 0.17757247388362885, "eval_rewards/margins": 0.851599395275116, "eval_rewards/rejected": -0.6740269660949707, "eval_runtime": 879.0322, "eval_samples": 5359, "eval_samples_per_second": 6.096, "eval_steps_per_second": 0.381, "total_flos": 0.0, "train_loss": 0.45811287039621257, "train_runtime": 34472.405, "train_samples": 48227, "train_samples_per_second": 1.399, "train_steps_per_second": 0.022 }