NicholasCorrado's picture
End of training
3ef9c96 verified
{
"epoch": 1.0,
"eval_logits/chosen": -1.1779704093933105,
"eval_logits/rejected": -1.2511389255523682,
"eval_logps/chosen": -335.2599792480469,
"eval_logps/rejected": -317.35614013671875,
"eval_loss": 0.6636306643486023,
"eval_rewards/accuracies": 0.6746031641960144,
"eval_rewards/chosen": 0.04071044921875,
"eval_rewards/margins": 0.0732826218008995,
"eval_rewards/rejected": -0.032572176307439804,
"eval_runtime": 91.7551,
"eval_samples": 2000,
"eval_samples_per_second": 21.797,
"eval_steps_per_second": 0.687
}