dpo-selective-buffer-safeipo / eval_results.json
wxzhang's picture
Model save
581934d verified
raw
history blame
682 Bytes
{
"epoch": 1.0,
"eval_logits/chosen": -1.9525097608566284,
"eval_logits/rejected": -1.7713532447814941,
"eval_logps/chosen": -224.6856231689453,
"eval_logps/rejected": -193.18142700195312,
"eval_loss": 4322.0576171875,
"eval_rewards/accuracies": 0.6033397912979126,
"eval_rewards/chosen": -0.9425504207611084,
"eval_rewards/margins": 0.06460745632648468,
"eval_rewards/rejected": -1.0071579217910767,
"eval_rewards/safe_rewards": -0.9376580119132996,
"eval_rewards/unsafe_rewards": -0.9381570816040039,
"eval_runtime": 993.6614,
"eval_samples": 33044,
"eval_samples_per_second": 33.255,
"eval_steps_per_second": 1.04
}