dpo-selective-buffer-safeipo / all_results.json
wxzhang's picture
Model save
581934d verified
raw
history blame
856 Bytes
{
"epoch": 1.0,
"eval_logits/chosen": -1.9525097608566284,
"eval_logits/rejected": -1.7713532447814941,
"eval_logps/chosen": -224.6856231689453,
"eval_logps/rejected": -193.18142700195312,
"eval_loss": 4322.0576171875,
"eval_rewards/accuracies": 0.6033397912979126,
"eval_rewards/chosen": -0.9425504207611084,
"eval_rewards/margins": 0.06460745632648468,
"eval_rewards/rejected": -1.0071579217910767,
"eval_rewards/safe_rewards": -0.9376580119132996,
"eval_rewards/unsafe_rewards": -0.9381570816040039,
"eval_runtime": 993.6614,
"eval_samples": 33044,
"eval_samples_per_second": 33.255,
"eval_steps_per_second": 1.04,
"train_loss": 11692.422362755651,
"train_runtime": 23508.9776,
"train_samples": 59478,
"train_samples_per_second": 2.53,
"train_steps_per_second": 0.079
}