|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.8064, |
|
"eval_steps": 63, |
|
"global_step": 252, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2016, |
|
"eval_logps/chosen": -106.34315490722656, |
|
"eval_logps/ref_chosen": -70.265625, |
|
"eval_logps/ref_rejected": -63.51186752319336, |
|
"eval_logps/rejected": -106.34315490722656, |
|
"eval_loss": 0.6557361483573914, |
|
"eval_rewards/accuracies": 0.6162024772678456, |
|
"eval_rewards/chosen": -1.8038759231567383, |
|
"eval_rewards/margins": 0.1433669477701187, |
|
"eval_rewards/rejected": -1.9472428560256958, |
|
"eval_runtime": 2599.3441, |
|
"eval_samples_per_second": 32.24, |
|
"eval_steps_per_second": 4.03, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 5.536309242248535, |
|
"learning_rate": 2.3016987391917016e-05, |
|
"logps/chosen": -187.26882934570312, |
|
"logps/ref_chosen": -199.84158325195312, |
|
"logps/ref_rejected": -140.63331604003906, |
|
"logps/rejected": -187.26882934570312, |
|
"loss": 0.4864, |
|
"rewards/accuracies": 0.7975000143051147, |
|
"rewards/chosen": 0.6286371350288391, |
|
"rewards/margins": 0.6605393886566162, |
|
"rewards/rejected": -0.031902212649583817, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4032, |
|
"eval_logps/chosen": -123.7810287475586, |
|
"eval_logps/ref_chosen": -70.265625, |
|
"eval_logps/ref_rejected": -63.51186752319336, |
|
"eval_logps/rejected": -123.7810287475586, |
|
"eval_loss": 0.6727969646453857, |
|
"eval_rewards/accuracies": 0.5971814515166702, |
|
"eval_rewards/chosen": -2.6757709980010986, |
|
"eval_rewards/margins": 0.16077223420143127, |
|
"eval_rewards/rejected": -2.836543321609497, |
|
"eval_runtime": 2597.2346, |
|
"eval_samples_per_second": 32.266, |
|
"eval_steps_per_second": 4.034, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.6048, |
|
"eval_logps/chosen": -136.31578063964844, |
|
"eval_logps/ref_chosen": -70.265625, |
|
"eval_logps/ref_rejected": -63.51186752319336, |
|
"eval_logps/rejected": -136.31578063964844, |
|
"eval_loss": 0.683315098285675, |
|
"eval_rewards/accuracies": 0.5913224028066156, |
|
"eval_rewards/chosen": -3.3025076389312744, |
|
"eval_rewards/margins": 0.17308557033538818, |
|
"eval_rewards/rejected": -3.475592851638794, |
|
"eval_runtime": 2597.1774, |
|
"eval_samples_per_second": 32.267, |
|
"eval_steps_per_second": 4.034, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 6.213208198547363, |
|
"learning_rate": 8.569611578954186e-06, |
|
"logps/chosen": -181.8887176513672, |
|
"logps/ref_chosen": -198.8422088623047, |
|
"logps/ref_rejected": -139.47769165039062, |
|
"logps/rejected": -181.8887176513672, |
|
"loss": 0.3802, |
|
"rewards/accuracies": 0.8515625, |
|
"rewards/chosen": 0.8476755023002625, |
|
"rewards/margins": 1.189441204071045, |
|
"rewards/rejected": -0.3417656719684601, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.8064, |
|
"eval_logps/chosen": -139.97152709960938, |
|
"eval_logps/ref_chosen": -70.265625, |
|
"eval_logps/ref_rejected": -63.51186752319336, |
|
"eval_logps/rejected": -139.97152709960938, |
|
"eval_loss": 0.6932100653648376, |
|
"eval_rewards/accuracies": 0.5844013269373046, |
|
"eval_rewards/chosen": -3.4852941036224365, |
|
"eval_rewards/margins": 0.16311165690422058, |
|
"eval_rewards/rejected": -3.6484062671661377, |
|
"eval_runtime": 2597.2713, |
|
"eval_samples_per_second": 32.265, |
|
"eval_steps_per_second": 4.033, |
|
"step": 252 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 312, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 63, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|