|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9874476987447699, |
|
"eval_steps": 500, |
|
"global_step": 59, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.016736401673640166, |
|
"grad_norm": 11.561801906529812, |
|
"learning_rate": 8.333333333333333e-08, |
|
"logits/chosen": -2.829817771911621, |
|
"logits/rejected": -2.7577133178710938, |
|
"logps/chosen": -192.45741271972656, |
|
"logps/pi_response": -109.17219543457031, |
|
"logps/ref_response": -109.17219543457031, |
|
"logps/rejected": -234.76165771484375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.16736401673640167, |
|
"grad_norm": 11.688151121353982, |
|
"learning_rate": 4.930057285201027e-07, |
|
"logits/chosen": -2.7516937255859375, |
|
"logits/rejected": -2.7037875652313232, |
|
"logps/chosen": -220.56422424316406, |
|
"logps/pi_response": -114.52010345458984, |
|
"logps/ref_response": -113.88327026367188, |
|
"logps/rejected": -259.45562744140625, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0390845388174057, |
|
"rewards/margins": 0.019516173750162125, |
|
"rewards/rejected": -0.05860070511698723, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.33472803347280333, |
|
"grad_norm": 21.166335476353083, |
|
"learning_rate": 4.187457503795526e-07, |
|
"logits/chosen": -2.7673466205596924, |
|
"logits/rejected": -2.7233119010925293, |
|
"logps/chosen": -245.68881225585938, |
|
"logps/pi_response": -113.6824722290039, |
|
"logps/ref_response": -116.56245422363281, |
|
"logps/rejected": -309.20391845703125, |
|
"loss": 0.6313, |
|
"rewards/accuracies": 0.659375011920929, |
|
"rewards/chosen": -0.22226174175739288, |
|
"rewards/margins": 0.28156232833862305, |
|
"rewards/rejected": -0.5038241147994995, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.502092050209205, |
|
"grad_norm": 16.990210994924944, |
|
"learning_rate": 2.8691164100062034e-07, |
|
"logits/chosen": -2.752291440963745, |
|
"logits/rejected": -2.7067503929138184, |
|
"logps/chosen": -281.0353088378906, |
|
"logps/pi_response": -142.60507202148438, |
|
"logps/ref_response": -121.12711334228516, |
|
"logps/rejected": -355.6138610839844, |
|
"loss": 0.5841, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.4965749680995941, |
|
"rewards/margins": 0.4751533567905426, |
|
"rewards/rejected": -0.9717282056808472, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.6694560669456067, |
|
"grad_norm": 16.142094423990546, |
|
"learning_rate": 1.4248369943086995e-07, |
|
"logits/chosen": -2.6668388843536377, |
|
"logits/rejected": -2.6179091930389404, |
|
"logps/chosen": -288.419921875, |
|
"logps/pi_response": -156.54928588867188, |
|
"logps/ref_response": -113.27392578125, |
|
"logps/rejected": -391.61376953125, |
|
"loss": 0.5456, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -0.6666512489318848, |
|
"rewards/margins": 0.5951187014579773, |
|
"rewards/rejected": -1.2617700099945068, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8368200836820083, |
|
"grad_norm": 20.707041980035317, |
|
"learning_rate": 3.473909705816111e-08, |
|
"logits/chosen": -2.6176280975341797, |
|
"logits/rejected": -2.5751731395721436, |
|
"logps/chosen": -286.2168884277344, |
|
"logps/pi_response": -157.565673828125, |
|
"logps/ref_response": -112.81059265136719, |
|
"logps/rejected": -343.13922119140625, |
|
"loss": 0.5484, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -0.6077224016189575, |
|
"rewards/margins": 0.5270937085151672, |
|
"rewards/rejected": -1.13481605052948, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.9874476987447699, |
|
"step": 59, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5870478193638689, |
|
"train_runtime": 2672.8955, |
|
"train_samples_per_second": 5.718, |
|
"train_steps_per_second": 0.022 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 59, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|