|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 500, |
|
"global_step": 156, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.125e-07, |
|
"logits/chosen": -0.18967239558696747, |
|
"logits/rejected": -0.41899582743644714, |
|
"logps/chosen": -1382.499267578125, |
|
"logps/rejected": -2863.098388671875, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.125e-06, |
|
"logits/chosen": -0.15270556509494781, |
|
"logits/rejected": -0.2915438413619995, |
|
"logps/chosen": -2153.170654296875, |
|
"logps/rejected": -3371.856689453125, |
|
"loss": 0.4997, |
|
"rewards/accuracies": 0.3993055522441864, |
|
"rewards/chosen": -0.007403078954666853, |
|
"rewards/margins": 0.0016438195016235113, |
|
"rewards/rejected": -0.00904689822345972, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.989935734988098e-06, |
|
"logits/chosen": -0.2100469321012497, |
|
"logits/rejected": -0.32501596212387085, |
|
"logps/chosen": -2614.16064453125, |
|
"logps/rejected": -3642.28076171875, |
|
"loss": 0.4963, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.1257179230451584, |
|
"rewards/margins": 0.020404411479830742, |
|
"rewards/rejected": -0.14612232148647308, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8776412907378845e-06, |
|
"logits/chosen": -0.19536757469177246, |
|
"logits/rejected": -0.4390452802181244, |
|
"logps/chosen": -2770.899169921875, |
|
"logps/rejected": -4525.7744140625, |
|
"loss": 0.4889, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.5548638701438904, |
|
"rewards/margins": 0.1145726665854454, |
|
"rewards/rejected": -0.6694365739822388, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.646121984004666e-06, |
|
"logits/chosen": -0.23629632592201233, |
|
"logits/rejected": -0.5095050930976868, |
|
"logps/chosen": -3372.510498046875, |
|
"logps/rejected": -5071.37451171875, |
|
"loss": 0.4868, |
|
"rewards/accuracies": 0.503125011920929, |
|
"rewards/chosen": -1.0432322025299072, |
|
"rewards/margins": 0.22232362627983093, |
|
"rewards/rejected": -1.26555597782135, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.3069871595684795e-06, |
|
"logits/chosen": -0.32125282287597656, |
|
"logits/rejected": -0.562100887298584, |
|
"logps/chosen": -3342.119873046875, |
|
"logps/rejected": -5389.53955078125, |
|
"loss": 0.4856, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -0.9807957410812378, |
|
"rewards/margins": 0.4719497263431549, |
|
"rewards/rejected": -1.4527455568313599, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.8772424536302565e-06, |
|
"logits/chosen": -0.3323759138584137, |
|
"logits/rejected": -0.5304074883460999, |
|
"logps/chosen": -3480.371826171875, |
|
"logps/rejected": -4698.51025390625, |
|
"loss": 0.4883, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.97138512134552, |
|
"rewards/margins": 0.20621006190776825, |
|
"rewards/rejected": -1.1775951385498047, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3784370602033572e-06, |
|
"logits/chosen": -0.3379088044166565, |
|
"logits/rejected": -0.5837884545326233, |
|
"logps/chosen": -3488.02587890625, |
|
"logps/rejected": -5069.7412109375, |
|
"loss": 0.4871, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -1.0387831926345825, |
|
"rewards/margins": 0.1595762073993683, |
|
"rewards/rejected": -1.198359489440918, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.835583164544139e-06, |
|
"logits/chosen": -0.2774398624897003, |
|
"logits/rejected": -0.5058914422988892, |
|
"logps/chosen": -2817.748779296875, |
|
"logps/rejected": -4369.88720703125, |
|
"loss": 0.4891, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.8934494853019714, |
|
"rewards/margins": 0.030101608484983444, |
|
"rewards/rejected": -0.9235512018203735, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2759017277414165e-06, |
|
"logits/chosen": -0.21915356814861298, |
|
"logits/rejected": -0.47689905762672424, |
|
"logps/chosen": -2961.93994140625, |
|
"logps/rejected": -4903.95166015625, |
|
"loss": 0.4839, |
|
"rewards/accuracies": 0.5531250238418579, |
|
"rewards/chosen": -0.7773372530937195, |
|
"rewards/margins": 0.3540068566799164, |
|
"rewards/rejected": -1.1313440799713135, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"logits/chosen": -0.3382042348384857, |
|
"logits/rejected": -0.6111919283866882, |
|
"logps/chosen": -2994.310302734375, |
|
"logps/rejected": -4617.9794921875, |
|
"loss": 0.4856, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.9103735089302063, |
|
"rewards/margins": 0.1410413533449173, |
|
"rewards/rejected": -1.051414966583252, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.217751806485235e-06, |
|
"logits/chosen": -0.28220734000205994, |
|
"logits/rejected": -0.5584867000579834, |
|
"logps/chosen": -2910.51953125, |
|
"logps/rejected": -4760.12158203125, |
|
"loss": 0.4865, |
|
"rewards/accuracies": 0.503125011920929, |
|
"rewards/chosen": -0.8143825531005859, |
|
"rewards/margins": 0.21515560150146484, |
|
"rewards/rejected": -1.0295381546020508, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.723433775328385e-07, |
|
"logits/chosen": -0.281380295753479, |
|
"logits/rejected": -0.500409722328186, |
|
"logps/chosen": -2965.12841796875, |
|
"logps/rejected": -4710.0673828125, |
|
"loss": 0.486, |
|
"rewards/accuracies": 0.5531250238418579, |
|
"rewards/chosen": -0.8462094068527222, |
|
"rewards/margins": 0.2777535617351532, |
|
"rewards/rejected": -1.1239629983901978, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1356686569674344e-07, |
|
"logits/chosen": -0.24006013572216034, |
|
"logits/rejected": -0.46842899918556213, |
|
"logps/chosen": -3051.799072265625, |
|
"logps/rejected": -4552.3515625, |
|
"loss": 0.4854, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -0.8304941058158875, |
|
"rewards/margins": 0.16177485883235931, |
|
"rewards/rejected": -0.9922689199447632, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.59412823400657e-07, |
|
"logits/chosen": -0.2925623953342438, |
|
"logits/rejected": -0.4558919072151184, |
|
"logps/chosen": -3453.52880859375, |
|
"logps/rejected": -4216.4990234375, |
|
"loss": 0.4876, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.9790051579475403, |
|
"rewards/margins": -0.0291127972304821, |
|
"rewards/rejected": -0.9498924016952515, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.262559558016325e-08, |
|
"logits/chosen": -0.29268592596054077, |
|
"logits/rejected": -0.5145989656448364, |
|
"logps/chosen": -2635.45703125, |
|
"logps/rejected": -4756.46875, |
|
"loss": 0.4888, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -0.7879113554954529, |
|
"rewards/margins": 0.24760587513446808, |
|
"rewards/rejected": -1.0355170965194702, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 156, |
|
"total_flos": 0.0, |
|
"train_loss": 0.4880518607604198, |
|
"train_runtime": 4047.7642, |
|
"train_samples_per_second": 4.941, |
|
"train_steps_per_second": 0.039 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 156, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|