|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1427, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 7.579595964430724, |
|
"learning_rate": 3.4965034965034967e-08, |
|
"log_odds_chosen": 0.4541015625, |
|
"log_odds_ratio": -0.677050769329071, |
|
"logits/chosen": -2.015625, |
|
"logits/rejected": -2.015625, |
|
"logps/chosen": -1.8671875, |
|
"logps/rejected": -2.265625, |
|
"loss": 1.5655, |
|
"nll_loss": 1.5, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.1865234375, |
|
"rewards/margins": 0.0400390625, |
|
"rewards/rejected": -0.2265625, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 7.5249840340758425, |
|
"learning_rate": 6.993006993006993e-08, |
|
"log_odds_chosen": 0.22894287109375, |
|
"log_odds_ratio": -0.7715820074081421, |
|
"logits/chosen": -2.03125, |
|
"logits/rejected": -2.0625, |
|
"logps/chosen": -1.9921875, |
|
"logps/rejected": -2.1875, |
|
"loss": 1.5773, |
|
"nll_loss": 1.4921875, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.19921875, |
|
"rewards/margins": 0.01953125, |
|
"rewards/rejected": -0.21875, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 5.9119698758492865, |
|
"learning_rate": 1.0489510489510489e-07, |
|
"log_odds_chosen": 0.28911131620407104, |
|
"log_odds_ratio": -0.8036133050918579, |
|
"logits/chosen": -2.015625, |
|
"logits/rejected": -2.046875, |
|
"logps/chosen": -2.109375, |
|
"logps/rejected": -2.34375, |
|
"loss": 1.5587, |
|
"nll_loss": 1.515625, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.2109375, |
|
"rewards/margins": 0.0242919921875, |
|
"rewards/rejected": -0.2353515625, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 6.999607089742296, |
|
"learning_rate": 1.3986013986013987e-07, |
|
"log_odds_chosen": 0.25310057401657104, |
|
"log_odds_ratio": -0.762402355670929, |
|
"logits/chosen": -2.03125, |
|
"logits/rejected": -2.015625, |
|
"logps/chosen": -1.921875, |
|
"logps/rejected": -2.125, |
|
"loss": 1.5862, |
|
"nll_loss": 1.4765625, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.1923828125, |
|
"rewards/margins": 0.0208740234375, |
|
"rewards/rejected": -0.212890625, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.309681737493818, |
|
"learning_rate": 1.7482517482517481e-07, |
|
"log_odds_chosen": 0.25770264863967896, |
|
"log_odds_ratio": -0.697558581829071, |
|
"logits/chosen": -2.109375, |
|
"logits/rejected": -2.0625, |
|
"logps/chosen": -1.921875, |
|
"logps/rejected": -2.140625, |
|
"loss": 1.5736, |
|
"nll_loss": 1.546875, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.1923828125, |
|
"rewards/margins": 0.0218505859375, |
|
"rewards/rejected": -0.2138671875, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.569709247368732, |
|
"learning_rate": 2.0979020979020979e-07, |
|
"log_odds_chosen": 0.311614990234375, |
|
"log_odds_ratio": -0.695605456829071, |
|
"logits/chosen": -2.109375, |
|
"logits/rejected": -2.15625, |
|
"logps/chosen": -1.96875, |
|
"logps/rejected": -2.25, |
|
"loss": 1.5668, |
|
"nll_loss": 1.484375, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.197265625, |
|
"rewards/margins": 0.02734375, |
|
"rewards/rejected": -0.224609375, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5.773237000535201, |
|
"learning_rate": 2.4475524475524473e-07, |
|
"log_odds_chosen": 0.14488525688648224, |
|
"log_odds_ratio": -0.786425769329071, |
|
"logits/chosen": -2.171875, |
|
"logits/rejected": -2.125, |
|
"logps/chosen": -2.0, |
|
"logps/rejected": -2.125, |
|
"loss": 1.5723, |
|
"nll_loss": 1.5234375, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.2001953125, |
|
"rewards/margins": 0.01287841796875, |
|
"rewards/rejected": -0.212890625, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 5.5756538810115925, |
|
"learning_rate": 2.7972027972027973e-07, |
|
"log_odds_chosen": 0.2967529296875, |
|
"log_odds_ratio": -0.7289062738418579, |
|
"logits/chosen": -2.109375, |
|
"logits/rejected": -2.140625, |
|
"logps/chosen": -1.8125, |
|
"logps/rejected": -2.046875, |
|
"loss": 1.4668, |
|
"nll_loss": 1.3515625, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.181640625, |
|
"rewards/margins": 0.023681640625, |
|
"rewards/rejected": -0.205078125, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 4.2269586617524535, |
|
"learning_rate": 3.146853146853147e-07, |
|
"log_odds_chosen": 0.2103271484375, |
|
"log_odds_ratio": -0.73095703125, |
|
"logits/chosen": -2.171875, |
|
"logits/rejected": -2.140625, |
|
"logps/chosen": -1.8515625, |
|
"logps/rejected": -2.03125, |
|
"loss": 1.5357, |
|
"nll_loss": 1.4921875, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.185546875, |
|
"rewards/margins": 0.0186767578125, |
|
"rewards/rejected": -0.2041015625, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 7.493002974943704, |
|
"learning_rate": 3.4965034965034963e-07, |
|
"log_odds_chosen": 0.14136353135108948, |
|
"log_odds_ratio": -0.710644543170929, |
|
"logits/chosen": -2.171875, |
|
"logits/rejected": -2.1875, |
|
"logps/chosen": -1.921875, |
|
"logps/rejected": -2.03125, |
|
"loss": 1.5193, |
|
"nll_loss": 1.4375, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.19140625, |
|
"rewards/margins": 0.010498046875, |
|
"rewards/rejected": -0.2021484375, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 6.173840413359453, |
|
"learning_rate": 3.8461538461538463e-07, |
|
"log_odds_chosen": 0.18465575575828552, |
|
"log_odds_ratio": -0.7144531011581421, |
|
"logits/chosen": -2.265625, |
|
"logits/rejected": -2.296875, |
|
"logps/chosen": -1.734375, |
|
"logps/rejected": -1.890625, |
|
"loss": 1.5043, |
|
"nll_loss": 1.421875, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.173828125, |
|
"rewards/margins": 0.01556396484375, |
|
"rewards/rejected": -0.189453125, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 3.9756802927191024, |
|
"learning_rate": 4.1958041958041957e-07, |
|
"log_odds_chosen": 0.22193603217601776, |
|
"log_odds_ratio": -0.7044922113418579, |
|
"logits/chosen": -2.296875, |
|
"logits/rejected": -2.359375, |
|
"logps/chosen": -1.4921875, |
|
"logps/rejected": -1.703125, |
|
"loss": 1.4443, |
|
"nll_loss": 1.3125, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.1494140625, |
|
"rewards/margins": 0.0208740234375, |
|
"rewards/rejected": -0.169921875, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 3.49115617326949, |
|
"learning_rate": 4.545454545454545e-07, |
|
"log_odds_chosen": 0.07476196438074112, |
|
"log_odds_ratio": -0.741992175579071, |
|
"logits/chosen": -2.28125, |
|
"logits/rejected": -2.359375, |
|
"logps/chosen": -1.5703125, |
|
"logps/rejected": -1.640625, |
|
"loss": 1.4535, |
|
"nll_loss": 1.40625, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.1572265625, |
|
"rewards/margins": 0.006622314453125, |
|
"rewards/rejected": -0.1640625, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 3.2300418693285047, |
|
"learning_rate": 4.895104895104895e-07, |
|
"log_odds_chosen": 0.23361817002296448, |
|
"log_odds_ratio": -0.694628894329071, |
|
"logits/chosen": -2.265625, |
|
"logits/rejected": -2.375, |
|
"logps/chosen": -1.5, |
|
"logps/rejected": -1.703125, |
|
"loss": 1.4352, |
|
"nll_loss": 1.3828125, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.150390625, |
|
"rewards/margins": 0.019775390625, |
|
"rewards/rejected": -0.169921875, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 3.733581414979026, |
|
"learning_rate": 4.999633338614865e-07, |
|
"log_odds_chosen": 0.0267333984375, |
|
"log_odds_ratio": -0.739453136920929, |
|
"logits/chosen": -2.296875, |
|
"logits/rejected": -2.375, |
|
"logps/chosen": -1.5234375, |
|
"logps/rejected": -1.546875, |
|
"loss": 1.4385, |
|
"nll_loss": 1.3671875, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.15234375, |
|
"rewards/margins": 0.0021209716796875, |
|
"rewards/rejected": -0.154296875, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 4.063546439288504, |
|
"learning_rate": 4.997837705025723e-07, |
|
"log_odds_chosen": 0.14241942763328552, |
|
"log_odds_ratio": -0.7054687738418579, |
|
"logits/chosen": -2.203125, |
|
"logits/rejected": -2.3125, |
|
"logps/chosen": -1.3828125, |
|
"logps/rejected": -1.4921875, |
|
"loss": 1.3762, |
|
"nll_loss": 1.2734375, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.138671875, |
|
"rewards/margins": 0.0108642578125, |
|
"rewards/rejected": -0.1494140625, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.8382728420037524, |
|
"learning_rate": 4.994546826814266e-07, |
|
"log_odds_chosen": 0.19875487685203552, |
|
"log_odds_ratio": -0.658203125, |
|
"logits/chosen": -2.375, |
|
"logits/rejected": -2.46875, |
|
"logps/chosen": -1.3359375, |
|
"logps/rejected": -1.484375, |
|
"loss": 1.3645, |
|
"nll_loss": 1.3046875, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.1337890625, |
|
"rewards/margins": 0.0146484375, |
|
"rewards/rejected": -0.1484375, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.5209932782786, |
|
"learning_rate": 4.989762673951533e-07, |
|
"log_odds_chosen": 0.16008301079273224, |
|
"log_odds_ratio": -0.6830078363418579, |
|
"logits/chosen": -2.359375, |
|
"logits/rejected": -2.515625, |
|
"logps/chosen": -1.2890625, |
|
"logps/rejected": -1.40625, |
|
"loss": 1.3044, |
|
"nll_loss": 1.2265625, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.12890625, |
|
"rewards/margins": 0.01129150390625, |
|
"rewards/rejected": -0.140625, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.7624176664717623, |
|
"learning_rate": 4.983488110306074e-07, |
|
"log_odds_chosen": 0.08121337741613388, |
|
"log_odds_ratio": -0.7232421636581421, |
|
"logits/chosen": -2.375, |
|
"logits/rejected": -2.4375, |
|
"logps/chosen": -1.34375, |
|
"logps/rejected": -1.3984375, |
|
"loss": 1.3099, |
|
"nll_loss": 1.265625, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.1337890625, |
|
"rewards/margins": 0.005584716796875, |
|
"rewards/rejected": -0.1396484375, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.5951453466247916, |
|
"learning_rate": 4.975726891929584e-07, |
|
"log_odds_chosen": 0.22237548232078552, |
|
"log_odds_ratio": -0.66455078125, |
|
"logits/chosen": -2.359375, |
|
"logits/rejected": -2.46875, |
|
"logps/chosen": -1.2421875, |
|
"logps/rejected": -1.40625, |
|
"loss": 1.3293, |
|
"nll_loss": 1.1953125, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.1240234375, |
|
"rewards/margins": 0.0169677734375, |
|
"rewards/rejected": -0.140625, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.745043363887144, |
|
"learning_rate": 4.966483664808476e-07, |
|
"log_odds_chosen": 0.13164062798023224, |
|
"log_odds_ratio": -0.7242187261581421, |
|
"logits/chosen": -2.21875, |
|
"logits/rejected": -2.21875, |
|
"logps/chosen": -1.21875, |
|
"logps/rejected": -1.3203125, |
|
"loss": 1.3097, |
|
"nll_loss": 1.1875, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.12158203125, |
|
"rewards/margins": 0.010498046875, |
|
"rewards/rejected": -0.1318359375, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.544921558378996, |
|
"learning_rate": 4.955763962082722e-07, |
|
"log_odds_chosen": 0.2619995176792145, |
|
"log_odds_ratio": -0.673046886920929, |
|
"logits/chosen": -2.4375, |
|
"logits/rejected": -2.453125, |
|
"logps/chosen": -1.265625, |
|
"logps/rejected": -1.46875, |
|
"loss": 1.299, |
|
"nll_loss": 1.2734375, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.126953125, |
|
"rewards/margins": 0.0205078125, |
|
"rewards/rejected": -0.1474609375, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.7422926363251467, |
|
"learning_rate": 4.943574200733625e-07, |
|
"log_odds_chosen": 0.197174072265625, |
|
"log_odds_ratio": -0.6680663824081421, |
|
"logits/chosen": -2.296875, |
|
"logits/rejected": -2.390625, |
|
"logps/chosen": -1.234375, |
|
"logps/rejected": -1.375, |
|
"loss": 1.3089, |
|
"nll_loss": 1.234375, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.123046875, |
|
"rewards/margins": 0.0142822265625, |
|
"rewards/rejected": -0.1376953125, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.5904882528928845, |
|
"learning_rate": 4.929921677742516e-07, |
|
"log_odds_chosen": 0.24537964165210724, |
|
"log_odds_ratio": -0.657031238079071, |
|
"logits/chosen": -2.328125, |
|
"logits/rejected": -2.421875, |
|
"logps/chosen": -1.2734375, |
|
"logps/rejected": -1.4453125, |
|
"loss": 1.3289, |
|
"nll_loss": 1.2421875, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.1279296875, |
|
"rewards/margins": 0.0167236328125, |
|
"rewards/rejected": -0.14453125, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 2.57374464764083, |
|
"learning_rate": 4.91481456572267e-07, |
|
"log_odds_chosen": 0.1439208984375, |
|
"log_odds_ratio": -0.7132812738418579, |
|
"logits/chosen": -2.296875, |
|
"logits/rejected": -2.328125, |
|
"logps/chosen": -1.21875, |
|
"logps/rejected": -1.3203125, |
|
"loss": 1.2807, |
|
"nll_loss": 1.2109375, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.1220703125, |
|
"rewards/margins": 0.01019287109375, |
|
"rewards/rejected": -0.1318359375, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 2.5866184628438873, |
|
"learning_rate": 4.898261908027049e-07, |
|
"log_odds_chosen": 0.20936889946460724, |
|
"log_odds_ratio": -0.6578124761581421, |
|
"logits/chosen": -2.28125, |
|
"logits/rejected": -2.359375, |
|
"logps/chosen": -1.2421875, |
|
"logps/rejected": -1.40625, |
|
"loss": 1.2753, |
|
"nll_loss": 1.21875, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.1240234375, |
|
"rewards/margins": 0.01611328125, |
|
"rewards/rejected": -0.140625, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 3.7405175636819643, |
|
"learning_rate": 4.880273613334809e-07, |
|
"log_odds_chosen": 0.22337035834789276, |
|
"log_odds_ratio": -0.7001953125, |
|
"logits/chosen": -2.328125, |
|
"logits/rejected": -2.4375, |
|
"logps/chosen": -1.2265625, |
|
"logps/rejected": -1.40625, |
|
"loss": 1.2719, |
|
"nll_loss": 1.203125, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.123046875, |
|
"rewards/margins": 0.0179443359375, |
|
"rewards/rejected": -0.140625, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.7574742318554923, |
|
"learning_rate": 4.86086044971981e-07, |
|
"log_odds_chosen": 0.15998534858226776, |
|
"log_odds_ratio": -0.677539050579071, |
|
"logits/chosen": -2.25, |
|
"logits/rejected": -2.328125, |
|
"logps/chosen": -1.2578125, |
|
"logps/rejected": -1.3671875, |
|
"loss": 1.3146, |
|
"nll_loss": 1.28125, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.1259765625, |
|
"rewards/margins": 0.0103759765625, |
|
"rewards/rejected": -0.13671875, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.3017737036995194, |
|
"learning_rate": 4.840034038204686e-07, |
|
"log_odds_chosen": 0.244415283203125, |
|
"log_odds_ratio": -0.649121105670929, |
|
"logits/chosen": -2.328125, |
|
"logits/rejected": -2.4375, |
|
"logps/chosen": -1.1171875, |
|
"logps/rejected": -1.2734375, |
|
"loss": 1.2796, |
|
"nll_loss": 1.1640625, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.111328125, |
|
"rewards/margins": 0.0159912109375, |
|
"rewards/rejected": -0.1279296875, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.4440053287886823, |
|
"learning_rate": 4.817806845804308e-07, |
|
"log_odds_chosen": 0.03922118991613388, |
|
"log_odds_ratio": -0.7237304449081421, |
|
"logits/chosen": -2.28125, |
|
"logits/rejected": -2.296875, |
|
"logps/chosen": -1.25, |
|
"logps/rejected": -1.2734375, |
|
"loss": 1.3285, |
|
"nll_loss": 1.25, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.125, |
|
"rewards/margins": 0.002655029296875, |
|
"rewards/rejected": -0.1279296875, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.404175576750603, |
|
"learning_rate": 4.794192178062845e-07, |
|
"log_odds_chosen": 0.07508544623851776, |
|
"log_odds_ratio": -0.7403320074081421, |
|
"logits/chosen": -2.265625, |
|
"logits/rejected": -2.265625, |
|
"logps/chosen": -1.2265625, |
|
"logps/rejected": -1.2890625, |
|
"loss": 1.2926, |
|
"nll_loss": 1.2265625, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.123046875, |
|
"rewards/margins": 0.006195068359375, |
|
"rewards/rejected": -0.12890625, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.4237084015053982, |
|
"learning_rate": 4.769204171088849e-07, |
|
"log_odds_chosen": 0.21917724609375, |
|
"log_odds_ratio": -0.671875, |
|
"logits/chosen": -2.359375, |
|
"logits/rejected": -2.4375, |
|
"logps/chosen": -1.203125, |
|
"logps/rejected": -1.34375, |
|
"loss": 1.3217, |
|
"nll_loss": 1.2578125, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.1201171875, |
|
"rewards/margins": 0.0147705078125, |
|
"rewards/rejected": -0.134765625, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.684798684422195, |
|
"learning_rate": 4.742857783093166e-07, |
|
"log_odds_chosen": 0.278564453125, |
|
"log_odds_ratio": -0.6395508050918579, |
|
"logits/chosen": -2.203125, |
|
"logits/rejected": -2.328125, |
|
"logps/chosen": -1.140625, |
|
"logps/rejected": -1.3359375, |
|
"loss": 1.2549, |
|
"nll_loss": 1.1640625, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.11376953125, |
|
"rewards/margins": 0.0201416015625, |
|
"rewards/rejected": -0.1337890625, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 3.6492535837021225, |
|
"learning_rate": 4.7151687854347157e-07, |
|
"log_odds_chosen": 0.1578369140625, |
|
"log_odds_ratio": -0.69482421875, |
|
"logits/chosen": -2.25, |
|
"logits/rejected": -2.34375, |
|
"logps/chosen": -1.1640625, |
|
"logps/rejected": -1.2734375, |
|
"loss": 1.2529, |
|
"nll_loss": 1.1640625, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.11669921875, |
|
"rewards/margins": 0.0106201171875, |
|
"rewards/rejected": -0.126953125, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.8702543670932075, |
|
"learning_rate": 4.6861537531795094e-07, |
|
"log_odds_chosen": 0.20824584364891052, |
|
"log_odds_ratio": -0.66015625, |
|
"logits/chosen": -2.265625, |
|
"logits/rejected": -2.359375, |
|
"logps/chosen": -1.21875, |
|
"logps/rejected": -1.3828125, |
|
"loss": 1.2876, |
|
"nll_loss": 1.25, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.1220703125, |
|
"rewards/margins": 0.016357421875, |
|
"rewards/rejected": -0.138671875, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.847118271789134, |
|
"learning_rate": 4.655830055178557e-07, |
|
"log_odds_chosen": 0.23935547471046448, |
|
"log_odds_ratio": -0.6680663824081421, |
|
"logits/chosen": -2.1875, |
|
"logits/rejected": -2.328125, |
|
"logps/chosen": -1.1328125, |
|
"logps/rejected": -1.3046875, |
|
"loss": 1.2766, |
|
"nll_loss": 1.171875, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.11328125, |
|
"rewards/margins": 0.016845703125, |
|
"rewards/rejected": -0.130859375, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 2.510359096280682, |
|
"learning_rate": 4.6242158436706015e-07, |
|
"log_odds_chosen": 0.16656494140625, |
|
"log_odds_ratio": -0.706347644329071, |
|
"logits/chosen": -2.296875, |
|
"logits/rejected": -2.359375, |
|
"logps/chosen": -1.1875, |
|
"logps/rejected": -1.3046875, |
|
"loss": 1.2963, |
|
"nll_loss": 1.2265625, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.119140625, |
|
"rewards/margins": 0.011474609375, |
|
"rewards/rejected": -0.130859375, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.2404018809724606, |
|
"learning_rate": 4.5913300434159087e-07, |
|
"log_odds_chosen": 0.2242431640625, |
|
"log_odds_ratio": -0.6768554449081421, |
|
"logits/chosen": -2.28125, |
|
"logits/rejected": -2.359375, |
|
"logps/chosen": -1.21875, |
|
"logps/rejected": -1.390625, |
|
"loss": 1.293, |
|
"nll_loss": 1.21875, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.1220703125, |
|
"rewards/margins": 0.016845703125, |
|
"rewards/rejected": -0.138671875, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.3810451499627434, |
|
"learning_rate": 4.5571923403676047e-07, |
|
"log_odds_chosen": 0.30363768339157104, |
|
"log_odds_ratio": -0.6507812738418579, |
|
"logits/chosen": -2.1875, |
|
"logits/rejected": -2.265625, |
|
"logps/chosen": -1.1328125, |
|
"logps/rejected": -1.3359375, |
|
"loss": 1.2838, |
|
"nll_loss": 1.140625, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.11328125, |
|
"rewards/margins": 0.0201416015625, |
|
"rewards/rejected": -0.1328125, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.671734360059129, |
|
"learning_rate": 4.5218231698873627e-07, |
|
"log_odds_chosen": 0.0892333984375, |
|
"log_odds_ratio": -0.735058605670929, |
|
"logits/chosen": -2.296875, |
|
"logits/rejected": -2.3125, |
|
"logps/chosen": -1.234375, |
|
"logps/rejected": -1.3046875, |
|
"loss": 1.3283, |
|
"nll_loss": 1.296875, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.12353515625, |
|
"rewards/margins": 0.0067138671875, |
|
"rewards/rejected": -0.1298828125, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 2.5118185799490043, |
|
"learning_rate": 4.4852437045124735e-07, |
|
"log_odds_chosen": 0.08201904594898224, |
|
"log_odds_ratio": -0.7183593511581421, |
|
"logits/chosen": -2.1875, |
|
"logits/rejected": -2.3125, |
|
"logps/chosen": -1.21875, |
|
"logps/rejected": -1.265625, |
|
"loss": 1.2777, |
|
"nll_loss": 1.25, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.12158203125, |
|
"rewards/margins": 0.0047607421875, |
|
"rewards/rejected": -0.1259765625, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 2.5063550386397604, |
|
"learning_rate": 4.447475841281635e-07, |
|
"log_odds_chosen": 0.2621704041957855, |
|
"log_odds_ratio": -0.6456054449081421, |
|
"logits/chosen": -2.203125, |
|
"logits/rejected": -2.296875, |
|
"logps/chosen": -1.1796875, |
|
"logps/rejected": -1.3515625, |
|
"loss": 1.256, |
|
"nll_loss": 1.25, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.11767578125, |
|
"rewards/margins": 0.01708984375, |
|
"rewards/rejected": -0.134765625, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.3631407616001243, |
|
"learning_rate": 4.408542188627044e-07, |
|
"log_odds_chosen": 0.12116088718175888, |
|
"log_odds_ratio": -0.7083984613418579, |
|
"logits/chosen": -2.3125, |
|
"logits/rejected": -2.328125, |
|
"logps/chosen": -1.1796875, |
|
"logps/rejected": -1.265625, |
|
"loss": 1.2792, |
|
"nll_loss": 1.1953125, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.1181640625, |
|
"rewards/margins": 0.00860595703125, |
|
"rewards/rejected": -0.126953125, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.6616497499686393, |
|
"learning_rate": 4.368466052840636e-07, |
|
"log_odds_chosen": 0.17690429091453552, |
|
"log_odds_ratio": -0.6680663824081421, |
|
"logits/chosen": -2.1875, |
|
"logits/rejected": -2.3125, |
|
"logps/chosen": -1.171875, |
|
"logps/rejected": -1.296875, |
|
"loss": 1.2943, |
|
"nll_loss": 1.2109375, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.11669921875, |
|
"rewards/margins": 0.01300048828125, |
|
"rewards/rejected": -0.1298828125, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.5349037092042406, |
|
"learning_rate": 4.327271424122573e-07, |
|
"log_odds_chosen": 0.2679077088832855, |
|
"log_odds_ratio": -0.6576172113418579, |
|
"logits/chosen": -2.1875, |
|
"logits/rejected": -2.28125, |
|
"logps/chosen": -1.1328125, |
|
"logps/rejected": -1.3359375, |
|
"loss": 1.2823, |
|
"nll_loss": 1.1796875, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.11328125, |
|
"rewards/margins": 0.019775390625, |
|
"rewards/rejected": -0.1328125, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.485655487242489, |
|
"learning_rate": 4.284982962220337e-07, |
|
"log_odds_chosen": 0.212890625, |
|
"log_odds_ratio": -0.680371105670929, |
|
"logits/chosen": -2.15625, |
|
"logits/rejected": -2.203125, |
|
"logps/chosen": -1.15625, |
|
"logps/rejected": -1.296875, |
|
"loss": 1.2641, |
|
"nll_loss": 1.171875, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.115234375, |
|
"rewards/margins": 0.0140380859375, |
|
"rewards/rejected": -0.12890625, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.569720946707249, |
|
"learning_rate": 4.241625981667023e-07, |
|
"log_odds_chosen": 0.20276489853858948, |
|
"log_odds_ratio": -0.681933581829071, |
|
"logits/chosen": -2.25, |
|
"logits/rejected": -2.28125, |
|
"logps/chosen": -1.1875, |
|
"logps/rejected": -1.3125, |
|
"loss": 1.2584, |
|
"nll_loss": 1.21875, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.11865234375, |
|
"rewards/margins": 0.01312255859375, |
|
"rewards/rejected": -0.1318359375, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2.6019962569531323, |
|
"learning_rate": 4.19722643662766e-07, |
|
"log_odds_chosen": 0.2719970643520355, |
|
"log_odds_ratio": -0.64111328125, |
|
"logits/chosen": -2.203125, |
|
"logits/rejected": -2.28125, |
|
"logps/chosen": -1.1328125, |
|
"logps/rejected": -1.328125, |
|
"loss": 1.2457, |
|
"nll_loss": 1.15625, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.11279296875, |
|
"rewards/margins": 0.01953125, |
|
"rewards/rejected": -0.1328125, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2.378793105209263, |
|
"learning_rate": 4.151810905362643e-07, |
|
"log_odds_chosen": 0.15797118842601776, |
|
"log_odds_ratio": -0.710156261920929, |
|
"logits/chosen": -2.109375, |
|
"logits/rejected": -2.25, |
|
"logps/chosen": -1.1484375, |
|
"logps/rejected": -1.25, |
|
"loss": 1.2691, |
|
"nll_loss": 1.1640625, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.11474609375, |
|
"rewards/margins": 0.01019287109375, |
|
"rewards/rejected": -0.125, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 2.461627110160161, |
|
"learning_rate": 4.105406574317578e-07, |
|
"log_odds_chosen": 0.16668701171875, |
|
"log_odds_ratio": -0.723925769329071, |
|
"logits/chosen": -2.09375, |
|
"logits/rejected": -2.171875, |
|
"logps/chosen": -1.2265625, |
|
"logps/rejected": -1.34375, |
|
"loss": 1.2486, |
|
"nll_loss": 1.25, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.1220703125, |
|
"rewards/margins": 0.01226806640625, |
|
"rewards/rejected": -0.134765625, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 2.3991709623693485, |
|
"learning_rate": 4.0580412218490424e-07, |
|
"log_odds_chosen": 0.3194335997104645, |
|
"log_odds_ratio": -0.61669921875, |
|
"logits/chosen": -2.15625, |
|
"logits/rejected": -2.25, |
|
"logps/chosen": -1.125, |
|
"logps/rejected": -1.34375, |
|
"loss": 1.261, |
|
"nll_loss": 1.203125, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.11279296875, |
|
"rewards/margins": 0.0218505859375, |
|
"rewards/rejected": -0.134765625, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 2.420136778893825, |
|
"learning_rate": 4.009743201596034e-07, |
|
"log_odds_chosen": 0.32293701171875, |
|
"log_odds_ratio": -0.66357421875, |
|
"logits/chosen": -2.203125, |
|
"logits/rejected": -2.296875, |
|
"logps/chosen": -1.140625, |
|
"logps/rejected": -1.3671875, |
|
"loss": 1.279, |
|
"nll_loss": 1.203125, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.1142578125, |
|
"rewards/margins": 0.0223388671875, |
|
"rewards/rejected": -0.13671875, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 2.576899517162626, |
|
"learning_rate": 3.960541425507039e-07, |
|
"log_odds_chosen": 0.18942871689796448, |
|
"log_odds_ratio": -0.6888672113418579, |
|
"logits/chosen": -2.171875, |
|
"logits/rejected": -2.296875, |
|
"logps/chosen": -1.171875, |
|
"logps/rejected": -1.3203125, |
|
"loss": 1.3049, |
|
"nll_loss": 1.2265625, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.1171875, |
|
"rewards/margins": 0.01458740234375, |
|
"rewards/rejected": -0.1318359375, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 2.7625972509957144, |
|
"learning_rate": 3.9104653465328853e-07, |
|
"log_odds_chosen": 0.09678955376148224, |
|
"log_odds_ratio": -0.721875011920929, |
|
"logits/chosen": -2.109375, |
|
"logits/rejected": -2.1875, |
|
"logps/chosen": -1.203125, |
|
"logps/rejected": -1.265625, |
|
"loss": 1.2931, |
|
"nll_loss": 1.234375, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.1201171875, |
|
"rewards/margins": 0.006103515625, |
|
"rewards/rejected": -0.1259765625, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.45849449078066, |
|
"learning_rate": 3.8595449409957516e-07, |
|
"log_odds_chosen": 0.20249633491039276, |
|
"log_odds_ratio": -0.668749988079071, |
|
"logits/chosen": -1.9921875, |
|
"logits/rejected": -2.109375, |
|
"logps/chosen": -1.1015625, |
|
"logps/rejected": -1.2421875, |
|
"loss": 1.2424, |
|
"nll_loss": 1.125, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.1103515625, |
|
"rewards/margins": 0.01397705078125, |
|
"rewards/rejected": -0.1240234375, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.336077999068494, |
|
"learning_rate": 3.807810690644868e-07, |
|
"log_odds_chosen": 0.15464477241039276, |
|
"log_odds_ratio": -0.6904296875, |
|
"logits/chosen": -2.0625, |
|
"logits/rejected": -2.1875, |
|
"logps/chosen": -1.1875, |
|
"logps/rejected": -1.3046875, |
|
"loss": 1.2639, |
|
"nll_loss": 1.2109375, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.119140625, |
|
"rewards/margins": 0.011474609375, |
|
"rewards/rejected": -0.1298828125, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 2.8019381749527845, |
|
"learning_rate": 3.75529356440967e-07, |
|
"log_odds_chosen": -0.002349853515625, |
|
"log_odds_ratio": -0.763867199420929, |
|
"logits/chosen": -2.140625, |
|
"logits/rejected": -2.1875, |
|
"logps/chosen": -1.1875, |
|
"logps/rejected": -1.203125, |
|
"loss": 1.2792, |
|
"nll_loss": 1.21875, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.119140625, |
|
"rewards/margins": 0.0014190673828125, |
|
"rewards/rejected": -0.12060546875, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 2.904460591649426, |
|
"learning_rate": 3.702024999861312e-07, |
|
"log_odds_chosen": 0.20991210639476776, |
|
"log_odds_ratio": -0.690234363079071, |
|
"logits/chosen": -2.140625, |
|
"logits/rejected": -2.203125, |
|
"logps/chosen": -1.1484375, |
|
"logps/rejected": -1.3125, |
|
"loss": 1.2689, |
|
"nll_loss": 1.21875, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.115234375, |
|
"rewards/margins": 0.0157470703125, |
|
"rewards/rejected": -0.130859375, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 2.839022537148172, |
|
"learning_rate": 3.648036884393646e-07, |
|
"log_odds_chosen": 0.11424560844898224, |
|
"log_odds_ratio": -0.715136706829071, |
|
"logits/chosen": -2.078125, |
|
"logits/rejected": -2.234375, |
|
"logps/chosen": -1.1953125, |
|
"logps/rejected": -1.28125, |
|
"loss": 1.2823, |
|
"nll_loss": 1.234375, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.119140625, |
|
"rewards/margins": 0.008544921875, |
|
"rewards/rejected": -0.1279296875, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 2.6089925230689737, |
|
"learning_rate": 3.593361536134931e-07, |
|
"log_odds_chosen": 0.2776428163051605, |
|
"log_odds_ratio": -0.6419922113418579, |
|
"logits/chosen": -1.9765625, |
|
"logits/rejected": -2.109375, |
|
"logps/chosen": -1.0859375, |
|
"logps/rejected": -1.2734375, |
|
"loss": 1.2798, |
|
"nll_loss": 1.1640625, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.1083984375, |
|
"rewards/margins": 0.0189208984375, |
|
"rewards/rejected": -0.126953125, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.603291469222202, |
|
"learning_rate": 3.538031684601698e-07, |
|
"log_odds_chosen": 0.06999512016773224, |
|
"log_odds_ratio": -0.740429699420929, |
|
"logits/chosen": -2.125, |
|
"logits/rejected": -2.234375, |
|
"logps/chosen": -1.171875, |
|
"logps/rejected": -1.234375, |
|
"loss": 1.2504, |
|
"nll_loss": 1.1796875, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.1171875, |
|
"rewards/margins": 0.006103515625, |
|
"rewards/rejected": -0.123046875, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.524850145022778, |
|
"learning_rate": 3.4820804511063496e-07, |
|
"log_odds_chosen": 0.2641845643520355, |
|
"log_odds_ratio": -0.642285168170929, |
|
"logits/chosen": -2.015625, |
|
"logits/rejected": -2.09375, |
|
"logps/chosen": -1.09375, |
|
"logps/rejected": -1.25, |
|
"loss": 1.2574, |
|
"nll_loss": 1.140625, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.10888671875, |
|
"rewards/margins": 0.0157470703125, |
|
"rewards/rejected": -0.125, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 2.821337451607177, |
|
"learning_rate": 3.425541328930226e-07, |
|
"log_odds_chosen": 0.26170653104782104, |
|
"log_odds_ratio": -0.6304687261581421, |
|
"logits/chosen": -2.046875, |
|
"logits/rejected": -2.15625, |
|
"logps/chosen": -1.125, |
|
"logps/rejected": -1.3125, |
|
"loss": 1.2199, |
|
"nll_loss": 1.15625, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.11279296875, |
|
"rewards/margins": 0.0184326171875, |
|
"rewards/rejected": -0.130859375, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 2.5621913998689734, |
|
"learning_rate": 3.3684481632740025e-07, |
|
"log_odds_chosen": 0.131927490234375, |
|
"log_odds_ratio": -0.692578136920929, |
|
"logits/chosen": -2.1875, |
|
"logits/rejected": -2.21875, |
|
"logps/chosen": -1.1875, |
|
"logps/rejected": -1.2734375, |
|
"loss": 1.2889, |
|
"nll_loss": 1.2421875, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.119140625, |
|
"rewards/margins": 0.0086669921875, |
|
"rewards/rejected": -0.1279296875, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.556936502980912, |
|
"learning_rate": 3.310835130997428e-07, |
|
"log_odds_chosen": 0.18841552734375, |
|
"log_odds_ratio": -0.67431640625, |
|
"logits/chosen": -2.125, |
|
"logits/rejected": -2.234375, |
|
"logps/chosen": -1.1953125, |
|
"logps/rejected": -1.3359375, |
|
"loss": 1.3018, |
|
"nll_loss": 1.2578125, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.11962890625, |
|
"rewards/margins": 0.01348876953125, |
|
"rewards/rejected": -0.1337890625, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.509648126252326, |
|
"learning_rate": 3.2527367201605215e-07, |
|
"log_odds_chosen": 0.28242188692092896, |
|
"log_odds_ratio": -0.6371093988418579, |
|
"logits/chosen": -2.0625, |
|
"logits/rejected": -2.15625, |
|
"logps/chosen": -1.15625, |
|
"logps/rejected": -1.3359375, |
|
"loss": 1.2715, |
|
"nll_loss": 1.2109375, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.115234375, |
|
"rewards/margins": 0.01806640625, |
|
"rewards/rejected": -0.1337890625, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 2.572686228637093, |
|
"learning_rate": 3.194187709378479e-07, |
|
"log_odds_chosen": 0.10871581733226776, |
|
"log_odds_ratio": -0.723828136920929, |
|
"logits/chosen": -2.09375, |
|
"logits/rejected": -2.171875, |
|
"logps/chosen": -1.171875, |
|
"logps/rejected": -1.2421875, |
|
"loss": 1.2564, |
|
"nll_loss": 1.2421875, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.11669921875, |
|
"rewards/margins": 0.00714111328125, |
|
"rewards/rejected": -0.1240234375, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 2.48714277601033, |
|
"learning_rate": 3.135223147002658e-07, |
|
"log_odds_chosen": 0.17313842475414276, |
|
"log_odds_ratio": -0.698437511920929, |
|
"logits/chosen": -2.15625, |
|
"logits/rejected": -2.21875, |
|
"logps/chosen": -1.1328125, |
|
"logps/rejected": -1.2578125, |
|
"loss": 1.2525, |
|
"nll_loss": 1.1875, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.11328125, |
|
"rewards/margins": 0.012451171875, |
|
"rewards/rejected": -0.1259765625, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 2.5499221997702164, |
|
"learning_rate": 3.075878330140079e-07, |
|
"log_odds_chosen": 0.22934570908546448, |
|
"log_odds_ratio": -0.6927734613418579, |
|
"logits/chosen": -2.109375, |
|
"logits/rejected": -2.1875, |
|
"logps/chosen": -1.203125, |
|
"logps/rejected": -1.3515625, |
|
"loss": 1.3172, |
|
"nll_loss": 1.25, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.12060546875, |
|
"rewards/margins": 0.01470947265625, |
|
"rewards/rejected": -0.134765625, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 2.4807391943530206, |
|
"learning_rate": 3.0161887835240353e-07, |
|
"log_odds_chosen": 0.16926269233226776, |
|
"log_odds_ratio": -0.692578136920929, |
|
"logits/chosen": -2.078125, |
|
"logits/rejected": -2.140625, |
|
"logps/chosen": -1.1640625, |
|
"logps/rejected": -1.28125, |
|
"loss": 1.2796, |
|
"nll_loss": 1.21875, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.11669921875, |
|
"rewards/margins": 0.0113525390625, |
|
"rewards/rejected": -0.1279296875, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 2.789334396425448, |
|
"learning_rate": 2.956190238248425e-07, |
|
"log_odds_chosen": 0.1761474609375, |
|
"log_odds_ratio": -0.71728515625, |
|
"logits/chosen": -2.0625, |
|
"logits/rejected": -2.125, |
|
"logps/chosen": -1.15625, |
|
"logps/rejected": -1.28125, |
|
"loss": 1.2759, |
|
"nll_loss": 1.265625, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.11572265625, |
|
"rewards/margins": 0.0126953125, |
|
"rewards/rejected": -0.1279296875, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 2.4358280165677817, |
|
"learning_rate": 2.8959186103785694e-07, |
|
"log_odds_chosen": 0.3479553163051605, |
|
"log_odds_ratio": -0.630664050579071, |
|
"logits/chosen": -2.0, |
|
"logits/rejected": -2.078125, |
|
"logps/chosen": -1.0625, |
|
"logps/rejected": -1.3046875, |
|
"loss": 1.2165, |
|
"nll_loss": 1.125, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.10595703125, |
|
"rewards/margins": 0.0245361328125, |
|
"rewards/rejected": -0.130859375, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 2.88074218743752, |
|
"learning_rate": 2.8354099794512876e-07, |
|
"log_odds_chosen": 0.14354248344898224, |
|
"log_odds_ratio": -0.6961914300918579, |
|
"logits/chosen": -2.078125, |
|
"logits/rejected": -2.21875, |
|
"logps/chosen": -1.109375, |
|
"logps/rejected": -1.1953125, |
|
"loss": 1.277, |
|
"nll_loss": 1.1640625, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.111328125, |
|
"rewards/margins": 0.00830078125, |
|
"rewards/rejected": -0.11962890625, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 2.511351318161689, |
|
"learning_rate": 2.774700566877129e-07, |
|
"log_odds_chosen": 0.21488037705421448, |
|
"log_odds_ratio": -0.669921875, |
|
"logits/chosen": -2.046875, |
|
"logits/rejected": -2.109375, |
|
"logps/chosen": -1.140625, |
|
"logps/rejected": -1.3046875, |
|
"loss": 1.2247, |
|
"nll_loss": 1.1796875, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.1142578125, |
|
"rewards/margins": 0.0164794921875, |
|
"rewards/rejected": -0.130859375, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 3.010290770764348, |
|
"learning_rate": 2.713826714257669e-07, |
|
"log_odds_chosen": 0.19861450791358948, |
|
"log_odds_ratio": -0.6786133050918579, |
|
"logits/chosen": -2.109375, |
|
"logits/rejected": -2.109375, |
|
"logps/chosen": -1.1796875, |
|
"logps/rejected": -1.3046875, |
|
"loss": 1.236, |
|
"nll_loss": 1.2109375, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.11767578125, |
|
"rewards/margins": 0.01239013671875, |
|
"rewards/rejected": -0.1298828125, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 2.27093551536218, |
|
"learning_rate": 2.6528248616308595e-07, |
|
"log_odds_chosen": 0.31080931425094604, |
|
"log_odds_ratio": -0.6333984136581421, |
|
"logits/chosen": -2.0625, |
|
"logits/rejected": -2.140625, |
|
"logps/chosen": -1.1484375, |
|
"logps/rejected": -1.3671875, |
|
"loss": 1.2256, |
|
"nll_loss": 1.1953125, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.115234375, |
|
"rewards/margins": 0.0216064453125, |
|
"rewards/rejected": -0.13671875, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 2.5700766425464, |
|
"learning_rate": 2.591731525657454e-07, |
|
"log_odds_chosen": 0.2696166932582855, |
|
"log_odds_ratio": -0.6527343988418579, |
|
"logits/chosen": -2.046875, |
|
"logits/rejected": -2.140625, |
|
"logps/chosen": -1.1171875, |
|
"logps/rejected": -1.296875, |
|
"loss": 1.2227, |
|
"nll_loss": 1.1484375, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.11181640625, |
|
"rewards/margins": 0.0177001953125, |
|
"rewards/rejected": -0.1298828125, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 3.3277253612496933, |
|
"learning_rate": 2.5305832777615644e-07, |
|
"log_odds_chosen": 0.19566650688648224, |
|
"log_odds_ratio": -0.688183605670929, |
|
"logits/chosen": -2.109375, |
|
"logits/rejected": -2.1875, |
|
"logps/chosen": -1.1640625, |
|
"logps/rejected": -1.296875, |
|
"loss": 1.2602, |
|
"nll_loss": 1.25, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.1162109375, |
|
"rewards/margins": 0.01287841796875, |
|
"rewards/rejected": -0.12890625, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 2.847511378794556, |
|
"learning_rate": 2.469416722238436e-07, |
|
"log_odds_chosen": 0.1658935546875, |
|
"log_odds_ratio": -0.706738293170929, |
|
"logits/chosen": -2.078125, |
|
"logits/rejected": -2.203125, |
|
"logps/chosen": -1.1875, |
|
"logps/rejected": -1.296875, |
|
"loss": 1.2591, |
|
"nll_loss": 1.21875, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.119140625, |
|
"rewards/margins": 0.01080322265625, |
|
"rewards/rejected": -0.1298828125, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 2.6549295176096672, |
|
"learning_rate": 2.4082684743425454e-07, |
|
"log_odds_chosen": 0.08632812649011612, |
|
"log_odds_ratio": -0.722851574420929, |
|
"logits/chosen": -2.0625, |
|
"logits/rejected": -2.15625, |
|
"logps/chosen": -1.140625, |
|
"logps/rejected": -1.203125, |
|
"loss": 1.2585, |
|
"nll_loss": 1.1875, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.11376953125, |
|
"rewards/margins": 0.0067138671875, |
|
"rewards/rejected": -0.12060546875, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 2.743059460411326, |
|
"learning_rate": 2.3471751383691403e-07, |
|
"log_odds_chosen": 0.22829589247703552, |
|
"log_odds_ratio": -0.6942383050918579, |
|
"logits/chosen": -2.046875, |
|
"logits/rejected": -2.125, |
|
"logps/chosen": -1.140625, |
|
"logps/rejected": -1.2890625, |
|
"loss": 1.2288, |
|
"nll_loss": 1.1875, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.1142578125, |
|
"rewards/margins": 0.014892578125, |
|
"rewards/rejected": -0.12890625, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 2.5772075525222253, |
|
"learning_rate": 2.2861732857423306e-07, |
|
"log_odds_chosen": 0.27777099609375, |
|
"log_odds_ratio": -0.6568359136581421, |
|
"logits/chosen": -1.9609375, |
|
"logits/rejected": -2.015625, |
|
"logps/chosen": -1.0859375, |
|
"logps/rejected": -1.28125, |
|
"loss": 1.2301, |
|
"nll_loss": 1.140625, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.1083984375, |
|
"rewards/margins": 0.02001953125, |
|
"rewards/rejected": -0.12890625, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 2.9548400351876465, |
|
"learning_rate": 2.225299433122871e-07, |
|
"log_odds_chosen": 0.19624023139476776, |
|
"log_odds_ratio": -0.6937500238418579, |
|
"logits/chosen": -2.046875, |
|
"logits/rejected": -2.109375, |
|
"logps/chosen": -1.21875, |
|
"logps/rejected": -1.3828125, |
|
"loss": 1.2425, |
|
"nll_loss": 1.21875, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.1220703125, |
|
"rewards/margins": 0.015869140625, |
|
"rewards/rejected": -0.1376953125, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 3.0054061279771362, |
|
"learning_rate": 2.1645900205487122e-07, |
|
"log_odds_chosen": 0.21121826767921448, |
|
"log_odds_ratio": -0.654492199420929, |
|
"logits/chosen": -2.03125, |
|
"logits/rejected": -2.0625, |
|
"logps/chosen": -1.09375, |
|
"logps/rejected": -1.234375, |
|
"loss": 1.2646, |
|
"nll_loss": 1.140625, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.10888671875, |
|
"rewards/margins": 0.014892578125, |
|
"rewards/rejected": -0.12353515625, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 2.6164133362622484, |
|
"learning_rate": 2.1040813896214307e-07, |
|
"log_odds_chosen": 0.150909423828125, |
|
"log_odds_ratio": -0.6998046636581421, |
|
"logits/chosen": -2.0, |
|
"logits/rejected": -2.078125, |
|
"logps/chosen": -1.1171875, |
|
"logps/rejected": -1.234375, |
|
"loss": 1.263, |
|
"nll_loss": 1.15625, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.11181640625, |
|
"rewards/margins": 0.0118408203125, |
|
"rewards/rejected": -0.12353515625, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 2.507875262223547, |
|
"learning_rate": 2.0438097617515741e-07, |
|
"log_odds_chosen": 0.241455078125, |
|
"log_odds_ratio": -0.6556640863418579, |
|
"logits/chosen": -1.890625, |
|
"logits/rejected": -2.015625, |
|
"logps/chosen": -1.0859375, |
|
"logps/rejected": -1.2265625, |
|
"loss": 1.2638, |
|
"nll_loss": 1.1484375, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.10888671875, |
|
"rewards/margins": 0.01409912109375, |
|
"rewards/rejected": -0.123046875, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 2.507656226813357, |
|
"learning_rate": 1.9838112164759652e-07, |
|
"log_odds_chosen": 0.12969970703125, |
|
"log_odds_ratio": -0.7134765386581421, |
|
"logits/chosen": -2.046875, |
|
"logits/rejected": -2.15625, |
|
"logps/chosen": -1.1328125, |
|
"logps/rejected": -1.234375, |
|
"loss": 1.2807, |
|
"nll_loss": 1.15625, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.11279296875, |
|
"rewards/margins": 0.0103759765625, |
|
"rewards/rejected": -0.12353515625, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 2.4386366953549508, |
|
"learning_rate": 1.9241216698599202e-07, |
|
"log_odds_chosen": 0.16312256455421448, |
|
"log_odds_ratio": -0.6949218511581421, |
|
"logits/chosen": -2.125, |
|
"logits/rejected": -2.171875, |
|
"logps/chosen": -1.171875, |
|
"logps/rejected": -1.2890625, |
|
"loss": 1.2391, |
|
"nll_loss": 1.234375, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.11669921875, |
|
"rewards/margins": 0.01214599609375, |
|
"rewards/rejected": -0.12890625, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 2.4804086256409157, |
|
"learning_rate": 1.8647768529973423e-07, |
|
"log_odds_chosen": 0.17501220107078552, |
|
"log_odds_ratio": -0.6958984136581421, |
|
"logits/chosen": -2.015625, |
|
"logits/rejected": -2.109375, |
|
"logps/chosen": -1.0625, |
|
"logps/rejected": -1.203125, |
|
"loss": 1.227, |
|
"nll_loss": 1.125, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.1064453125, |
|
"rewards/margins": 0.0135498046875, |
|
"rewards/rejected": -0.1201171875, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 2.4623419233165986, |
|
"learning_rate": 1.80581229062152e-07, |
|
"log_odds_chosen": 0.17165526747703552, |
|
"log_odds_ratio": -0.6900390386581421, |
|
"logits/chosen": -2.03125, |
|
"logits/rejected": -2.078125, |
|
"logps/chosen": -1.1640625, |
|
"logps/rejected": -1.2734375, |
|
"loss": 1.2806, |
|
"nll_loss": 1.234375, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.1162109375, |
|
"rewards/margins": 0.0111083984375, |
|
"rewards/rejected": -0.126953125, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 2.797106319585485, |
|
"learning_rate": 1.7472632798394788e-07, |
|
"log_odds_chosen": 0.102783203125, |
|
"log_odds_ratio": -0.738574206829071, |
|
"logits/chosen": -2.03125, |
|
"logits/rejected": -2.125, |
|
"logps/chosen": -1.125, |
|
"logps/rejected": -1.1796875, |
|
"loss": 1.2623, |
|
"nll_loss": 1.203125, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.1123046875, |
|
"rewards/margins": 0.0057373046875, |
|
"rewards/rejected": -0.1181640625, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 2.502768284944301, |
|
"learning_rate": 1.6891648690025718e-07, |
|
"log_odds_chosen": 0.20386962592601776, |
|
"log_odds_ratio": -0.687695324420929, |
|
"logits/chosen": -2.046875, |
|
"logits/rejected": -2.09375, |
|
"logps/chosen": -1.09375, |
|
"logps/rejected": -1.2421875, |
|
"loss": 1.2348, |
|
"nll_loss": 1.1640625, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.109375, |
|
"rewards/margins": 0.01507568359375, |
|
"rewards/rejected": -0.12451171875, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 2.656181511294093, |
|
"learning_rate": 1.6315518367259978e-07, |
|
"log_odds_chosen": 0.27000731229782104, |
|
"log_odds_ratio": -0.647265613079071, |
|
"logits/chosen": -1.9765625, |
|
"logits/rejected": -2.0625, |
|
"logps/chosen": -1.1015625, |
|
"logps/rejected": -1.2890625, |
|
"loss": 1.2463, |
|
"nll_loss": 1.140625, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.1103515625, |
|
"rewards/margins": 0.0185546875, |
|
"rewards/rejected": -0.12890625, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 3.0809025740755227, |
|
"learning_rate": 1.574458671069774e-07, |
|
"log_odds_chosen": 0.10646972805261612, |
|
"log_odds_ratio": -0.731249988079071, |
|
"logits/chosen": -1.9609375, |
|
"logits/rejected": -2.09375, |
|
"logps/chosen": -1.1328125, |
|
"logps/rejected": -1.2109375, |
|
"loss": 1.243, |
|
"nll_loss": 1.171875, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.11279296875, |
|
"rewards/margins": 0.0078125, |
|
"rewards/rejected": -0.12060546875, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 2.5307625663876787, |
|
"learning_rate": 1.5179195488936504e-07, |
|
"log_odds_chosen": 0.22603759169578552, |
|
"log_odds_ratio": -0.6683593988418579, |
|
"logits/chosen": -2.046875, |
|
"logits/rejected": -2.109375, |
|
"logps/chosen": -1.109375, |
|
"logps/rejected": -1.265625, |
|
"loss": 1.224, |
|
"nll_loss": 1.1796875, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.1103515625, |
|
"rewards/margins": 0.0162353515625, |
|
"rewards/rejected": -0.126953125, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 2.587513457015136, |
|
"learning_rate": 1.4619683153983016e-07, |
|
"log_odds_chosen": 0.15230712294578552, |
|
"log_odds_ratio": -0.70263671875, |
|
"logits/chosen": -2.0625, |
|
"logits/rejected": -2.171875, |
|
"logps/chosen": -1.1484375, |
|
"logps/rejected": -1.2421875, |
|
"loss": 1.2417, |
|
"nll_loss": 1.1953125, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.11474609375, |
|
"rewards/margins": 0.00909423828125, |
|
"rewards/rejected": -0.1240234375, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 2.468276881587768, |
|
"learning_rate": 1.4066384638650687e-07, |
|
"log_odds_chosen": 0.26385498046875, |
|
"log_odds_ratio": -0.650195300579071, |
|
"logits/chosen": -2.015625, |
|
"logits/rejected": -2.046875, |
|
"logps/chosen": -1.15625, |
|
"logps/rejected": -1.3359375, |
|
"loss": 1.2174, |
|
"nll_loss": 1.1796875, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.11572265625, |
|
"rewards/margins": 0.0177001953125, |
|
"rewards/rejected": -0.1337890625, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 2.5705148701270537, |
|
"learning_rate": 1.351963115606354e-07, |
|
"log_odds_chosen": 0.18297119438648224, |
|
"log_odds_ratio": -0.689746081829071, |
|
"logits/chosen": -2.03125, |
|
"logits/rejected": -2.171875, |
|
"logps/chosen": -1.109375, |
|
"logps/rejected": -1.2265625, |
|
"loss": 1.2253, |
|
"nll_loss": 1.1640625, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.11083984375, |
|
"rewards/margins": 0.011962890625, |
|
"rewards/rejected": -0.123046875, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 2.5100478432366558, |
|
"learning_rate": 1.297975000138688e-07, |
|
"log_odds_chosen": 0.10567627102136612, |
|
"log_odds_ratio": -0.7080078125, |
|
"logits/chosen": -2.03125, |
|
"logits/rejected": -2.109375, |
|
"logps/chosen": -1.15625, |
|
"logps/rejected": -1.234375, |
|
"loss": 1.257, |
|
"nll_loss": 1.2109375, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.11572265625, |
|
"rewards/margins": 0.00787353515625, |
|
"rewards/rejected": -0.12353515625, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 2.6847113433850924, |
|
"learning_rate": 1.24470643559033e-07, |
|
"log_odds_chosen": 0.27998048067092896, |
|
"log_odds_ratio": -0.650585949420929, |
|
"logits/chosen": -2.0, |
|
"logits/rejected": -2.109375, |
|
"logps/chosen": -1.125, |
|
"logps/rejected": -1.3125, |
|
"loss": 1.2519, |
|
"nll_loss": 1.1875, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.11279296875, |
|
"rewards/margins": 0.018798828125, |
|
"rewards/rejected": -0.1318359375, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 2.695150048132808, |
|
"learning_rate": 1.1921893093551323e-07, |
|
"log_odds_chosen": 0.21813353896141052, |
|
"log_odds_ratio": -0.673144519329071, |
|
"logits/chosen": -1.9921875, |
|
"logits/rejected": -2.0625, |
|
"logps/chosen": -1.1171875, |
|
"logps/rejected": -1.265625, |
|
"loss": 1.2468, |
|
"nll_loss": 1.1171875, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.11181640625, |
|
"rewards/margins": 0.0147705078125, |
|
"rewards/rejected": -0.1259765625, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 2.575139032183112, |
|
"learning_rate": 1.140455059004248e-07, |
|
"log_odds_chosen": 0.33759766817092896, |
|
"log_odds_ratio": -0.63525390625, |
|
"logits/chosen": -2.078125, |
|
"logits/rejected": -2.203125, |
|
"logps/chosen": -1.1171875, |
|
"logps/rejected": -1.359375, |
|
"loss": 1.2473, |
|
"nll_loss": 1.1796875, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.11181640625, |
|
"rewards/margins": 0.0244140625, |
|
"rewards/rejected": -0.13671875, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 2.3412172556068858, |
|
"learning_rate": 1.0895346534671146e-07, |
|
"log_odds_chosen": 0.28547364473342896, |
|
"log_odds_ratio": -0.6714843511581421, |
|
"logits/chosen": -2.0625, |
|
"logits/rejected": -2.171875, |
|
"logps/chosen": -1.109375, |
|
"logps/rejected": -1.296875, |
|
"loss": 1.223, |
|
"nll_loss": 1.1484375, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.111328125, |
|
"rewards/margins": 0.018798828125, |
|
"rewards/rejected": -0.1298828125, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 2.440451621766773, |
|
"learning_rate": 1.0394585744929605e-07, |
|
"log_odds_chosen": 0.09172363579273224, |
|
"log_odds_ratio": -0.706250011920929, |
|
"logits/chosen": -2.03125, |
|
"logits/rejected": -2.078125, |
|
"logps/chosen": -1.1796875, |
|
"logps/rejected": -1.25, |
|
"loss": 1.2244, |
|
"nll_loss": 1.203125, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.1181640625, |
|
"rewards/margins": 0.0067138671875, |
|
"rewards/rejected": -0.125, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 2.7872635135771486, |
|
"learning_rate": 9.902567984039659e-08, |
|
"log_odds_chosen": 0.10517577826976776, |
|
"log_odds_ratio": -0.718554675579071, |
|
"logits/chosen": -2.015625, |
|
"logits/rejected": -2.109375, |
|
"logps/chosen": -1.1796875, |
|
"logps/rejected": -1.25, |
|
"loss": 1.2433, |
|
"nll_loss": 1.234375, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.1181640625, |
|
"rewards/margins": 0.0067138671875, |
|
"rewards/rejected": -0.125, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 2.6062059656472667, |
|
"learning_rate": 9.419587781509572e-08, |
|
"log_odds_chosen": 0.21335449814796448, |
|
"log_odds_ratio": -0.6875, |
|
"logits/chosen": -1.9296875, |
|
"logits/rejected": -2.046875, |
|
"logps/chosen": -1.0859375, |
|
"logps/rejected": -1.2421875, |
|
"loss": 1.2339, |
|
"nll_loss": 1.109375, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.1083984375, |
|
"rewards/margins": 0.016357421875, |
|
"rewards/rejected": -0.12451171875, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 2.387458644437053, |
|
"learning_rate": 8.945934256824217e-08, |
|
"log_odds_chosen": 0.19075927138328552, |
|
"log_odds_ratio": -0.67236328125, |
|
"logits/chosen": -2.0, |
|
"logits/rejected": -2.0625, |
|
"logps/chosen": -1.125, |
|
"logps/rejected": -1.2578125, |
|
"loss": 1.2428, |
|
"nll_loss": 1.171875, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.1123046875, |
|
"rewards/margins": 0.0137939453125, |
|
"rewards/rejected": -0.1259765625, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 2.7020254464929536, |
|
"learning_rate": 8.481890946373562e-08, |
|
"log_odds_chosen": 0.16676025092601776, |
|
"log_odds_ratio": -0.6944335699081421, |
|
"logits/chosen": -1.9921875, |
|
"logits/rejected": -2.09375, |
|
"logps/chosen": -1.1484375, |
|
"logps/rejected": -1.265625, |
|
"loss": 1.2652, |
|
"nll_loss": 1.1953125, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.115234375, |
|
"rewards/margins": 0.01165771484375, |
|
"rewards/rejected": -0.126953125, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 2.5506342978080365, |
|
"learning_rate": 8.027735633723401e-08, |
|
"log_odds_chosen": 0.19404907524585724, |
|
"log_odds_ratio": -0.678906261920929, |
|
"logits/chosen": -2.015625, |
|
"logits/rejected": -2.0625, |
|
"logps/chosen": -1.15625, |
|
"logps/rejected": -1.2890625, |
|
"loss": 1.2371, |
|
"nll_loss": 1.1640625, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.11572265625, |
|
"rewards/margins": 0.01336669921875, |
|
"rewards/rejected": -0.12890625, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 2.9131209890812215, |
|
"learning_rate": 7.583740183329768e-08, |
|
"log_odds_chosen": 0.25482177734375, |
|
"log_odds_ratio": -0.654589831829071, |
|
"logits/chosen": -2.078125, |
|
"logits/rejected": -2.1875, |
|
"logps/chosen": -1.1328125, |
|
"logps/rejected": -1.3125, |
|
"loss": 1.2099, |
|
"nll_loss": 1.2109375, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.11376953125, |
|
"rewards/margins": 0.017578125, |
|
"rewards/rejected": -0.130859375, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 2.4151795293745417, |
|
"learning_rate": 7.150170377796627e-08, |
|
"log_odds_chosen": 0.09471435844898224, |
|
"log_odds_ratio": -0.719921886920929, |
|
"logits/chosen": -2.015625, |
|
"logits/rejected": -2.125, |
|
"logps/chosen": -1.15625, |
|
"logps/rejected": -1.2109375, |
|
"loss": 1.2681, |
|
"nll_loss": 1.1875, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.11572265625, |
|
"rewards/margins": 0.0057373046875, |
|
"rewards/rejected": -0.12109375, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 2.9894142206703074, |
|
"learning_rate": 6.727285758774276e-08, |
|
"log_odds_chosen": 0.20228271186351776, |
|
"log_odds_ratio": -0.686718761920929, |
|
"logits/chosen": -1.9296875, |
|
"logits/rejected": -2.0625, |
|
"logps/chosen": -1.078125, |
|
"logps/rejected": -1.2265625, |
|
"loss": 1.2048, |
|
"nll_loss": 1.1171875, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.107421875, |
|
"rewards/margins": 0.01513671875, |
|
"rewards/rejected": -0.12255859375, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 2.292724337045773, |
|
"learning_rate": 6.315339471593645e-08, |
|
"log_odds_chosen": 0.2957824766635895, |
|
"log_odds_ratio": -0.6419922113418579, |
|
"logits/chosen": -1.9921875, |
|
"logits/rejected": -2.171875, |
|
"logps/chosen": -1.125, |
|
"logps/rejected": -1.328125, |
|
"loss": 1.2562, |
|
"nll_loss": 1.1640625, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.11279296875, |
|
"rewards/margins": 0.0201416015625, |
|
"rewards/rejected": -0.1328125, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 2.4858379206596783, |
|
"learning_rate": 5.9145781137295596e-08, |
|
"log_odds_chosen": 0.11273193359375, |
|
"log_odds_ratio": -0.712109386920929, |
|
"logits/chosen": -1.9765625, |
|
"logits/rejected": -2.109375, |
|
"logps/chosen": -1.1640625, |
|
"logps/rejected": -1.25, |
|
"loss": 1.2469, |
|
"nll_loss": 1.1796875, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.11669921875, |
|
"rewards/margins": 0.00823974609375, |
|
"rewards/rejected": -0.125, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 2.9554773310288303, |
|
"learning_rate": 5.5252415871836514e-08, |
|
"log_odds_chosen": 0.19149169325828552, |
|
"log_odds_ratio": -0.6700195074081421, |
|
"logits/chosen": -2.078125, |
|
"logits/rejected": -2.125, |
|
"logps/chosen": -1.140625, |
|
"logps/rejected": -1.265625, |
|
"loss": 1.2691, |
|
"nll_loss": 1.1796875, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.11376953125, |
|
"rewards/margins": 0.01287841796875, |
|
"rewards/rejected": -0.126953125, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 2.455900615151912, |
|
"learning_rate": 5.147562954875267e-08, |
|
"log_odds_chosen": 0.25535887479782104, |
|
"log_odds_ratio": -0.662304699420929, |
|
"logits/chosen": -1.9765625, |
|
"logits/rejected": -2.0625, |
|
"logps/chosen": -1.0625, |
|
"logps/rejected": -1.2265625, |
|
"loss": 1.2103, |
|
"nll_loss": 1.078125, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.1064453125, |
|
"rewards/margins": 0.01611328125, |
|
"rewards/rejected": -0.12255859375, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 2.6012002451079055, |
|
"learning_rate": 4.781768301126374e-08, |
|
"log_odds_chosen": 0.10560913383960724, |
|
"log_odds_ratio": -0.7105468511581421, |
|
"logits/chosen": -2.0625, |
|
"logits/rejected": -2.09375, |
|
"logps/chosen": -1.171875, |
|
"logps/rejected": -1.2578125, |
|
"loss": 1.2439, |
|
"nll_loss": 1.1875, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.11767578125, |
|
"rewards/margins": 0.00823974609375, |
|
"rewards/rejected": -0.1259765625, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 2.2529151127172526, |
|
"learning_rate": 4.428076596323954e-08, |
|
"log_odds_chosen": 0.21270751953125, |
|
"log_odds_ratio": -0.6869140863418579, |
|
"logits/chosen": -1.953125, |
|
"logits/rejected": -2.078125, |
|
"logps/chosen": -1.125, |
|
"logps/rejected": -1.28125, |
|
"loss": 1.2745, |
|
"nll_loss": 1.2265625, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.11279296875, |
|
"rewards/margins": 0.01513671875, |
|
"rewards/rejected": -0.1279296875, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 2.4154203157415512, |
|
"learning_rate": 4.0866995658409145e-08, |
|
"log_odds_chosen": 0.25830078125, |
|
"log_odds_ratio": -0.65966796875, |
|
"logits/chosen": -1.9921875, |
|
"logits/rejected": -2.125, |
|
"logps/chosen": -1.09375, |
|
"logps/rejected": -1.265625, |
|
"loss": 1.2827, |
|
"nll_loss": 1.1484375, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.109375, |
|
"rewards/margins": 0.0174560546875, |
|
"rewards/rejected": -0.126953125, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 2.447937914506964, |
|
"learning_rate": 3.757841563293987e-08, |
|
"log_odds_chosen": 0.24925537407398224, |
|
"log_odds_ratio": -0.6611328125, |
|
"logits/chosen": -2.03125, |
|
"logits/rejected": -2.09375, |
|
"logps/chosen": -1.125, |
|
"logps/rejected": -1.296875, |
|
"loss": 1.2345, |
|
"nll_loss": 1.1640625, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.1123046875, |
|
"rewards/margins": 0.0167236328125, |
|
"rewards/rejected": -0.12890625, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 2.5349608625933375, |
|
"learning_rate": 3.441699448214433e-08, |
|
"log_odds_chosen": 0.20963135361671448, |
|
"log_odds_ratio": -0.689257800579071, |
|
"logits/chosen": -2.015625, |
|
"logits/rejected": -2.109375, |
|
"logps/chosen": -1.140625, |
|
"logps/rejected": -1.28125, |
|
"loss": 1.2633, |
|
"nll_loss": 1.1875, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.11376953125, |
|
"rewards/margins": 0.01409912109375, |
|
"rewards/rejected": -0.1279296875, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 2.6135424725635987, |
|
"learning_rate": 3.138462468204914e-08, |
|
"log_odds_chosen": 0.25098878145217896, |
|
"log_odds_ratio": -0.660449206829071, |
|
"logits/chosen": -2.015625, |
|
"logits/rejected": -2.03125, |
|
"logps/chosen": -1.078125, |
|
"logps/rejected": -1.25, |
|
"loss": 1.2263, |
|
"nll_loss": 1.15625, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.10791015625, |
|
"rewards/margins": 0.01708984375, |
|
"rewards/rejected": -0.125, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 2.450199821395702, |
|
"learning_rate": 2.8483121456528454e-08, |
|
"log_odds_chosen": 0.24144287407398224, |
|
"log_odds_ratio": -0.661816418170929, |
|
"logits/chosen": -2.046875, |
|
"logits/rejected": -2.125, |
|
"logps/chosen": -1.171875, |
|
"logps/rejected": -1.34375, |
|
"loss": 1.2541, |
|
"nll_loss": 1.2421875, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.1171875, |
|
"rewards/margins": 0.016845703125, |
|
"rewards/rejected": -0.1337890625, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 2.4363743869256904, |
|
"learning_rate": 2.5714221690683462e-08, |
|
"log_odds_chosen": 0.122314453125, |
|
"log_odds_ratio": -0.7119140625, |
|
"logits/chosen": -1.9609375, |
|
"logits/rejected": -2.046875, |
|
"logps/chosen": -1.1484375, |
|
"logps/rejected": -1.2265625, |
|
"loss": 1.3106, |
|
"nll_loss": 1.1953125, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.115234375, |
|
"rewards/margins": 0.0079345703125, |
|
"rewards/rejected": -0.123046875, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 2.5756303193348327, |
|
"learning_rate": 2.307958289111514e-08, |
|
"log_odds_chosen": 0.19765624403953552, |
|
"log_odds_ratio": -0.6796875, |
|
"logits/chosen": -2.0, |
|
"logits/rejected": -2.03125, |
|
"logps/chosen": -1.15625, |
|
"logps/rejected": -1.296875, |
|
"loss": 1.2333, |
|
"nll_loss": 1.1875, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.11572265625, |
|
"rewards/margins": 0.01397705078125, |
|
"rewards/rejected": -0.1298828125, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 2.553964198420633, |
|
"learning_rate": 2.0580782193715506e-08, |
|
"log_odds_chosen": 0.15519408881664276, |
|
"log_odds_ratio": -0.704882800579071, |
|
"logits/chosen": -1.9296875, |
|
"logits/rejected": -2.0, |
|
"logps/chosen": -1.15625, |
|
"logps/rejected": -1.2578125, |
|
"loss": 1.2388, |
|
"nll_loss": 1.1640625, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.11572265625, |
|
"rewards/margins": 0.01031494140625, |
|
"rewards/rejected": -0.1259765625, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 2.469702033641708, |
|
"learning_rate": 1.821931541956914e-08, |
|
"log_odds_chosen": 0.4207519590854645, |
|
"log_odds_ratio": -0.582226574420929, |
|
"logits/chosen": -2.03125, |
|
"logits/rejected": -2.125, |
|
"logps/chosen": -1.09375, |
|
"logps/rejected": -1.359375, |
|
"loss": 1.2354, |
|
"nll_loss": 1.1484375, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.109375, |
|
"rewards/margins": 0.027099609375, |
|
"rewards/rejected": -0.1357421875, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 2.4889196799977786, |
|
"learning_rate": 1.5996596179531364e-08, |
|
"log_odds_chosen": 0.12117920070886612, |
|
"log_odds_ratio": -0.710644543170929, |
|
"logits/chosen": -2.015625, |
|
"logits/rejected": -2.046875, |
|
"logps/chosen": -1.1484375, |
|
"logps/rejected": -1.234375, |
|
"loss": 1.2454, |
|
"nll_loss": 1.21875, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.115234375, |
|
"rewards/margins": 0.0084228515625, |
|
"rewards/rejected": -0.12353515625, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 2.6921341795857088, |
|
"learning_rate": 1.3913955028018925e-08, |
|
"log_odds_chosen": 0.0946044921875, |
|
"log_odds_ratio": -0.734375, |
|
"logits/chosen": -1.9921875, |
|
"logits/rejected": -2.046875, |
|
"logps/chosen": -1.15625, |
|
"logps/rejected": -1.21875, |
|
"loss": 1.2338, |
|
"nll_loss": 1.203125, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.115234375, |
|
"rewards/margins": 0.00634765625, |
|
"rewards/rejected": -0.12158203125, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 2.9234303938288444, |
|
"learning_rate": 1.1972638666519153e-08, |
|
"log_odds_chosen": 0.30915528535842896, |
|
"log_odds_ratio": -0.6298828125, |
|
"logits/chosen": -1.9453125, |
|
"logits/rejected": -2.046875, |
|
"logps/chosen": -1.09375, |
|
"logps/rejected": -1.328125, |
|
"loss": 1.2258, |
|
"nll_loss": 1.140625, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.109375, |
|
"rewards/margins": 0.0230712890625, |
|
"rewards/rejected": -0.1328125, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 2.488742998417297, |
|
"learning_rate": 1.0173809197295074e-08, |
|
"log_odds_chosen": 0.1483154296875, |
|
"log_odds_ratio": -0.6932617425918579, |
|
"logits/chosen": -2.015625, |
|
"logits/rejected": -2.078125, |
|
"logps/chosen": -1.171875, |
|
"logps/rejected": -1.28125, |
|
"loss": 1.2868, |
|
"nll_loss": 1.234375, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.11767578125, |
|
"rewards/margins": 0.01080322265625, |
|
"rewards/rejected": -0.12890625, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 2.930123126278221, |
|
"learning_rate": 8.518543427732949e-09, |
|
"log_odds_chosen": 0.2968383729457855, |
|
"log_odds_ratio": -0.6597656011581421, |
|
"logits/chosen": -1.9609375, |
|
"logits/rejected": -2.0, |
|
"logps/chosen": -1.1015625, |
|
"logps/rejected": -1.3359375, |
|
"loss": 1.2515, |
|
"nll_loss": 1.1484375, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.1103515625, |
|
"rewards/margins": 0.0230712890625, |
|
"rewards/rejected": -0.1337890625, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 2.4910759658116306, |
|
"learning_rate": 7.007832225748356e-09, |
|
"log_odds_chosen": 0.3276123106479645, |
|
"log_odds_ratio": -0.623339831829071, |
|
"logits/chosen": -1.8359375, |
|
"logits/rejected": -1.953125, |
|
"logps/chosen": -1.03125, |
|
"logps/rejected": -1.25, |
|
"loss": 1.2122, |
|
"nll_loss": 1.1015625, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.1025390625, |
|
"rewards/margins": 0.0220947265625, |
|
"rewards/rejected": -0.125, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 2.6952772741810462, |
|
"learning_rate": 5.642579926637553e-09, |
|
"log_odds_chosen": 0.102996826171875, |
|
"log_odds_ratio": -0.718554675579071, |
|
"logits/chosen": -1.953125, |
|
"logits/rejected": -1.984375, |
|
"logps/chosen": -1.1875, |
|
"logps/rejected": -1.265625, |
|
"loss": 1.2679, |
|
"nll_loss": 1.2421875, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.11865234375, |
|
"rewards/margins": 0.007659912109375, |
|
"rewards/rejected": -0.1259765625, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 2.564277781118033, |
|
"learning_rate": 4.4236037917277644e-09, |
|
"log_odds_chosen": 0.17406006157398224, |
|
"log_odds_ratio": -0.697949230670929, |
|
"logits/chosen": -2.078125, |
|
"logits/rejected": -2.171875, |
|
"logps/chosen": -1.1875, |
|
"logps/rejected": -1.3125, |
|
"loss": 1.2736, |
|
"nll_loss": 1.2265625, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.11865234375, |
|
"rewards/margins": 0.012939453125, |
|
"rewards/rejected": -0.1318359375, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 2.6044066137402764, |
|
"learning_rate": 3.3516335191523528e-09, |
|
"log_odds_chosen": 0.22431640326976776, |
|
"log_odds_ratio": -0.6830078363418579, |
|
"logits/chosen": -1.9765625, |
|
"logits/rejected": -2.078125, |
|
"logps/chosen": -1.15625, |
|
"logps/rejected": -1.3046875, |
|
"loss": 1.221, |
|
"nll_loss": 1.1640625, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.115234375, |
|
"rewards/margins": 0.01483154296875, |
|
"rewards/rejected": -0.1298828125, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 2.569982717697751, |
|
"learning_rate": 2.4273108070415605e-09, |
|
"log_odds_chosen": 0.24323730170726776, |
|
"log_odds_ratio": -0.675000011920929, |
|
"logits/chosen": -1.984375, |
|
"logits/rejected": -2.078125, |
|
"logps/chosen": -1.125, |
|
"logps/rejected": -1.3046875, |
|
"loss": 1.2342, |
|
"nll_loss": 1.1953125, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.1123046875, |
|
"rewards/margins": 0.017822265625, |
|
"rewards/rejected": -0.130859375, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 2.9474176384005117, |
|
"learning_rate": 1.651188969392564e-09, |
|
"log_odds_chosen": 0.18778076767921448, |
|
"log_odds_ratio": -0.675000011920929, |
|
"logits/chosen": -2.03125, |
|
"logits/rejected": -2.171875, |
|
"logps/chosen": -1.1484375, |
|
"logps/rejected": -1.2734375, |
|
"loss": 1.2187, |
|
"nll_loss": 1.1796875, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.11474609375, |
|
"rewards/margins": 0.012939453125, |
|
"rewards/rejected": -0.1279296875, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 2.5311989884303747, |
|
"learning_rate": 1.0237326048466443e-09, |
|
"log_odds_chosen": 0.2604003846645355, |
|
"log_odds_ratio": -0.6591796875, |
|
"logits/chosen": -1.9296875, |
|
"logits/rejected": -1.96875, |
|
"logps/chosen": -1.1328125, |
|
"logps/rejected": -1.3125, |
|
"loss": 1.2465, |
|
"nll_loss": 1.1875, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.11328125, |
|
"rewards/margins": 0.0185546875, |
|
"rewards/rejected": -0.1318359375, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 2.4286187387911005, |
|
"learning_rate": 5.453173185734073e-10, |
|
"log_odds_chosen": 0.158447265625, |
|
"log_odds_ratio": -0.71044921875, |
|
"logits/chosen": -2.0, |
|
"logits/rejected": -2.09375, |
|
"logps/chosen": -1.15625, |
|
"logps/rejected": -1.2734375, |
|
"loss": 1.2469, |
|
"nll_loss": 1.2265625, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.1162109375, |
|
"rewards/margins": 0.01123046875, |
|
"rewards/rejected": -0.126953125, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 2.7786302717518256, |
|
"learning_rate": 2.1622949742761133e-10, |
|
"log_odds_chosen": 0.21599121391773224, |
|
"log_odds_ratio": -0.6719726324081421, |
|
"logits/chosen": -1.9609375, |
|
"logits/rejected": -2.0625, |
|
"logps/chosen": -1.09375, |
|
"logps/rejected": -1.2265625, |
|
"loss": 1.2478, |
|
"nll_loss": 1.1796875, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.109375, |
|
"rewards/margins": 0.0133056640625, |
|
"rewards/rejected": -0.123046875, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.2980939714984134, |
|
"learning_rate": 3.666613851355249e-11, |
|
"log_odds_chosen": 0.22458496689796448, |
|
"log_odds_ratio": -0.666210949420929, |
|
"logits/chosen": -2.078125, |
|
"logits/rejected": -2.203125, |
|
"logps/chosen": -1.1640625, |
|
"logps/rejected": -1.34375, |
|
"loss": 1.2281, |
|
"nll_loss": 1.2109375, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.1162109375, |
|
"rewards/margins": 0.017822265625, |
|
"rewards/rejected": -0.1337890625, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1427, |
|
"total_flos": 0.0, |
|
"train_loss": 1.2897131164005442, |
|
"train_runtime": 9812.389, |
|
"train_samples_per_second": 6.979, |
|
"train_steps_per_second": 0.145 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1427, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|