|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.489327811404906, |
|
"eval_steps": 500, |
|
"global_step": 24, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02038865880853775, |
|
"grad_norm": 10.073711395263672, |
|
"learning_rate": 1e-05, |
|
"log_odds_chosen": -0.03190982714295387, |
|
"log_odds_ratio": -0.7741295099258423, |
|
"logits/chosen": -2.249119758605957, |
|
"logits/rejected": -2.2597196102142334, |
|
"logps/chosen": -5.181360721588135, |
|
"logps/rejected": -5.149355888366699, |
|
"loss": 5.4553, |
|
"nll_loss": 5.37792444229126, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5181360840797424, |
|
"rewards/margins": -0.0032004904933273792, |
|
"rewards/rejected": -0.5149356126785278, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0407773176170755, |
|
"grad_norm": 9.501357078552246, |
|
"learning_rate": 2e-05, |
|
"log_odds_chosen": -0.044936653226614, |
|
"log_odds_ratio": -0.7695960402488708, |
|
"logits/chosen": -2.2498621940612793, |
|
"logits/rejected": -2.260127067565918, |
|
"logps/chosen": -5.01981258392334, |
|
"logps/rejected": -4.974053382873535, |
|
"loss": 5.3213, |
|
"nll_loss": 5.2443060874938965, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.501981258392334, |
|
"rewards/margins": -0.0045759049244225025, |
|
"rewards/rejected": -0.49740538001060486, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.06116597642561325, |
|
"grad_norm": 9.111624717712402, |
|
"learning_rate": 3e-05, |
|
"log_odds_chosen": -0.008361914195120335, |
|
"log_odds_ratio": -0.7590417265892029, |
|
"logits/chosen": -2.287618398666382, |
|
"logits/rejected": -2.3050174713134766, |
|
"logps/chosen": -4.931052207946777, |
|
"logps/rejected": -4.922361850738525, |
|
"loss": 5.195, |
|
"nll_loss": 5.119076251983643, |
|
"rewards/accuracies": 0.453125, |
|
"rewards/chosen": -0.4931052029132843, |
|
"rewards/margins": -0.0008690543472766876, |
|
"rewards/rejected": -0.4922361373901367, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.081554635234151, |
|
"grad_norm": 9.194869995117188, |
|
"learning_rate": 4e-05, |
|
"log_odds_chosen": -0.06829527020454407, |
|
"log_odds_ratio": -0.7628177404403687, |
|
"logits/chosen": -2.2833240032196045, |
|
"logits/rejected": -2.2870724201202393, |
|
"logps/chosen": -4.916200160980225, |
|
"logps/rejected": -4.847894668579102, |
|
"loss": 5.1872, |
|
"nll_loss": 5.110929489135742, |
|
"rewards/accuracies": 0.421875, |
|
"rewards/chosen": -0.4916200041770935, |
|
"rewards/margins": -0.006830527447164059, |
|
"rewards/rejected": -0.48478949069976807, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.10194329404268876, |
|
"grad_norm": 9.331730842590332, |
|
"learning_rate": 5e-05, |
|
"log_odds_chosen": -0.07571306079626083, |
|
"log_odds_ratio": -0.7841904163360596, |
|
"logits/chosen": -2.3229925632476807, |
|
"logits/rejected": -2.337273120880127, |
|
"logps/chosen": -4.687668323516846, |
|
"logps/rejected": -4.612335205078125, |
|
"loss": 4.9652, |
|
"nll_loss": 4.886806011199951, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.46876680850982666, |
|
"rewards/margins": -0.0075332350097596645, |
|
"rewards/rejected": -0.46123358607292175, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.1223319528512265, |
|
"grad_norm": 9.857681274414062, |
|
"learning_rate": 6e-05, |
|
"log_odds_chosen": -0.19471649825572968, |
|
"log_odds_ratio": -0.8327277302742004, |
|
"logits/chosen": -2.3558108806610107, |
|
"logits/rejected": -2.3881752490997314, |
|
"logps/chosen": -4.344555377960205, |
|
"logps/rejected": -4.152055263519287, |
|
"loss": 4.6379, |
|
"nll_loss": 4.554634094238281, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.4344555735588074, |
|
"rewards/margins": -0.019250018522143364, |
|
"rewards/rejected": -0.41520553827285767, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.14272061165976427, |
|
"grad_norm": 7.811158657073975, |
|
"learning_rate": 7e-05, |
|
"log_odds_chosen": -0.0166391022503376, |
|
"log_odds_ratio": -0.7380757331848145, |
|
"logits/chosen": -2.388141632080078, |
|
"logits/rejected": -2.4195711612701416, |
|
"logps/chosen": -3.969034194946289, |
|
"logps/rejected": -3.951852798461914, |
|
"loss": 4.2265, |
|
"nll_loss": 4.152710914611816, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.39690345525741577, |
|
"rewards/margins": -0.0017181318253278732, |
|
"rewards/rejected": -0.39518529176712036, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.163109270468302, |
|
"grad_norm": 7.66775369644165, |
|
"learning_rate": 8e-05, |
|
"log_odds_chosen": 0.02697194740176201, |
|
"log_odds_ratio": -0.7153270840644836, |
|
"logits/chosen": -2.477844476699829, |
|
"logits/rejected": -2.474121570587158, |
|
"logps/chosen": -3.4541428089141846, |
|
"logps/rejected": -3.4769277572631836, |
|
"loss": 3.6939, |
|
"nll_loss": 3.6224091053009033, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.34541425108909607, |
|
"rewards/margins": 0.0022785186301916838, |
|
"rewards/rejected": -0.3476927876472473, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.18349792927683975, |
|
"grad_norm": 6.6126298904418945, |
|
"learning_rate": 9e-05, |
|
"log_odds_chosen": 0.16790322959423065, |
|
"log_odds_ratio": -0.6347489356994629, |
|
"logits/chosen": -2.5679516792297363, |
|
"logits/rejected": -2.5715692043304443, |
|
"logps/chosen": -2.9835710525512695, |
|
"logps/rejected": -3.140634059906006, |
|
"loss": 3.2274, |
|
"nll_loss": 3.163942813873291, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.2983570992946625, |
|
"rewards/margins": 0.015706289559602737, |
|
"rewards/rejected": -0.3140634000301361, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.20388658808537752, |
|
"grad_norm": 6.9558281898498535, |
|
"learning_rate": 0.0001, |
|
"log_odds_chosen": 0.17972886562347412, |
|
"log_odds_ratio": -0.6277650594711304, |
|
"logits/chosen": -2.6125497817993164, |
|
"logits/rejected": -2.6118133068084717, |
|
"logps/chosen": -2.6501834392547607, |
|
"logps/rejected": -2.8179259300231934, |
|
"loss": 2.8684, |
|
"nll_loss": 2.8056435585021973, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.26501837372779846, |
|
"rewards/margins": 0.01677425391972065, |
|
"rewards/rejected": -0.2817925810813904, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.22427524689391526, |
|
"grad_norm": 7.142885684967041, |
|
"learning_rate": 9.743589743589744e-05, |
|
"log_odds_chosen": 0.11056404560804367, |
|
"log_odds_ratio": -0.662803053855896, |
|
"logits/chosen": -2.665982723236084, |
|
"logits/rejected": -2.6722326278686523, |
|
"logps/chosen": -2.1527411937713623, |
|
"logps/rejected": -2.2486300468444824, |
|
"loss": 2.383, |
|
"nll_loss": 2.316676378250122, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.2152741551399231, |
|
"rewards/margins": 0.009588859975337982, |
|
"rewards/rejected": -0.2248629927635193, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.244663905702453, |
|
"grad_norm": 4.808487415313721, |
|
"learning_rate": 9.487179487179487e-05, |
|
"log_odds_chosen": 0.09381386637687683, |
|
"log_odds_ratio": -0.6690701842308044, |
|
"logits/chosen": -2.6697304248809814, |
|
"logits/rejected": -2.684809446334839, |
|
"logps/chosen": -1.6216576099395752, |
|
"logps/rejected": -1.695598840713501, |
|
"loss": 1.9113, |
|
"nll_loss": 1.8443692922592163, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.16216576099395752, |
|
"rewards/margins": 0.007394128944724798, |
|
"rewards/rejected": -0.16955989599227905, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.26505256451099074, |
|
"grad_norm": 2.420715093612671, |
|
"learning_rate": 9.230769230769232e-05, |
|
"log_odds_chosen": 0.10657332092523575, |
|
"log_odds_ratio": -0.6556077599525452, |
|
"logits/chosen": -2.683443784713745, |
|
"logits/rejected": -2.6935503482818604, |
|
"logps/chosen": -1.4284594058990479, |
|
"logps/rejected": -1.508366584777832, |
|
"loss": 1.7205, |
|
"nll_loss": 1.654909372329712, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.14284594357013702, |
|
"rewards/margins": 0.007990704849362373, |
|
"rewards/rejected": -0.15083666145801544, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.28544122331952854, |
|
"grad_norm": 2.851985216140747, |
|
"learning_rate": 8.974358974358975e-05, |
|
"log_odds_chosen": 0.156551793217659, |
|
"log_odds_ratio": -0.6308416128158569, |
|
"logits/chosen": -2.6921679973602295, |
|
"logits/rejected": -2.6880078315734863, |
|
"logps/chosen": -1.332141637802124, |
|
"logps/rejected": -1.4488908052444458, |
|
"loss": 1.5952, |
|
"nll_loss": 1.5321555137634277, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.13321417570114136, |
|
"rewards/margins": 0.01167491264641285, |
|
"rewards/rejected": -0.14488908648490906, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.3058298821280663, |
|
"grad_norm": 2.4120404720306396, |
|
"learning_rate": 8.717948717948718e-05, |
|
"log_odds_chosen": 0.0856461226940155, |
|
"log_odds_ratio": -0.6610275506973267, |
|
"logits/chosen": -2.6284494400024414, |
|
"logits/rejected": -2.6575706005096436, |
|
"logps/chosen": -1.342667579650879, |
|
"logps/rejected": -1.4053808450698853, |
|
"loss": 1.5791, |
|
"nll_loss": 1.5130078792572021, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.13426676392555237, |
|
"rewards/margins": 0.006271325517445803, |
|
"rewards/rejected": -0.1405380815267563, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.326218540936604, |
|
"grad_norm": 2.2445013523101807, |
|
"learning_rate": 8.461538461538461e-05, |
|
"log_odds_chosen": 0.18759144842624664, |
|
"log_odds_ratio": -0.617063045501709, |
|
"logits/chosen": -2.574859857559204, |
|
"logits/rejected": -2.5844004154205322, |
|
"logps/chosen": -1.24087393283844, |
|
"logps/rejected": -1.3730320930480957, |
|
"loss": 1.5016, |
|
"nll_loss": 1.439911127090454, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.1240873858332634, |
|
"rewards/margins": 0.013215810991823673, |
|
"rewards/rejected": -0.1373032033443451, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.34660719974514176, |
|
"grad_norm": 2.1423990726470947, |
|
"learning_rate": 8.205128205128205e-05, |
|
"log_odds_chosen": 0.2600902020931244, |
|
"log_odds_ratio": -0.5869597792625427, |
|
"logits/chosen": -2.4825172424316406, |
|
"logits/rejected": -2.4918212890625, |
|
"logps/chosen": -1.1209421157836914, |
|
"logps/rejected": -1.2962286472320557, |
|
"loss": 1.4016, |
|
"nll_loss": 1.3428654670715332, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.1120942085981369, |
|
"rewards/margins": 0.01752866432070732, |
|
"rewards/rejected": -0.12962287664413452, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.3669958585536795, |
|
"grad_norm": 2.0624334812164307, |
|
"learning_rate": 7.948717948717948e-05, |
|
"log_odds_chosen": 0.19398407638072968, |
|
"log_odds_ratio": -0.6082965731620789, |
|
"logits/chosen": -2.403656482696533, |
|
"logits/rejected": -2.4159321784973145, |
|
"logps/chosen": -1.0768545866012573, |
|
"logps/rejected": -1.204436182975769, |
|
"loss": 1.3427, |
|
"nll_loss": 1.2818515300750732, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.10768546164035797, |
|
"rewards/margins": 0.012758150696754456, |
|
"rewards/rejected": -0.12044361233711243, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.38738451736221724, |
|
"grad_norm": 2.0077810287475586, |
|
"learning_rate": 7.692307692307693e-05, |
|
"log_odds_chosen": 0.2774355113506317, |
|
"log_odds_ratio": -0.5771675109863281, |
|
"logits/chosen": -2.32011079788208, |
|
"logits/rejected": -2.309196949005127, |
|
"logps/chosen": -0.982606828212738, |
|
"logps/rejected": -1.1639189720153809, |
|
"loss": 1.272, |
|
"nll_loss": 1.2142971754074097, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.09826068580150604, |
|
"rewards/margins": 0.01813122071325779, |
|
"rewards/rejected": -0.11639191210269928, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.40777317617075504, |
|
"grad_norm": 1.5790979862213135, |
|
"learning_rate": 7.435897435897436e-05, |
|
"log_odds_chosen": 0.3015543520450592, |
|
"log_odds_ratio": -0.571921706199646, |
|
"logits/chosen": -2.204582691192627, |
|
"logits/rejected": -2.2353363037109375, |
|
"logps/chosen": -0.8750602006912231, |
|
"logps/rejected": -1.055103063583374, |
|
"loss": 1.1722, |
|
"nll_loss": 1.1150364875793457, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.08750602602958679, |
|
"rewards/margins": 0.018004287034273148, |
|
"rewards/rejected": -0.10551030933856964, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.4281618349792928, |
|
"grad_norm": 1.4550942182540894, |
|
"learning_rate": 7.17948717948718e-05, |
|
"log_odds_chosen": 0.2803484797477722, |
|
"log_odds_ratio": -0.5891110897064209, |
|
"logits/chosen": -2.177445888519287, |
|
"logits/rejected": -2.1862730979919434, |
|
"logps/chosen": -0.8740922808647156, |
|
"logps/rejected": -1.0376415252685547, |
|
"loss": 1.184, |
|
"nll_loss": 1.125113606452942, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.08740923553705215, |
|
"rewards/margins": 0.01635492593050003, |
|
"rewards/rejected": -0.10376415401697159, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.4485504937878305, |
|
"grad_norm": 1.5131646394729614, |
|
"learning_rate": 6.923076923076924e-05, |
|
"log_odds_chosen": 0.3196752965450287, |
|
"log_odds_ratio": -0.5673432350158691, |
|
"logits/chosen": -2.139277458190918, |
|
"logits/rejected": -2.1643970012664795, |
|
"logps/chosen": -0.8622347116470337, |
|
"logps/rejected": -1.060903549194336, |
|
"loss": 1.1375, |
|
"nll_loss": 1.0807565450668335, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.08622346818447113, |
|
"rewards/margins": 0.019866881892085075, |
|
"rewards/rejected": -0.10609035938978195, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.46893915259636826, |
|
"grad_norm": 1.7129428386688232, |
|
"learning_rate": 6.666666666666667e-05, |
|
"log_odds_chosen": 0.3558296263217926, |
|
"log_odds_ratio": -0.551045298576355, |
|
"logits/chosen": -2.1384575366973877, |
|
"logits/rejected": -2.1461870670318604, |
|
"logps/chosen": -0.8587465286254883, |
|
"logps/rejected": -1.0661779642105103, |
|
"loss": 1.1327, |
|
"nll_loss": 1.0775768756866455, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.08587465435266495, |
|
"rewards/margins": 0.02074313722550869, |
|
"rewards/rejected": -0.10661779344081879, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.489327811404906, |
|
"grad_norm": 1.7440029382705688, |
|
"learning_rate": 6.410256410256412e-05, |
|
"log_odds_chosen": 0.32858026027679443, |
|
"log_odds_ratio": -0.5619024038314819, |
|
"logits/chosen": -2.1546478271484375, |
|
"logits/rejected": -2.1749908924102783, |
|
"logps/chosen": -0.835049033164978, |
|
"logps/rejected": -1.0198912620544434, |
|
"loss": 1.1057, |
|
"nll_loss": 1.0495383739471436, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.08350490033626556, |
|
"rewards/margins": 0.01848422922194004, |
|
"rewards/rejected": -0.10198913514614105, |
|
"step": 24 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 49, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 4, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|