|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.988679245283019, |
|
"eval_steps": 500, |
|
"global_step": 396, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.25e-08, |
|
"logps/chosen": -22.472335815429688, |
|
"logps/rejected": -25.36812400817871, |
|
"loss": 0.6931, |
|
"losses/dpo": 0.6931471824645996, |
|
"losses/sft": 0.7711470723152161, |
|
"losses/total": 0.6931471824645996, |
|
"ref_logps/chosen": -22.472335815429688, |
|
"ref_logps/rejected": -25.36812400817871, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.5e-08, |
|
"logps/chosen": -21.278339385986328, |
|
"logps/rejected": -25.130128860473633, |
|
"loss": 0.6931, |
|
"losses/dpo": 0.6931471824645996, |
|
"losses/sft": 0.8523496985435486, |
|
"losses/total": 0.6931471824645996, |
|
"ref_logps/chosen": -21.278339385986328, |
|
"ref_logps/rejected": -25.130128860473633, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.75e-08, |
|
"logps/chosen": -21.53506851196289, |
|
"logps/rejected": -26.44188690185547, |
|
"loss": 0.693, |
|
"losses/dpo": 0.6928481459617615, |
|
"losses/sft": 0.6631997227668762, |
|
"losses/total": 0.6928481459617615, |
|
"ref_logps/chosen": -21.54958152770996, |
|
"ref_logps/rejected": -26.452028274536133, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.001451290212571621, |
|
"rewards/margins": 0.00043702672701328993, |
|
"rewards/rejected": 0.0010142631363123655, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5e-08, |
|
"logps/chosen": -21.846920013427734, |
|
"logps/rejected": -26.232192993164062, |
|
"loss": 0.6935, |
|
"losses/dpo": 0.6933612823486328, |
|
"losses/sft": 0.819932758808136, |
|
"losses/total": 0.6933612823486328, |
|
"ref_logps/chosen": -21.842269897460938, |
|
"ref_logps/rejected": -26.234174728393555, |
|
"rewards/accuracies": 0.4609375, |
|
"rewards/chosen": -0.00046504498459398746, |
|
"rewards/margins": -0.000663207727484405, |
|
"rewards/rejected": 0.0001981628010980785, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.25e-08, |
|
"logps/chosen": -23.82025146484375, |
|
"logps/rejected": -26.558738708496094, |
|
"loss": 0.693, |
|
"losses/dpo": 0.6929464340209961, |
|
"losses/sft": 0.7624120712280273, |
|
"losses/total": 0.6929464340209961, |
|
"ref_logps/chosen": -23.817665100097656, |
|
"ref_logps/rejected": -26.55132293701172, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": -0.00025857496075332165, |
|
"rewards/margins": 0.0004831284750252962, |
|
"rewards/rejected": -0.0007417035521939397, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 7.5e-08, |
|
"logps/chosen": -25.088871002197266, |
|
"logps/rejected": -29.653806686401367, |
|
"loss": 0.6923, |
|
"losses/dpo": 0.6934427618980408, |
|
"losses/sft": 0.7273141741752625, |
|
"losses/total": 0.6934427618980408, |
|
"ref_logps/chosen": -25.0992431640625, |
|
"ref_logps/rejected": -29.64551544189453, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": 0.0010370061499997973, |
|
"rewards/margins": 0.0018662326037883759, |
|
"rewards/rejected": -0.0008292265702039003, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.75e-08, |
|
"logps/chosen": -23.075027465820312, |
|
"logps/rejected": -27.50556182861328, |
|
"loss": 0.693, |
|
"losses/dpo": 0.6948896646499634, |
|
"losses/sft": 0.6432714462280273, |
|
"losses/total": 0.6948896646499634, |
|
"ref_logps/chosen": -23.066946029663086, |
|
"ref_logps/rejected": -27.4930362701416, |
|
"rewards/accuracies": 0.4765625, |
|
"rewards/chosen": -0.0008082209387794137, |
|
"rewards/margins": 0.0004443599027581513, |
|
"rewards/rejected": -0.001252580899745226, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1e-07, |
|
"logps/chosen": -21.430335998535156, |
|
"logps/rejected": -29.949260711669922, |
|
"loss": 0.6933, |
|
"losses/dpo": 0.6911635398864746, |
|
"losses/sft": 0.8042243123054504, |
|
"losses/total": 0.6911635398864746, |
|
"ref_logps/chosen": -21.44394302368164, |
|
"ref_logps/rejected": -29.96406364440918, |
|
"rewards/accuracies": 0.4921875, |
|
"rewards/chosen": 0.0013606649590656161, |
|
"rewards/margins": -0.00011985772289335728, |
|
"rewards/rejected": 0.0014805227983742952, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.125e-07, |
|
"logps/chosen": -23.053390502929688, |
|
"logps/rejected": -27.866111755371094, |
|
"loss": 0.6923, |
|
"losses/dpo": 0.6914368271827698, |
|
"losses/sft": 0.8787165284156799, |
|
"losses/total": 0.6914368271827698, |
|
"ref_logps/chosen": -23.060134887695312, |
|
"ref_logps/rejected": -27.85537338256836, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": 0.0006745259161107242, |
|
"rewards/margins": 0.0017485294956713915, |
|
"rewards/rejected": -0.0010740034049376845, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.25e-07, |
|
"logps/chosen": -23.637466430664062, |
|
"logps/rejected": -29.587308883666992, |
|
"loss": 0.6922, |
|
"losses/dpo": 0.690066397190094, |
|
"losses/sft": 1.0419297218322754, |
|
"losses/total": 0.690066397190094, |
|
"ref_logps/chosen": -23.649028778076172, |
|
"ref_logps/rejected": -29.579374313354492, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": 0.0011563875013962388, |
|
"rewards/margins": 0.0019498697947710752, |
|
"rewards/rejected": -0.0007934823515824974, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.375e-07, |
|
"logps/chosen": -22.38899040222168, |
|
"logps/rejected": -24.971160888671875, |
|
"loss": 0.6926, |
|
"losses/dpo": 0.6951523423194885, |
|
"losses/sft": 0.9443475008010864, |
|
"losses/total": 0.6951523423194885, |
|
"ref_logps/chosen": -22.398780822753906, |
|
"ref_logps/rejected": -24.969751358032227, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": 0.000978996278718114, |
|
"rewards/margins": 0.0011200353037565947, |
|
"rewards/rejected": -0.00014103890862315893, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.5e-07, |
|
"logps/chosen": -20.165252685546875, |
|
"logps/rejected": -26.619457244873047, |
|
"loss": 0.6946, |
|
"losses/dpo": 0.6987805962562561, |
|
"losses/sft": 0.876471221446991, |
|
"losses/total": 0.6987805962562561, |
|
"ref_logps/chosen": -20.14897918701172, |
|
"ref_logps/rejected": -26.63131332397461, |
|
"rewards/accuracies": 0.421875, |
|
"rewards/chosen": -0.001627539866603911, |
|
"rewards/margins": -0.002813115483149886, |
|
"rewards/rejected": 0.0011855755001306534, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.625e-07, |
|
"logps/chosen": -25.07573699951172, |
|
"logps/rejected": -25.939855575561523, |
|
"loss": 0.6936, |
|
"losses/dpo": 0.6952416896820068, |
|
"losses/sft": 0.9322817325592041, |
|
"losses/total": 0.6952416896820068, |
|
"ref_logps/chosen": -25.075220108032227, |
|
"ref_logps/rejected": -25.947521209716797, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -5.185510963201523e-05, |
|
"rewards/margins": -0.0008183673489838839, |
|
"rewards/rejected": 0.0007665121229365468, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.75e-07, |
|
"logps/chosen": -22.58213233947754, |
|
"logps/rejected": -27.590843200683594, |
|
"loss": 0.692, |
|
"losses/dpo": 0.6901522874832153, |
|
"losses/sft": 0.8234641551971436, |
|
"losses/total": 0.6901522874832153, |
|
"ref_logps/chosen": -22.58617401123047, |
|
"ref_logps/rejected": -27.570602416992188, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": 0.00040407240157946944, |
|
"rewards/margins": 0.002427991945296526, |
|
"rewards/rejected": -0.0020239197183400393, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.875e-07, |
|
"logps/chosen": -23.004196166992188, |
|
"logps/rejected": -25.858173370361328, |
|
"loss": 0.6925, |
|
"losses/dpo": 0.6923660039901733, |
|
"losses/sft": 0.7345502376556396, |
|
"losses/total": 0.6923660039901733, |
|
"ref_logps/chosen": -23.010601043701172, |
|
"ref_logps/rejected": -25.85067367553711, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.0006403709994629025, |
|
"rewards/margins": 0.0013903947547078133, |
|
"rewards/rejected": -0.0007500239298678935, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2e-07, |
|
"logps/chosen": -21.546062469482422, |
|
"logps/rejected": -25.777360916137695, |
|
"loss": 0.6931, |
|
"losses/dpo": 0.6901232004165649, |
|
"losses/sft": 0.8039647936820984, |
|
"losses/total": 0.6901232004165649, |
|
"ref_logps/chosen": -21.53840446472168, |
|
"ref_logps/rejected": -25.766767501831055, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": -0.0007656853413209319, |
|
"rewards/margins": 0.0002937153331004083, |
|
"rewards/rejected": -0.0010594006162136793, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.1249999999999998e-07, |
|
"logps/chosen": -22.206989288330078, |
|
"logps/rejected": -27.877731323242188, |
|
"loss": 0.6937, |
|
"losses/dpo": 0.6932737827301025, |
|
"losses/sft": 0.7667961716651917, |
|
"losses/total": 0.6932737827301025, |
|
"ref_logps/chosen": -22.19771957397461, |
|
"ref_logps/rejected": -27.87958335876465, |
|
"rewards/accuracies": 0.4453125, |
|
"rewards/chosen": -0.0009270801674574614, |
|
"rewards/margins": -0.0011123311705887318, |
|
"rewards/rejected": 0.0001852509449236095, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.25e-07, |
|
"logps/chosen": -21.215139389038086, |
|
"logps/rejected": -25.75381088256836, |
|
"loss": 0.693, |
|
"losses/dpo": 0.6932240724563599, |
|
"losses/sft": 0.736687421798706, |
|
"losses/total": 0.6932240724563599, |
|
"ref_logps/chosen": -21.212387084960938, |
|
"ref_logps/rejected": -25.746326446533203, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.00027507508639246225, |
|
"rewards/margins": 0.000473553518531844, |
|
"rewards/rejected": -0.0007486287504434586, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.3749999999999998e-07, |
|
"logps/chosen": -22.499832153320312, |
|
"logps/rejected": -26.145751953125, |
|
"loss": 0.6932, |
|
"losses/dpo": 0.6942628622055054, |
|
"losses/sft": 0.7466978430747986, |
|
"losses/total": 0.6942628622055054, |
|
"ref_logps/chosen": -22.496463775634766, |
|
"ref_logps/rejected": -26.141849517822266, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": -0.0003368390607647598, |
|
"rewards/margins": 5.3280091378837824e-05, |
|
"rewards/rejected": -0.0003901191521435976, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.5e-07, |
|
"logps/chosen": -21.5505428314209, |
|
"logps/rejected": -25.036113739013672, |
|
"loss": 0.693, |
|
"losses/dpo": 0.688271164894104, |
|
"losses/sft": 0.8725596070289612, |
|
"losses/total": 0.688271164894104, |
|
"ref_logps/chosen": -21.558109283447266, |
|
"ref_logps/rejected": -25.038726806640625, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0007566105341538787, |
|
"rewards/margins": 0.0004949538852088153, |
|
"rewards/rejected": 0.0002616568235680461, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.625e-07, |
|
"logps/chosen": -21.649169921875, |
|
"logps/rejected": -25.741392135620117, |
|
"loss": 0.6917, |
|
"losses/dpo": 0.6939514875411987, |
|
"losses/sft": 0.7525328993797302, |
|
"losses/total": 0.6939514875411987, |
|
"ref_logps/chosen": -21.666126251220703, |
|
"ref_logps/rejected": -25.72817611694336, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 0.0016953760059550405, |
|
"rewards/margins": 0.003016936592757702, |
|
"rewards/rejected": -0.0013215603539720178, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.75e-07, |
|
"logps/chosen": -21.422496795654297, |
|
"logps/rejected": -26.453773498535156, |
|
"loss": 0.695, |
|
"losses/dpo": 0.699163019657135, |
|
"losses/sft": 0.7248706221580505, |
|
"losses/total": 0.699163019657135, |
|
"ref_logps/chosen": -21.396032333374023, |
|
"ref_logps/rejected": -26.464006423950195, |
|
"rewards/accuracies": 0.4296875, |
|
"rewards/chosen": -0.002646287204697728, |
|
"rewards/margins": -0.003669553902000189, |
|
"rewards/rejected": 0.0010232668137177825, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.8749999999999995e-07, |
|
"logps/chosen": -21.21988868713379, |
|
"logps/rejected": -25.13469886779785, |
|
"loss": 0.6929, |
|
"losses/dpo": 0.6908746957778931, |
|
"losses/sft": 0.7899657487869263, |
|
"losses/total": 0.6908746957778931, |
|
"ref_logps/chosen": -21.22311782836914, |
|
"ref_logps/rejected": -25.131580352783203, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.0003229643334634602, |
|
"rewards/margins": 0.0006348754977807403, |
|
"rewards/rejected": -0.00031191116431728005, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3e-07, |
|
"logps/chosen": -24.172225952148438, |
|
"logps/rejected": -27.93877410888672, |
|
"loss": 0.6936, |
|
"losses/dpo": 0.6931849718093872, |
|
"losses/sft": 0.7270597219467163, |
|
"losses/total": 0.6931849718093872, |
|
"ref_logps/chosen": -24.16461944580078, |
|
"ref_logps/rejected": -27.940391540527344, |
|
"rewards/accuracies": 0.4765625, |
|
"rewards/chosen": -0.000760397466365248, |
|
"rewards/margins": -0.0009220357751473784, |
|
"rewards/rejected": 0.00016163833788596094, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logps/chosen": -23.023677825927734, |
|
"logps/rejected": -23.77918243408203, |
|
"loss": 0.6929, |
|
"losses/dpo": 0.6930486559867859, |
|
"losses/sft": 0.779391884803772, |
|
"losses/total": 0.6930486559867859, |
|
"ref_logps/chosen": -23.028684616088867, |
|
"ref_logps/rejected": -23.778560638427734, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": 0.0005005812272429466, |
|
"rewards/margins": 0.0005629429360851645, |
|
"rewards/rejected": -6.23615924268961e-05, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.25e-07, |
|
"logps/chosen": -24.240978240966797, |
|
"logps/rejected": -30.183570861816406, |
|
"loss": 0.6923, |
|
"losses/dpo": 0.6919558644294739, |
|
"losses/sft": 0.8828473091125488, |
|
"losses/total": 0.6919558644294739, |
|
"ref_logps/chosen": -24.253870010375977, |
|
"ref_logps/rejected": -30.17804718017578, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": 0.001289202249608934, |
|
"rewards/margins": 0.0018417320679873228, |
|
"rewards/rejected": -0.0005525298183783889, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.375e-07, |
|
"logps/chosen": -22.371261596679688, |
|
"logps/rejected": -28.10503387451172, |
|
"loss": 0.6911, |
|
"losses/dpo": 0.6919010281562805, |
|
"losses/sft": 0.9361266493797302, |
|
"losses/total": 0.6919010281562805, |
|
"ref_logps/chosen": -22.4020938873291, |
|
"ref_logps/rejected": -28.094257354736328, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.0030831946060061455, |
|
"rewards/margins": 0.004160974640399218, |
|
"rewards/rejected": -0.001077780150808394, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.5e-07, |
|
"logps/chosen": -21.107967376708984, |
|
"logps/rejected": -27.053752899169922, |
|
"loss": 0.6921, |
|
"losses/dpo": 0.6916664838790894, |
|
"losses/sft": 0.8491181135177612, |
|
"losses/total": 0.6916664838790894, |
|
"ref_logps/chosen": -21.1080379486084, |
|
"ref_logps/rejected": -27.03229331970215, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": 7.087946869432926e-06, |
|
"rewards/margins": 0.0021530785597860813, |
|
"rewards/rejected": -0.00214599072933197, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.6249999999999997e-07, |
|
"logps/chosen": -23.424461364746094, |
|
"logps/rejected": -27.092483520507812, |
|
"loss": 0.6906, |
|
"losses/dpo": 0.6926239728927612, |
|
"losses/sft": 0.7789149284362793, |
|
"losses/total": 0.6926239728927612, |
|
"ref_logps/chosen": -23.46218490600586, |
|
"ref_logps/rejected": -27.07909393310547, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.0037725979927927256, |
|
"rewards/margins": 0.005111560225486755, |
|
"rewards/rejected": -0.0013389625819399953, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.75e-07, |
|
"logps/chosen": -22.859556198120117, |
|
"logps/rejected": -27.201662063598633, |
|
"loss": 0.6933, |
|
"losses/dpo": 0.6948127746582031, |
|
"losses/sft": 0.7969105243682861, |
|
"losses/total": 0.6948127746582031, |
|
"ref_logps/chosen": -22.869096755981445, |
|
"ref_logps/rejected": -27.212430953979492, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.0009542852640151978, |
|
"rewards/margins": -0.00012272456660866737, |
|
"rewards/rejected": 0.001077009947039187, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.875e-07, |
|
"logps/chosen": -22.666168212890625, |
|
"logps/rejected": -25.310596466064453, |
|
"loss": 0.6918, |
|
"losses/dpo": 0.6922581195831299, |
|
"losses/sft": 0.7759775519371033, |
|
"losses/total": 0.6922581195831299, |
|
"ref_logps/chosen": -22.68026351928711, |
|
"ref_logps/rejected": -25.297521591186523, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.001409594900906086, |
|
"rewards/margins": 0.002717201365157962, |
|
"rewards/rejected": -0.001307606347836554, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4e-07, |
|
"logps/chosen": -23.281084060668945, |
|
"logps/rejected": -28.84569549560547, |
|
"loss": 0.693, |
|
"losses/dpo": 0.6980300545692444, |
|
"losses/sft": 0.7636886835098267, |
|
"losses/total": 0.6980300545692444, |
|
"ref_logps/chosen": -23.299869537353516, |
|
"ref_logps/rejected": -28.859834671020508, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": 0.0018782642437145114, |
|
"rewards/margins": 0.000464284501504153, |
|
"rewards/rejected": 0.0014139798004180193, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.1249999999999997e-07, |
|
"logps/chosen": -20.922544479370117, |
|
"logps/rejected": -27.139453887939453, |
|
"loss": 0.6914, |
|
"losses/dpo": 0.6892759799957275, |
|
"losses/sft": 0.7832686901092529, |
|
"losses/total": 0.6892759799957275, |
|
"ref_logps/chosen": -20.949806213378906, |
|
"ref_logps/rejected": -27.13178253173828, |
|
"rewards/accuracies": 0.6484375, |
|
"rewards/chosen": 0.0027261325158178806, |
|
"rewards/margins": 0.003493295982480049, |
|
"rewards/rejected": -0.0007671635248698294, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.2499999999999995e-07, |
|
"logps/chosen": -22.535436630249023, |
|
"logps/rejected": -26.6143798828125, |
|
"loss": 0.6926, |
|
"losses/dpo": 0.6938276290893555, |
|
"losses/sft": 0.7895969152450562, |
|
"losses/total": 0.6938276290893555, |
|
"ref_logps/chosen": -22.540180206298828, |
|
"ref_logps/rejected": -26.607288360595703, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.0004745282931253314, |
|
"rewards/margins": 0.0011834825854748487, |
|
"rewards/rejected": -0.0007089540013112128, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.375e-07, |
|
"logps/chosen": -21.444934844970703, |
|
"logps/rejected": -27.329378128051758, |
|
"loss": 0.6928, |
|
"losses/dpo": 0.6910836100578308, |
|
"losses/sft": 0.7998620271682739, |
|
"losses/total": 0.6910836100578308, |
|
"ref_logps/chosen": -21.460729598999023, |
|
"ref_logps/rejected": -27.336944580078125, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.0015797324012964964, |
|
"rewards/margins": 0.0008232423570007086, |
|
"rewards/rejected": 0.0007564900442957878, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5e-07, |
|
"logps/chosen": -22.847640991210938, |
|
"logps/rejected": -26.22686195373535, |
|
"loss": 0.6938, |
|
"losses/dpo": 0.6915764808654785, |
|
"losses/sft": 0.7927474975585938, |
|
"losses/total": 0.6915764808654785, |
|
"ref_logps/chosen": -22.84987449645996, |
|
"ref_logps/rejected": -26.240699768066406, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.00022311191423796117, |
|
"rewards/margins": -0.001160716055892408, |
|
"rewards/rejected": 0.0013838279992341995, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.625e-07, |
|
"logps/chosen": -23.097599029541016, |
|
"logps/rejected": -25.179964065551758, |
|
"loss": 0.6925, |
|
"losses/dpo": 0.6903287768363953, |
|
"losses/sft": 0.8005999326705933, |
|
"losses/total": 0.6903287768363953, |
|
"ref_logps/chosen": -23.103515625, |
|
"ref_logps/rejected": -25.171833038330078, |
|
"rewards/accuracies": 0.4921875, |
|
"rewards/chosen": 0.000591703865211457, |
|
"rewards/margins": 0.001405149232596159, |
|
"rewards/rejected": -0.000813445309177041, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.7499999999999995e-07, |
|
"logps/chosen": -23.07529640197754, |
|
"logps/rejected": -26.14615821838379, |
|
"loss": 0.6912, |
|
"losses/dpo": 0.6978300213813782, |
|
"losses/sft": 0.7380209565162659, |
|
"losses/total": 0.6978300213813782, |
|
"ref_logps/chosen": -23.113910675048828, |
|
"ref_logps/rejected": -26.144914627075195, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": 0.003861566074192524, |
|
"rewards/margins": 0.003985891118645668, |
|
"rewards/rejected": -0.00012432527728378773, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.875e-07, |
|
"logps/chosen": -23.091575622558594, |
|
"logps/rejected": -28.207073211669922, |
|
"loss": 0.6933, |
|
"losses/dpo": 0.6926023960113525, |
|
"losses/sft": 0.7966833710670471, |
|
"losses/total": 0.6926023960113525, |
|
"ref_logps/chosen": -23.102182388305664, |
|
"ref_logps/rejected": -28.21949577331543, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": 0.0010607184376567602, |
|
"rewards/margins": -0.00018126872600987554, |
|
"rewards/rejected": 0.0012419875711202621, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 5e-07, |
|
"logps/chosen": -21.683151245117188, |
|
"logps/rejected": -27.111900329589844, |
|
"loss": 0.6903, |
|
"losses/dpo": 0.6866278648376465, |
|
"losses/sft": 0.887488842010498, |
|
"losses/total": 0.6866278648376465, |
|
"ref_logps/chosen": -21.714126586914062, |
|
"ref_logps/rejected": -27.08427619934082, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": 0.003097555134445429, |
|
"rewards/margins": 0.0058600143529474735, |
|
"rewards/rejected": -0.0027624592185020447, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.985955056179775e-07, |
|
"logps/chosen": -23.24443817138672, |
|
"logps/rejected": -24.057823181152344, |
|
"loss": 0.6924, |
|
"losses/dpo": 0.6903232336044312, |
|
"losses/sft": 0.7454457879066467, |
|
"losses/total": 0.6903232336044312, |
|
"ref_logps/chosen": -23.264373779296875, |
|
"ref_logps/rejected": -24.060710906982422, |
|
"rewards/accuracies": 0.4765625, |
|
"rewards/chosen": 0.0019934140145778656, |
|
"rewards/margins": 0.0017045673448592424, |
|
"rewards/rejected": 0.0002888469025492668, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.97191011235955e-07, |
|
"logps/chosen": -22.751291275024414, |
|
"logps/rejected": -23.993690490722656, |
|
"loss": 0.692, |
|
"losses/dpo": 0.6917561292648315, |
|
"losses/sft": 0.8527467846870422, |
|
"losses/total": 0.6917561292648315, |
|
"ref_logps/chosen": -22.75712013244629, |
|
"ref_logps/rejected": -23.975711822509766, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0005830166628584266, |
|
"rewards/margins": 0.0023808996193110943, |
|
"rewards/rejected": -0.0017978833056986332, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.957865168539325e-07, |
|
"logps/chosen": -24.575613021850586, |
|
"logps/rejected": -27.22784996032715, |
|
"loss": 0.6922, |
|
"losses/dpo": 0.6912024021148682, |
|
"losses/sft": 0.8869270086288452, |
|
"losses/total": 0.6912024021148682, |
|
"ref_logps/chosen": -24.60643196105957, |
|
"ref_logps/rejected": -27.23748779296875, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": 0.0030817545484751463, |
|
"rewards/margins": 0.002117899712175131, |
|
"rewards/rejected": 0.0009638546616770327, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.943820224719101e-07, |
|
"logps/chosen": -23.449739456176758, |
|
"logps/rejected": -29.683177947998047, |
|
"loss": 0.6913, |
|
"losses/dpo": 0.690817654132843, |
|
"losses/sft": 0.7518939971923828, |
|
"losses/total": 0.690817654132843, |
|
"ref_logps/chosen": -23.47886848449707, |
|
"ref_logps/rejected": -29.673599243164062, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": 0.0029130401089787483, |
|
"rewards/margins": 0.0038708222564309835, |
|
"rewards/rejected": -0.0009577819146215916, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.929775280898877e-07, |
|
"logps/chosen": -21.53199577331543, |
|
"logps/rejected": -26.939178466796875, |
|
"loss": 0.6923, |
|
"losses/dpo": 0.6913425922393799, |
|
"losses/sft": 0.6940815448760986, |
|
"losses/total": 0.6913425922393799, |
|
"ref_logps/chosen": -21.567256927490234, |
|
"ref_logps/rejected": -26.955793380737305, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": 0.0035258703865110874, |
|
"rewards/margins": 0.0018642698414623737, |
|
"rewards/rejected": 0.0016616008942946792, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.915730337078651e-07, |
|
"logps/chosen": -25.476314544677734, |
|
"logps/rejected": -28.62994956970215, |
|
"loss": 0.6903, |
|
"losses/dpo": 0.6909126043319702, |
|
"losses/sft": 0.9766503572463989, |
|
"losses/total": 0.6909126043319702, |
|
"ref_logps/chosen": -25.53481674194336, |
|
"ref_logps/rejected": -28.629175186157227, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": 0.005850302986800671, |
|
"rewards/margins": 0.005927846767008305, |
|
"rewards/rejected": -7.754407124593854e-05, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.901685393258427e-07, |
|
"logps/chosen": -24.225303649902344, |
|
"logps/rejected": -25.871768951416016, |
|
"loss": 0.6919, |
|
"losses/dpo": 0.6921358704566956, |
|
"losses/sft": 0.8468361496925354, |
|
"losses/total": 0.6921358704566956, |
|
"ref_logps/chosen": -24.27114486694336, |
|
"ref_logps/rejected": -25.892141342163086, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.004584114067256451, |
|
"rewards/margins": 0.0025467565283179283, |
|
"rewards/rejected": 0.0020373575389385223, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.887640449438202e-07, |
|
"logps/chosen": -21.586902618408203, |
|
"logps/rejected": -27.604530334472656, |
|
"loss": 0.6902, |
|
"losses/dpo": 0.6886686086654663, |
|
"losses/sft": 0.7169030904769897, |
|
"losses/total": 0.6886686086654663, |
|
"ref_logps/chosen": -21.621915817260742, |
|
"ref_logps/rejected": -27.578147888183594, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": 0.003501205239444971, |
|
"rewards/margins": 0.006139571778476238, |
|
"rewards/rejected": -0.00263836607336998, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.873595505617978e-07, |
|
"logps/chosen": -24.600910186767578, |
|
"logps/rejected": -30.10862922668457, |
|
"loss": 0.6909, |
|
"losses/dpo": 0.6952996850013733, |
|
"losses/sft": 0.7813842296600342, |
|
"losses/total": 0.6952996850013733, |
|
"ref_logps/chosen": -24.644847869873047, |
|
"ref_logps/rejected": -30.104995727539062, |
|
"rewards/accuracies": 0.5859375, |
|
"rewards/chosen": 0.004394051153212786, |
|
"rewards/margins": 0.004757395945489407, |
|
"rewards/rejected": -0.0003633448213804513, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.859550561797752e-07, |
|
"logps/chosen": -20.754308700561523, |
|
"logps/rejected": -24.876815795898438, |
|
"loss": 0.6914, |
|
"losses/dpo": 0.6889626979827881, |
|
"losses/sft": 0.8148602843284607, |
|
"losses/total": 0.6889626979827881, |
|
"ref_logps/chosen": -20.81591796875, |
|
"ref_logps/rejected": -24.90121078491211, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.006161023862659931, |
|
"rewards/margins": 0.0037216043565422297, |
|
"rewards/rejected": 0.0024394195061177015, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.845505617977528e-07, |
|
"logps/chosen": -23.585115432739258, |
|
"logps/rejected": -24.949783325195312, |
|
"loss": 0.6915, |
|
"losses/dpo": 0.6886854767799377, |
|
"losses/sft": 0.8582803010940552, |
|
"losses/total": 0.6886854767799377, |
|
"ref_logps/chosen": -23.63630485534668, |
|
"ref_logps/rejected": -24.965686798095703, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": 0.00511885154992342, |
|
"rewards/margins": 0.00352850160561502, |
|
"rewards/rejected": 0.0015903504099696875, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.831460674157303e-07, |
|
"logps/chosen": -20.576318740844727, |
|
"logps/rejected": -24.87842559814453, |
|
"loss": 0.6916, |
|
"losses/dpo": 0.6899633407592773, |
|
"losses/sft": 0.6870510578155518, |
|
"losses/total": 0.6899633407592773, |
|
"ref_logps/chosen": -20.60286521911621, |
|
"ref_logps/rejected": -24.87299346923828, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": 0.0026545142754912376, |
|
"rewards/margins": 0.0031977419275790453, |
|
"rewards/rejected": -0.0005432275356724858, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.817415730337078e-07, |
|
"logps/chosen": -24.051544189453125, |
|
"logps/rejected": -25.128353118896484, |
|
"loss": 0.6887, |
|
"losses/dpo": 0.6841185092926025, |
|
"losses/sft": 0.833280622959137, |
|
"losses/total": 0.6841185092926025, |
|
"ref_logps/chosen": -24.10638427734375, |
|
"ref_logps/rejected": -25.090627670288086, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": 0.005483907647430897, |
|
"rewards/margins": 0.009256447665393353, |
|
"rewards/rejected": -0.003772540483623743, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.803370786516854e-07, |
|
"logps/chosen": -21.564958572387695, |
|
"logps/rejected": -26.20134735107422, |
|
"loss": 0.6914, |
|
"losses/dpo": 0.687272846698761, |
|
"losses/sft": 0.7218018770217896, |
|
"losses/total": 0.687272846698761, |
|
"ref_logps/chosen": -21.621246337890625, |
|
"ref_logps/rejected": -26.221445083618164, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": 0.00562882237136364, |
|
"rewards/margins": 0.003618879709392786, |
|
"rewards/rejected": 0.002009942661970854, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.789325842696629e-07, |
|
"logps/chosen": -23.699432373046875, |
|
"logps/rejected": -26.1567325592041, |
|
"loss": 0.6884, |
|
"losses/dpo": 0.6862033605575562, |
|
"losses/sft": 0.9426325559616089, |
|
"losses/total": 0.6862033605575562, |
|
"ref_logps/chosen": -23.775989532470703, |
|
"ref_logps/rejected": -26.134971618652344, |
|
"rewards/accuracies": 0.5859375, |
|
"rewards/chosen": 0.007655493449419737, |
|
"rewards/margins": 0.009831697680056095, |
|
"rewards/rejected": -0.0021762042306363583, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.775280898876405e-07, |
|
"logps/chosen": -23.076374053955078, |
|
"logps/rejected": -27.695213317871094, |
|
"loss": 0.6881, |
|
"losses/dpo": 0.6900283098220825, |
|
"losses/sft": 0.8505688905715942, |
|
"losses/total": 0.6900283098220825, |
|
"ref_logps/chosen": -23.134254455566406, |
|
"ref_logps/rejected": -27.64853858947754, |
|
"rewards/accuracies": 0.6640625, |
|
"rewards/chosen": 0.005787987262010574, |
|
"rewards/margins": 0.010455346666276455, |
|
"rewards/rejected": -0.004667359404265881, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.7612359550561797e-07, |
|
"logps/chosen": -21.54006576538086, |
|
"logps/rejected": -24.36727523803711, |
|
"loss": 0.6911, |
|
"losses/dpo": 0.6942879557609558, |
|
"losses/sft": 0.7311047911643982, |
|
"losses/total": 0.6942879557609558, |
|
"ref_logps/chosen": -21.592029571533203, |
|
"ref_logps/rejected": -24.37733268737793, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": 0.0051962630823254585, |
|
"rewards/margins": 0.004190489184111357, |
|
"rewards/rejected": 0.0010057740146294236, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.747191011235955e-07, |
|
"logps/chosen": -21.678865432739258, |
|
"logps/rejected": -28.501548767089844, |
|
"loss": 0.6906, |
|
"losses/dpo": 0.6889323592185974, |
|
"losses/sft": 0.7590615749359131, |
|
"losses/total": 0.6889323592185974, |
|
"ref_logps/chosen": -21.72535514831543, |
|
"ref_logps/rejected": -28.493976593017578, |
|
"rewards/accuracies": 0.6328125, |
|
"rewards/chosen": 0.004648969508707523, |
|
"rewards/margins": 0.005405961070209742, |
|
"rewards/rejected": -0.0007569912704639137, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.7331460674157303e-07, |
|
"logps/chosen": -23.9781436920166, |
|
"logps/rejected": -26.515047073364258, |
|
"loss": 0.684, |
|
"losses/dpo": 0.6820257902145386, |
|
"losses/sft": 0.8394409418106079, |
|
"losses/total": 0.6820257902145386, |
|
"ref_logps/chosen": -24.077434539794922, |
|
"ref_logps/rejected": -26.427589416503906, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.009928906336426735, |
|
"rewards/margins": 0.01867445930838585, |
|
"rewards/rejected": -0.008745552971959114, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.7191011235955054e-07, |
|
"logps/chosen": -22.162433624267578, |
|
"logps/rejected": -30.391559600830078, |
|
"loss": 0.6894, |
|
"losses/dpo": 0.6909818053245544, |
|
"losses/sft": 0.7433596253395081, |
|
"losses/total": 0.6909818053245544, |
|
"ref_logps/chosen": -22.250009536743164, |
|
"ref_logps/rejected": -30.40111541748047, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.00875765923410654, |
|
"rewards/margins": 0.007802051026374102, |
|
"rewards/rejected": 0.0009556080331094563, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.705056179775281e-07, |
|
"logps/chosen": -24.088329315185547, |
|
"logps/rejected": -26.851608276367188, |
|
"loss": 0.6888, |
|
"losses/dpo": 0.6858267188072205, |
|
"losses/sft": 0.6961312294006348, |
|
"losses/total": 0.6858267188072205, |
|
"ref_logps/chosen": -24.163042068481445, |
|
"ref_logps/rejected": -26.837688446044922, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.007471038028597832, |
|
"rewards/margins": 0.008863050490617752, |
|
"rewards/rejected": -0.0013920125784352422, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.691011235955056e-07, |
|
"logps/chosen": -23.13729476928711, |
|
"logps/rejected": -28.607454299926758, |
|
"loss": 0.6896, |
|
"losses/dpo": 0.6952353715896606, |
|
"losses/sft": 0.8425909280776978, |
|
"losses/total": 0.6952353715896606, |
|
"ref_logps/chosen": -23.206546783447266, |
|
"ref_logps/rejected": -28.603229522705078, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": 0.006925276480615139, |
|
"rewards/margins": 0.007347787730395794, |
|
"rewards/rejected": -0.00042251107515767217, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.6769662921348315e-07, |
|
"logps/chosen": -22.758800506591797, |
|
"logps/rejected": -25.503629684448242, |
|
"loss": 0.6882, |
|
"losses/dpo": 0.690306544303894, |
|
"losses/sft": 0.7292711734771729, |
|
"losses/total": 0.690306544303894, |
|
"ref_logps/chosen": -22.867115020751953, |
|
"ref_logps/rejected": -25.50885009765625, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 0.010831332765519619, |
|
"rewards/margins": 0.010309312492609024, |
|
"rewards/rejected": 0.0005220210296101868, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.662921348314606e-07, |
|
"logps/chosen": -22.957290649414062, |
|
"logps/rejected": -27.15595245361328, |
|
"loss": 0.6868, |
|
"losses/dpo": 0.6876275539398193, |
|
"losses/sft": 0.9537997245788574, |
|
"losses/total": 0.6876275539398193, |
|
"ref_logps/chosen": -23.08481788635254, |
|
"ref_logps/rejected": -27.15395736694336, |
|
"rewards/accuracies": 0.6484375, |
|
"rewards/chosen": 0.012752560898661613, |
|
"rewards/margins": 0.012952261604368687, |
|
"rewards/rejected": -0.0001996997743844986, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.6488764044943816e-07, |
|
"logps/chosen": -21.856212615966797, |
|
"logps/rejected": -28.90016746520996, |
|
"loss": 0.688, |
|
"losses/dpo": 0.6866365075111389, |
|
"losses/sft": 0.748786211013794, |
|
"losses/total": 0.6866365075111389, |
|
"ref_logps/chosen": -21.946701049804688, |
|
"ref_logps/rejected": -28.884090423583984, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": 0.009048780426383018, |
|
"rewards/margins": 0.010656429454684258, |
|
"rewards/rejected": -0.0016076482133939862, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.634831460674157e-07, |
|
"logps/chosen": -21.727970123291016, |
|
"logps/rejected": -24.484195709228516, |
|
"loss": 0.6866, |
|
"losses/dpo": 0.6858303546905518, |
|
"losses/sft": 0.7428255677223206, |
|
"losses/total": 0.6858303546905518, |
|
"ref_logps/chosen": -21.827533721923828, |
|
"ref_logps/rejected": -24.44991111755371, |
|
"rewards/accuracies": 0.6484375, |
|
"rewards/chosen": 0.009956244379281998, |
|
"rewards/margins": 0.013384684920310974, |
|
"rewards/rejected": -0.0034284412395209074, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.620786516853932e-07, |
|
"logps/chosen": -23.145030975341797, |
|
"logps/rejected": -25.03292465209961, |
|
"loss": 0.685, |
|
"losses/dpo": 0.6789939403533936, |
|
"losses/sft": 0.718001127243042, |
|
"losses/total": 0.6789939403533936, |
|
"ref_logps/chosen": -23.27937889099121, |
|
"ref_logps/rejected": -24.999483108520508, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": 0.013434557244181633, |
|
"rewards/margins": 0.016778716817498207, |
|
"rewards/rejected": -0.0033441600389778614, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.606741573033708e-07, |
|
"logps/chosen": -21.208370208740234, |
|
"logps/rejected": -25.74646759033203, |
|
"loss": 0.6852, |
|
"losses/dpo": 0.6921157836914062, |
|
"losses/sft": 0.8621765971183777, |
|
"losses/total": 0.6921157836914062, |
|
"ref_logps/chosen": -21.325489044189453, |
|
"ref_logps/rejected": -25.700342178344727, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 0.011711984872817993, |
|
"rewards/margins": 0.016324326395988464, |
|
"rewards/rejected": -0.0046123419888317585, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.592696629213483e-07, |
|
"logps/chosen": -22.621421813964844, |
|
"logps/rejected": -28.81465721130371, |
|
"loss": 0.6885, |
|
"losses/dpo": 0.689292848110199, |
|
"losses/sft": 0.7215853929519653, |
|
"losses/total": 0.689292848110199, |
|
"ref_logps/chosen": -22.720117568969727, |
|
"ref_logps/rejected": -28.81524658203125, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": 0.009869576431810856, |
|
"rewards/margins": 0.009810445830225945, |
|
"rewards/rejected": 5.913013592362404e-05, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.5786516853932584e-07, |
|
"logps/chosen": -22.636703491210938, |
|
"logps/rejected": -28.595046997070312, |
|
"loss": 0.6872, |
|
"losses/dpo": 0.6876038312911987, |
|
"losses/sft": 0.7616434097290039, |
|
"losses/total": 0.6876038312911987, |
|
"ref_logps/chosen": -22.73769187927246, |
|
"ref_logps/rejected": -28.57284164428711, |
|
"rewards/accuracies": 0.6328125, |
|
"rewards/chosen": 0.01009867899119854, |
|
"rewards/margins": 0.01231930311769247, |
|
"rewards/rejected": -0.002220625290647149, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.5646067415730334e-07, |
|
"logps/chosen": -23.055517196655273, |
|
"logps/rejected": -28.524490356445312, |
|
"loss": 0.6849, |
|
"losses/dpo": 0.6818934082984924, |
|
"losses/sft": 0.8828948736190796, |
|
"losses/total": 0.6818934082984924, |
|
"ref_logps/chosen": -23.18179702758789, |
|
"ref_logps/rejected": -28.479928970336914, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": 0.012628016993403435, |
|
"rewards/margins": 0.01708414778113365, |
|
"rewards/rejected": -0.004456131719052792, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.550561797752809e-07, |
|
"logps/chosen": -25.802350997924805, |
|
"logps/rejected": -29.403223037719727, |
|
"loss": 0.682, |
|
"losses/dpo": 0.6922101974487305, |
|
"losses/sft": 0.7417640089988708, |
|
"losses/total": 0.6922101974487305, |
|
"ref_logps/chosen": -25.971485137939453, |
|
"ref_logps/rejected": -29.342666625976562, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.016913428902626038, |
|
"rewards/margins": 0.02296869084239006, |
|
"rewards/rejected": -0.006055259145796299, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.536516853932584e-07, |
|
"logps/chosen": -22.979541778564453, |
|
"logps/rejected": -31.861392974853516, |
|
"loss": 0.6849, |
|
"losses/dpo": 0.6843876242637634, |
|
"losses/sft": 0.6335030198097229, |
|
"losses/total": 0.6843876242637634, |
|
"ref_logps/chosen": -23.086105346679688, |
|
"ref_logps/rejected": -31.796035766601562, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": 0.010656386613845825, |
|
"rewards/margins": 0.017192194238305092, |
|
"rewards/rejected": -0.006535808090120554, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.522471910112359e-07, |
|
"logps/chosen": -21.333240509033203, |
|
"logps/rejected": -25.32451629638672, |
|
"loss": 0.6842, |
|
"losses/dpo": 0.6832489967346191, |
|
"losses/sft": 0.8737274408340454, |
|
"losses/total": 0.6832489967346191, |
|
"ref_logps/chosen": -21.456268310546875, |
|
"ref_logps/rejected": -25.263538360595703, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 0.012302841059863567, |
|
"rewards/margins": 0.01840106211602688, |
|
"rewards/rejected": -0.006098220124840736, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.5084269662921347e-07, |
|
"logps/chosen": -21.905548095703125, |
|
"logps/rejected": -25.504837036132812, |
|
"loss": 0.6845, |
|
"losses/dpo": 0.6803750991821289, |
|
"losses/sft": 0.7227590084075928, |
|
"losses/total": 0.6803750991821289, |
|
"ref_logps/chosen": -22.001012802124023, |
|
"ref_logps/rejected": -25.422731399536133, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": 0.009546317160129547, |
|
"rewards/margins": 0.01775689423084259, |
|
"rewards/rejected": -0.008210576139390469, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.4943820224719097e-07, |
|
"logps/chosen": -22.212453842163086, |
|
"logps/rejected": -25.56966209411621, |
|
"loss": 0.6845, |
|
"losses/dpo": 0.6879241466522217, |
|
"losses/sft": 0.936349093914032, |
|
"losses/total": 0.6879241466522217, |
|
"ref_logps/chosen": -22.337223052978516, |
|
"ref_logps/rejected": -25.51331901550293, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.012477071955800056, |
|
"rewards/margins": 0.018111376091837883, |
|
"rewards/rejected": -0.005634305067360401, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.4803370786516853e-07, |
|
"logps/chosen": -20.199138641357422, |
|
"logps/rejected": -26.30996322631836, |
|
"loss": 0.6818, |
|
"losses/dpo": 0.6872521638870239, |
|
"losses/sft": 0.6872013211250305, |
|
"losses/total": 0.6872521638870239, |
|
"ref_logps/chosen": -20.368690490722656, |
|
"ref_logps/rejected": -26.24540138244629, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.016955075785517693, |
|
"rewards/margins": 0.02341129444539547, |
|
"rewards/rejected": -0.006456219125539064, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.4662921348314603e-07, |
|
"logps/chosen": -22.031774520874023, |
|
"logps/rejected": -26.07961082458496, |
|
"loss": 0.6813, |
|
"losses/dpo": 0.6833238005638123, |
|
"losses/sft": 0.7775546312332153, |
|
"losses/total": 0.6833238005638123, |
|
"ref_logps/chosen": -22.163204193115234, |
|
"ref_logps/rejected": -25.965608596801758, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": 0.01314287818968296, |
|
"rewards/margins": 0.02454320341348648, |
|
"rewards/rejected": -0.011400324292480946, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.452247191011236e-07, |
|
"logps/chosen": -22.522083282470703, |
|
"logps/rejected": -26.621906280517578, |
|
"loss": 0.6801, |
|
"losses/dpo": 0.6835525035858154, |
|
"losses/sft": 0.7558909058570862, |
|
"losses/total": 0.6835525035858154, |
|
"ref_logps/chosen": -22.656497955322266, |
|
"ref_logps/rejected": -26.48514747619629, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.013441269285976887, |
|
"rewards/margins": 0.027117114514112473, |
|
"rewards/rejected": -0.013675847090780735, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.438202247191011e-07, |
|
"logps/chosen": -22.05775260925293, |
|
"logps/rejected": -26.428781509399414, |
|
"loss": 0.6836, |
|
"losses/dpo": 0.6767468452453613, |
|
"losses/sft": 0.8101401329040527, |
|
"losses/total": 0.6767468452453613, |
|
"ref_logps/chosen": -22.211511611938477, |
|
"ref_logps/rejected": -26.38385772705078, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.015375516377389431, |
|
"rewards/margins": 0.019868001341819763, |
|
"rewards/rejected": -0.004492484033107758, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.4241573033707865e-07, |
|
"logps/chosen": -22.327136993408203, |
|
"logps/rejected": -27.90719985961914, |
|
"loss": 0.6803, |
|
"losses/dpo": 0.6830211281776428, |
|
"losses/sft": 0.7352213263511658, |
|
"losses/total": 0.6830211281776428, |
|
"ref_logps/chosen": -22.457595825195312, |
|
"ref_logps/rejected": -27.77078628540039, |
|
"rewards/accuracies": 0.6484375, |
|
"rewards/chosen": 0.013045946136116982, |
|
"rewards/margins": 0.026687312871217728, |
|
"rewards/rejected": -0.013641366735100746, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.410112359550562e-07, |
|
"logps/chosen": -23.738140106201172, |
|
"logps/rejected": -26.4810791015625, |
|
"loss": 0.6802, |
|
"losses/dpo": 0.673937976360321, |
|
"losses/sft": 0.7962872385978699, |
|
"losses/total": 0.673937976360321, |
|
"ref_logps/chosen": -23.89459991455078, |
|
"ref_logps/rejected": -26.366804122924805, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.015645721927285194, |
|
"rewards/margins": 0.027073292061686516, |
|
"rewards/rejected": -0.011427570134401321, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.3960674157303366e-07, |
|
"logps/chosen": -21.008014678955078, |
|
"logps/rejected": -24.34069061279297, |
|
"loss": 0.6805, |
|
"losses/dpo": 0.6789628863334656, |
|
"losses/sft": 0.9124815464019775, |
|
"losses/total": 0.6789628863334656, |
|
"ref_logps/chosen": -21.115734100341797, |
|
"ref_logps/rejected": -24.184371948242188, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.01077171228826046, |
|
"rewards/margins": 0.026403725147247314, |
|
"rewards/rejected": -0.015632012858986855, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.382022471910112e-07, |
|
"logps/chosen": -20.62143325805664, |
|
"logps/rejected": -26.963245391845703, |
|
"loss": 0.6833, |
|
"losses/dpo": 0.6907744407653809, |
|
"losses/sft": 0.7639827728271484, |
|
"losses/total": 0.6907744407653809, |
|
"ref_logps/chosen": -20.739776611328125, |
|
"ref_logps/rejected": -26.87374496459961, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.011834252625703812, |
|
"rewards/margins": 0.020784219726920128, |
|
"rewards/rejected": -0.008949968963861465, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.367977528089887e-07, |
|
"logps/chosen": -21.591964721679688, |
|
"logps/rejected": -24.5494384765625, |
|
"loss": 0.6846, |
|
"losses/dpo": 0.6878204345703125, |
|
"losses/sft": 0.6917088627815247, |
|
"losses/total": 0.6878204345703125, |
|
"ref_logps/chosen": -21.644916534423828, |
|
"ref_logps/rejected": -24.421239852905273, |
|
"rewards/accuracies": 0.6328125, |
|
"rewards/chosen": 0.005295174196362495, |
|
"rewards/margins": 0.01811503805220127, |
|
"rewards/rejected": -0.012819863855838776, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.353932584269663e-07, |
|
"logps/chosen": -24.759811401367188, |
|
"logps/rejected": -28.227123260498047, |
|
"loss": 0.6825, |
|
"losses/dpo": 0.6937445402145386, |
|
"losses/sft": 0.9424384832382202, |
|
"losses/total": 0.6937445402145386, |
|
"ref_logps/chosen": -24.891460418701172, |
|
"ref_logps/rejected": -28.136310577392578, |
|
"rewards/accuracies": 0.6484375, |
|
"rewards/chosen": 0.013164759613573551, |
|
"rewards/margins": 0.022245781496167183, |
|
"rewards/rejected": -0.009081022813916206, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.339887640449438e-07, |
|
"logps/chosen": -22.8006591796875, |
|
"logps/rejected": -26.10009002685547, |
|
"loss": 0.6795, |
|
"losses/dpo": 0.6909404993057251, |
|
"losses/sft": 0.8603497743606567, |
|
"losses/total": 0.6909404993057251, |
|
"ref_logps/chosen": -22.96673583984375, |
|
"ref_logps/rejected": -25.977882385253906, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": 0.01660749316215515, |
|
"rewards/margins": 0.028828214854002, |
|
"rewards/rejected": -0.012220719829201698, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.3258426966292134e-07, |
|
"logps/chosen": -24.15732765197754, |
|
"logps/rejected": -28.13039779663086, |
|
"loss": 0.6752, |
|
"losses/dpo": 0.6638558506965637, |
|
"losses/sft": 0.8455443382263184, |
|
"losses/total": 0.6638558506965637, |
|
"ref_logps/chosen": -24.341087341308594, |
|
"ref_logps/rejected": -27.938106536865234, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.018375899642705917, |
|
"rewards/margins": 0.03760489821434021, |
|
"rewards/rejected": -0.019228998571634293, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.311797752808989e-07, |
|
"logps/chosen": -21.290430068969727, |
|
"logps/rejected": -25.207626342773438, |
|
"loss": 0.6771, |
|
"losses/dpo": 0.6774411797523499, |
|
"losses/sft": 0.9257520437240601, |
|
"losses/total": 0.6774411797523499, |
|
"ref_logps/chosen": -21.449438095092773, |
|
"ref_logps/rejected": -25.032873153686523, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.015900880098342896, |
|
"rewards/margins": 0.033375710248947144, |
|
"rewards/rejected": -0.017474830150604248, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.297752808988764e-07, |
|
"logps/chosen": -24.241390228271484, |
|
"logps/rejected": -27.57483673095703, |
|
"loss": 0.681, |
|
"losses/dpo": 0.6869298219680786, |
|
"losses/sft": 0.8004887104034424, |
|
"losses/total": 0.6869298219680786, |
|
"ref_logps/chosen": -24.399887084960938, |
|
"ref_logps/rejected": -27.475460052490234, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 0.015849877148866653, |
|
"rewards/margins": 0.02578754723072052, |
|
"rewards/rejected": -0.009937671013176441, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.2837078651685396e-07, |
|
"logps/chosen": -21.290605545043945, |
|
"logps/rejected": -25.188884735107422, |
|
"loss": 0.6843, |
|
"losses/dpo": 0.6896719336509705, |
|
"losses/sft": 0.7865870594978333, |
|
"losses/total": 0.6896719336509705, |
|
"ref_logps/chosen": -21.39483642578125, |
|
"ref_logps/rejected": -25.098407745361328, |
|
"rewards/accuracies": 0.6328125, |
|
"rewards/chosen": 0.010422902181744576, |
|
"rewards/margins": 0.01947084441781044, |
|
"rewards/rejected": -0.00904794316738844, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.269662921348314e-07, |
|
"logps/chosen": -21.395389556884766, |
|
"logps/rejected": -24.005056381225586, |
|
"loss": 0.6863, |
|
"losses/dpo": 0.6820717453956604, |
|
"losses/sft": 0.8161361813545227, |
|
"losses/total": 0.6820717453956604, |
|
"ref_logps/chosen": -21.495037078857422, |
|
"ref_logps/rejected": -23.9505558013916, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.009964808821678162, |
|
"rewards/margins": 0.015414956025779247, |
|
"rewards/rejected": -0.00545014813542366, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.2556179775280896e-07, |
|
"logps/chosen": -20.948806762695312, |
|
"logps/rejected": -24.735366821289062, |
|
"loss": 0.6786, |
|
"losses/dpo": 0.6868577599525452, |
|
"losses/sft": 0.7177249193191528, |
|
"losses/total": 0.6868577599525452, |
|
"ref_logps/chosen": -21.072193145751953, |
|
"ref_logps/rejected": -24.555286407470703, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": 0.012338603846728802, |
|
"rewards/margins": 0.030346699059009552, |
|
"rewards/rejected": -0.018008096143603325, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.2415730337078647e-07, |
|
"logps/chosen": -24.245830535888672, |
|
"logps/rejected": -28.811023712158203, |
|
"loss": 0.6783, |
|
"losses/dpo": 0.6721839904785156, |
|
"losses/sft": 0.816402018070221, |
|
"losses/total": 0.6721839904785156, |
|
"ref_logps/chosen": -24.40906524658203, |
|
"ref_logps/rejected": -28.65966033935547, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 0.01632346771657467, |
|
"rewards/margins": 0.03146028146147728, |
|
"rewards/rejected": -0.01513681747019291, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.22752808988764e-07, |
|
"logps/chosen": -22.48372459411621, |
|
"logps/rejected": -29.088359832763672, |
|
"loss": 0.6709, |
|
"losses/dpo": 0.6718644499778748, |
|
"losses/sft": 0.823063313961029, |
|
"losses/total": 0.6718644499778748, |
|
"ref_logps/chosen": -22.634136199951172, |
|
"ref_logps/rejected": -28.77379608154297, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": 0.01504128985106945, |
|
"rewards/margins": 0.04649777710437775, |
|
"rewards/rejected": -0.03145648539066315, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.2134831460674153e-07, |
|
"logps/chosen": -20.869436264038086, |
|
"logps/rejected": -27.790451049804688, |
|
"loss": 0.6785, |
|
"losses/dpo": 0.6842025518417358, |
|
"losses/sft": 0.8330531120300293, |
|
"losses/total": 0.6842025518417358, |
|
"ref_logps/chosen": -20.964067459106445, |
|
"ref_logps/rejected": -27.572711944580078, |
|
"rewards/accuracies": 0.6484375, |
|
"rewards/chosen": 0.009463240392506123, |
|
"rewards/margins": 0.031237438321113586, |
|
"rewards/rejected": -0.02177419885993004, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.199438202247191e-07, |
|
"logps/chosen": -22.02164077758789, |
|
"logps/rejected": -28.644880294799805, |
|
"loss": 0.6772, |
|
"losses/dpo": 0.6933009028434753, |
|
"losses/sft": 0.7342395186424255, |
|
"losses/total": 0.6933009028434753, |
|
"ref_logps/chosen": -22.146129608154297, |
|
"ref_logps/rejected": -28.42925262451172, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 0.012448801659047604, |
|
"rewards/margins": 0.03401148319244385, |
|
"rewards/rejected": -0.02156267873942852, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.1853932584269664e-07, |
|
"logps/chosen": -21.086360931396484, |
|
"logps/rejected": -23.74181365966797, |
|
"loss": 0.6834, |
|
"losses/dpo": 0.7061095833778381, |
|
"losses/sft": 0.6976662278175354, |
|
"losses/total": 0.7061095833778381, |
|
"ref_logps/chosen": -21.240116119384766, |
|
"ref_logps/rejected": -23.683523178100586, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.015375564806163311, |
|
"rewards/margins": 0.021204624325037003, |
|
"rewards/rejected": -0.0058290609158575535, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.1713483146067415e-07, |
|
"logps/chosen": -21.535640716552734, |
|
"logps/rejected": -28.555763244628906, |
|
"loss": 0.6749, |
|
"losses/dpo": 0.6546899080276489, |
|
"losses/sft": 0.8132616281509399, |
|
"losses/total": 0.6546899080276489, |
|
"ref_logps/chosen": -21.68370819091797, |
|
"ref_logps/rejected": -28.313589096069336, |
|
"rewards/accuracies": 0.6640625, |
|
"rewards/chosen": 0.014806646853685379, |
|
"rewards/margins": 0.03902393952012062, |
|
"rewards/rejected": -0.02421729266643524, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.157303370786517e-07, |
|
"logps/chosen": -22.314010620117188, |
|
"logps/rejected": -26.403512954711914, |
|
"loss": 0.6777, |
|
"losses/dpo": 0.6830233931541443, |
|
"losses/sft": 0.7298552393913269, |
|
"losses/total": 0.6830233931541443, |
|
"ref_logps/chosen": -22.442527770996094, |
|
"ref_logps/rejected": -26.1983699798584, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.012851729989051819, |
|
"rewards/margins": 0.033365827053785324, |
|
"rewards/rejected": -0.020514097064733505, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.1432584269662915e-07, |
|
"logps/chosen": -23.65606117248535, |
|
"logps/rejected": -27.6639461517334, |
|
"loss": 0.6787, |
|
"losses/dpo": 0.66861492395401, |
|
"losses/sft": 0.7538549900054932, |
|
"losses/total": 0.66861492395401, |
|
"ref_logps/chosen": -23.742881774902344, |
|
"ref_logps/rejected": -27.43739128112793, |
|
"rewards/accuracies": 0.6328125, |
|
"rewards/chosen": 0.008682135492563248, |
|
"rewards/margins": 0.03133738413453102, |
|
"rewards/rejected": -0.022655250504612923, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.129213483146067e-07, |
|
"logps/chosen": -21.20174789428711, |
|
"logps/rejected": -27.045516967773438, |
|
"loss": 0.6736, |
|
"losses/dpo": 0.6594799757003784, |
|
"losses/sft": 0.7625675201416016, |
|
"losses/total": 0.6594799757003784, |
|
"ref_logps/chosen": -21.360929489135742, |
|
"ref_logps/rejected": -26.788671493530273, |
|
"rewards/accuracies": 0.6640625, |
|
"rewards/chosen": 0.015918483957648277, |
|
"rewards/margins": 0.04160304740071297, |
|
"rewards/rejected": -0.02568456158041954, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.115168539325842e-07, |
|
"logps/chosen": -25.287567138671875, |
|
"logps/rejected": -27.158187866210938, |
|
"loss": 0.6789, |
|
"losses/dpo": 0.6871756315231323, |
|
"losses/sft": 0.7897288799285889, |
|
"losses/total": 0.6871756315231323, |
|
"ref_logps/chosen": -25.39737319946289, |
|
"ref_logps/rejected": -26.95665740966797, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": 0.010980643332004547, |
|
"rewards/margins": 0.03113364614546299, |
|
"rewards/rejected": -0.020153000950813293, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.1011235955056177e-07, |
|
"logps/chosen": -20.239051818847656, |
|
"logps/rejected": -27.055557250976562, |
|
"loss": 0.6766, |
|
"losses/dpo": 0.6560062170028687, |
|
"losses/sft": 0.7211654186248779, |
|
"losses/total": 0.6560062170028687, |
|
"ref_logps/chosen": -20.345287322998047, |
|
"ref_logps/rejected": -26.804546356201172, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 0.010623706504702568, |
|
"rewards/margins": 0.03572461009025574, |
|
"rewards/rejected": -0.02510090172290802, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.0870786516853933e-07, |
|
"logps/chosen": -22.816429138183594, |
|
"logps/rejected": -28.331439971923828, |
|
"loss": 0.6728, |
|
"losses/dpo": 0.6975245475769043, |
|
"losses/sft": 0.8287545442581177, |
|
"losses/total": 0.6975245475769043, |
|
"ref_logps/chosen": -22.96261215209961, |
|
"ref_logps/rejected": -28.04006576538086, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": 0.014618270099163055, |
|
"rewards/margins": 0.043755702674388885, |
|
"rewards/rejected": -0.02913743630051613, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.0730337078651683e-07, |
|
"logps/chosen": -22.864845275878906, |
|
"logps/rejected": -27.868162155151367, |
|
"loss": 0.6776, |
|
"losses/dpo": 0.6524635553359985, |
|
"losses/sft": 0.8967273235321045, |
|
"losses/total": 0.6524635553359985, |
|
"ref_logps/chosen": -22.934465408325195, |
|
"ref_logps/rejected": -27.60092544555664, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.006961943581700325, |
|
"rewards/margins": 0.03368568420410156, |
|
"rewards/rejected": -0.026723740622401237, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.058988764044944e-07, |
|
"logps/chosen": -26.633420944213867, |
|
"logps/rejected": -29.40836524963379, |
|
"loss": 0.6785, |
|
"losses/dpo": 0.6883168816566467, |
|
"losses/sft": 0.9007142782211304, |
|
"losses/total": 0.6883168816566467, |
|
"ref_logps/chosen": -26.658733367919922, |
|
"ref_logps/rejected": -29.11638641357422, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0025312139187008142, |
|
"rewards/margins": 0.031729087233543396, |
|
"rewards/rejected": -0.029197873547673225, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.044943820224719e-07, |
|
"logps/chosen": -21.93716049194336, |
|
"logps/rejected": -26.78734016418457, |
|
"loss": 0.6678, |
|
"losses/dpo": 0.6620572805404663, |
|
"losses/sft": 0.7277075052261353, |
|
"losses/total": 0.6620572805404663, |
|
"ref_logps/chosen": -22.14274787902832, |
|
"ref_logps/rejected": -26.450454711914062, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": 0.02055862732231617, |
|
"rewards/margins": 0.05424723029136658, |
|
"rewards/rejected": -0.03368859738111496, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.0308988764044945e-07, |
|
"logps/chosen": -23.479236602783203, |
|
"logps/rejected": -25.321468353271484, |
|
"loss": 0.6732, |
|
"losses/dpo": 0.6536136865615845, |
|
"losses/sft": 0.793202817440033, |
|
"losses/total": 0.6536136865615845, |
|
"ref_logps/chosen": -23.628402709960938, |
|
"ref_logps/rejected": -25.03476905822754, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": 0.014916517771780491, |
|
"rewards/margins": 0.043586596846580505, |
|
"rewards/rejected": -0.02867007628083229, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.0168539325842696e-07, |
|
"logps/chosen": -21.36187744140625, |
|
"logps/rejected": -26.808046340942383, |
|
"loss": 0.6677, |
|
"losses/dpo": 0.658541202545166, |
|
"losses/sft": 0.6240718364715576, |
|
"losses/total": 0.658541202545166, |
|
"ref_logps/chosen": -21.525625228881836, |
|
"ref_logps/rejected": -26.417198181152344, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 0.01637459173798561, |
|
"rewards/margins": 0.05545924976468086, |
|
"rewards/rejected": -0.03908466175198555, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.0028089887640446e-07, |
|
"logps/chosen": -22.143728256225586, |
|
"logps/rejected": -26.035858154296875, |
|
"loss": 0.6732, |
|
"losses/dpo": 0.6707695126533508, |
|
"losses/sft": 0.8353971838951111, |
|
"losses/total": 0.6707695126533508, |
|
"ref_logps/chosen": -22.292274475097656, |
|
"ref_logps/rejected": -25.744632720947266, |
|
"rewards/accuracies": 0.6484375, |
|
"rewards/chosen": 0.014854478649795055, |
|
"rewards/margins": 0.043976958841085434, |
|
"rewards/rejected": -0.029122481122612953, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.9887640449438196e-07, |
|
"logps/chosen": -22.15041732788086, |
|
"logps/rejected": -24.53826332092285, |
|
"loss": 0.6688, |
|
"losses/dpo": 0.6656994819641113, |
|
"losses/sft": 0.8727293014526367, |
|
"losses/total": 0.6656994819641113, |
|
"ref_logps/chosen": -22.210494995117188, |
|
"ref_logps/rejected": -24.07231330871582, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": 0.0060077933594584465, |
|
"rewards/margins": 0.05260289087891579, |
|
"rewards/rejected": -0.046595096588134766, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.974719101123595e-07, |
|
"logps/chosen": -23.314592361450195, |
|
"logps/rejected": -27.797752380371094, |
|
"loss": 0.675, |
|
"losses/dpo": 0.6690158247947693, |
|
"losses/sft": 0.7370929718017578, |
|
"losses/total": 0.6690158247947693, |
|
"ref_logps/chosen": -23.396080017089844, |
|
"ref_logps/rejected": -27.483016967773438, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": 0.008148876950144768, |
|
"rewards/margins": 0.03962232545018196, |
|
"rewards/rejected": -0.031473446637392044, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.960674157303371e-07, |
|
"logps/chosen": -21.854373931884766, |
|
"logps/rejected": -26.652328491210938, |
|
"loss": 0.6706, |
|
"losses/dpo": 0.645140528678894, |
|
"losses/sft": 0.77164226770401, |
|
"losses/total": 0.645140528678894, |
|
"ref_logps/chosen": -21.949893951416016, |
|
"ref_logps/rejected": -26.255746841430664, |
|
"rewards/accuracies": 0.6484375, |
|
"rewards/chosen": 0.009551877155900002, |
|
"rewards/margins": 0.0492100827395916, |
|
"rewards/rejected": -0.03965820372104645, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.946629213483146e-07, |
|
"logps/chosen": -23.778413772583008, |
|
"logps/rejected": -28.40381622314453, |
|
"loss": 0.6634, |
|
"losses/dpo": 0.6699668169021606, |
|
"losses/sft": 0.8002771139144897, |
|
"losses/total": 0.6699668169021606, |
|
"ref_logps/chosen": -23.903501510620117, |
|
"ref_logps/rejected": -27.89557647705078, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.0125090591609478, |
|
"rewards/margins": 0.06333282589912415, |
|
"rewards/rejected": -0.050823770463466644, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.9325842696629214e-07, |
|
"logps/chosen": -24.59353256225586, |
|
"logps/rejected": -28.184139251708984, |
|
"loss": 0.6658, |
|
"losses/dpo": 0.6745936870574951, |
|
"losses/sft": 0.8017398715019226, |
|
"losses/total": 0.6745936870574951, |
|
"ref_logps/chosen": -24.77825164794922, |
|
"ref_logps/rejected": -27.77378273010254, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": 0.018472209572792053, |
|
"rewards/margins": 0.059507861733436584, |
|
"rewards/rejected": -0.04103565216064453, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.9185393258426964e-07, |
|
"logps/chosen": -20.781490325927734, |
|
"logps/rejected": -25.704240798950195, |
|
"loss": 0.6641, |
|
"losses/dpo": 0.6748782396316528, |
|
"losses/sft": 0.6509857177734375, |
|
"losses/total": 0.6748782396316528, |
|
"ref_logps/chosen": -20.93104362487793, |
|
"ref_logps/rejected": -25.223262786865234, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 0.014955190010368824, |
|
"rewards/margins": 0.06305292248725891, |
|
"rewards/rejected": -0.04809773340821266, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.904494382022472e-07, |
|
"logps/chosen": -22.889171600341797, |
|
"logps/rejected": -28.954145431518555, |
|
"loss": 0.6719, |
|
"losses/dpo": 0.6790695190429688, |
|
"losses/sft": 0.7899962663650513, |
|
"losses/total": 0.6790695190429688, |
|
"ref_logps/chosen": -22.998294830322266, |
|
"ref_logps/rejected": -28.596576690673828, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 0.010911967605352402, |
|
"rewards/margins": 0.04666893184185028, |
|
"rewards/rejected": -0.03575696796178818, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.890449438202247e-07, |
|
"logps/chosen": -22.229143142700195, |
|
"logps/rejected": -24.892658233642578, |
|
"loss": 0.6713, |
|
"losses/dpo": 0.6665077209472656, |
|
"losses/sft": 0.8753491044044495, |
|
"losses/total": 0.6665077209472656, |
|
"ref_logps/chosen": -22.402416229248047, |
|
"ref_logps/rejected": -24.568809509277344, |
|
"rewards/accuracies": 0.6484375, |
|
"rewards/chosen": 0.01732712611556053, |
|
"rewards/margins": 0.0497119314968586, |
|
"rewards/rejected": -0.032384805381298065, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.876404494382022e-07, |
|
"logps/chosen": -22.233783721923828, |
|
"logps/rejected": -29.53872299194336, |
|
"loss": 0.6637, |
|
"losses/dpo": 0.6545946002006531, |
|
"losses/sft": 0.8056938052177429, |
|
"losses/total": 0.6545946002006531, |
|
"ref_logps/chosen": -22.328821182250977, |
|
"ref_logps/rejected": -28.996824264526367, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": 0.009503833949565887, |
|
"rewards/margins": 0.06369376927614212, |
|
"rewards/rejected": -0.05418993532657623, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.8623595505617977e-07, |
|
"logps/chosen": -24.073867797851562, |
|
"logps/rejected": -27.632476806640625, |
|
"loss": 0.6778, |
|
"losses/dpo": 0.6500009298324585, |
|
"losses/sft": 0.9210071563720703, |
|
"losses/total": 0.6500009298324585, |
|
"ref_logps/chosen": -24.12955093383789, |
|
"ref_logps/rejected": -27.32662582397461, |
|
"rewards/accuracies": 0.5859375, |
|
"rewards/chosen": 0.00556858628988266, |
|
"rewards/margins": 0.03615354374051094, |
|
"rewards/rejected": -0.03058495745062828, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.8483146067415727e-07, |
|
"logps/chosen": -21.38442039489746, |
|
"logps/rejected": -31.358665466308594, |
|
"loss": 0.6601, |
|
"losses/dpo": 0.6630659103393555, |
|
"losses/sft": 0.8758641481399536, |
|
"losses/total": 0.6630659103393555, |
|
"ref_logps/chosen": -21.540292739868164, |
|
"ref_logps/rejected": -30.79846954345703, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 0.01558714546263218, |
|
"rewards/margins": 0.07160677015781403, |
|
"rewards/rejected": -0.056019626557826996, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.834269662921348e-07, |
|
"logps/chosen": -21.09262466430664, |
|
"logps/rejected": -25.64166831970215, |
|
"loss": 0.6622, |
|
"losses/dpo": 0.6400080919265747, |
|
"losses/sft": 0.8849148750305176, |
|
"losses/total": 0.6400080919265747, |
|
"ref_logps/chosen": -21.179445266723633, |
|
"ref_logps/rejected": -25.056869506835938, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": 0.00868179090321064, |
|
"rewards/margins": 0.06716156005859375, |
|
"rewards/rejected": -0.05847976729273796, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.8202247191011233e-07, |
|
"logps/chosen": -25.65859603881836, |
|
"logps/rejected": -28.025104522705078, |
|
"loss": 0.6765, |
|
"losses/dpo": 0.6927012205123901, |
|
"losses/sft": 0.8673559427261353, |
|
"losses/total": 0.6927012205123901, |
|
"ref_logps/chosen": -25.61608123779297, |
|
"ref_logps/rejected": -27.600624084472656, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.004251426085829735, |
|
"rewards/margins": 0.038196537643671036, |
|
"rewards/rejected": -0.04244796186685562, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.806179775280899e-07, |
|
"logps/chosen": -23.93341636657715, |
|
"logps/rejected": -29.840375900268555, |
|
"loss": 0.6647, |
|
"losses/dpo": 0.7150436639785767, |
|
"losses/sft": 0.9468034505844116, |
|
"losses/total": 0.7150436639785767, |
|
"ref_logps/chosen": -23.979652404785156, |
|
"ref_logps/rejected": -29.25320816040039, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 0.004623853601515293, |
|
"rewards/margins": 0.06334076821804047, |
|
"rewards/rejected": -0.058716922998428345, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.792134831460674e-07, |
|
"logps/chosen": -25.031259536743164, |
|
"logps/rejected": -28.292198181152344, |
|
"loss": 0.6559, |
|
"losses/dpo": 0.6770719289779663, |
|
"losses/sft": 0.9255229234695435, |
|
"losses/total": 0.6770719289779663, |
|
"ref_logps/chosen": -25.22754669189453, |
|
"ref_logps/rejected": -27.667905807495117, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": 0.019628863781690598, |
|
"rewards/margins": 0.08205802738666534, |
|
"rewards/rejected": -0.06242916360497475, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.7780898876404495e-07, |
|
"logps/chosen": -21.68558692932129, |
|
"logps/rejected": -26.84676742553711, |
|
"loss": 0.6765, |
|
"losses/dpo": 0.635480523109436, |
|
"losses/sft": 0.7413178086280823, |
|
"losses/total": 0.635480523109436, |
|
"ref_logps/chosen": -21.638694763183594, |
|
"ref_logps/rejected": -26.392860412597656, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": -0.004689330700784922, |
|
"rewards/margins": 0.04070135951042175, |
|
"rewards/rejected": -0.04539068788290024, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.7640449438202245e-07, |
|
"logps/chosen": -22.910152435302734, |
|
"logps/rejected": -26.53976821899414, |
|
"loss": 0.6587, |
|
"losses/dpo": 0.6835530400276184, |
|
"losses/sft": 0.9732310771942139, |
|
"losses/total": 0.6835530400276184, |
|
"ref_logps/chosen": -23.018016815185547, |
|
"ref_logps/rejected": -25.88375473022461, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 0.010786494240164757, |
|
"rewards/margins": 0.07638738304376602, |
|
"rewards/rejected": -0.06560088694095612, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.75e-07, |
|
"logps/chosen": -23.20888900756836, |
|
"logps/rejected": -26.875211715698242, |
|
"loss": 0.6617, |
|
"losses/dpo": 0.6463422775268555, |
|
"losses/sft": 0.7454620599746704, |
|
"losses/total": 0.6463422775268555, |
|
"ref_logps/chosen": -23.336442947387695, |
|
"ref_logps/rejected": -26.300058364868164, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.012755412608385086, |
|
"rewards/margins": 0.0702708438038826, |
|
"rewards/rejected": -0.057515427470207214, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.735955056179775e-07, |
|
"logps/chosen": -22.396747589111328, |
|
"logps/rejected": -29.472164154052734, |
|
"loss": 0.6784, |
|
"losses/dpo": 0.6625787019729614, |
|
"losses/sft": 0.7854889631271362, |
|
"losses/total": 0.6625787019729614, |
|
"ref_logps/chosen": -22.277332305908203, |
|
"ref_logps/rejected": -28.998043060302734, |
|
"rewards/accuracies": 0.6328125, |
|
"rewards/chosen": -0.01194157637655735, |
|
"rewards/margins": 0.03547064587473869, |
|
"rewards/rejected": -0.047412216663360596, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.72191011235955e-07, |
|
"logps/chosen": -18.81739044189453, |
|
"logps/rejected": -24.600296020507812, |
|
"loss": 0.6612, |
|
"losses/dpo": 0.6598723530769348, |
|
"losses/sft": 0.8644169569015503, |
|
"losses/total": 0.6598723530769348, |
|
"ref_logps/chosen": -18.89391326904297, |
|
"ref_logps/rejected": -23.96309471130371, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": 0.0076522137969732285, |
|
"rewards/margins": 0.07137227803468704, |
|
"rewards/rejected": -0.06372006982564926, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.707865168539326e-07, |
|
"logps/chosen": -25.24700927734375, |
|
"logps/rejected": -29.2607364654541, |
|
"loss": 0.6576, |
|
"losses/dpo": 0.6264052391052246, |
|
"losses/sft": 0.7484258413314819, |
|
"losses/total": 0.6264052391052246, |
|
"ref_logps/chosen": -25.243091583251953, |
|
"ref_logps/rejected": -28.458097457885742, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.00039180926978588104, |
|
"rewards/margins": 0.07987209409475327, |
|
"rewards/rejected": -0.0802639052271843, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.693820224719101e-07, |
|
"logps/chosen": -24.664264678955078, |
|
"logps/rejected": -29.071331024169922, |
|
"loss": 0.6596, |
|
"losses/dpo": 0.6850643157958984, |
|
"losses/sft": 0.7063156366348267, |
|
"losses/total": 0.6850643157958984, |
|
"ref_logps/chosen": -24.58011245727539, |
|
"ref_logps/rejected": -28.208541870117188, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.008415229618549347, |
|
"rewards/margins": 0.07786377519369125, |
|
"rewards/rejected": -0.08627899736166, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.6797752808988764e-07, |
|
"logps/chosen": -21.803192138671875, |
|
"logps/rejected": -25.79207992553711, |
|
"loss": 0.6529, |
|
"losses/dpo": 0.6567816734313965, |
|
"losses/sft": 0.8528650403022766, |
|
"losses/total": 0.6567816734313965, |
|
"ref_logps/chosen": -21.88966941833496, |
|
"ref_logps/rejected": -24.97705841064453, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.00864771381020546, |
|
"rewards/margins": 0.0901501327753067, |
|
"rewards/rejected": -0.08150242269039154, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.6657303370786514e-07, |
|
"logps/chosen": -20.78626823425293, |
|
"logps/rejected": -27.048810958862305, |
|
"loss": 0.6442, |
|
"losses/dpo": 0.6402660608291626, |
|
"losses/sft": 0.7653439044952393, |
|
"losses/total": 0.6402660608291626, |
|
"ref_logps/chosen": -20.915481567382812, |
|
"ref_logps/rejected": -26.105587005615234, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": 0.012921325862407684, |
|
"rewards/margins": 0.10724389553070068, |
|
"rewards/rejected": -0.0943225771188736, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.651685393258427e-07, |
|
"logps/chosen": -23.661598205566406, |
|
"logps/rejected": -26.884532928466797, |
|
"loss": 0.6563, |
|
"losses/dpo": 0.6588989496231079, |
|
"losses/sft": 0.8334387540817261, |
|
"losses/total": 0.6588989496231079, |
|
"ref_logps/chosen": -23.68170166015625, |
|
"ref_logps/rejected": -26.042449951171875, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": 0.0020102611742913723, |
|
"rewards/margins": 0.08621874451637268, |
|
"rewards/rejected": -0.08420848101377487, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.637640449438202e-07, |
|
"logps/chosen": -21.846914291381836, |
|
"logps/rejected": -26.843595504760742, |
|
"loss": 0.6414, |
|
"losses/dpo": 0.610801100730896, |
|
"losses/sft": 0.6104759573936462, |
|
"losses/total": 0.610801100730896, |
|
"ref_logps/chosen": -21.904037475585938, |
|
"ref_logps/rejected": -25.758628845214844, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": 0.005712391808629036, |
|
"rewards/margins": 0.11420895159244537, |
|
"rewards/rejected": -0.10849656164646149, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.6235955056179776e-07, |
|
"logps/chosen": -23.79953384399414, |
|
"logps/rejected": -26.24932861328125, |
|
"loss": 0.6507, |
|
"losses/dpo": 0.6711180806159973, |
|
"losses/sft": 0.8334028720855713, |
|
"losses/total": 0.6711180806159973, |
|
"ref_logps/chosen": -23.89289093017578, |
|
"ref_logps/rejected": -25.393817901611328, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": 0.009335671551525593, |
|
"rewards/margins": 0.09488671272993088, |
|
"rewards/rejected": -0.08555103838443756, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.6095505617977526e-07, |
|
"logps/chosen": -20.413612365722656, |
|
"logps/rejected": -28.091732025146484, |
|
"loss": 0.6393, |
|
"losses/dpo": 0.6086191534996033, |
|
"losses/sft": 0.7045127749443054, |
|
"losses/total": 0.6086191534996033, |
|
"ref_logps/chosen": -20.591529846191406, |
|
"ref_logps/rejected": -27.054677963256836, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 0.0177919864654541, |
|
"rewards/margins": 0.12149728834629059, |
|
"rewards/rejected": -0.10370529443025589, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.5955056179775277e-07, |
|
"logps/chosen": -23.96946907043457, |
|
"logps/rejected": -25.42624282836914, |
|
"loss": 0.6574, |
|
"losses/dpo": 0.6771029233932495, |
|
"losses/sft": 0.8275946378707886, |
|
"losses/total": 0.6771029233932495, |
|
"ref_logps/chosen": -23.84187126159668, |
|
"ref_logps/rejected": -24.474294662475586, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.012759597972035408, |
|
"rewards/margins": 0.0824354737997055, |
|
"rewards/rejected": -0.09519506990909576, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.581460674157303e-07, |
|
"logps/chosen": -20.24493980407715, |
|
"logps/rejected": -26.33192253112793, |
|
"loss": 0.6403, |
|
"losses/dpo": 0.60587477684021, |
|
"losses/sft": 0.7718257904052734, |
|
"losses/total": 0.60587477684021, |
|
"ref_logps/chosen": -20.375638961791992, |
|
"ref_logps/rejected": -25.299020767211914, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.013069930486381054, |
|
"rewards/margins": 0.11636004596948624, |
|
"rewards/rejected": -0.10329011082649231, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.5674157303370783e-07, |
|
"logps/chosen": -22.9414119720459, |
|
"logps/rejected": -28.200380325317383, |
|
"loss": 0.6384, |
|
"losses/dpo": 0.6827423572540283, |
|
"losses/sft": 0.8567611575126648, |
|
"losses/total": 0.6827423572540283, |
|
"ref_logps/chosen": -23.111347198486328, |
|
"ref_logps/rejected": -27.142616271972656, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": 0.01699351891875267, |
|
"rewards/margins": 0.12276984751224518, |
|
"rewards/rejected": -0.10577632486820221, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.553370786516854e-07, |
|
"logps/chosen": -23.226070404052734, |
|
"logps/rejected": -27.77198028564453, |
|
"loss": 0.6624, |
|
"losses/dpo": 0.6864386796951294, |
|
"losses/sft": 0.8041479587554932, |
|
"losses/total": 0.6864386796951294, |
|
"ref_logps/chosen": -22.92740249633789, |
|
"ref_logps/rejected": -26.72946548461914, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.029866419732570648, |
|
"rewards/margins": 0.07438516616821289, |
|
"rewards/rejected": -0.10425157845020294, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.539325842696629e-07, |
|
"logps/chosen": -21.75617027282715, |
|
"logps/rejected": -28.53704833984375, |
|
"loss": 0.6455, |
|
"losses/dpo": 0.6347097158432007, |
|
"losses/sft": 0.6569658517837524, |
|
"losses/total": 0.6347097158432007, |
|
"ref_logps/chosen": -21.872474670410156, |
|
"ref_logps/rejected": -27.540082931518555, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": 0.011630430817604065, |
|
"rewards/margins": 0.11132718622684479, |
|
"rewards/rejected": -0.09969674795866013, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.5252808988764045e-07, |
|
"logps/chosen": -24.18975830078125, |
|
"logps/rejected": -29.736862182617188, |
|
"loss": 0.6407, |
|
"losses/dpo": 0.6530706286430359, |
|
"losses/sft": 0.8703383207321167, |
|
"losses/total": 0.6530706286430359, |
|
"ref_logps/chosen": -24.138484954833984, |
|
"ref_logps/rejected": -28.495933532714844, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.005126964300870895, |
|
"rewards/margins": 0.11896562576293945, |
|
"rewards/rejected": -0.12409258633852005, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.51123595505618e-07, |
|
"logps/chosen": -24.84428596496582, |
|
"logps/rejected": -29.576303482055664, |
|
"loss": 0.647, |
|
"losses/dpo": 0.6477080583572388, |
|
"losses/sft": 0.8653473854064941, |
|
"losses/total": 0.6477080583572388, |
|
"ref_logps/chosen": -24.755064010620117, |
|
"ref_logps/rejected": -28.43872833251953, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.008922239765524864, |
|
"rewards/margins": 0.10483534634113312, |
|
"rewards/rejected": -0.11375758051872253, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.497191011235955e-07, |
|
"logps/chosen": -24.983165740966797, |
|
"logps/rejected": -27.753063201904297, |
|
"loss": 0.6095, |
|
"losses/dpo": 0.6273882389068604, |
|
"losses/sft": 0.8987213373184204, |
|
"losses/total": 0.6273882389068604, |
|
"ref_logps/chosen": -25.17366600036621, |
|
"ref_logps/rejected": -26.05366325378418, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.019050076603889465, |
|
"rewards/margins": 0.1889900416135788, |
|
"rewards/rejected": -0.16993993520736694, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.48314606741573e-07, |
|
"logps/chosen": -22.61692237854004, |
|
"logps/rejected": -27.743179321289062, |
|
"loss": 0.6583, |
|
"losses/dpo": 0.6790063381195068, |
|
"losses/sft": 0.7648496627807617, |
|
"losses/total": 0.6790063381195068, |
|
"ref_logps/chosen": -22.40664291381836, |
|
"ref_logps/rejected": -26.67925262451172, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.02102772891521454, |
|
"rewards/margins": 0.08536479622125626, |
|
"rewards/rejected": -0.1063925176858902, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.469101123595505e-07, |
|
"logps/chosen": -22.846782684326172, |
|
"logps/rejected": -29.590002059936523, |
|
"loss": 0.6261, |
|
"losses/dpo": 0.6479306221008301, |
|
"losses/sft": 0.8049210906028748, |
|
"losses/total": 0.6479306221008301, |
|
"ref_logps/chosen": -23.011579513549805, |
|
"ref_logps/rejected": -28.2562198638916, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": 0.016479745507240295, |
|
"rewards/margins": 0.14985813200473785, |
|
"rewards/rejected": -0.13337840139865875, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.4550561797752807e-07, |
|
"logps/chosen": -21.699583053588867, |
|
"logps/rejected": -27.46141815185547, |
|
"loss": 0.6277, |
|
"losses/dpo": 0.6358213424682617, |
|
"losses/sft": 0.8344307541847229, |
|
"losses/total": 0.6358213424682617, |
|
"ref_logps/chosen": -21.698383331298828, |
|
"ref_logps/rejected": -25.974313735961914, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -0.00012012943625450134, |
|
"rewards/margins": 0.14859014749526978, |
|
"rewards/rejected": -0.14871028065681458, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.441011235955056e-07, |
|
"logps/chosen": -20.88718032836914, |
|
"logps/rejected": -25.436817169189453, |
|
"loss": 0.654, |
|
"losses/dpo": 0.6406779289245605, |
|
"losses/sft": 0.8018806576728821, |
|
"losses/total": 0.6406779289245605, |
|
"ref_logps/chosen": -20.712448120117188, |
|
"ref_logps/rejected": -24.37557029724121, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.01747327297925949, |
|
"rewards/margins": 0.08865140378475189, |
|
"rewards/rejected": -0.10612466931343079, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.4269662921348313e-07, |
|
"logps/chosen": -22.312236785888672, |
|
"logps/rejected": -30.142927169799805, |
|
"loss": 0.6355, |
|
"losses/dpo": 0.5932921171188354, |
|
"losses/sft": 0.6528638005256653, |
|
"losses/total": 0.5932921171188354, |
|
"ref_logps/chosen": -22.210554122924805, |
|
"ref_logps/rejected": -28.713180541992188, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.010168392211198807, |
|
"rewards/margins": 0.13280624151229858, |
|
"rewards/rejected": -0.1429746299982071, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.4129213483146064e-07, |
|
"logps/chosen": -26.28810691833496, |
|
"logps/rejected": -29.10406494140625, |
|
"loss": 0.6359, |
|
"losses/dpo": 0.6205468773841858, |
|
"losses/sft": 0.8744308352470398, |
|
"losses/total": 0.6205468773841858, |
|
"ref_logps/chosen": -26.150360107421875, |
|
"ref_logps/rejected": -27.655288696289062, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -0.013774631544947624, |
|
"rewards/margins": 0.1311032772064209, |
|
"rewards/rejected": -0.14487791061401367, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.398876404494382e-07, |
|
"logps/chosen": -22.283679962158203, |
|
"logps/rejected": -26.302614212036133, |
|
"loss": 0.6679, |
|
"losses/dpo": 0.6655905246734619, |
|
"losses/sft": 0.8864909410476685, |
|
"losses/total": 0.6655905246734619, |
|
"ref_logps/chosen": -21.867923736572266, |
|
"ref_logps/rejected": -25.230295181274414, |
|
"rewards/accuracies": 0.6484375, |
|
"rewards/chosen": -0.04157543182373047, |
|
"rewards/margins": 0.0656563863158226, |
|
"rewards/rejected": -0.10723182559013367, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.3848314606741575e-07, |
|
"logps/chosen": -22.68756103515625, |
|
"logps/rejected": -28.45652961730957, |
|
"loss": 0.6559, |
|
"losses/dpo": 0.6645406484603882, |
|
"losses/sft": 0.794353723526001, |
|
"losses/total": 0.6645406484603882, |
|
"ref_logps/chosen": -22.445241928100586, |
|
"ref_logps/rejected": -27.26552963256836, |
|
"rewards/accuracies": 0.6328125, |
|
"rewards/chosen": -0.024232013151049614, |
|
"rewards/margins": 0.09486782550811768, |
|
"rewards/rejected": -0.11909983307123184, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.3707865168539325e-07, |
|
"logps/chosen": -22.336397171020508, |
|
"logps/rejected": -27.09580421447754, |
|
"loss": 0.6194, |
|
"losses/dpo": 0.5837043523788452, |
|
"losses/sft": 0.9716494083404541, |
|
"losses/total": 0.5837043523788452, |
|
"ref_logps/chosen": -22.35472297668457, |
|
"ref_logps/rejected": -25.422115325927734, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": 0.001832372508943081, |
|
"rewards/margins": 0.16920123994350433, |
|
"rewards/rejected": -0.16736885905265808, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.356741573033708e-07, |
|
"logps/chosen": -22.49996566772461, |
|
"logps/rejected": -28.435253143310547, |
|
"loss": 0.6425, |
|
"losses/dpo": 0.6696836948394775, |
|
"losses/sft": 0.773880660533905, |
|
"losses/total": 0.6696836948394775, |
|
"ref_logps/chosen": -22.170368194580078, |
|
"ref_logps/rejected": -26.929363250732422, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.032960131764411926, |
|
"rewards/margins": 0.11762877553701401, |
|
"rewards/rejected": -0.15058889985084534, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.3426966292134826e-07, |
|
"logps/chosen": -22.498619079589844, |
|
"logps/rejected": -30.868057250976562, |
|
"loss": 0.6295, |
|
"losses/dpo": 0.6400988101959229, |
|
"losses/sft": 0.724359929561615, |
|
"losses/total": 0.6400988101959229, |
|
"ref_logps/chosen": -22.199575424194336, |
|
"ref_logps/rejected": -29.1011962890625, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -0.029904408380389214, |
|
"rewards/margins": 0.1467815339565277, |
|
"rewards/rejected": -0.17668592929840088, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.328651685393258e-07, |
|
"logps/chosen": -24.872241973876953, |
|
"logps/rejected": -29.327089309692383, |
|
"loss": 0.6331, |
|
"losses/dpo": 0.6349748373031616, |
|
"losses/sft": 0.7728020548820496, |
|
"losses/total": 0.6349748373031616, |
|
"ref_logps/chosen": -24.600563049316406, |
|
"ref_logps/rejected": -27.632978439331055, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -0.027167750522494316, |
|
"rewards/margins": 0.14224328100681305, |
|
"rewards/rejected": -0.16941101849079132, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.314606741573033e-07, |
|
"logps/chosen": -25.719676971435547, |
|
"logps/rejected": -28.384960174560547, |
|
"loss": 0.6269, |
|
"losses/dpo": 0.6175022721290588, |
|
"losses/sft": 0.8887324929237366, |
|
"losses/total": 0.6175022721290588, |
|
"ref_logps/chosen": -25.583393096923828, |
|
"ref_logps/rejected": -26.621837615966797, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.013628311455249786, |
|
"rewards/margins": 0.16268408298492432, |
|
"rewards/rejected": -0.1763123720884323, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.300561797752809e-07, |
|
"logps/chosen": -20.547767639160156, |
|
"logps/rejected": -26.39871597290039, |
|
"loss": 0.6418, |
|
"losses/dpo": 0.604182243347168, |
|
"losses/sft": 0.63340824842453, |
|
"losses/total": 0.604182243347168, |
|
"ref_logps/chosen": -20.272342681884766, |
|
"ref_logps/rejected": -24.899860382080078, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -0.027542442083358765, |
|
"rewards/margins": 0.12234312295913696, |
|
"rewards/rejected": -0.14988556504249573, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.2865168539325844e-07, |
|
"logps/chosen": -22.42629623413086, |
|
"logps/rejected": -27.69287872314453, |
|
"loss": 0.6111, |
|
"losses/dpo": 0.5942946672439575, |
|
"losses/sft": 0.9472201466560364, |
|
"losses/total": 0.5942946672439575, |
|
"ref_logps/chosen": -22.135528564453125, |
|
"ref_logps/rejected": -25.469520568847656, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -0.029076654464006424, |
|
"rewards/margins": 0.1932588517665863, |
|
"rewards/rejected": -0.22233551740646362, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.2724719101123594e-07, |
|
"logps/chosen": -23.306896209716797, |
|
"logps/rejected": -28.64287567138672, |
|
"loss": 0.6467, |
|
"losses/dpo": 0.6821013689041138, |
|
"losses/sft": 0.9050745368003845, |
|
"losses/total": 0.6821013689041138, |
|
"ref_logps/chosen": -22.86626625061035, |
|
"ref_logps/rejected": -27.0958251953125, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.04406279698014259, |
|
"rewards/margins": 0.11064193397760391, |
|
"rewards/rejected": -0.1547047346830368, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.258426966292135e-07, |
|
"logps/chosen": -24.126543045043945, |
|
"logps/rejected": -26.020713806152344, |
|
"loss": 0.6214, |
|
"losses/dpo": 0.6081950664520264, |
|
"losses/sft": 0.827450692653656, |
|
"losses/total": 0.6081950664520264, |
|
"ref_logps/chosen": -23.934072494506836, |
|
"ref_logps/rejected": -24.092741012573242, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.01924710161983967, |
|
"rewards/margins": 0.17355017364025116, |
|
"rewards/rejected": -0.19279725849628448, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.24438202247191e-07, |
|
"logps/chosen": -23.07083511352539, |
|
"logps/rejected": -29.666513442993164, |
|
"loss": 0.6401, |
|
"losses/dpo": 0.6096771955490112, |
|
"losses/sft": 0.7951339483261108, |
|
"losses/total": 0.6096771955490112, |
|
"ref_logps/chosen": -22.55157470703125, |
|
"ref_logps/rejected": -27.86458396911621, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -0.05192602425813675, |
|
"rewards/margins": 0.12826718389987946, |
|
"rewards/rejected": -0.1801932007074356, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.2303370786516856e-07, |
|
"logps/chosen": -23.97926139831543, |
|
"logps/rejected": -26.387611389160156, |
|
"loss": 0.6543, |
|
"losses/dpo": 0.5806229710578918, |
|
"losses/sft": 0.9021787047386169, |
|
"losses/total": 0.5806229710578918, |
|
"ref_logps/chosen": -23.097957611083984, |
|
"ref_logps/rejected": -24.48831558227539, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.0881301686167717, |
|
"rewards/margins": 0.10179921984672546, |
|
"rewards/rejected": -0.18992936611175537, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.21629213483146e-07, |
|
"logps/chosen": -24.496349334716797, |
|
"logps/rejected": -28.20893669128418, |
|
"loss": 0.6439, |
|
"losses/dpo": 0.5786381959915161, |
|
"losses/sft": 0.9020153284072876, |
|
"losses/total": 0.5786381959915161, |
|
"ref_logps/chosen": -24.026926040649414, |
|
"ref_logps/rejected": -26.54248046875, |
|
"rewards/accuracies": 0.6484375, |
|
"rewards/chosen": -0.04694243520498276, |
|
"rewards/margins": 0.11970352381467819, |
|
"rewards/rejected": -0.16664597392082214, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.2022471910112357e-07, |
|
"logps/chosen": -23.227306365966797, |
|
"logps/rejected": -29.19955825805664, |
|
"loss": 0.6389, |
|
"losses/dpo": 0.6521559953689575, |
|
"losses/sft": 0.9907703399658203, |
|
"losses/total": 0.6521559953689575, |
|
"ref_logps/chosen": -22.804248809814453, |
|
"ref_logps/rejected": -27.36874008178711, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.042305897921323776, |
|
"rewards/margins": 0.14077602326869965, |
|
"rewards/rejected": -0.18308192491531372, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.1882022471910107e-07, |
|
"logps/chosen": -22.211841583251953, |
|
"logps/rejected": -27.533721923828125, |
|
"loss": 0.6512, |
|
"losses/dpo": 0.6903020143508911, |
|
"losses/sft": 0.8463045358657837, |
|
"losses/total": 0.6903020143508911, |
|
"ref_logps/chosen": -21.630611419677734, |
|
"ref_logps/rejected": -25.909526824951172, |
|
"rewards/accuracies": 0.6640625, |
|
"rewards/chosen": -0.05812288075685501, |
|
"rewards/margins": 0.10429678112268448, |
|
"rewards/rejected": -0.1624196618795395, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.1741573033707863e-07, |
|
"logps/chosen": -22.332489013671875, |
|
"logps/rejected": -28.400074005126953, |
|
"loss": 0.6155, |
|
"losses/dpo": 0.6296464204788208, |
|
"losses/sft": 0.6626120805740356, |
|
"losses/total": 0.6296464204788208, |
|
"ref_logps/chosen": -22.126087188720703, |
|
"ref_logps/rejected": -26.414535522460938, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.020640213042497635, |
|
"rewards/margins": 0.17791378498077393, |
|
"rewards/rejected": -0.19855400919914246, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.160112359550562e-07, |
|
"logps/chosen": -23.771900177001953, |
|
"logps/rejected": -30.088207244873047, |
|
"loss": 0.5971, |
|
"losses/dpo": 0.6422166228294373, |
|
"losses/sft": 0.7472187876701355, |
|
"losses/total": 0.6422166228294373, |
|
"ref_logps/chosen": -23.688966751098633, |
|
"ref_logps/rejected": -27.655853271484375, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -0.008293594233691692, |
|
"rewards/margins": 0.23494186997413635, |
|
"rewards/rejected": -0.24323543906211853, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.146067415730337e-07, |
|
"logps/chosen": -23.348037719726562, |
|
"logps/rejected": -27.53687286376953, |
|
"loss": 0.6459, |
|
"losses/dpo": 0.6455183029174805, |
|
"losses/sft": 0.8395851850509644, |
|
"losses/total": 0.6455183029174805, |
|
"ref_logps/chosen": -22.63860511779785, |
|
"ref_logps/rejected": -25.60868263244629, |
|
"rewards/accuracies": 0.6640625, |
|
"rewards/chosen": -0.0709431990981102, |
|
"rewards/margins": 0.12187594175338745, |
|
"rewards/rejected": -0.19281914830207825, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.1320224719101125e-07, |
|
"logps/chosen": -24.17770767211914, |
|
"logps/rejected": -30.49142074584961, |
|
"loss": 0.627, |
|
"losses/dpo": 0.6627662181854248, |
|
"losses/sft": 0.9079832434654236, |
|
"losses/total": 0.6627662181854248, |
|
"ref_logps/chosen": -23.401166915893555, |
|
"ref_logps/rejected": -28.0411376953125, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -0.07765418291091919, |
|
"rewards/margins": 0.16737422347068787, |
|
"rewards/rejected": -0.24502840638160706, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.1179775280898875e-07, |
|
"logps/chosen": -24.392324447631836, |
|
"logps/rejected": -27.670101165771484, |
|
"loss": 0.6251, |
|
"losses/dpo": 0.6143248081207275, |
|
"losses/sft": 0.6558141112327576, |
|
"losses/total": 0.6143248081207275, |
|
"ref_logps/chosen": -23.9196720123291, |
|
"ref_logps/rejected": -25.53693389892578, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.047265198081731796, |
|
"rewards/margins": 0.16605158150196075, |
|
"rewards/rejected": -0.21331676840782166, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.103932584269663e-07, |
|
"logps/chosen": -24.742660522460938, |
|
"logps/rejected": -33.37188720703125, |
|
"loss": 0.6157, |
|
"losses/dpo": 0.5933184623718262, |
|
"losses/sft": 0.9941530227661133, |
|
"losses/total": 0.5933184623718262, |
|
"ref_logps/chosen": -24.150442123413086, |
|
"ref_logps/rejected": -30.931093215942383, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.05922209471464157, |
|
"rewards/margins": 0.1848573535680771, |
|
"rewards/rejected": -0.24407947063446045, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.0898876404494376e-07, |
|
"logps/chosen": -23.68863296508789, |
|
"logps/rejected": -28.516223907470703, |
|
"loss": 0.6428, |
|
"losses/dpo": 0.6548395156860352, |
|
"losses/sft": 0.9564076066017151, |
|
"losses/total": 0.6548395156860352, |
|
"ref_logps/chosen": -22.73943519592285, |
|
"ref_logps/rejected": -26.3284912109375, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.09491994976997375, |
|
"rewards/margins": 0.12385320663452148, |
|
"rewards/rejected": -0.21877314150333405, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.075842696629213e-07, |
|
"logps/chosen": -22.527427673339844, |
|
"logps/rejected": -31.241607666015625, |
|
"loss": 0.6179, |
|
"losses/dpo": 0.5700336694717407, |
|
"losses/sft": 0.8869008421897888, |
|
"losses/total": 0.5700336694717407, |
|
"ref_logps/chosen": -21.73688507080078, |
|
"ref_logps/rejected": -28.55950164794922, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.07905411720275879, |
|
"rewards/margins": 0.18915657699108124, |
|
"rewards/rejected": -0.2682107090950012, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.0617977528089887e-07, |
|
"logps/chosen": -22.916969299316406, |
|
"logps/rejected": -26.000946044921875, |
|
"loss": 0.6425, |
|
"losses/dpo": 0.651595413684845, |
|
"losses/sft": 0.8127326369285583, |
|
"losses/total": 0.651595413684845, |
|
"ref_logps/chosen": -22.194671630859375, |
|
"ref_logps/rejected": -23.969348907470703, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.07222998142242432, |
|
"rewards/margins": 0.1309295892715454, |
|
"rewards/rejected": -0.20315957069396973, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.047752808988764e-07, |
|
"logps/chosen": -20.320987701416016, |
|
"logps/rejected": -27.46251106262207, |
|
"loss": 0.6217, |
|
"losses/dpo": 0.7334872484207153, |
|
"losses/sft": 0.9430239200592041, |
|
"losses/total": 0.7334872484207153, |
|
"ref_logps/chosen": -19.424144744873047, |
|
"ref_logps/rejected": -24.763113021850586, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -0.08968427777290344, |
|
"rewards/margins": 0.18025556206703186, |
|
"rewards/rejected": -0.2699398398399353, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.0337078651685393e-07, |
|
"logps/chosen": -23.853857040405273, |
|
"logps/rejected": -27.422889709472656, |
|
"loss": 0.6381, |
|
"losses/dpo": 0.6393001079559326, |
|
"losses/sft": 0.766620397567749, |
|
"losses/total": 0.6393001079559326, |
|
"ref_logps/chosen": -22.798233032226562, |
|
"ref_logps/rejected": -24.980205535888672, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -0.10556241869926453, |
|
"rewards/margins": 0.13870559632778168, |
|
"rewards/rejected": -0.2442680299282074, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.0196629213483144e-07, |
|
"logps/chosen": -23.360549926757812, |
|
"logps/rejected": -27.110477447509766, |
|
"loss": 0.6234, |
|
"losses/dpo": 0.6311055421829224, |
|
"losses/sft": 0.9324018955230713, |
|
"losses/total": 0.6311055421829224, |
|
"ref_logps/chosen": -22.890331268310547, |
|
"ref_logps/rejected": -24.903316497802734, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -0.04702185466885567, |
|
"rewards/margins": 0.17369432747364044, |
|
"rewards/rejected": -0.2207161784172058, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.00561797752809e-07, |
|
"logps/chosen": -23.004093170166016, |
|
"logps/rejected": -31.04292106628418, |
|
"loss": 0.5926, |
|
"losses/dpo": 0.6243355870246887, |
|
"losses/sft": 0.8456003665924072, |
|
"losses/total": 0.6243355870246887, |
|
"ref_logps/chosen": -22.555362701416016, |
|
"ref_logps/rejected": -28.14826011657715, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -0.04487309604883194, |
|
"rewards/margins": 0.24459321796894073, |
|
"rewards/rejected": -0.28946632146835327, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.991573033707865e-07, |
|
"logps/chosen": -26.668237686157227, |
|
"logps/rejected": -30.511489868164062, |
|
"loss": 0.6099, |
|
"losses/dpo": 0.6743872761726379, |
|
"losses/sft": 0.836949348449707, |
|
"losses/total": 0.6743872761726379, |
|
"ref_logps/chosen": -25.680599212646484, |
|
"ref_logps/rejected": -27.35342788696289, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.09876400232315063, |
|
"rewards/margins": 0.2170422226190567, |
|
"rewards/rejected": -0.31580623984336853, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.9775280898876406e-07, |
|
"logps/chosen": -23.974590301513672, |
|
"logps/rejected": -28.162975311279297, |
|
"loss": 0.6119, |
|
"losses/dpo": 0.5823447704315186, |
|
"losses/sft": 0.8065779805183411, |
|
"losses/total": 0.5823447704315186, |
|
"ref_logps/chosen": -23.180667877197266, |
|
"ref_logps/rejected": -25.296037673950195, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -0.07939236611127853, |
|
"rewards/margins": 0.20730134844779968, |
|
"rewards/rejected": -0.2866936922073364, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.9634831460674156e-07, |
|
"logps/chosen": -24.01116943359375, |
|
"logps/rejected": -30.05943489074707, |
|
"loss": 0.6203, |
|
"losses/dpo": 0.5889841318130493, |
|
"losses/sft": 0.8877280354499817, |
|
"losses/total": 0.5889841318130493, |
|
"ref_logps/chosen": -22.781108856201172, |
|
"ref_logps/rejected": -27.01274871826172, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.12300599366426468, |
|
"rewards/margins": 0.18166252970695496, |
|
"rewards/rejected": -0.3046685457229614, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.9494382022471906e-07, |
|
"logps/chosen": -22.79621124267578, |
|
"logps/rejected": -28.1258544921875, |
|
"loss": 0.6198, |
|
"losses/dpo": 0.6025291681289673, |
|
"losses/sft": 0.93308424949646, |
|
"losses/total": 0.6025291681289673, |
|
"ref_logps/chosen": -21.591278076171875, |
|
"ref_logps/rejected": -25.04897689819336, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -0.12049318104982376, |
|
"rewards/margins": 0.18719442188739777, |
|
"rewards/rejected": -0.3076876401901245, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.935393258426966e-07, |
|
"logps/chosen": -24.246837615966797, |
|
"logps/rejected": -30.58446502685547, |
|
"loss": 0.6277, |
|
"losses/dpo": 0.5978178977966309, |
|
"losses/sft": 0.7778979539871216, |
|
"losses/total": 0.5978178977966309, |
|
"ref_logps/chosen": -23.127248764038086, |
|
"ref_logps/rejected": -27.7061767578125, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.11195877939462662, |
|
"rewards/margins": 0.17587023973464966, |
|
"rewards/rejected": -0.2878290116786957, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.921348314606741e-07, |
|
"logps/chosen": -24.55533218383789, |
|
"logps/rejected": -29.098743438720703, |
|
"loss": 0.6458, |
|
"losses/dpo": 0.6147331595420837, |
|
"losses/sft": 0.8299495577812195, |
|
"losses/total": 0.6147331595420837, |
|
"ref_logps/chosen": -23.001358032226562, |
|
"ref_logps/rejected": -26.009681701660156, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.1553977131843567, |
|
"rewards/margins": 0.15350814163684845, |
|
"rewards/rejected": -0.30890583992004395, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.907303370786517e-07, |
|
"logps/chosen": -22.7973690032959, |
|
"logps/rejected": -30.61502456665039, |
|
"loss": 0.5968, |
|
"losses/dpo": 0.5409806370735168, |
|
"losses/sft": 0.8110998272895813, |
|
"losses/total": 0.5409806370735168, |
|
"ref_logps/chosen": -21.900728225708008, |
|
"ref_logps/rejected": -27.346271514892578, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -0.08966411650180817, |
|
"rewards/margins": 0.23721098899841309, |
|
"rewards/rejected": -0.32687509059906006, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.893258426966292e-07, |
|
"logps/chosen": -21.656837463378906, |
|
"logps/rejected": -28.09313201904297, |
|
"loss": 0.636, |
|
"losses/dpo": 0.6395488977432251, |
|
"losses/sft": 0.8838689923286438, |
|
"losses/total": 0.6395488977432251, |
|
"ref_logps/chosen": -20.17813491821289, |
|
"ref_logps/rejected": -25.027902603149414, |
|
"rewards/accuracies": 0.6484375, |
|
"rewards/chosen": -0.14787010848522186, |
|
"rewards/margins": 0.15865309536457062, |
|
"rewards/rejected": -0.3065232038497925, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.8792134831460674e-07, |
|
"logps/chosen": -23.13861083984375, |
|
"logps/rejected": -32.06410217285156, |
|
"loss": 0.6131, |
|
"losses/dpo": 0.6822565197944641, |
|
"losses/sft": 0.7876338362693787, |
|
"losses/total": 0.6822565197944641, |
|
"ref_logps/chosen": -21.99342918395996, |
|
"ref_logps/rejected": -28.761310577392578, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.11451825499534607, |
|
"rewards/margins": 0.21576061844825745, |
|
"rewards/rejected": -0.33027884364128113, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.8651685393258425e-07, |
|
"logps/chosen": -22.36726951599121, |
|
"logps/rejected": -27.791099548339844, |
|
"loss": 0.6132, |
|
"losses/dpo": 0.5694007873535156, |
|
"losses/sft": 0.7940797805786133, |
|
"losses/total": 0.5694007873535156, |
|
"ref_logps/chosen": -21.662071228027344, |
|
"ref_logps/rejected": -25.100269317626953, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.07051999121904373, |
|
"rewards/margins": 0.19856315851211548, |
|
"rewards/rejected": -0.2690831422805786, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.851123595505618e-07, |
|
"logps/chosen": -24.541927337646484, |
|
"logps/rejected": -30.479598999023438, |
|
"loss": 0.6251, |
|
"losses/dpo": 0.6676912307739258, |
|
"losses/sft": 0.8101266026496887, |
|
"losses/total": 0.6676912307739258, |
|
"ref_logps/chosen": -23.375761032104492, |
|
"ref_logps/rejected": -27.565099716186523, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -0.11661653220653534, |
|
"rewards/margins": 0.1748332977294922, |
|
"rewards/rejected": -0.2914498448371887, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.8370786516853936e-07, |
|
"logps/chosen": -23.954505920410156, |
|
"logps/rejected": -30.262849807739258, |
|
"loss": 0.6289, |
|
"losses/dpo": 0.6359354257583618, |
|
"losses/sft": 0.846460223197937, |
|
"losses/total": 0.6359354257583618, |
|
"ref_logps/chosen": -22.69145965576172, |
|
"ref_logps/rejected": -27.221202850341797, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.12630482017993927, |
|
"rewards/margins": 0.1778600960969925, |
|
"rewards/rejected": -0.30416491627693176, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.823033707865168e-07, |
|
"logps/chosen": -25.615474700927734, |
|
"logps/rejected": -32.26765823364258, |
|
"loss": 0.6017, |
|
"losses/dpo": 0.6264960765838623, |
|
"losses/sft": 0.906339704990387, |
|
"losses/total": 0.6264960765838623, |
|
"ref_logps/chosen": -24.62253189086914, |
|
"ref_logps/rejected": -29.02202796936035, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -0.09929438680410385, |
|
"rewards/margins": 0.22526855766773224, |
|
"rewards/rejected": -0.3245629370212555, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.8089887640449437e-07, |
|
"logps/chosen": -22.84251594543457, |
|
"logps/rejected": -28.347021102905273, |
|
"loss": 0.6191, |
|
"losses/dpo": 0.6483104825019836, |
|
"losses/sft": 0.9074235558509827, |
|
"losses/total": 0.6483104825019836, |
|
"ref_logps/chosen": -21.419048309326172, |
|
"ref_logps/rejected": -25.032745361328125, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.14234672486782074, |
|
"rewards/margins": 0.18908075988292694, |
|
"rewards/rejected": -0.3314274847507477, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.794943820224719e-07, |
|
"logps/chosen": -23.73548126220703, |
|
"logps/rejected": -28.329975128173828, |
|
"loss": 0.6238, |
|
"losses/dpo": 0.6014984250068665, |
|
"losses/sft": 0.773016631603241, |
|
"losses/total": 0.6014984250068665, |
|
"ref_logps/chosen": -22.425506591796875, |
|
"ref_logps/rejected": -25.099872589111328, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -0.13099724054336548, |
|
"rewards/margins": 0.19201286137104034, |
|
"rewards/rejected": -0.323010116815567, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.7808988764044943e-07, |
|
"logps/chosen": -26.183156967163086, |
|
"logps/rejected": -30.921403884887695, |
|
"loss": 0.625, |
|
"losses/dpo": 0.6309884190559387, |
|
"losses/sft": 0.8918415307998657, |
|
"losses/total": 0.6309884190559387, |
|
"ref_logps/chosen": -24.701202392578125, |
|
"ref_logps/rejected": -27.679357528686523, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -0.14819550514221191, |
|
"rewards/margins": 0.17600935697555542, |
|
"rewards/rejected": -0.32420486211776733, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.7668539325842694e-07, |
|
"logps/chosen": -23.88658905029297, |
|
"logps/rejected": -29.73432731628418, |
|
"loss": 0.6156, |
|
"losses/dpo": 0.6188192367553711, |
|
"losses/sft": 0.8410817384719849, |
|
"losses/total": 0.6188192367553711, |
|
"ref_logps/chosen": -22.504894256591797, |
|
"ref_logps/rejected": -26.384294509887695, |
|
"rewards/accuracies": 0.6328125, |
|
"rewards/chosen": -0.13816949725151062, |
|
"rewards/margins": 0.19683387875556946, |
|
"rewards/rejected": -0.3350033462047577, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.752808988764045e-07, |
|
"logps/chosen": -23.145811080932617, |
|
"logps/rejected": -29.627685546875, |
|
"loss": 0.637, |
|
"losses/dpo": 0.6995939612388611, |
|
"losses/sft": 0.9283435344696045, |
|
"losses/total": 0.6995939612388611, |
|
"ref_logps/chosen": -21.45529556274414, |
|
"ref_logps/rejected": -26.336702346801758, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.1690514236688614, |
|
"rewards/margins": 0.16004663705825806, |
|
"rewards/rejected": -0.32909804582595825, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.73876404494382e-07, |
|
"logps/chosen": -22.545406341552734, |
|
"logps/rejected": -30.04849624633789, |
|
"loss": 0.608, |
|
"losses/dpo": 0.6513813734054565, |
|
"losses/sft": 0.9403305649757385, |
|
"losses/total": 0.6513813734054565, |
|
"ref_logps/chosen": -21.010854721069336, |
|
"ref_logps/rejected": -26.149032592773438, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.15345513820648193, |
|
"rewards/margins": 0.23649117350578308, |
|
"rewards/rejected": -0.3899462819099426, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.7247191011235955e-07, |
|
"logps/chosen": -22.640438079833984, |
|
"logps/rejected": -28.583681106567383, |
|
"loss": 0.6007, |
|
"losses/dpo": 0.5443820357322693, |
|
"losses/sft": 0.8517413139343262, |
|
"losses/total": 0.5443820357322693, |
|
"ref_logps/chosen": -21.29751968383789, |
|
"ref_logps/rejected": -24.850605010986328, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.1342916190624237, |
|
"rewards/margins": 0.23901620507240295, |
|
"rewards/rejected": -0.37330782413482666, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.710674157303371e-07, |
|
"logps/chosen": -25.259624481201172, |
|
"logps/rejected": -32.96052551269531, |
|
"loss": 0.6029, |
|
"losses/dpo": 0.5749891996383667, |
|
"losses/sft": 0.9417051672935486, |
|
"losses/total": 0.5749891996383667, |
|
"ref_logps/chosen": -23.468887329101562, |
|
"ref_logps/rejected": -28.776565551757812, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -0.17907381057739258, |
|
"rewards/margins": 0.23932181298732758, |
|
"rewards/rejected": -0.41839560866355896, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.6966292134831456e-07, |
|
"logps/chosen": -24.431142807006836, |
|
"logps/rejected": -31.409852981567383, |
|
"loss": 0.6256, |
|
"losses/dpo": 0.6045551896095276, |
|
"losses/sft": 0.8162484169006348, |
|
"losses/total": 0.6045551896095276, |
|
"ref_logps/chosen": -22.7187442779541, |
|
"ref_logps/rejected": -27.74604606628418, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.1712397187948227, |
|
"rewards/margins": 0.19514092803001404, |
|
"rewards/rejected": -0.36638063192367554, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.682584269662921e-07, |
|
"logps/chosen": -22.776988983154297, |
|
"logps/rejected": -30.418426513671875, |
|
"loss": 0.6093, |
|
"losses/dpo": 0.630817711353302, |
|
"losses/sft": 0.907343327999115, |
|
"losses/total": 0.630817711353302, |
|
"ref_logps/chosen": -21.150266647338867, |
|
"ref_logps/rejected": -26.52399444580078, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.16267219185829163, |
|
"rewards/margins": 0.22677099704742432, |
|
"rewards/rejected": -0.38944315910339355, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.668539325842696e-07, |
|
"logps/chosen": -24.300395965576172, |
|
"logps/rejected": -32.63694763183594, |
|
"loss": 0.5834, |
|
"losses/dpo": 0.5977815389633179, |
|
"losses/sft": 0.8870611190795898, |
|
"losses/total": 0.5977815389633179, |
|
"ref_logps/chosen": -22.923202514648438, |
|
"ref_logps/rejected": -28.30066680908203, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.13771943747997284, |
|
"rewards/margins": 0.29590874910354614, |
|
"rewards/rejected": -0.4336281716823578, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.654494382022472e-07, |
|
"logps/chosen": -25.562063217163086, |
|
"logps/rejected": -28.686279296875, |
|
"loss": 0.6248, |
|
"losses/dpo": 0.593975841999054, |
|
"losses/sft": 0.8298511505126953, |
|
"losses/total": 0.593975841999054, |
|
"ref_logps/chosen": -23.863605499267578, |
|
"ref_logps/rejected": -24.980735778808594, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -0.16984564065933228, |
|
"rewards/margins": 0.2007087767124176, |
|
"rewards/rejected": -0.3705544173717499, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.640449438202247e-07, |
|
"logps/chosen": -24.133087158203125, |
|
"logps/rejected": -32.86896514892578, |
|
"loss": 0.6072, |
|
"losses/dpo": 0.5785881280899048, |
|
"losses/sft": 0.9283973574638367, |
|
"losses/total": 0.5785881280899048, |
|
"ref_logps/chosen": -22.546520233154297, |
|
"ref_logps/rejected": -28.988473892211914, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.158656507730484, |
|
"rewards/margins": 0.2293928861618042, |
|
"rewards/rejected": -0.3880493640899658, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.6264044943820224e-07, |
|
"logps/chosen": -21.610166549682617, |
|
"logps/rejected": -33.77753448486328, |
|
"loss": 0.5743, |
|
"losses/dpo": 0.5111271142959595, |
|
"losses/sft": 0.7807843685150146, |
|
"losses/total": 0.5111271142959595, |
|
"ref_logps/chosen": -20.26101303100586, |
|
"ref_logps/rejected": -29.390432357788086, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -0.1349155306816101, |
|
"rewards/margins": 0.3037945628166199, |
|
"rewards/rejected": -0.43871009349823, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.612359550561798e-07, |
|
"logps/chosen": -24.600027084350586, |
|
"logps/rejected": -28.993408203125, |
|
"loss": 0.621, |
|
"losses/dpo": 0.6254321336746216, |
|
"losses/sft": 0.7647839188575745, |
|
"losses/total": 0.6254321336746216, |
|
"ref_logps/chosen": -22.838638305664062, |
|
"ref_logps/rejected": -25.172962188720703, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -0.1761387437582016, |
|
"rewards/margins": 0.20590564608573914, |
|
"rewards/rejected": -0.38204440474510193, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.598314606741573e-07, |
|
"logps/chosen": -25.24309730529785, |
|
"logps/rejected": -32.02477264404297, |
|
"loss": 0.6078, |
|
"losses/dpo": 0.6571998000144958, |
|
"losses/sft": 0.8880329728126526, |
|
"losses/total": 0.6571998000144958, |
|
"ref_logps/chosen": -23.272363662719727, |
|
"ref_logps/rejected": -27.582080841064453, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.19707328081130981, |
|
"rewards/margins": 0.24719560146331787, |
|
"rewards/rejected": -0.4442688822746277, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.5842696629213486e-07, |
|
"logps/chosen": -23.570541381835938, |
|
"logps/rejected": -31.662994384765625, |
|
"loss": 0.5954, |
|
"losses/dpo": 0.6153095960617065, |
|
"losses/sft": 0.7867841720581055, |
|
"losses/total": 0.6153095960617065, |
|
"ref_logps/chosen": -21.58125114440918, |
|
"ref_logps/rejected": -27.1029052734375, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.19892916083335876, |
|
"rewards/margins": 0.2570798993110657, |
|
"rewards/rejected": -0.45600906014442444, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.5702247191011236e-07, |
|
"logps/chosen": -26.515090942382812, |
|
"logps/rejected": -33.26690673828125, |
|
"loss": 0.5944, |
|
"losses/dpo": 0.559239387512207, |
|
"losses/sft": 0.8030417561531067, |
|
"losses/total": 0.559239387512207, |
|
"ref_logps/chosen": -24.70389175415039, |
|
"ref_logps/rejected": -28.788631439208984, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -0.18111974000930786, |
|
"rewards/margins": 0.2667076587677002, |
|
"rewards/rejected": -0.44782739877700806, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.5561797752808987e-07, |
|
"logps/chosen": -23.109725952148438, |
|
"logps/rejected": -30.950822830200195, |
|
"loss": 0.6028, |
|
"losses/dpo": 0.6463332772254944, |
|
"losses/sft": 0.867030918598175, |
|
"losses/total": 0.6463332772254944, |
|
"ref_logps/chosen": -21.529489517211914, |
|
"ref_logps/rejected": -26.95291519165039, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -0.15802377462387085, |
|
"rewards/margins": 0.24176692962646484, |
|
"rewards/rejected": -0.3997907340526581, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.5421348314606737e-07, |
|
"logps/chosen": -22.45772933959961, |
|
"logps/rejected": -30.6645450592041, |
|
"loss": 0.548, |
|
"losses/dpo": 0.49787038564682007, |
|
"losses/sft": 0.9076435565948486, |
|
"losses/total": 0.49787038564682007, |
|
"ref_logps/chosen": -21.689294815063477, |
|
"ref_logps/rejected": -26.167482376098633, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.07684363424777985, |
|
"rewards/margins": 0.37286245822906494, |
|
"rewards/rejected": -0.449706107378006, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.5280898876404493e-07, |
|
"logps/chosen": -23.930644989013672, |
|
"logps/rejected": -31.34885597229004, |
|
"loss": 0.5791, |
|
"losses/dpo": 0.6228358745574951, |
|
"losses/sft": 0.894844651222229, |
|
"losses/total": 0.6228358745574951, |
|
"ref_logps/chosen": -22.003002166748047, |
|
"ref_logps/rejected": -26.41282844543457, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.19276437163352966, |
|
"rewards/margins": 0.3008383512496948, |
|
"rewards/rejected": -0.4936027228832245, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.5140449438202243e-07, |
|
"logps/chosen": -25.59225082397461, |
|
"logps/rejected": -30.82415199279785, |
|
"loss": 0.5571, |
|
"losses/dpo": 0.5233840942382812, |
|
"losses/sft": 0.8860921263694763, |
|
"losses/total": 0.5233840942382812, |
|
"ref_logps/chosen": -23.89864730834961, |
|
"ref_logps/rejected": -25.53179168701172, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.16936028003692627, |
|
"rewards/margins": 0.35987579822540283, |
|
"rewards/rejected": -0.5292361378669739, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.5e-07, |
|
"logps/chosen": -26.896615982055664, |
|
"logps/rejected": -32.64814376831055, |
|
"loss": 0.5821, |
|
"losses/dpo": 0.5345016121864319, |
|
"losses/sft": 0.9819333553314209, |
|
"losses/total": 0.5345016121864319, |
|
"ref_logps/chosen": -24.95808982849121, |
|
"ref_logps/rejected": -27.606571197509766, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.19385257363319397, |
|
"rewards/margins": 0.3103046417236328, |
|
"rewards/rejected": -0.5041571855545044, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.485955056179775e-07, |
|
"logps/chosen": -21.461519241333008, |
|
"logps/rejected": -29.887657165527344, |
|
"loss": 0.5621, |
|
"losses/dpo": 0.5603345632553101, |
|
"losses/sft": 0.7855640649795532, |
|
"losses/total": 0.5603345632553101, |
|
"ref_logps/chosen": -20.122406005859375, |
|
"ref_logps/rejected": -24.85255241394043, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.13391147553920746, |
|
"rewards/margins": 0.3695991039276123, |
|
"rewards/rejected": -0.503510594367981, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.4719101123595505e-07, |
|
"logps/chosen": -22.143098831176758, |
|
"logps/rejected": -34.4566764831543, |
|
"loss": 0.5923, |
|
"losses/dpo": 0.5465586185455322, |
|
"losses/sft": 1.051912546157837, |
|
"losses/total": 0.5465586185455322, |
|
"ref_logps/chosen": -20.453866958618164, |
|
"ref_logps/rejected": -30.012981414794922, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -0.16892319917678833, |
|
"rewards/margins": 0.27544665336608887, |
|
"rewards/rejected": -0.4443698525428772, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.4578651685393255e-07, |
|
"logps/chosen": -24.042566299438477, |
|
"logps/rejected": -29.772445678710938, |
|
"loss": 0.6149, |
|
"losses/dpo": 0.6469910144805908, |
|
"losses/sft": 1.0151987075805664, |
|
"losses/total": 0.6469910144805908, |
|
"ref_logps/chosen": -21.836162567138672, |
|
"ref_logps/rejected": -25.394845962524414, |
|
"rewards/accuracies": 0.6328125, |
|
"rewards/chosen": -0.220640629529953, |
|
"rewards/margins": 0.2171194702386856, |
|
"rewards/rejected": -0.4377601146697998, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.443820224719101e-07, |
|
"logps/chosen": -24.834793090820312, |
|
"logps/rejected": -33.834083557128906, |
|
"loss": 0.5676, |
|
"losses/dpo": 0.6051491498947144, |
|
"losses/sft": 0.8380707502365112, |
|
"losses/total": 0.6051491498947144, |
|
"ref_logps/chosen": -22.940967559814453, |
|
"ref_logps/rejected": -28.49428939819336, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.18938273191452026, |
|
"rewards/margins": 0.3445969223976135, |
|
"rewards/rejected": -0.5339796543121338, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.429775280898876e-07, |
|
"logps/chosen": -25.5327091217041, |
|
"logps/rejected": -30.429113388061523, |
|
"loss": 0.6089, |
|
"losses/dpo": 0.5853685140609741, |
|
"losses/sft": 0.6926910877227783, |
|
"losses/total": 0.5853685140609741, |
|
"ref_logps/chosen": -23.138214111328125, |
|
"ref_logps/rejected": -25.674575805664062, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -0.23944953083992004, |
|
"rewards/margins": 0.2360040545463562, |
|
"rewards/rejected": -0.47545361518859863, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.4157303370786517e-07, |
|
"logps/chosen": -24.123153686523438, |
|
"logps/rejected": -29.51090431213379, |
|
"loss": 0.6134, |
|
"losses/dpo": 0.7566800117492676, |
|
"losses/sft": 0.9139145612716675, |
|
"losses/total": 0.7566800117492676, |
|
"ref_logps/chosen": -22.065155029296875, |
|
"ref_logps/rejected": -24.94894027709961, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -0.20579975843429565, |
|
"rewards/margins": 0.250396728515625, |
|
"rewards/rejected": -0.45619648694992065, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.401685393258427e-07, |
|
"logps/chosen": -26.274799346923828, |
|
"logps/rejected": -32.90815734863281, |
|
"loss": 0.6145, |
|
"losses/dpo": 0.6078730225563049, |
|
"losses/sft": 1.1017650365829468, |
|
"losses/total": 0.6078730225563049, |
|
"ref_logps/chosen": -23.55907440185547, |
|
"ref_logps/rejected": -27.880718231201172, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.2715725004673004, |
|
"rewards/margins": 0.23117120563983917, |
|
"rewards/rejected": -0.5027437210083008, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.3876404494382023e-07, |
|
"logps/chosen": -25.727689743041992, |
|
"logps/rejected": -30.410335540771484, |
|
"loss": 0.6292, |
|
"losses/dpo": 0.6031284332275391, |
|
"losses/sft": 0.7834776639938354, |
|
"losses/total": 0.6031284332275391, |
|
"ref_logps/chosen": -23.430198669433594, |
|
"ref_logps/rejected": -26.02400779724121, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -0.22974896430969238, |
|
"rewards/margins": 0.20888389647006989, |
|
"rewards/rejected": -0.4386328458786011, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.3735955056179774e-07, |
|
"logps/chosen": -25.917598724365234, |
|
"logps/rejected": -31.14261245727539, |
|
"loss": 0.5928, |
|
"losses/dpo": 0.5714601874351501, |
|
"losses/sft": 0.8888335227966309, |
|
"losses/total": 0.5714601874351501, |
|
"ref_logps/chosen": -23.436574935913086, |
|
"ref_logps/rejected": -25.756431579589844, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.24810227751731873, |
|
"rewards/margins": 0.29051584005355835, |
|
"rewards/rejected": -0.5386180877685547, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.3595505617977527e-07, |
|
"logps/chosen": -25.50743865966797, |
|
"logps/rejected": -34.945220947265625, |
|
"loss": 0.5505, |
|
"losses/dpo": 0.5715539455413818, |
|
"losses/sft": 0.8663308620452881, |
|
"losses/total": 0.5715539455413818, |
|
"ref_logps/chosen": -23.417984008789062, |
|
"ref_logps/rejected": -28.690208435058594, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -0.20894566178321838, |
|
"rewards/margins": 0.41655558347702026, |
|
"rewards/rejected": -0.625501275062561, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.345505617977528e-07, |
|
"logps/chosen": -23.620698928833008, |
|
"logps/rejected": -34.89327621459961, |
|
"loss": 0.571, |
|
"losses/dpo": 0.6053493022918701, |
|
"losses/sft": 0.8246825933456421, |
|
"losses/total": 0.6053493022918701, |
|
"ref_logps/chosen": -21.27004623413086, |
|
"ref_logps/rejected": -29.035568237304688, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -0.23506540060043335, |
|
"rewards/margins": 0.35070547461509705, |
|
"rewards/rejected": -0.585770845413208, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.331460674157303e-07, |
|
"logps/chosen": -21.874225616455078, |
|
"logps/rejected": -34.58841323852539, |
|
"loss": 0.5745, |
|
"losses/dpo": 0.5964910984039307, |
|
"losses/sft": 0.842921793460846, |
|
"losses/total": 0.5964910984039307, |
|
"ref_logps/chosen": -19.500164031982422, |
|
"ref_logps/rejected": -28.771209716796875, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -0.23740598559379578, |
|
"rewards/margins": 0.34431448578834534, |
|
"rewards/rejected": -0.5817204713821411, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.3174157303370786e-07, |
|
"logps/chosen": -24.84224510192871, |
|
"logps/rejected": -32.233497619628906, |
|
"loss": 0.6064, |
|
"losses/dpo": 0.5861349105834961, |
|
"losses/sft": 0.9263943433761597, |
|
"losses/total": 0.5861349105834961, |
|
"ref_logps/chosen": -21.88359832763672, |
|
"ref_logps/rejected": -26.701745986938477, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -0.29586488008499146, |
|
"rewards/margins": 0.2573099732398987, |
|
"rewards/rejected": -0.5531748533248901, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.303370786516854e-07, |
|
"logps/chosen": -25.4254207611084, |
|
"logps/rejected": -34.96025085449219, |
|
"loss": 0.5747, |
|
"losses/dpo": 0.5563768744468689, |
|
"losses/sft": 0.9355225563049316, |
|
"losses/total": 0.5563768744468689, |
|
"ref_logps/chosen": -22.772850036621094, |
|
"ref_logps/rejected": -28.902484893798828, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.26525697112083435, |
|
"rewards/margins": 0.3405200242996216, |
|
"rewards/rejected": -0.6057769656181335, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.2893258426966292e-07, |
|
"logps/chosen": -26.856834411621094, |
|
"logps/rejected": -34.212364196777344, |
|
"loss": 0.6228, |
|
"losses/dpo": 0.6681157946586609, |
|
"losses/sft": 1.0442770719528198, |
|
"losses/total": 0.6681157946586609, |
|
"ref_logps/chosen": -23.627426147460938, |
|
"ref_logps/rejected": -28.62677001953125, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.32294073700904846, |
|
"rewards/margins": 0.23561875522136688, |
|
"rewards/rejected": -0.5585595369338989, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.2752808988764045e-07, |
|
"logps/chosen": -26.366958618164062, |
|
"logps/rejected": -33.41276550292969, |
|
"loss": 0.6217, |
|
"losses/dpo": 0.6866650581359863, |
|
"losses/sft": 0.8693393468856812, |
|
"losses/total": 0.6866650581359863, |
|
"ref_logps/chosen": -23.189382553100586, |
|
"ref_logps/rejected": -27.676807403564453, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.31775763630867004, |
|
"rewards/margins": 0.25583818554878235, |
|
"rewards/rejected": -0.5735958218574524, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.2612359550561795e-07, |
|
"logps/chosen": -24.26227569580078, |
|
"logps/rejected": -32.4229736328125, |
|
"loss": 0.604, |
|
"losses/dpo": 0.5642524361610413, |
|
"losses/sft": 0.9980260133743286, |
|
"losses/total": 0.5642524361610413, |
|
"ref_logps/chosen": -21.425315856933594, |
|
"ref_logps/rejected": -26.820331573486328, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.28369593620300293, |
|
"rewards/margins": 0.2765684127807617, |
|
"rewards/rejected": -0.5602643489837646, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.2471910112359549e-07, |
|
"logps/chosen": -27.912431716918945, |
|
"logps/rejected": -31.85492706298828, |
|
"loss": 0.6448, |
|
"losses/dpo": 0.5940742492675781, |
|
"losses/sft": 0.969171404838562, |
|
"losses/total": 0.5940742492675781, |
|
"ref_logps/chosen": -24.59956932067871, |
|
"ref_logps/rejected": -26.790037155151367, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.3312861919403076, |
|
"rewards/margins": 0.17520278692245483, |
|
"rewards/rejected": -0.5064890384674072, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.2331460674157302e-07, |
|
"logps/chosen": -27.303508758544922, |
|
"logps/rejected": -37.65882110595703, |
|
"loss": 0.5545, |
|
"losses/dpo": 0.5936781764030457, |
|
"losses/sft": 1.015429139137268, |
|
"losses/total": 0.5936781764030457, |
|
"ref_logps/chosen": -24.510639190673828, |
|
"ref_logps/rejected": -30.55707550048828, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.27928683161735535, |
|
"rewards/margins": 0.4308881163597107, |
|
"rewards/rejected": -0.7101750373840332, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.2191011235955055e-07, |
|
"logps/chosen": -24.99541473388672, |
|
"logps/rejected": -30.256423950195312, |
|
"loss": 0.6034, |
|
"losses/dpo": 0.608791172504425, |
|
"losses/sft": 0.9114975929260254, |
|
"losses/total": 0.608791172504425, |
|
"ref_logps/chosen": -22.079914093017578, |
|
"ref_logps/rejected": -24.779722213745117, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -0.2915502190589905, |
|
"rewards/margins": 0.2561199963092804, |
|
"rewards/rejected": -0.5476702451705933, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.205056179775281e-07, |
|
"logps/chosen": -27.542556762695312, |
|
"logps/rejected": -34.17859649658203, |
|
"loss": 0.574, |
|
"losses/dpo": 0.5037014484405518, |
|
"losses/sft": 0.8922078609466553, |
|
"losses/total": 0.5037014484405518, |
|
"ref_logps/chosen": -24.774127960205078, |
|
"ref_logps/rejected": -27.759735107421875, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.2768429219722748, |
|
"rewards/margins": 0.3650434911251068, |
|
"rewards/rejected": -0.6418864727020264, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.191011235955056e-07, |
|
"logps/chosen": -25.87149429321289, |
|
"logps/rejected": -34.46807861328125, |
|
"loss": 0.6117, |
|
"losses/dpo": 0.7050824165344238, |
|
"losses/sft": 0.9497538208961487, |
|
"losses/total": 0.7050824165344238, |
|
"ref_logps/chosen": -23.14657211303711, |
|
"ref_logps/rejected": -28.885162353515625, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.27249252796173096, |
|
"rewards/margins": 0.2857990562915802, |
|
"rewards/rejected": -0.5582915544509888, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.1769662921348314e-07, |
|
"logps/chosen": -24.89635467529297, |
|
"logps/rejected": -33.622718811035156, |
|
"loss": 0.5808, |
|
"losses/dpo": 0.5883455276489258, |
|
"losses/sft": 0.9948925375938416, |
|
"losses/total": 0.5883455276489258, |
|
"ref_logps/chosen": -21.52194595336914, |
|
"ref_logps/rejected": -26.93505859375, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.3374406695365906, |
|
"rewards/margins": 0.3313255310058594, |
|
"rewards/rejected": -0.66876620054245, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.1629213483146067e-07, |
|
"logps/chosen": -24.833309173583984, |
|
"logps/rejected": -30.974327087402344, |
|
"loss": 0.6136, |
|
"losses/dpo": 0.6376237869262695, |
|
"losses/sft": 0.9374114274978638, |
|
"losses/total": 0.6376237869262695, |
|
"ref_logps/chosen": -21.7708683013916, |
|
"ref_logps/rejected": -25.24457359313965, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.30624428391456604, |
|
"rewards/margins": 0.2667309045791626, |
|
"rewards/rejected": -0.5729751586914062, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.148876404494382e-07, |
|
"logps/chosen": -24.04471778869629, |
|
"logps/rejected": -34.610633850097656, |
|
"loss": 0.6133, |
|
"losses/dpo": 0.645912766456604, |
|
"losses/sft": 0.9913955926895142, |
|
"losses/total": 0.645912766456604, |
|
"ref_logps/chosen": -20.834651947021484, |
|
"ref_logps/rejected": -28.77642059326172, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -0.32100653648376465, |
|
"rewards/margins": 0.26241475343704224, |
|
"rewards/rejected": -0.5834212899208069, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.134831460674157e-07, |
|
"logps/chosen": -26.419416427612305, |
|
"logps/rejected": -34.56787109375, |
|
"loss": 0.5713, |
|
"losses/dpo": 0.6227866411209106, |
|
"losses/sft": 0.9809292554855347, |
|
"losses/total": 0.6227866411209106, |
|
"ref_logps/chosen": -23.278644561767578, |
|
"ref_logps/rejected": -27.596946716308594, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.3140770494937897, |
|
"rewards/margins": 0.38301563262939453, |
|
"rewards/rejected": -0.6970926523208618, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.1207865168539323e-07, |
|
"logps/chosen": -26.64739990234375, |
|
"logps/rejected": -33.21559524536133, |
|
"loss": 0.59, |
|
"losses/dpo": 0.6351089477539062, |
|
"losses/sft": 0.9912072420120239, |
|
"losses/total": 0.6351089477539062, |
|
"ref_logps/chosen": -23.281349182128906, |
|
"ref_logps/rejected": -26.577198028564453, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -0.3366050124168396, |
|
"rewards/margins": 0.32723480463027954, |
|
"rewards/rejected": -0.6638398170471191, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.1067415730337076e-07, |
|
"logps/chosen": -27.422582626342773, |
|
"logps/rejected": -35.08824920654297, |
|
"loss": 0.6064, |
|
"losses/dpo": 0.5233859419822693, |
|
"losses/sft": 0.8136109709739685, |
|
"losses/total": 0.5233859419822693, |
|
"ref_logps/chosen": -24.274629592895508, |
|
"ref_logps/rejected": -28.790220260620117, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.3147951364517212, |
|
"rewards/margins": 0.3150079846382141, |
|
"rewards/rejected": -0.6298030614852905, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.0926966292134832e-07, |
|
"logps/chosen": -26.381507873535156, |
|
"logps/rejected": -31.576181411743164, |
|
"loss": 0.5829, |
|
"losses/dpo": 0.5970532894134521, |
|
"losses/sft": 0.8552703261375427, |
|
"losses/total": 0.5970532894134521, |
|
"ref_logps/chosen": -23.155136108398438, |
|
"ref_logps/rejected": -24.94633674621582, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.32263678312301636, |
|
"rewards/margins": 0.34034764766693115, |
|
"rewards/rejected": -0.6629844903945923, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.0786516853932585e-07, |
|
"logps/chosen": -24.061811447143555, |
|
"logps/rejected": -29.508312225341797, |
|
"loss": 0.6137, |
|
"losses/dpo": 0.6248607039451599, |
|
"losses/sft": 0.8072177767753601, |
|
"losses/total": 0.6248607039451599, |
|
"ref_logps/chosen": -20.497760772705078, |
|
"ref_logps/rejected": -23.47817611694336, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.35640496015548706, |
|
"rewards/margins": 0.24660846590995789, |
|
"rewards/rejected": -0.6030134558677673, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.0646067415730336e-07, |
|
"logps/chosen": -29.165149688720703, |
|
"logps/rejected": -35.16246032714844, |
|
"loss": 0.5826, |
|
"losses/dpo": 0.5271694660186768, |
|
"losses/sft": 1.0120395421981812, |
|
"losses/total": 0.5271694660186768, |
|
"ref_logps/chosen": -25.856834411621094, |
|
"ref_logps/rejected": -28.48740005493164, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -0.3308315873146057, |
|
"rewards/margins": 0.33667463064193726, |
|
"rewards/rejected": -0.667506217956543, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.0505617977528089e-07, |
|
"logps/chosen": -26.1055965423584, |
|
"logps/rejected": -36.45195770263672, |
|
"loss": 0.5345, |
|
"losses/dpo": 0.5425952076911926, |
|
"losses/sft": 0.9156839847564697, |
|
"losses/total": 0.5425952076911926, |
|
"ref_logps/chosen": -23.200654983520508, |
|
"ref_logps/rejected": -28.661373138427734, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -0.2904941737651825, |
|
"rewards/margins": 0.48856407403945923, |
|
"rewards/rejected": -0.7790582776069641, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.0365168539325842e-07, |
|
"logps/chosen": -24.59746551513672, |
|
"logps/rejected": -36.00947570800781, |
|
"loss": 0.5622, |
|
"losses/dpo": 0.6595858335494995, |
|
"losses/sft": 0.8320033550262451, |
|
"losses/total": 0.6595858335494995, |
|
"ref_logps/chosen": -21.081745147705078, |
|
"ref_logps/rejected": -28.422481536865234, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.3515721559524536, |
|
"rewards/margins": 0.4071270823478699, |
|
"rewards/rejected": -0.7586992383003235, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.0224719101123595e-07, |
|
"logps/chosen": -25.407838821411133, |
|
"logps/rejected": -33.07604217529297, |
|
"loss": 0.5892, |
|
"losses/dpo": 0.5324288606643677, |
|
"losses/sft": 1.0311552286148071, |
|
"losses/total": 0.5324288606643677, |
|
"ref_logps/chosen": -22.188087463378906, |
|
"ref_logps/rejected": -26.633270263671875, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.3219751715660095, |
|
"rewards/margins": 0.322301983833313, |
|
"rewards/rejected": -0.6442771553993225, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.0084269662921348e-07, |
|
"logps/chosen": -26.190311431884766, |
|
"logps/rejected": -33.34137725830078, |
|
"loss": 0.5861, |
|
"losses/dpo": 0.6612842082977295, |
|
"losses/sft": 0.8551939129829407, |
|
"losses/total": 0.6612842082977295, |
|
"ref_logps/chosen": -22.73942756652832, |
|
"ref_logps/rejected": -26.702760696411133, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.345088392496109, |
|
"rewards/margins": 0.3187733292579651, |
|
"rewards/rejected": -0.6638616919517517, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.9943820224719098e-07, |
|
"logps/chosen": -27.615928649902344, |
|
"logps/rejected": -33.776695251464844, |
|
"loss": 0.5511, |
|
"losses/dpo": 0.6082693338394165, |
|
"losses/sft": 1.0973209142684937, |
|
"losses/total": 0.6082693338394165, |
|
"ref_logps/chosen": -24.300251007080078, |
|
"ref_logps/rejected": -26.21588897705078, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -0.3315678834915161, |
|
"rewards/margins": 0.42451295256614685, |
|
"rewards/rejected": -0.7560808658599854, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.9803370786516854e-07, |
|
"logps/chosen": -28.257335662841797, |
|
"logps/rejected": -35.67947769165039, |
|
"loss": 0.5919, |
|
"losses/dpo": 0.6389520168304443, |
|
"losses/sft": 1.087360143661499, |
|
"losses/total": 0.6389520168304443, |
|
"ref_logps/chosen": -24.031015396118164, |
|
"ref_logps/rejected": -28.11650276184082, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -0.4226321578025818, |
|
"rewards/margins": 0.33366525173187256, |
|
"rewards/rejected": -0.7562973499298096, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.9662921348314607e-07, |
|
"logps/chosen": -27.326435089111328, |
|
"logps/rejected": -33.91853713989258, |
|
"loss": 0.5884, |
|
"losses/dpo": 0.5772832632064819, |
|
"losses/sft": 1.0057258605957031, |
|
"losses/total": 0.5772832632064819, |
|
"ref_logps/chosen": -23.13665008544922, |
|
"ref_logps/rejected": -26.448516845703125, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -0.4189784526824951, |
|
"rewards/margins": 0.32802364230155945, |
|
"rewards/rejected": -0.747002124786377, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.952247191011236e-07, |
|
"logps/chosen": -24.134462356567383, |
|
"logps/rejected": -34.923095703125, |
|
"loss": 0.5245, |
|
"losses/dpo": 0.5826983451843262, |
|
"losses/sft": 0.7670709490776062, |
|
"losses/total": 0.5826983451843262, |
|
"ref_logps/chosen": -21.35719108581543, |
|
"ref_logps/rejected": -27.226768493652344, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -0.2777270972728729, |
|
"rewards/margins": 0.49190521240234375, |
|
"rewards/rejected": -0.7696323394775391, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.938202247191011e-07, |
|
"logps/chosen": -24.894744873046875, |
|
"logps/rejected": -36.34782791137695, |
|
"loss": 0.5654, |
|
"losses/dpo": 0.5832593441009521, |
|
"losses/sft": 0.8260340094566345, |
|
"losses/total": 0.5832593441009521, |
|
"ref_logps/chosen": -21.5096435546875, |
|
"ref_logps/rejected": -28.905296325683594, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -0.3385101854801178, |
|
"rewards/margins": 0.4057431221008301, |
|
"rewards/rejected": -0.7442533373832703, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.9241573033707863e-07, |
|
"logps/chosen": -29.12051773071289, |
|
"logps/rejected": -33.72222900390625, |
|
"loss": 0.6189, |
|
"losses/dpo": 0.5586456060409546, |
|
"losses/sft": 1.1363164186477661, |
|
"losses/total": 0.5586456060409546, |
|
"ref_logps/chosen": -24.818958282470703, |
|
"ref_logps/rejected": -26.637435913085938, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.430155873298645, |
|
"rewards/margins": 0.27832359075546265, |
|
"rewards/rejected": -0.7084795236587524, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.9101123595505617e-07, |
|
"logps/chosen": -25.77654266357422, |
|
"logps/rejected": -32.80144119262695, |
|
"loss": 0.5647, |
|
"losses/dpo": 0.6132915616035461, |
|
"losses/sft": 0.8355939984321594, |
|
"losses/total": 0.6132915616035461, |
|
"ref_logps/chosen": -22.049232482910156, |
|
"ref_logps/rejected": -25.218961715698242, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.37273097038269043, |
|
"rewards/margins": 0.3855169415473938, |
|
"rewards/rejected": -0.758247971534729, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.896067415730337e-07, |
|
"logps/chosen": -27.173105239868164, |
|
"logps/rejected": -33.18789291381836, |
|
"loss": 0.5757, |
|
"losses/dpo": 0.6402326822280884, |
|
"losses/sft": 0.9358000159263611, |
|
"losses/total": 0.6402326822280884, |
|
"ref_logps/chosen": -24.05023956298828, |
|
"ref_logps/rejected": -26.557050704956055, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -0.3122865557670593, |
|
"rewards/margins": 0.3507978618144989, |
|
"rewards/rejected": -0.6630844473838806, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.8820224719101123e-07, |
|
"logps/chosen": -25.127092361450195, |
|
"logps/rejected": -34.0608024597168, |
|
"loss": 0.5844, |
|
"losses/dpo": 0.576771080493927, |
|
"losses/sft": 0.8823024034500122, |
|
"losses/total": 0.576771080493927, |
|
"ref_logps/chosen": -21.552627563476562, |
|
"ref_logps/rejected": -27.01715087890625, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.35744667053222656, |
|
"rewards/margins": 0.346918523311615, |
|
"rewards/rejected": -0.7043651938438416, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.8679775280898876e-07, |
|
"logps/chosen": -25.840179443359375, |
|
"logps/rejected": -34.11262893676758, |
|
"loss": 0.5675, |
|
"losses/dpo": 0.5643225312232971, |
|
"losses/sft": 0.7924672365188599, |
|
"losses/total": 0.5643225312232971, |
|
"ref_logps/chosen": -22.366439819335938, |
|
"ref_logps/rejected": -26.873088836669922, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -0.3473738133907318, |
|
"rewards/margins": 0.37658050656318665, |
|
"rewards/rejected": -0.7239543199539185, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.853932584269663e-07, |
|
"logps/chosen": -24.64289093017578, |
|
"logps/rejected": -30.944011688232422, |
|
"loss": 0.5768, |
|
"losses/dpo": 0.6149911880493164, |
|
"losses/sft": 0.9512190222740173, |
|
"losses/total": 0.6149911880493164, |
|
"ref_logps/chosen": -21.270837783813477, |
|
"ref_logps/rejected": -24.173620223999023, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.33720535039901733, |
|
"rewards/margins": 0.3398338854312897, |
|
"rewards/rejected": -0.6770392656326294, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.8398876404494382e-07, |
|
"logps/chosen": -26.05956268310547, |
|
"logps/rejected": -35.905609130859375, |
|
"loss": 0.5407, |
|
"losses/dpo": 0.49823397397994995, |
|
"losses/sft": 0.8145182132720947, |
|
"losses/total": 0.49823397397994995, |
|
"ref_logps/chosen": -23.054027557373047, |
|
"ref_logps/rejected": -28.27884864807129, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.3005535304546356, |
|
"rewards/margins": 0.4621226191520691, |
|
"rewards/rejected": -0.7626761794090271, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.8258426966292135e-07, |
|
"logps/chosen": -24.907108306884766, |
|
"logps/rejected": -33.71357345581055, |
|
"loss": 0.5301, |
|
"losses/dpo": 0.49924543499946594, |
|
"losses/sft": 0.9444026350975037, |
|
"losses/total": 0.49924543499946594, |
|
"ref_logps/chosen": -21.562763214111328, |
|
"ref_logps/rejected": -25.69823455810547, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -0.3344343304634094, |
|
"rewards/margins": 0.4670996069908142, |
|
"rewards/rejected": -0.8015338778495789, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.8117977528089888e-07, |
|
"logps/chosen": -24.61281967163086, |
|
"logps/rejected": -33.178436279296875, |
|
"loss": 0.5843, |
|
"losses/dpo": 0.6827691793441772, |
|
"losses/sft": 0.9820384979248047, |
|
"losses/total": 0.6827691793441772, |
|
"ref_logps/chosen": -20.769065856933594, |
|
"ref_logps/rejected": -25.740190505981445, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -0.3843753933906555, |
|
"rewards/margins": 0.35944926738739014, |
|
"rewards/rejected": -0.7438246607780457, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.7977528089887638e-07, |
|
"logps/chosen": -25.742042541503906, |
|
"logps/rejected": -31.92254638671875, |
|
"loss": 0.5765, |
|
"losses/dpo": 0.48391562700271606, |
|
"losses/sft": 0.9694733619689941, |
|
"losses/total": 0.48391562700271606, |
|
"ref_logps/chosen": -22.27023696899414, |
|
"ref_logps/rejected": -24.869842529296875, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.34718072414398193, |
|
"rewards/margins": 0.3580899238586426, |
|
"rewards/rejected": -0.7052706480026245, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.7837078651685391e-07, |
|
"logps/chosen": -24.76668930053711, |
|
"logps/rejected": -33.92596435546875, |
|
"loss": 0.5197, |
|
"losses/dpo": 0.566383957862854, |
|
"losses/sft": 1.056198239326477, |
|
"losses/total": 0.566383957862854, |
|
"ref_logps/chosen": -21.79462432861328, |
|
"ref_logps/rejected": -26.1394100189209, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.2972065806388855, |
|
"rewards/margins": 0.48144853115081787, |
|
"rewards/rejected": -0.7786551713943481, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.7696629213483144e-07, |
|
"logps/chosen": -25.022621154785156, |
|
"logps/rejected": -32.83625030517578, |
|
"loss": 0.554, |
|
"losses/dpo": 0.5455434322357178, |
|
"losses/sft": 0.9091237783432007, |
|
"losses/total": 0.5455434322357178, |
|
"ref_logps/chosen": -21.205692291259766, |
|
"ref_logps/rejected": -24.853519439697266, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.3816927969455719, |
|
"rewards/margins": 0.4165803790092468, |
|
"rewards/rejected": -0.7982731461524963, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.75561797752809e-07, |
|
"logps/chosen": -27.038639068603516, |
|
"logps/rejected": -35.007415771484375, |
|
"loss": 0.5526, |
|
"losses/dpo": 0.7876778841018677, |
|
"losses/sft": 1.1023296117782593, |
|
"losses/total": 0.7876778841018677, |
|
"ref_logps/chosen": -23.039878845214844, |
|
"ref_logps/rejected": -26.884708404541016, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.39987578988075256, |
|
"rewards/margins": 0.41239458322525024, |
|
"rewards/rejected": -0.8122704029083252, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.741573033707865e-07, |
|
"logps/chosen": -26.300579071044922, |
|
"logps/rejected": -36.53676223754883, |
|
"loss": 0.5444, |
|
"losses/dpo": 0.4805631637573242, |
|
"losses/sft": 0.8787716031074524, |
|
"losses/total": 0.4805631637573242, |
|
"ref_logps/chosen": -22.55372428894043, |
|
"ref_logps/rejected": -27.711929321289062, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -0.3746855556964874, |
|
"rewards/margins": 0.5077978372573853, |
|
"rewards/rejected": -0.8824833631515503, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.7275280898876404e-07, |
|
"logps/chosen": -27.612911224365234, |
|
"logps/rejected": -39.48854064941406, |
|
"loss": 0.4883, |
|
"losses/dpo": 0.5499591827392578, |
|
"losses/sft": 1.1995720863342285, |
|
"losses/total": 0.5499591827392578, |
|
"ref_logps/chosen": -23.866586685180664, |
|
"ref_logps/rejected": -29.748516082763672, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.37463241815567017, |
|
"rewards/margins": 0.5993699431419373, |
|
"rewards/rejected": -0.9740023612976074, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7134831460674157e-07, |
|
"logps/chosen": -28.848485946655273, |
|
"logps/rejected": -38.46211242675781, |
|
"loss": 0.5223, |
|
"losses/dpo": 0.5853086113929749, |
|
"losses/sft": 0.9450937509536743, |
|
"losses/total": 0.5853086113929749, |
|
"ref_logps/chosen": -24.71368980407715, |
|
"ref_logps/rejected": -29.095096588134766, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.4134795069694519, |
|
"rewards/margins": 0.5232220888137817, |
|
"rewards/rejected": -0.9367015957832336, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.699438202247191e-07, |
|
"logps/chosen": -26.53584861755371, |
|
"logps/rejected": -33.2642707824707, |
|
"loss": 0.5583, |
|
"losses/dpo": 0.6550332307815552, |
|
"losses/sft": 0.844421923160553, |
|
"losses/total": 0.6550332307815552, |
|
"ref_logps/chosen": -22.528621673583984, |
|
"ref_logps/rejected": -25.07666778564453, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.4007226824760437, |
|
"rewards/margins": 0.4180375933647156, |
|
"rewards/rejected": -0.8187602758407593, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.6853932584269663e-07, |
|
"logps/chosen": -26.93305778503418, |
|
"logps/rejected": -36.43919372558594, |
|
"loss": 0.5267, |
|
"losses/dpo": 0.37509262561798096, |
|
"losses/sft": 0.9286944270133972, |
|
"losses/total": 0.37509262561798096, |
|
"ref_logps/chosen": -22.965662002563477, |
|
"ref_logps/rejected": -27.516300201416016, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.39673954248428345, |
|
"rewards/margins": 0.4955495595932007, |
|
"rewards/rejected": -0.8922891616821289, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.6713483146067413e-07, |
|
"logps/chosen": -27.517230987548828, |
|
"logps/rejected": -33.23160934448242, |
|
"loss": 0.585, |
|
"losses/dpo": 0.45891374349594116, |
|
"losses/sft": 0.8818660378456116, |
|
"losses/total": 0.45891374349594116, |
|
"ref_logps/chosen": -23.37508773803711, |
|
"ref_logps/rejected": -25.319503784179688, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4142143726348877, |
|
"rewards/margins": 0.37699633836746216, |
|
"rewards/rejected": -0.7912107110023499, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.6573033707865166e-07, |
|
"logps/chosen": -29.848949432373047, |
|
"logps/rejected": -37.009605407714844, |
|
"loss": 0.5569, |
|
"losses/dpo": 0.6695871353149414, |
|
"losses/sft": 1.1478632688522339, |
|
"losses/total": 0.6695871353149414, |
|
"ref_logps/chosen": -25.79513168334961, |
|
"ref_logps/rejected": -28.52492332458496, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -0.4053817391395569, |
|
"rewards/margins": 0.4430864751338959, |
|
"rewards/rejected": -0.8484681844711304, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.6432584269662922e-07, |
|
"logps/chosen": -26.847461700439453, |
|
"logps/rejected": -33.84664535522461, |
|
"loss": 0.5853, |
|
"losses/dpo": 0.6266674995422363, |
|
"losses/sft": 0.9419240951538086, |
|
"losses/total": 0.6266674995422363, |
|
"ref_logps/chosen": -23.1273193359375, |
|
"ref_logps/rejected": -26.497238159179688, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.3720143437385559, |
|
"rewards/margins": 0.3629264533519745, |
|
"rewards/rejected": -0.734940767288208, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.6292134831460675e-07, |
|
"logps/chosen": -25.227951049804688, |
|
"logps/rejected": -37.79768371582031, |
|
"loss": 0.5277, |
|
"losses/dpo": 0.5965819358825684, |
|
"losses/sft": 1.0364360809326172, |
|
"losses/total": 0.5965819358825684, |
|
"ref_logps/chosen": -20.85896873474121, |
|
"ref_logps/rejected": -28.49602508544922, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -0.43689805269241333, |
|
"rewards/margins": 0.49326756596565247, |
|
"rewards/rejected": -0.9301656484603882, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.6151685393258428e-07, |
|
"logps/chosen": -25.945070266723633, |
|
"logps/rejected": -35.28973388671875, |
|
"loss": 0.5305, |
|
"losses/dpo": 0.5456879138946533, |
|
"losses/sft": 0.8692267537117004, |
|
"losses/total": 0.5456879138946533, |
|
"ref_logps/chosen": -22.303909301757812, |
|
"ref_logps/rejected": -26.77994155883789, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.36411628127098083, |
|
"rewards/margins": 0.4868628680706024, |
|
"rewards/rejected": -0.850979208946228, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.6011235955056178e-07, |
|
"logps/chosen": -28.660266876220703, |
|
"logps/rejected": -36.41142272949219, |
|
"loss": 0.5766, |
|
"losses/dpo": 0.6054384708404541, |
|
"losses/sft": 0.9599564671516418, |
|
"losses/total": 0.6054384708404541, |
|
"ref_logps/chosen": -24.156015396118164, |
|
"ref_logps/rejected": -28.24047088623047, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.4504254460334778, |
|
"rewards/margins": 0.36666956543922424, |
|
"rewards/rejected": -0.8170950412750244, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.5870786516853931e-07, |
|
"logps/chosen": -27.74228858947754, |
|
"logps/rejected": -38.50691604614258, |
|
"loss": 0.5215, |
|
"losses/dpo": 0.463223397731781, |
|
"losses/sft": 1.041387915611267, |
|
"losses/total": 0.463223397731781, |
|
"ref_logps/chosen": -23.603931427001953, |
|
"ref_logps/rejected": -28.803909301757812, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4138358533382416, |
|
"rewards/margins": 0.5564644932746887, |
|
"rewards/rejected": -0.9703004360198975, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.5730337078651685e-07, |
|
"logps/chosen": -24.93131446838379, |
|
"logps/rejected": -35.91729736328125, |
|
"loss": 0.5266, |
|
"losses/dpo": 0.6279169321060181, |
|
"losses/sft": 0.8709256052970886, |
|
"losses/total": 0.6279169321060181, |
|
"ref_logps/chosen": -20.768774032592773, |
|
"ref_logps/rejected": -26.83118438720703, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -0.4162542223930359, |
|
"rewards/margins": 0.4923573136329651, |
|
"rewards/rejected": -0.9086115956306458, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.5589887640449438e-07, |
|
"logps/chosen": -27.571338653564453, |
|
"logps/rejected": -38.57915496826172, |
|
"loss": 0.5687, |
|
"losses/dpo": 0.5966840386390686, |
|
"losses/sft": 0.9412966966629028, |
|
"losses/total": 0.5966840386390686, |
|
"ref_logps/chosen": -22.682205200195312, |
|
"ref_logps/rejected": -29.58980369567871, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.48891347646713257, |
|
"rewards/margins": 0.4100216031074524, |
|
"rewards/rejected": -0.898935079574585, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.5449438202247188e-07, |
|
"logps/chosen": -25.46674346923828, |
|
"logps/rejected": -33.395118713378906, |
|
"loss": 0.5902, |
|
"losses/dpo": 0.7199227213859558, |
|
"losses/sft": 0.9989073276519775, |
|
"losses/total": 0.7199227213859558, |
|
"ref_logps/chosen": -21.129976272583008, |
|
"ref_logps/rejected": -25.61261749267578, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4336766302585602, |
|
"rewards/margins": 0.3445735573768616, |
|
"rewards/rejected": -0.7782501578330994, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.5308988764044944e-07, |
|
"logps/chosen": -26.559568405151367, |
|
"logps/rejected": -39.50514221191406, |
|
"loss": 0.5101, |
|
"losses/dpo": 0.42156773805618286, |
|
"losses/sft": 0.824786365032196, |
|
"losses/total": 0.42156773805618286, |
|
"ref_logps/chosen": -21.88895606994629, |
|
"ref_logps/rejected": -29.256093978881836, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -0.46706122159957886, |
|
"rewards/margins": 0.557843804359436, |
|
"rewards/rejected": -1.0249050855636597, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.5168539325842697e-07, |
|
"logps/chosen": -27.430574417114258, |
|
"logps/rejected": -35.846214294433594, |
|
"loss": 0.5852, |
|
"losses/dpo": 0.7073966264724731, |
|
"losses/sft": 0.959773600101471, |
|
"losses/total": 0.7073966264724731, |
|
"ref_logps/chosen": -22.33085060119629, |
|
"ref_logps/rejected": -26.85816192626953, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -0.509972333908081, |
|
"rewards/margins": 0.38883259892463684, |
|
"rewards/rejected": -0.8988049030303955, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.502808988764045e-07, |
|
"logps/chosen": -25.799861907958984, |
|
"logps/rejected": -37.50861358642578, |
|
"loss": 0.5553, |
|
"losses/dpo": 0.5419721603393555, |
|
"losses/sft": 0.940202534198761, |
|
"losses/total": 0.5419721603393555, |
|
"ref_logps/chosen": -21.224933624267578, |
|
"ref_logps/rejected": -28.002174377441406, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4574929475784302, |
|
"rewards/margins": 0.4931509494781494, |
|
"rewards/rejected": -0.9506438970565796, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.4887640449438203e-07, |
|
"logps/chosen": -27.79110336303711, |
|
"logps/rejected": -34.21430206298828, |
|
"loss": 0.5921, |
|
"losses/dpo": 0.6595107913017273, |
|
"losses/sft": 1.0057413578033447, |
|
"losses/total": 0.6595107913017273, |
|
"ref_logps/chosen": -23.073078155517578, |
|
"ref_logps/rejected": -26.093578338623047, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.4718025326728821, |
|
"rewards/margins": 0.34026968479156494, |
|
"rewards/rejected": -0.8120721578598022, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.4747191011235953e-07, |
|
"logps/chosen": -26.380355834960938, |
|
"logps/rejected": -37.56932830810547, |
|
"loss": 0.5263, |
|
"losses/dpo": 0.47728973627090454, |
|
"losses/sft": 1.0133030414581299, |
|
"losses/total": 0.47728973627090454, |
|
"ref_logps/chosen": -22.488906860351562, |
|
"ref_logps/rejected": -28.34372329711914, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.38914480805397034, |
|
"rewards/margins": 0.5334160327911377, |
|
"rewards/rejected": -0.9225608110427856, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.4606741573033706e-07, |
|
"logps/chosen": -27.006134033203125, |
|
"logps/rejected": -37.092594146728516, |
|
"loss": 0.5417, |
|
"losses/dpo": 0.7257384061813354, |
|
"losses/sft": 1.2120591402053833, |
|
"losses/total": 0.7257384061813354, |
|
"ref_logps/chosen": -22.130794525146484, |
|
"ref_logps/rejected": -27.460662841796875, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.48753368854522705, |
|
"rewards/margins": 0.47565943002700806, |
|
"rewards/rejected": -0.9631930589675903, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.446629213483146e-07, |
|
"logps/chosen": -25.57880210876465, |
|
"logps/rejected": -37.16014099121094, |
|
"loss": 0.5381, |
|
"losses/dpo": 0.6313049793243408, |
|
"losses/sft": 0.9201721549034119, |
|
"losses/total": 0.6313049793243408, |
|
"ref_logps/chosen": -21.904296875, |
|
"ref_logps/rejected": -28.32394027709961, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -0.3674505054950714, |
|
"rewards/margins": 0.5161697268486023, |
|
"rewards/rejected": -0.8836201429367065, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.4325842696629212e-07, |
|
"logps/chosen": -24.601829528808594, |
|
"logps/rejected": -37.283538818359375, |
|
"loss": 0.5429, |
|
"losses/dpo": 0.5216307044029236, |
|
"losses/sft": 1.0138075351715088, |
|
"losses/total": 0.5216307044029236, |
|
"ref_logps/chosen": -20.36395263671875, |
|
"ref_logps/rejected": -28.042282104492188, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -0.4237874746322632, |
|
"rewards/margins": 0.5003381967544556, |
|
"rewards/rejected": -0.9241256713867188, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.4185393258426968e-07, |
|
"logps/chosen": -28.58258819580078, |
|
"logps/rejected": -36.47199249267578, |
|
"loss": 0.5894, |
|
"losses/dpo": 0.43838924169540405, |
|
"losses/sft": 1.2099077701568604, |
|
"losses/total": 0.43838924169540405, |
|
"ref_logps/chosen": -23.226455688476562, |
|
"ref_logps/rejected": -26.880578994750977, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -0.5356131792068481, |
|
"rewards/margins": 0.423528254032135, |
|
"rewards/rejected": -0.9591414332389832, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.4044943820224718e-07, |
|
"logps/chosen": -28.39300537109375, |
|
"logps/rejected": -36.5651741027832, |
|
"loss": 0.5472, |
|
"losses/dpo": 0.6286274790763855, |
|
"losses/sft": 1.1655751466751099, |
|
"losses/total": 0.6286274790763855, |
|
"ref_logps/chosen": -23.700767517089844, |
|
"ref_logps/rejected": -26.989849090576172, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.4692240357398987, |
|
"rewards/margins": 0.48830845952033997, |
|
"rewards/rejected": -0.9575324058532715, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.3904494382022472e-07, |
|
"logps/chosen": -27.01761245727539, |
|
"logps/rejected": -34.147830963134766, |
|
"loss": 0.5581, |
|
"losses/dpo": 0.425361692905426, |
|
"losses/sft": 1.129596471786499, |
|
"losses/total": 0.425361692905426, |
|
"ref_logps/chosen": -22.251991271972656, |
|
"ref_logps/rejected": -24.953710556030273, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.47656214237213135, |
|
"rewards/margins": 0.4428498148918152, |
|
"rewards/rejected": -0.9194119572639465, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.3764044943820225e-07, |
|
"logps/chosen": -27.92938995361328, |
|
"logps/rejected": -39.81676483154297, |
|
"loss": 0.5111, |
|
"losses/dpo": 0.5578055381774902, |
|
"losses/sft": 1.1197444200515747, |
|
"losses/total": 0.5578055381774902, |
|
"ref_logps/chosen": -23.314014434814453, |
|
"ref_logps/rejected": -29.507539749145508, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.4615376591682434, |
|
"rewards/margins": 0.5693849325180054, |
|
"rewards/rejected": -1.0309226512908936, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.3623595505617978e-07, |
|
"logps/chosen": -27.910144805908203, |
|
"logps/rejected": -35.69133758544922, |
|
"loss": 0.5499, |
|
"losses/dpo": 0.4847102165222168, |
|
"losses/sft": 0.989621639251709, |
|
"losses/total": 0.4847102165222168, |
|
"ref_logps/chosen": -23.326908111572266, |
|
"ref_logps/rejected": -26.762676239013672, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.45832377672195435, |
|
"rewards/margins": 0.43454277515411377, |
|
"rewards/rejected": -0.8928664922714233, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.3483146067415728e-07, |
|
"logps/chosen": -28.233020782470703, |
|
"logps/rejected": -37.3542366027832, |
|
"loss": 0.5935, |
|
"losses/dpo": 0.5905570983886719, |
|
"losses/sft": 1.0464057922363281, |
|
"losses/total": 0.5905570983886719, |
|
"ref_logps/chosen": -23.20905303955078, |
|
"ref_logps/rejected": -28.49638557434082, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.5023964643478394, |
|
"rewards/margins": 0.38338857889175415, |
|
"rewards/rejected": -0.8857850432395935, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.334269662921348e-07, |
|
"logps/chosen": -29.44438934326172, |
|
"logps/rejected": -36.25569152832031, |
|
"loss": 0.5608, |
|
"losses/dpo": 0.5518324375152588, |
|
"losses/sft": 0.9761526584625244, |
|
"losses/total": 0.5518324375152588, |
|
"ref_logps/chosen": -24.553022384643555, |
|
"ref_logps/rejected": -26.846464157104492, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -0.4891367554664612, |
|
"rewards/margins": 0.451786071062088, |
|
"rewards/rejected": -0.9409228563308716, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.3202247191011234e-07, |
|
"logps/chosen": -29.32979393005371, |
|
"logps/rejected": -37.83529281616211, |
|
"loss": 0.5463, |
|
"losses/dpo": 0.5125599503517151, |
|
"losses/sft": 0.9747940897941589, |
|
"losses/total": 0.5125599503517151, |
|
"ref_logps/chosen": -24.211776733398438, |
|
"ref_logps/rejected": -27.92011260986328, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.5118017196655273, |
|
"rewards/margins": 0.47971609234809875, |
|
"rewards/rejected": -0.9915178418159485, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.306179775280899e-07, |
|
"logps/chosen": -27.11697769165039, |
|
"logps/rejected": -35.418338775634766, |
|
"loss": 0.575, |
|
"losses/dpo": 0.5703020095825195, |
|
"losses/sft": 0.9395530223846436, |
|
"losses/total": 0.5703020095825195, |
|
"ref_logps/chosen": -22.12503433227539, |
|
"ref_logps/rejected": -26.28829574584961, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.49919426441192627, |
|
"rewards/margins": 0.4138101041316986, |
|
"rewards/rejected": -0.9130042791366577, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.2921348314606743e-07, |
|
"logps/chosen": -31.24747085571289, |
|
"logps/rejected": -38.8961181640625, |
|
"loss": 0.5843, |
|
"losses/dpo": 0.4914831221103668, |
|
"losses/sft": 0.9517439603805542, |
|
"losses/total": 0.4914831221103668, |
|
"ref_logps/chosen": -25.351207733154297, |
|
"ref_logps/rejected": -29.048057556152344, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.5896263122558594, |
|
"rewards/margins": 0.3951793909072876, |
|
"rewards/rejected": -0.9848057627677917, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.2780898876404493e-07, |
|
"logps/chosen": -27.757152557373047, |
|
"logps/rejected": -37.481964111328125, |
|
"loss": 0.5261, |
|
"losses/dpo": 0.4620394706726074, |
|
"losses/sft": 1.0134756565093994, |
|
"losses/total": 0.4620394706726074, |
|
"ref_logps/chosen": -23.13431167602539, |
|
"ref_logps/rejected": -27.492977142333984, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4622839689254761, |
|
"rewards/margins": 0.5366144776344299, |
|
"rewards/rejected": -0.9988985061645508, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.2640449438202246e-07, |
|
"logps/chosen": -26.990705490112305, |
|
"logps/rejected": -34.95043182373047, |
|
"loss": 0.5636, |
|
"losses/dpo": 0.5714951753616333, |
|
"losses/sft": 0.9859296679496765, |
|
"losses/total": 0.5714951753616333, |
|
"ref_logps/chosen": -22.465744018554688, |
|
"ref_logps/rejected": -26.120864868164062, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -0.452495813369751, |
|
"rewards/margins": 0.4304611086845398, |
|
"rewards/rejected": -0.882956862449646, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.25e-07, |
|
"logps/chosen": -29.839576721191406, |
|
"logps/rejected": -40.363712310791016, |
|
"loss": 0.5508, |
|
"losses/dpo": 0.5849748253822327, |
|
"losses/sft": 0.9925932288169861, |
|
"losses/total": 0.5849748253822327, |
|
"ref_logps/chosen": -24.343534469604492, |
|
"ref_logps/rejected": -29.49897003173828, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5496042370796204, |
|
"rewards/margins": 0.5368699431419373, |
|
"rewards/rejected": -1.0864741802215576, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.2359550561797752e-07, |
|
"logps/chosen": -26.925609588623047, |
|
"logps/rejected": -35.485931396484375, |
|
"loss": 0.5537, |
|
"losses/dpo": 0.43898260593414307, |
|
"losses/sft": 0.8520787954330444, |
|
"losses/total": 0.43898260593414307, |
|
"ref_logps/chosen": -22.369102478027344, |
|
"ref_logps/rejected": -26.426111221313477, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -0.4556504487991333, |
|
"rewards/margins": 0.450331449508667, |
|
"rewards/rejected": -0.9059818387031555, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.2219101123595506e-07, |
|
"logps/chosen": -27.308425903320312, |
|
"logps/rejected": -34.86455535888672, |
|
"loss": 0.6099, |
|
"losses/dpo": 0.6877168416976929, |
|
"losses/sft": 0.8925371766090393, |
|
"losses/total": 0.6877168416976929, |
|
"ref_logps/chosen": -22.015674591064453, |
|
"ref_logps/rejected": -26.024978637695312, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -0.5292750597000122, |
|
"rewards/margins": 0.3546826243400574, |
|
"rewards/rejected": -0.8839576840400696, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.2078651685393259e-07, |
|
"logps/chosen": -27.23873519897461, |
|
"logps/rejected": -34.355613708496094, |
|
"loss": 0.5451, |
|
"losses/dpo": 0.4608323574066162, |
|
"losses/sft": 1.068372130393982, |
|
"losses/total": 0.4608323574066162, |
|
"ref_logps/chosen": -22.93021011352539, |
|
"ref_logps/rejected": -25.29645538330078, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -0.4308522939682007, |
|
"rewards/margins": 0.4750638008117676, |
|
"rewards/rejected": -0.9059160947799683, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.1938202247191012e-07, |
|
"logps/chosen": -27.410297393798828, |
|
"logps/rejected": -36.60034942626953, |
|
"loss": 0.5435, |
|
"losses/dpo": 0.49991002678871155, |
|
"losses/sft": 0.9416501522064209, |
|
"losses/total": 0.49991002678871155, |
|
"ref_logps/chosen": -22.95693016052246, |
|
"ref_logps/rejected": -27.357826232910156, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.4453369379043579, |
|
"rewards/margins": 0.47891533374786377, |
|
"rewards/rejected": -0.9242523312568665, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.1797752808988763e-07, |
|
"logps/chosen": -26.075031280517578, |
|
"logps/rejected": -35.989906311035156, |
|
"loss": 0.5108, |
|
"losses/dpo": 0.49980589747428894, |
|
"losses/sft": 0.8830540776252747, |
|
"losses/total": 0.49980589747428894, |
|
"ref_logps/chosen": -21.97342300415039, |
|
"ref_logps/rejected": -26.20843505859375, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.4101608395576477, |
|
"rewards/margins": 0.5679866075515747, |
|
"rewards/rejected": -0.9781473875045776, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.1657303370786515e-07, |
|
"logps/chosen": -27.034866333007812, |
|
"logps/rejected": -36.756141662597656, |
|
"loss": 0.535, |
|
"losses/dpo": 0.5506036281585693, |
|
"losses/sft": 0.842628002166748, |
|
"losses/total": 0.5506036281585693, |
|
"ref_logps/chosen": -22.406818389892578, |
|
"ref_logps/rejected": -26.883403778076172, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -0.4628047049045563, |
|
"rewards/margins": 0.5244689583778381, |
|
"rewards/rejected": -0.9872736930847168, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.151685393258427e-07, |
|
"logps/chosen": -24.555809020996094, |
|
"logps/rejected": -32.58570861816406, |
|
"loss": 0.6002, |
|
"losses/dpo": 0.643724262714386, |
|
"losses/sft": 0.86636883020401, |
|
"losses/total": 0.643724262714386, |
|
"ref_logps/chosen": -20.17746353149414, |
|
"ref_logps/rejected": -24.76491928100586, |
|
"rewards/accuracies": 0.6640625, |
|
"rewards/chosen": -0.4378345012664795, |
|
"rewards/margins": 0.3442443907260895, |
|
"rewards/rejected": -0.7820788621902466, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.1376404494382023e-07, |
|
"logps/chosen": -23.88672637939453, |
|
"logps/rejected": -34.9397087097168, |
|
"loss": 0.529, |
|
"losses/dpo": 0.5676740407943726, |
|
"losses/sft": 0.8977797627449036, |
|
"losses/total": 0.5676740407943726, |
|
"ref_logps/chosen": -19.26026153564453, |
|
"ref_logps/rejected": -25.219451904296875, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -0.4626464247703552, |
|
"rewards/margins": 0.5093792676925659, |
|
"rewards/rejected": -0.9720257520675659, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.1235955056179774e-07, |
|
"logps/chosen": -28.308603286743164, |
|
"logps/rejected": -38.62759017944336, |
|
"loss": 0.5376, |
|
"losses/dpo": 0.4975647032260895, |
|
"losses/sft": 1.098832130432129, |
|
"losses/total": 0.4975647032260895, |
|
"ref_logps/chosen": -22.983192443847656, |
|
"ref_logps/rejected": -28.174081802368164, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5325409173965454, |
|
"rewards/margins": 0.5128099918365479, |
|
"rewards/rejected": -1.0453509092330933, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.1095505617977527e-07, |
|
"logps/chosen": -29.64281463623047, |
|
"logps/rejected": -34.92308807373047, |
|
"loss": 0.6085, |
|
"losses/dpo": 0.6205140352249146, |
|
"losses/sft": 1.0714130401611328, |
|
"losses/total": 0.6205140352249146, |
|
"ref_logps/chosen": -23.70892333984375, |
|
"ref_logps/rejected": -25.790882110595703, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5933888554573059, |
|
"rewards/margins": 0.31983205676078796, |
|
"rewards/rejected": -0.9132209420204163, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.095505617977528e-07, |
|
"logps/chosen": -27.25480079650879, |
|
"logps/rejected": -35.160831451416016, |
|
"loss": 0.5594, |
|
"losses/dpo": 0.46756136417388916, |
|
"losses/sft": 1.0184146165847778, |
|
"losses/total": 0.46756136417388916, |
|
"ref_logps/chosen": -22.160459518432617, |
|
"ref_logps/rejected": -25.47940444946289, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -0.5094340443611145, |
|
"rewards/margins": 0.45870864391326904, |
|
"rewards/rejected": -0.9681426882743835, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.0814606741573033e-07, |
|
"logps/chosen": -28.060367584228516, |
|
"logps/rejected": -35.89076232910156, |
|
"loss": 0.5957, |
|
"losses/dpo": 0.6158527135848999, |
|
"losses/sft": 0.9492118954658508, |
|
"losses/total": 0.6158527135848999, |
|
"ref_logps/chosen": -22.560768127441406, |
|
"ref_logps/rejected": -26.671247482299805, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.5499600172042847, |
|
"rewards/margins": 0.3719918131828308, |
|
"rewards/rejected": -0.9219518899917603, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.0674157303370785e-07, |
|
"logps/chosen": -28.077362060546875, |
|
"logps/rejected": -32.179466247558594, |
|
"loss": 0.579, |
|
"losses/dpo": 0.5990191698074341, |
|
"losses/sft": 1.0173970460891724, |
|
"losses/total": 0.5990191698074341, |
|
"ref_logps/chosen": -23.08903694152832, |
|
"ref_logps/rejected": -23.298954010009766, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -0.4988323450088501, |
|
"rewards/margins": 0.38921868801116943, |
|
"rewards/rejected": -0.8880510330200195, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.0533707865168538e-07, |
|
"logps/chosen": -28.690523147583008, |
|
"logps/rejected": -34.72178268432617, |
|
"loss": 0.5694, |
|
"losses/dpo": 0.6682005524635315, |
|
"losses/sft": 0.9579723477363586, |
|
"losses/total": 0.6682005524635315, |
|
"ref_logps/chosen": -23.495473861694336, |
|
"ref_logps/rejected": -25.300090789794922, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.519504964351654, |
|
"rewards/margins": 0.42266416549682617, |
|
"rewards/rejected": -0.942169189453125, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.0393258426966293e-07, |
|
"logps/chosen": -25.57655143737793, |
|
"logps/rejected": -33.412723541259766, |
|
"loss": 0.5471, |
|
"losses/dpo": 0.6572707891464233, |
|
"losses/sft": 1.028795599937439, |
|
"losses/total": 0.6572707891464233, |
|
"ref_logps/chosen": -21.189647674560547, |
|
"ref_logps/rejected": -24.523921966552734, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4386903643608093, |
|
"rewards/margins": 0.45018988847732544, |
|
"rewards/rejected": -0.8888803124427795, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.0252808988764044e-07, |
|
"logps/chosen": -29.36073112487793, |
|
"logps/rejected": -33.21003723144531, |
|
"loss": 0.5813, |
|
"losses/dpo": 0.5923129916191101, |
|
"losses/sft": 0.9457908272743225, |
|
"losses/total": 0.5923129916191101, |
|
"ref_logps/chosen": -24.347349166870117, |
|
"ref_logps/rejected": -24.210060119628906, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.5013381838798523, |
|
"rewards/margins": 0.3986593186855316, |
|
"rewards/rejected": -0.8999974727630615, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.0112359550561797e-07, |
|
"logps/chosen": -25.595857620239258, |
|
"logps/rejected": -36.034000396728516, |
|
"loss": 0.5458, |
|
"losses/dpo": 0.5657480359077454, |
|
"losses/sft": 0.9646883606910706, |
|
"losses/total": 0.5657480359077454, |
|
"ref_logps/chosen": -21.133930206298828, |
|
"ref_logps/rejected": -26.436534881591797, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.44619300961494446, |
|
"rewards/margins": 0.5135533809661865, |
|
"rewards/rejected": -0.9597463607788086, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 9.971910112359549e-08, |
|
"logps/chosen": -25.68283462524414, |
|
"logps/rejected": -35.9984130859375, |
|
"loss": 0.5137, |
|
"losses/dpo": 0.5698142051696777, |
|
"losses/sft": 0.9736462235450745, |
|
"losses/total": 0.5698142051696777, |
|
"ref_logps/chosen": -21.976032257080078, |
|
"ref_logps/rejected": -26.659069061279297, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.370680034160614, |
|
"rewards/margins": 0.5632542371749878, |
|
"rewards/rejected": -0.9339342713356018, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 9.831460674157303e-08, |
|
"logps/chosen": -27.503202438354492, |
|
"logps/rejected": -37.614471435546875, |
|
"loss": 0.5574, |
|
"losses/dpo": 0.6440725326538086, |
|
"losses/sft": 0.963034987449646, |
|
"losses/total": 0.6440725326538086, |
|
"ref_logps/chosen": -22.742584228515625, |
|
"ref_logps/rejected": -28.33091163635254, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.47606179118156433, |
|
"rewards/margins": 0.45229417085647583, |
|
"rewards/rejected": -0.9283559918403625, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 9.691011235955055e-08, |
|
"logps/chosen": -27.817134857177734, |
|
"logps/rejected": -34.62038803100586, |
|
"loss": 0.5561, |
|
"losses/dpo": 0.6037241816520691, |
|
"losses/sft": 0.9915317296981812, |
|
"losses/total": 0.6037241816520691, |
|
"ref_logps/chosen": -23.01769256591797, |
|
"ref_logps/rejected": -25.366844177246094, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -0.47994428873062134, |
|
"rewards/margins": 0.44541001319885254, |
|
"rewards/rejected": -0.9253543019294739, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.550561797752808e-08, |
|
"logps/chosen": -29.053791046142578, |
|
"logps/rejected": -36.763641357421875, |
|
"loss": 0.5811, |
|
"losses/dpo": 0.5880983471870422, |
|
"losses/sft": 1.1540213823318481, |
|
"losses/total": 0.5880983471870422, |
|
"ref_logps/chosen": -23.688079833984375, |
|
"ref_logps/rejected": -27.189102172851562, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -0.5365712642669678, |
|
"rewards/margins": 0.42088285088539124, |
|
"rewards/rejected": -0.9574541449546814, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.410112359550561e-08, |
|
"logps/chosen": -25.49301528930664, |
|
"logps/rejected": -34.453372955322266, |
|
"loss": 0.56, |
|
"losses/dpo": 0.4375653862953186, |
|
"losses/sft": 1.0353739261627197, |
|
"losses/total": 0.4375653862953186, |
|
"ref_logps/chosen": -21.16693115234375, |
|
"ref_logps/rejected": -25.54058837890625, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.43260854482650757, |
|
"rewards/margins": 0.45866984128952026, |
|
"rewards/rejected": -0.8912783861160278, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 9.269662921348314e-08, |
|
"logps/chosen": -27.841381072998047, |
|
"logps/rejected": -31.897401809692383, |
|
"loss": 0.5787, |
|
"losses/dpo": 0.6422601938247681, |
|
"losses/sft": 0.9122541546821594, |
|
"losses/total": 0.6422601938247681, |
|
"ref_logps/chosen": -23.25330352783203, |
|
"ref_logps/rejected": -23.617647171020508, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -0.45880773663520813, |
|
"rewards/margins": 0.3691678047180176, |
|
"rewards/rejected": -0.8279755115509033, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.129213483146067e-08, |
|
"logps/chosen": -28.4913330078125, |
|
"logps/rejected": -37.258323669433594, |
|
"loss": 0.5962, |
|
"losses/dpo": 0.7063708901405334, |
|
"losses/sft": 1.0378127098083496, |
|
"losses/total": 0.7063708901405334, |
|
"ref_logps/chosen": -22.901752471923828, |
|
"ref_logps/rejected": -27.694263458251953, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.558958113193512, |
|
"rewards/margins": 0.3974474370479584, |
|
"rewards/rejected": -0.956405520439148, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 8.988764044943819e-08, |
|
"logps/chosen": -28.342041015625, |
|
"logps/rejected": -36.76737976074219, |
|
"loss": 0.5382, |
|
"losses/dpo": 0.6065940856933594, |
|
"losses/sft": 1.061606526374817, |
|
"losses/total": 0.6065940856933594, |
|
"ref_logps/chosen": -22.77169418334961, |
|
"ref_logps/rejected": -26.355728149414062, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -0.5570348501205444, |
|
"rewards/margins": 0.48413002490997314, |
|
"rewards/rejected": -1.0411648750305176, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 8.848314606741572e-08, |
|
"logps/chosen": -28.1925048828125, |
|
"logps/rejected": -36.757789611816406, |
|
"loss": 0.542, |
|
"losses/dpo": 0.5146865844726562, |
|
"losses/sft": 0.82643723487854, |
|
"losses/total": 0.5146865844726562, |
|
"ref_logps/chosen": -22.9779052734375, |
|
"ref_logps/rejected": -26.85318374633789, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5214601755142212, |
|
"rewards/margins": 0.4690002501010895, |
|
"rewards/rejected": -0.9904604554176331, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.707865168539325e-08, |
|
"logps/chosen": -26.62852668762207, |
|
"logps/rejected": -34.3861083984375, |
|
"loss": 0.5288, |
|
"losses/dpo": 0.554874062538147, |
|
"losses/sft": 0.9589724540710449, |
|
"losses/total": 0.554874062538147, |
|
"ref_logps/chosen": -22.37548065185547, |
|
"ref_logps/rejected": -25.256423950195312, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -0.4253048598766327, |
|
"rewards/margins": 0.4876634180545807, |
|
"rewards/rejected": -0.9129682183265686, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.567415730337078e-08, |
|
"logps/chosen": -30.657304763793945, |
|
"logps/rejected": -35.13941192626953, |
|
"loss": 0.564, |
|
"losses/dpo": 0.5628042817115784, |
|
"losses/sft": 0.9816582202911377, |
|
"losses/total": 0.5628042817115784, |
|
"ref_logps/chosen": -25.568553924560547, |
|
"ref_logps/rejected": -26.13833236694336, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.5088753700256348, |
|
"rewards/margins": 0.3912326395511627, |
|
"rewards/rejected": -0.9001079797744751, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 8.426966292134831e-08, |
|
"logps/chosen": -27.922801971435547, |
|
"logps/rejected": -35.52781677246094, |
|
"loss": 0.5751, |
|
"losses/dpo": 0.5604207515716553, |
|
"losses/sft": 0.9308174848556519, |
|
"losses/total": 0.5604207515716553, |
|
"ref_logps/chosen": -22.82427978515625, |
|
"ref_logps/rejected": -26.594511032104492, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -0.5098517537117004, |
|
"rewards/margins": 0.38347893953323364, |
|
"rewards/rejected": -0.8933306932449341, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 8.286516853932583e-08, |
|
"logps/chosen": -26.58924674987793, |
|
"logps/rejected": -37.19468688964844, |
|
"loss": 0.5092, |
|
"losses/dpo": 0.5189211368560791, |
|
"losses/sft": 0.9888613224029541, |
|
"losses/total": 0.5189211368560791, |
|
"ref_logps/chosen": -22.07261085510254, |
|
"ref_logps/rejected": -26.852371215820312, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.45166367292404175, |
|
"rewards/margins": 0.5825679302215576, |
|
"rewards/rejected": -1.0342316627502441, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.146067415730337e-08, |
|
"logps/chosen": -26.25957489013672, |
|
"logps/rejected": -34.435699462890625, |
|
"loss": 0.5636, |
|
"losses/dpo": 0.5064201951026917, |
|
"losses/sft": 1.0053503513336182, |
|
"losses/total": 0.5064201951026917, |
|
"ref_logps/chosen": -21.541664123535156, |
|
"ref_logps/rejected": -25.595813751220703, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.47179120779037476, |
|
"rewards/margins": 0.41219767928123474, |
|
"rewards/rejected": -0.8839888572692871, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.005617977528089e-08, |
|
"logps/chosen": -28.839031219482422, |
|
"logps/rejected": -37.7884635925293, |
|
"loss": 0.5099, |
|
"losses/dpo": 0.5300096273422241, |
|
"losses/sft": 0.9853606224060059, |
|
"losses/total": 0.5300096273422241, |
|
"ref_logps/chosen": -24.249427795410156, |
|
"ref_logps/rejected": -27.3421688079834, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.4589604139328003, |
|
"rewards/margins": 0.5856689810752869, |
|
"rewards/rejected": -1.0446293354034424, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.865168539325842e-08, |
|
"logps/chosen": -24.501375198364258, |
|
"logps/rejected": -35.306766510009766, |
|
"loss": 0.5489, |
|
"losses/dpo": 0.5961363315582275, |
|
"losses/sft": 1.0056707859039307, |
|
"losses/total": 0.5961363315582275, |
|
"ref_logps/chosen": -19.69840431213379, |
|
"ref_logps/rejected": -26.113300323486328, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4802970886230469, |
|
"rewards/margins": 0.43904954195022583, |
|
"rewards/rejected": -0.9193466305732727, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.724719101123594e-08, |
|
"logps/chosen": -28.589330673217773, |
|
"logps/rejected": -37.83360290527344, |
|
"loss": 0.5047, |
|
"losses/dpo": 0.584464430809021, |
|
"losses/sft": 1.1327065229415894, |
|
"losses/total": 0.584464430809021, |
|
"ref_logps/chosen": -23.609786987304688, |
|
"ref_logps/rejected": -27.048458099365234, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -0.49795451760292053, |
|
"rewards/margins": 0.5805596113204956, |
|
"rewards/rejected": -1.0785142183303833, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.584269662921348e-08, |
|
"logps/chosen": -26.895156860351562, |
|
"logps/rejected": -36.51441192626953, |
|
"loss": 0.5664, |
|
"losses/dpo": 0.5289937257766724, |
|
"losses/sft": 0.9366389513015747, |
|
"losses/total": 0.5289937257766724, |
|
"ref_logps/chosen": -21.687744140625, |
|
"ref_logps/rejected": -27.214290618896484, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -0.5207412838935852, |
|
"rewards/margins": 0.40927091240882874, |
|
"rewards/rejected": -0.9300122261047363, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.443820224719101e-08, |
|
"logps/chosen": -26.770729064941406, |
|
"logps/rejected": -37.010292053222656, |
|
"loss": 0.5614, |
|
"losses/dpo": 0.5385686159133911, |
|
"losses/sft": 1.026196002960205, |
|
"losses/total": 0.5385686159133911, |
|
"ref_logps/chosen": -21.59899139404297, |
|
"ref_logps/rejected": -27.5603084564209, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.5171737670898438, |
|
"rewards/margins": 0.4278249144554138, |
|
"rewards/rejected": -0.9449986815452576, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.303370786516853e-08, |
|
"logps/chosen": -28.282766342163086, |
|
"logps/rejected": -37.94123840332031, |
|
"loss": 0.5574, |
|
"losses/dpo": 0.4933924973011017, |
|
"losses/sft": 1.0346543788909912, |
|
"losses/total": 0.4933924973011017, |
|
"ref_logps/chosen": -23.10584831237793, |
|
"ref_logps/rejected": -27.940820693969727, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.5176920294761658, |
|
"rewards/margins": 0.48235008120536804, |
|
"rewards/rejected": -1.0000420808792114, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.162921348314606e-08, |
|
"logps/chosen": -28.72567367553711, |
|
"logps/rejected": -39.72615432739258, |
|
"loss": 0.5095, |
|
"losses/dpo": 0.46729788184165955, |
|
"losses/sft": 1.0185084342956543, |
|
"losses/total": 0.46729788184165955, |
|
"ref_logps/chosen": -23.77487564086914, |
|
"ref_logps/rejected": -29.071794509887695, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -0.4950796663761139, |
|
"rewards/margins": 0.5703563690185547, |
|
"rewards/rejected": -1.0654358863830566, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 7.022471910112359e-08, |
|
"logps/chosen": -25.868453979492188, |
|
"logps/rejected": -38.04795455932617, |
|
"loss": 0.5312, |
|
"losses/dpo": 0.5006756782531738, |
|
"losses/sft": 0.9233719110488892, |
|
"losses/total": 0.5006756782531738, |
|
"ref_logps/chosen": -21.202287673950195, |
|
"ref_logps/rejected": -28.387357711791992, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.4666164517402649, |
|
"rewards/margins": 0.49944305419921875, |
|
"rewards/rejected": -0.9660595059394836, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.882022471910112e-08, |
|
"logps/chosen": -30.674976348876953, |
|
"logps/rejected": -37.18801498413086, |
|
"loss": 0.5609, |
|
"losses/dpo": 0.53383469581604, |
|
"losses/sft": 1.0966167449951172, |
|
"losses/total": 0.53383469581604, |
|
"ref_logps/chosen": -25.279560089111328, |
|
"ref_logps/rejected": -27.363300323486328, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.5395419001579285, |
|
"rewards/margins": 0.4429297149181366, |
|
"rewards/rejected": -0.9824715852737427, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.741573033707864e-08, |
|
"logps/chosen": -27.533077239990234, |
|
"logps/rejected": -36.4505729675293, |
|
"loss": 0.5459, |
|
"losses/dpo": 0.5303448438644409, |
|
"losses/sft": 1.0059340000152588, |
|
"losses/total": 0.5303448438644409, |
|
"ref_logps/chosen": -23.062650680541992, |
|
"ref_logps/rejected": -27.07624053955078, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.4470424950122833, |
|
"rewards/margins": 0.49039074778556824, |
|
"rewards/rejected": -0.9374332427978516, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.601123595505617e-08, |
|
"logps/chosen": -27.371315002441406, |
|
"logps/rejected": -37.460845947265625, |
|
"loss": 0.5395, |
|
"losses/dpo": 0.46134790778160095, |
|
"losses/sft": 1.0326218605041504, |
|
"losses/total": 0.46134790778160095, |
|
"ref_logps/chosen": -21.61273765563965, |
|
"ref_logps/rejected": -26.598316192626953, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.5758577585220337, |
|
"rewards/margins": 0.5103954076766968, |
|
"rewards/rejected": -1.0862531661987305, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.460674157303371e-08, |
|
"logps/chosen": -26.683940887451172, |
|
"logps/rejected": -35.51959228515625, |
|
"loss": 0.531, |
|
"losses/dpo": 0.5929858684539795, |
|
"losses/sft": 0.8796969056129456, |
|
"losses/total": 0.5929858684539795, |
|
"ref_logps/chosen": -21.84777069091797, |
|
"ref_logps/rejected": -25.34250259399414, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -0.48361673951148987, |
|
"rewards/margins": 0.5340923070907593, |
|
"rewards/rejected": -1.0177090167999268, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.320224719101123e-08, |
|
"logps/chosen": -27.784767150878906, |
|
"logps/rejected": -36.31642150878906, |
|
"loss": 0.5638, |
|
"losses/dpo": 0.46039754152297974, |
|
"losses/sft": 1.014696478843689, |
|
"losses/total": 0.46039754152297974, |
|
"ref_logps/chosen": -22.830692291259766, |
|
"ref_logps/rejected": -26.99026107788086, |
|
"rewards/accuracies": 0.6484375, |
|
"rewards/chosen": -0.4954075217247009, |
|
"rewards/margins": 0.4372091293334961, |
|
"rewards/rejected": -0.9326165914535522, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.179775280898876e-08, |
|
"logps/chosen": -23.8892765045166, |
|
"logps/rejected": -32.802425384521484, |
|
"loss": 0.5307, |
|
"losses/dpo": 0.5120245218276978, |
|
"losses/sft": 0.9590541124343872, |
|
"losses/total": 0.5120245218276978, |
|
"ref_logps/chosen": -19.922191619873047, |
|
"ref_logps/rejected": -23.754770278930664, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -0.3967083692550659, |
|
"rewards/margins": 0.5080575346946716, |
|
"rewards/rejected": -0.9047658443450928, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.039325842696629e-08, |
|
"logps/chosen": -27.902587890625, |
|
"logps/rejected": -39.759193420410156, |
|
"loss": 0.5216, |
|
"losses/dpo": 0.5157948136329651, |
|
"losses/sft": 0.8797988891601562, |
|
"losses/total": 0.5157948136329651, |
|
"ref_logps/chosen": -22.233232498168945, |
|
"ref_logps/rejected": -28.421966552734375, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.5669355988502502, |
|
"rewards/margins": 0.5667868852615356, |
|
"rewards/rejected": -1.1337225437164307, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 5.898876404494382e-08, |
|
"logps/chosen": -27.200105667114258, |
|
"logps/rejected": -38.05504608154297, |
|
"loss": 0.5154, |
|
"losses/dpo": 0.6272658705711365, |
|
"losses/sft": 0.901512086391449, |
|
"losses/total": 0.6272658705711365, |
|
"ref_logps/chosen": -22.22509765625, |
|
"ref_logps/rejected": -27.425247192382812, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -0.49750083684921265, |
|
"rewards/margins": 0.5654786825180054, |
|
"rewards/rejected": -1.0629794597625732, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.758426966292135e-08, |
|
"logps/chosen": -29.658336639404297, |
|
"logps/rejected": -38.597557067871094, |
|
"loss": 0.5507, |
|
"losses/dpo": 0.4642670750617981, |
|
"losses/sft": 1.0486382246017456, |
|
"losses/total": 0.4642670750617981, |
|
"ref_logps/chosen": -23.814481735229492, |
|
"ref_logps/rejected": -27.686166763305664, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.5843857526779175, |
|
"rewards/margins": 0.5067534446716309, |
|
"rewards/rejected": -1.0911391973495483, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.617977528089887e-08, |
|
"logps/chosen": -23.939620971679688, |
|
"logps/rejected": -33.06968688964844, |
|
"loss": 0.5631, |
|
"losses/dpo": 0.5865851640701294, |
|
"losses/sft": 1.1602400541305542, |
|
"losses/total": 0.5865851640701294, |
|
"ref_logps/chosen": -19.01073455810547, |
|
"ref_logps/rejected": -23.945262908935547, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.4928884506225586, |
|
"rewards/margins": 0.4195541441440582, |
|
"rewards/rejected": -0.9124425649642944, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.47752808988764e-08, |
|
"logps/chosen": -27.94991683959961, |
|
"logps/rejected": -36.65930938720703, |
|
"loss": 0.5461, |
|
"losses/dpo": 0.6400465369224548, |
|
"losses/sft": 1.0134565830230713, |
|
"losses/total": 0.6400465369224548, |
|
"ref_logps/chosen": -23.591896057128906, |
|
"ref_logps/rejected": -27.34914779663086, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -0.43580204248428345, |
|
"rewards/margins": 0.4952143728733063, |
|
"rewards/rejected": -0.9310164451599121, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.3370786516853926e-08, |
|
"logps/chosen": -28.148937225341797, |
|
"logps/rejected": -34.08583450317383, |
|
"loss": 0.561, |
|
"losses/dpo": 0.47015029191970825, |
|
"losses/sft": 0.923213005065918, |
|
"losses/total": 0.47015029191970825, |
|
"ref_logps/chosen": -23.29110336303711, |
|
"ref_logps/rejected": -25.051483154296875, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.4857832193374634, |
|
"rewards/margins": 0.417651891708374, |
|
"rewards/rejected": -0.9034351110458374, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.196629213483146e-08, |
|
"logps/chosen": -26.931848526000977, |
|
"logps/rejected": -35.78190994262695, |
|
"loss": 0.5196, |
|
"losses/dpo": 0.4919354021549225, |
|
"losses/sft": 0.9875601530075073, |
|
"losses/total": 0.4919354021549225, |
|
"ref_logps/chosen": -22.220352172851562, |
|
"ref_logps/rejected": -25.88389778137207, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -0.4711495637893677, |
|
"rewards/margins": 0.51865154504776, |
|
"rewards/rejected": -0.9898011684417725, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.056179775280899e-08, |
|
"logps/chosen": -26.860830307006836, |
|
"logps/rejected": -36.244728088378906, |
|
"loss": 0.5141, |
|
"losses/dpo": 0.5143895745277405, |
|
"losses/sft": 0.8888437747955322, |
|
"losses/total": 0.5143895745277405, |
|
"ref_logps/chosen": -22.12276840209961, |
|
"ref_logps/rejected": -25.870086669921875, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.4738062918186188, |
|
"rewards/margins": 0.5636579394340515, |
|
"rewards/rejected": -1.0374642610549927, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.915730337078652e-08, |
|
"logps/chosen": -27.645713806152344, |
|
"logps/rejected": -35.34681701660156, |
|
"loss": 0.5612, |
|
"losses/dpo": 0.5186240077018738, |
|
"losses/sft": 1.109127402305603, |
|
"losses/total": 0.5186240077018738, |
|
"ref_logps/chosen": -22.531267166137695, |
|
"ref_logps/rejected": -25.30887222290039, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5114448070526123, |
|
"rewards/margins": 0.4923498034477234, |
|
"rewards/rejected": -1.0037946701049805, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.775280898876404e-08, |
|
"logps/chosen": -29.528343200683594, |
|
"logps/rejected": -37.111507415771484, |
|
"loss": 0.5701, |
|
"losses/dpo": 0.5167029500007629, |
|
"losses/sft": 1.1346383094787598, |
|
"losses/total": 0.5167029500007629, |
|
"ref_logps/chosen": -23.850698471069336, |
|
"ref_logps/rejected": -27.338424682617188, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.5677646398544312, |
|
"rewards/margins": 0.40954357385635376, |
|
"rewards/rejected": -0.9773082137107849, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.634831460674157e-08, |
|
"logps/chosen": -27.099462509155273, |
|
"logps/rejected": -38.734046936035156, |
|
"loss": 0.5367, |
|
"losses/dpo": 0.6075611114501953, |
|
"losses/sft": 1.0922847986221313, |
|
"losses/total": 0.6075611114501953, |
|
"ref_logps/chosen": -21.647756576538086, |
|
"ref_logps/rejected": -27.990768432617188, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.5451704859733582, |
|
"rewards/margins": 0.5291576385498047, |
|
"rewards/rejected": -1.0743281841278076, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.4943820224719096e-08, |
|
"logps/chosen": -28.804433822631836, |
|
"logps/rejected": -38.87983703613281, |
|
"loss": 0.5448, |
|
"losses/dpo": 0.5679644346237183, |
|
"losses/sft": 1.123085618019104, |
|
"losses/total": 0.5679644346237183, |
|
"ref_logps/chosen": -23.055761337280273, |
|
"ref_logps/rejected": -27.842578887939453, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5748672485351562, |
|
"rewards/margins": 0.5288586020469666, |
|
"rewards/rejected": -1.1037259101867676, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.3539325842696626e-08, |
|
"logps/chosen": -29.942031860351562, |
|
"logps/rejected": -37.742164611816406, |
|
"loss": 0.5544, |
|
"losses/dpo": 0.4389882981777191, |
|
"losses/sft": 0.9757397174835205, |
|
"losses/total": 0.4389882981777191, |
|
"ref_logps/chosen": -24.796215057373047, |
|
"ref_logps/rejected": -27.602325439453125, |
|
"rewards/accuracies": 0.6640625, |
|
"rewards/chosen": -0.5145817995071411, |
|
"rewards/margins": 0.4994018077850342, |
|
"rewards/rejected": -1.0139836072921753, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.213483146067416e-08, |
|
"logps/chosen": -30.154991149902344, |
|
"logps/rejected": -35.81608581542969, |
|
"loss": 0.57, |
|
"losses/dpo": 0.571212887763977, |
|
"losses/sft": 0.8268208503723145, |
|
"losses/total": 0.571212887763977, |
|
"ref_logps/chosen": -24.421096801757812, |
|
"ref_logps/rejected": -25.88280487060547, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5733895897865295, |
|
"rewards/margins": 0.4199383854866028, |
|
"rewards/rejected": -0.9933279752731323, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.073033707865169e-08, |
|
"logps/chosen": -27.25971794128418, |
|
"logps/rejected": -33.205955505371094, |
|
"loss": 0.5874, |
|
"losses/dpo": 0.4875527620315552, |
|
"losses/sft": 0.8703315854072571, |
|
"losses/total": 0.4875527620315552, |
|
"ref_logps/chosen": -22.749954223632812, |
|
"ref_logps/rejected": -24.781803131103516, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.45097634196281433, |
|
"rewards/margins": 0.3914392292499542, |
|
"rewards/rejected": -0.8424156308174133, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.932584269662921e-08, |
|
"logps/chosen": -28.230928421020508, |
|
"logps/rejected": -37.86750030517578, |
|
"loss": 0.508, |
|
"losses/dpo": 0.4668968617916107, |
|
"losses/sft": 1.1078698635101318, |
|
"losses/total": 0.4668968617916107, |
|
"ref_logps/chosen": -23.454715728759766, |
|
"ref_logps/rejected": -27.212678909301758, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.47762107849121094, |
|
"rewards/margins": 0.5878612995147705, |
|
"rewards/rejected": -1.0654823780059814, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.792134831460674e-08, |
|
"logps/chosen": -28.5417423248291, |
|
"logps/rejected": -39.07720184326172, |
|
"loss": 0.5722, |
|
"losses/dpo": 0.5119404196739197, |
|
"losses/sft": 1.0701940059661865, |
|
"losses/total": 0.5119404196739197, |
|
"ref_logps/chosen": -22.97249984741211, |
|
"ref_logps/rejected": -29.236312866210938, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5569244623184204, |
|
"rewards/margins": 0.42716455459594727, |
|
"rewards/rejected": -0.9840888977050781, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.6516853932584266e-08, |
|
"logps/chosen": -24.37343406677246, |
|
"logps/rejected": -35.577354431152344, |
|
"loss": 0.5144, |
|
"losses/dpo": 0.39502987265586853, |
|
"losses/sft": 1.0756311416625977, |
|
"losses/total": 0.39502987265586853, |
|
"ref_logps/chosen": -19.630115509033203, |
|
"ref_logps/rejected": -25.026439666748047, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.474331796169281, |
|
"rewards/margins": 0.5807597041130066, |
|
"rewards/rejected": -1.055091381072998, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.5112359550561796e-08, |
|
"logps/chosen": -25.75430679321289, |
|
"logps/rejected": -35.49622344970703, |
|
"loss": 0.5757, |
|
"losses/dpo": 0.5865879058837891, |
|
"losses/sft": 1.0159986019134521, |
|
"losses/total": 0.5865879058837891, |
|
"ref_logps/chosen": -21.11154556274414, |
|
"ref_logps/rejected": -26.56639862060547, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4642760157585144, |
|
"rewards/margins": 0.42870670557022095, |
|
"rewards/rejected": -0.8929827213287354, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.370786516853932e-08, |
|
"logps/chosen": -28.015663146972656, |
|
"logps/rejected": -36.934810638427734, |
|
"loss": 0.507, |
|
"losses/dpo": 0.6495200395584106, |
|
"losses/sft": 1.097916841506958, |
|
"losses/total": 0.6495200395584106, |
|
"ref_logps/chosen": -23.48037338256836, |
|
"ref_logps/rejected": -26.453636169433594, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -0.45352903008461, |
|
"rewards/margins": 0.5945882797241211, |
|
"rewards/rejected": -1.0481172800064087, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.230337078651686e-08, |
|
"logps/chosen": -29.112939834594727, |
|
"logps/rejected": -35.190895080566406, |
|
"loss": 0.5557, |
|
"losses/dpo": 0.3998969793319702, |
|
"losses/sft": 0.9329382181167603, |
|
"losses/total": 0.3998969793319702, |
|
"ref_logps/chosen": -23.576570510864258, |
|
"ref_logps/rejected": -24.735258102416992, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5536371469497681, |
|
"rewards/margins": 0.49192649126052856, |
|
"rewards/rejected": -1.0455635786056519, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.089887640449438e-08, |
|
"logps/chosen": -28.713830947875977, |
|
"logps/rejected": -37.038963317871094, |
|
"loss": 0.5536, |
|
"losses/dpo": 0.548796534538269, |
|
"losses/sft": 1.0410091876983643, |
|
"losses/total": 0.548796534538269, |
|
"ref_logps/chosen": -23.76250457763672, |
|
"ref_logps/rejected": -27.5520076751709, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.49513280391693115, |
|
"rewards/margins": 0.45356276631355286, |
|
"rewards/rejected": -0.9486956000328064, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.949438202247191e-08, |
|
"logps/chosen": -29.465068817138672, |
|
"logps/rejected": -39.406578063964844, |
|
"loss": 0.5343, |
|
"losses/dpo": 0.6983579397201538, |
|
"losses/sft": 1.0986469984054565, |
|
"losses/total": 0.6983579397201538, |
|
"ref_logps/chosen": -24.418071746826172, |
|
"ref_logps/rejected": -28.89803695678711, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5046992897987366, |
|
"rewards/margins": 0.5461547374725342, |
|
"rewards/rejected": -1.050853967666626, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.8089887640449436e-08, |
|
"logps/chosen": -27.72464370727539, |
|
"logps/rejected": -35.225887298583984, |
|
"loss": 0.5838, |
|
"losses/dpo": 0.615436851978302, |
|
"losses/sft": 1.064025640487671, |
|
"losses/total": 0.615436851978302, |
|
"ref_logps/chosen": -22.502582550048828, |
|
"ref_logps/rejected": -26.32878875732422, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.5222063660621643, |
|
"rewards/margins": 0.36750373244285583, |
|
"rewards/rejected": -0.8897100687026978, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.6685393258426963e-08, |
|
"logps/chosen": -26.368640899658203, |
|
"logps/rejected": -35.305564880371094, |
|
"loss": 0.5351, |
|
"losses/dpo": 0.5552591681480408, |
|
"losses/sft": 0.8796924352645874, |
|
"losses/total": 0.5552591681480408, |
|
"ref_logps/chosen": -21.371036529541016, |
|
"ref_logps/rejected": -25.149438858032227, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.49976038932800293, |
|
"rewards/margins": 0.5158523917198181, |
|
"rewards/rejected": -1.0156128406524658, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.5280898876404493e-08, |
|
"logps/chosen": -29.838565826416016, |
|
"logps/rejected": -38.05325698852539, |
|
"loss": 0.5338, |
|
"losses/dpo": 0.4755927324295044, |
|
"losses/sft": 0.9763241410255432, |
|
"losses/total": 0.4755927324295044, |
|
"ref_logps/chosen": -24.918655395507812, |
|
"ref_logps/rejected": -28.04724884033203, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.4919911026954651, |
|
"rewards/margins": 0.5086094737052917, |
|
"rewards/rejected": -1.0006005764007568, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.387640449438202e-08, |
|
"logps/chosen": -29.036991119384766, |
|
"logps/rejected": -35.4906005859375, |
|
"loss": 0.5335, |
|
"losses/dpo": 0.5385127067565918, |
|
"losses/sft": 1.245056390762329, |
|
"losses/total": 0.5385127067565918, |
|
"ref_logps/chosen": -23.929513931274414, |
|
"ref_logps/rejected": -24.878217697143555, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -0.5107479095458984, |
|
"rewards/margins": 0.5504903793334961, |
|
"rewards/rejected": -1.0612382888793945, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.2471910112359548e-08, |
|
"logps/chosen": -29.392702102661133, |
|
"logps/rejected": -38.68418884277344, |
|
"loss": 0.545, |
|
"losses/dpo": 0.43943360447883606, |
|
"losses/sft": 1.023887276649475, |
|
"losses/total": 0.43943360447883606, |
|
"ref_logps/chosen": -24.07533073425293, |
|
"ref_logps/rejected": -28.427806854248047, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5317370891571045, |
|
"rewards/margins": 0.49390077590942383, |
|
"rewards/rejected": -1.0256378650665283, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.106741573033708e-08, |
|
"logps/chosen": -25.038589477539062, |
|
"logps/rejected": -32.384376525878906, |
|
"loss": 0.56, |
|
"losses/dpo": 0.6935892701148987, |
|
"losses/sft": 1.0011663436889648, |
|
"losses/total": 0.6935892701148987, |
|
"ref_logps/chosen": -21.044326782226562, |
|
"ref_logps/rejected": -23.86334991455078, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.3994261920452118, |
|
"rewards/margins": 0.45267629623413086, |
|
"rewards/rejected": -0.8521024584770203, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.9662921348314606e-08, |
|
"logps/chosen": -30.07229995727539, |
|
"logps/rejected": -37.0654411315918, |
|
"loss": 0.5936, |
|
"losses/dpo": 0.547340989112854, |
|
"losses/sft": 1.0020110607147217, |
|
"losses/total": 0.547340989112854, |
|
"ref_logps/chosen": -23.930465698242188, |
|
"ref_logps/rejected": -27.052518844604492, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -0.6141834259033203, |
|
"rewards/margins": 0.38710883259773254, |
|
"rewards/rejected": -1.0012922286987305, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.8258426966292133e-08, |
|
"logps/chosen": -30.305606842041016, |
|
"logps/rejected": -40.710792541503906, |
|
"loss": 0.537, |
|
"losses/dpo": 0.5175353288650513, |
|
"losses/sft": 0.8916615843772888, |
|
"losses/total": 0.5175353288650513, |
|
"ref_logps/chosen": -25.279661178588867, |
|
"ref_logps/rejected": -29.970672607421875, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -0.5025948286056519, |
|
"rewards/margins": 0.5714170932769775, |
|
"rewards/rejected": -1.0740119218826294, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.685393258426966e-08, |
|
"logps/chosen": -29.87887191772461, |
|
"logps/rejected": -39.89691162109375, |
|
"loss": 0.5598, |
|
"losses/dpo": 0.4781198799610138, |
|
"losses/sft": 1.0425841808319092, |
|
"losses/total": 0.4781198799610138, |
|
"ref_logps/chosen": -23.869295120239258, |
|
"ref_logps/rejected": -29.154647827148438, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -0.6009576916694641, |
|
"rewards/margins": 0.4732685387134552, |
|
"rewards/rejected": -1.0742262601852417, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.544943820224719e-08, |
|
"logps/chosen": -26.600048065185547, |
|
"logps/rejected": -39.657188415527344, |
|
"loss": 0.5186, |
|
"losses/dpo": 0.5135948657989502, |
|
"losses/sft": 0.9224843978881836, |
|
"losses/total": 0.5135948657989502, |
|
"ref_logps/chosen": -21.754756927490234, |
|
"ref_logps/rejected": -29.07170867919922, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.48452913761138916, |
|
"rewards/margins": 0.5740190744400024, |
|
"rewards/rejected": -1.0585482120513916, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.4044943820224718e-08, |
|
"logps/chosen": -27.77488136291504, |
|
"logps/rejected": -37.88126754760742, |
|
"loss": 0.5551, |
|
"losses/dpo": 0.5367317199707031, |
|
"losses/sft": 1.0271828174591064, |
|
"losses/total": 0.5367317199707031, |
|
"ref_logps/chosen": -22.3087158203125, |
|
"ref_logps/rejected": -27.471187591552734, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -0.5466164350509644, |
|
"rewards/margins": 0.4943912625312805, |
|
"rewards/rejected": -1.0410076379776, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.2640449438202247e-08, |
|
"logps/chosen": -28.72400665283203, |
|
"logps/rejected": -36.061241149902344, |
|
"loss": 0.5438, |
|
"losses/dpo": 0.5493422746658325, |
|
"losses/sft": 0.9023943543434143, |
|
"losses/total": 0.5493422746658325, |
|
"ref_logps/chosen": -23.460235595703125, |
|
"ref_logps/rejected": -26.00853729248047, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.526377260684967, |
|
"rewards/margins": 0.47889336943626404, |
|
"rewards/rejected": -1.0052706003189087, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.1235955056179774e-08, |
|
"logps/chosen": -27.819026947021484, |
|
"logps/rejected": -37.490928649902344, |
|
"loss": 0.5852, |
|
"losses/dpo": 0.5147813558578491, |
|
"losses/sft": 0.8766761422157288, |
|
"losses/total": 0.5147813558578491, |
|
"ref_logps/chosen": -21.90268898010254, |
|
"ref_logps/rejected": -27.36888885498047, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5916341543197632, |
|
"rewards/margins": 0.4205697774887085, |
|
"rewards/rejected": -1.0122039318084717, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 9.831460674157303e-09, |
|
"logps/chosen": -26.303754806518555, |
|
"logps/rejected": -37.83194351196289, |
|
"loss": 0.524, |
|
"losses/dpo": 0.5489503741264343, |
|
"losses/sft": 0.9560513496398926, |
|
"losses/total": 0.5489503741264343, |
|
"ref_logps/chosen": -21.29248046875, |
|
"ref_logps/rejected": -27.238914489746094, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.5011276006698608, |
|
"rewards/margins": 0.5581751465797424, |
|
"rewards/rejected": -1.059302806854248, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 8.42696629213483e-09, |
|
"logps/chosen": -26.287546157836914, |
|
"logps/rejected": -38.258975982666016, |
|
"loss": 0.5441, |
|
"losses/dpo": 0.5884628295898438, |
|
"losses/sft": 0.9961035251617432, |
|
"losses/total": 0.5884628295898438, |
|
"ref_logps/chosen": -21.199317932128906, |
|
"ref_logps/rejected": -27.755794525146484, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.5088227391242981, |
|
"rewards/margins": 0.5414952635765076, |
|
"rewards/rejected": -1.0503180027008057, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 7.022471910112359e-09, |
|
"logps/chosen": -29.260208129882812, |
|
"logps/rejected": -35.26235580444336, |
|
"loss": 0.5711, |
|
"losses/dpo": 0.6062160730361938, |
|
"losses/sft": 0.9891349673271179, |
|
"losses/total": 0.6062160730361938, |
|
"ref_logps/chosen": -24.228797912597656, |
|
"ref_logps/rejected": -25.79244613647461, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.5031411051750183, |
|
"rewards/margins": 0.44384992122650146, |
|
"rewards/rejected": -0.9469910264015198, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 5.617977528089887e-09, |
|
"logps/chosen": -26.954505920410156, |
|
"logps/rejected": -38.197296142578125, |
|
"loss": 0.5188, |
|
"losses/dpo": 0.48179134726524353, |
|
"losses/sft": 1.0057315826416016, |
|
"losses/total": 0.48179134726524353, |
|
"ref_logps/chosen": -22.015995025634766, |
|
"ref_logps/rejected": -27.583335876464844, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.49385106563568115, |
|
"rewards/margins": 0.5675452351570129, |
|
"rewards/rejected": -1.0613962411880493, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.213483146067415e-09, |
|
"logps/chosen": -25.941349029541016, |
|
"logps/rejected": -37.711891174316406, |
|
"loss": 0.5126, |
|
"losses/dpo": 0.47302547097206116, |
|
"losses/sft": 1.0042707920074463, |
|
"losses/total": 0.47302547097206116, |
|
"ref_logps/chosen": -21.42403793334961, |
|
"ref_logps/rejected": -27.090473175048828, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.45173099637031555, |
|
"rewards/margins": 0.610410749912262, |
|
"rewards/rejected": -1.0621416568756104, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.8089887640449435e-09, |
|
"logps/chosen": -27.533342361450195, |
|
"logps/rejected": -40.14276123046875, |
|
"loss": 0.5282, |
|
"losses/dpo": 0.47439950704574585, |
|
"losses/sft": 1.004162073135376, |
|
"losses/total": 0.47439950704574585, |
|
"ref_logps/chosen": -22.44705581665039, |
|
"ref_logps/rejected": -29.271793365478516, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.5086286067962646, |
|
"rewards/margins": 0.5784677267074585, |
|
"rewards/rejected": -1.0870963335037231, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.4044943820224717e-09, |
|
"logps/chosen": -27.44398307800293, |
|
"logps/rejected": -38.508323669433594, |
|
"loss": 0.5377, |
|
"losses/dpo": 0.5113502740859985, |
|
"losses/sft": 1.0710563659667969, |
|
"losses/total": 0.5113502740859985, |
|
"ref_logps/chosen": -22.568340301513672, |
|
"ref_logps/rejected": -28.23776626586914, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.4875642657279968, |
|
"rewards/margins": 0.5394913554191589, |
|
"rewards/rejected": -1.0270556211471558, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 0.0, |
|
"logps/chosen": -28.845203399658203, |
|
"logps/rejected": -36.77953338623047, |
|
"loss": 0.5692, |
|
"losses/dpo": 0.7008877992630005, |
|
"losses/sft": 1.1200252771377563, |
|
"losses/total": 0.7008877992630005, |
|
"ref_logps/chosen": -23.59469985961914, |
|
"ref_logps/rejected": -27.211450576782227, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.5250504612922668, |
|
"rewards/margins": 0.4317581057548523, |
|
"rewards/rejected": -0.9568085670471191, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"step": 396, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6152852120423558, |
|
"train_runtime": 11562.7876, |
|
"train_samples_per_second": 4.4, |
|
"train_steps_per_second": 0.034 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 396, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 200, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|