|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 200, |
|
"global_step": 1563, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0006397952655150352, |
|
"grad_norm": 60.118304941939414, |
|
"learning_rate": 3.1847133757961784e-09, |
|
"logits/chosen": -2.853665351867676, |
|
"logits/rejected": -2.8379149436950684, |
|
"logps/chosen": -83.49566650390625, |
|
"logps/rejected": -123.54679870605469, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.006397952655150352, |
|
"grad_norm": 57.81423019053645, |
|
"learning_rate": 3.184713375796178e-08, |
|
"logits/chosen": -2.902895927429199, |
|
"logits/rejected": -2.875051259994507, |
|
"logps/chosen": -115.33470153808594, |
|
"logps/rejected": -92.90689086914062, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.0009852921357378364, |
|
"rewards/margins": -0.0015405109152197838, |
|
"rewards/rejected": 0.0025258036330342293, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.012795905310300703, |
|
"grad_norm": 61.55427334185566, |
|
"learning_rate": 6.369426751592356e-08, |
|
"logits/chosen": -2.903585433959961, |
|
"logits/rejected": -2.8909051418304443, |
|
"logps/chosen": -133.5170440673828, |
|
"logps/rejected": -110.08155822753906, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.00143242790363729, |
|
"rewards/margins": -0.0002988163323607296, |
|
"rewards/rejected": 0.0017312444979324937, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.019193857965451054, |
|
"grad_norm": 55.02555810267581, |
|
"learning_rate": 9.554140127388536e-08, |
|
"logits/chosen": -2.892906904220581, |
|
"logits/rejected": -2.8784232139587402, |
|
"logps/chosen": -114.21958923339844, |
|
"logps/rejected": -97.69152069091797, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.012303625233471394, |
|
"rewards/margins": 0.007176184095442295, |
|
"rewards/rejected": 0.0051274425350129604, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.025591810620601407, |
|
"grad_norm": 57.294821146884644, |
|
"learning_rate": 1.2738853503184713e-07, |
|
"logits/chosen": -2.911865711212158, |
|
"logits/rejected": -2.896728992462158, |
|
"logps/chosen": -124.02873229980469, |
|
"logps/rejected": -107.5560073852539, |
|
"loss": 0.6838, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.03221520036458969, |
|
"rewards/margins": 0.018920911476016045, |
|
"rewards/rejected": 0.013294287025928497, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03198976327575176, |
|
"grad_norm": 66.3217272293019, |
|
"learning_rate": 1.592356687898089e-07, |
|
"logits/chosen": -2.908454656600952, |
|
"logits/rejected": -2.884701728820801, |
|
"logps/chosen": -121.34477233886719, |
|
"logps/rejected": -102.1689453125, |
|
"loss": 0.683, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.05464054271578789, |
|
"rewards/margins": 0.035086970776319504, |
|
"rewards/rejected": 0.019553570076823235, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03838771593090211, |
|
"grad_norm": 58.62876538394977, |
|
"learning_rate": 1.9108280254777072e-07, |
|
"logits/chosen": -2.9217209815979004, |
|
"logits/rejected": -2.8922061920166016, |
|
"logps/chosen": -124.75311279296875, |
|
"logps/rejected": -81.85745239257812, |
|
"loss": 0.6796, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.08774559199810028, |
|
"rewards/margins": 0.069425567984581, |
|
"rewards/rejected": 0.01832001842558384, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.044785668586052464, |
|
"grad_norm": 56.19565905301092, |
|
"learning_rate": 2.2292993630573247e-07, |
|
"logits/chosen": -2.9313912391662598, |
|
"logits/rejected": -2.890962839126587, |
|
"logps/chosen": -114.58404541015625, |
|
"logps/rejected": -89.98262023925781, |
|
"loss": 0.6635, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.08911927044391632, |
|
"rewards/margins": 0.06764236837625504, |
|
"rewards/rejected": 0.02147689089179039, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05118362124120281, |
|
"grad_norm": 57.42643526688929, |
|
"learning_rate": 2.5477707006369425e-07, |
|
"logits/chosen": -2.9421534538269043, |
|
"logits/rejected": -2.9394068717956543, |
|
"logps/chosen": -108.8486328125, |
|
"logps/rejected": -106.7107162475586, |
|
"loss": 0.6684, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.09960101544857025, |
|
"rewards/margins": 0.029676537960767746, |
|
"rewards/rejected": 0.06992447376251221, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05758157389635317, |
|
"grad_norm": 65.93546292866482, |
|
"learning_rate": 2.86624203821656e-07, |
|
"logits/chosen": -2.9601008892059326, |
|
"logits/rejected": -2.928912401199341, |
|
"logps/chosen": -135.4389190673828, |
|
"logps/rejected": -104.19834899902344, |
|
"loss": 0.6681, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.17293739318847656, |
|
"rewards/margins": 0.07813060283660889, |
|
"rewards/rejected": 0.09480679780244827, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06397952655150352, |
|
"grad_norm": 57.01131226596072, |
|
"learning_rate": 3.184713375796178e-07, |
|
"logits/chosen": -2.96305513381958, |
|
"logits/rejected": -2.9574124813079834, |
|
"logps/chosen": -130.2837677001953, |
|
"logps/rejected": -111.2293701171875, |
|
"loss": 0.6608, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.18434445559978485, |
|
"rewards/margins": 0.12251557409763336, |
|
"rewards/rejected": 0.06182890012860298, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07037747920665387, |
|
"grad_norm": 69.70550779478351, |
|
"learning_rate": 3.5031847133757957e-07, |
|
"logits/chosen": -2.9655303955078125, |
|
"logits/rejected": -2.951481342315674, |
|
"logps/chosen": -111.79109954833984, |
|
"logps/rejected": -109.75225830078125, |
|
"loss": 0.6721, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.1715824156999588, |
|
"rewards/margins": 0.13271735608577728, |
|
"rewards/rejected": 0.03886505961418152, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.07677543186180422, |
|
"grad_norm": 52.38866527542888, |
|
"learning_rate": 3.8216560509554143e-07, |
|
"logits/chosen": -2.903899908065796, |
|
"logits/rejected": -2.881450653076172, |
|
"logps/chosen": -97.67478942871094, |
|
"logps/rejected": -84.29422760009766, |
|
"loss": 0.6516, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.12506189942359924, |
|
"rewards/margins": 0.13894985616207123, |
|
"rewards/rejected": -0.013887954875826836, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08317338451695458, |
|
"grad_norm": 64.70820481988301, |
|
"learning_rate": 4.140127388535032e-07, |
|
"logits/chosen": -2.895112991333008, |
|
"logits/rejected": -2.88991641998291, |
|
"logps/chosen": -96.93889617919922, |
|
"logps/rejected": -90.70915222167969, |
|
"loss": 0.6585, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.038154274225234985, |
|
"rewards/margins": 0.07399366050958633, |
|
"rewards/rejected": -0.03583937883377075, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.08957133717210493, |
|
"grad_norm": 60.15429861597433, |
|
"learning_rate": 4.4585987261146494e-07, |
|
"logits/chosen": -2.8921263217926025, |
|
"logits/rejected": -2.878185749053955, |
|
"logps/chosen": -98.92501068115234, |
|
"logps/rejected": -91.44964599609375, |
|
"loss": 0.6562, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.04788198322057724, |
|
"rewards/margins": 0.14094644784927368, |
|
"rewards/rejected": -0.18882843852043152, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.09596928982725528, |
|
"grad_norm": 61.72005334408183, |
|
"learning_rate": 4.777070063694267e-07, |
|
"logits/chosen": -2.8982043266296387, |
|
"logits/rejected": -2.8801465034484863, |
|
"logps/chosen": -109.93717193603516, |
|
"logps/rejected": -105.29100036621094, |
|
"loss": 0.6436, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.11187714338302612, |
|
"rewards/margins": 0.06109604984521866, |
|
"rewards/rejected": -0.17297318577766418, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.10236724248240563, |
|
"grad_norm": 50.464722527852004, |
|
"learning_rate": 4.989331436699858e-07, |
|
"logits/chosen": -2.876605749130249, |
|
"logits/rejected": -2.869777202606201, |
|
"logps/chosen": -114.76756286621094, |
|
"logps/rejected": -95.31561279296875, |
|
"loss": 0.6387, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.06972712278366089, |
|
"rewards/margins": 0.17382851243019104, |
|
"rewards/rejected": -0.24355562031269073, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.10876519513755598, |
|
"grad_norm": 52.998328288429626, |
|
"learning_rate": 4.953769559032717e-07, |
|
"logits/chosen": -2.877915859222412, |
|
"logits/rejected": -2.8578855991363525, |
|
"logps/chosen": -127.77665710449219, |
|
"logps/rejected": -94.31978607177734, |
|
"loss": 0.6357, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.042665015906095505, |
|
"rewards/margins": 0.3181106448173523, |
|
"rewards/rejected": -0.3607756495475769, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.11516314779270634, |
|
"grad_norm": 53.51042626016048, |
|
"learning_rate": 4.918207681365576e-07, |
|
"logits/chosen": -2.865325450897217, |
|
"logits/rejected": -2.8459763526916504, |
|
"logps/chosen": -107.5126724243164, |
|
"logps/rejected": -95.08032989501953, |
|
"loss": 0.6438, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.12875112891197205, |
|
"rewards/margins": 0.29623284935951233, |
|
"rewards/rejected": -0.4249839782714844, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.12156110044785669, |
|
"grad_norm": 61.69592094178514, |
|
"learning_rate": 4.882645803698435e-07, |
|
"logits/chosen": -2.884171724319458, |
|
"logits/rejected": -2.863605499267578, |
|
"logps/chosen": -120.2016372680664, |
|
"logps/rejected": -112.73602294921875, |
|
"loss": 0.661, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.348448246717453, |
|
"rewards/margins": 0.15380175411701202, |
|
"rewards/rejected": -0.502250075340271, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.12795905310300704, |
|
"grad_norm": 47.21307657695218, |
|
"learning_rate": 4.847083926031294e-07, |
|
"logits/chosen": -2.8833072185516357, |
|
"logits/rejected": -2.864515542984009, |
|
"logps/chosen": -120.51539611816406, |
|
"logps/rejected": -123.11246490478516, |
|
"loss": 0.6556, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.2995172441005707, |
|
"rewards/margins": 0.23192158341407776, |
|
"rewards/rejected": -0.5314388275146484, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12795905310300704, |
|
"eval_logits/chosen": -2.822009325027466, |
|
"eval_logits/rejected": -2.808537006378174, |
|
"eval_logps/chosen": -114.53223419189453, |
|
"eval_logps/rejected": -98.75383758544922, |
|
"eval_loss": 0.6150196194648743, |
|
"eval_rewards/accuracies": 0.6831210255622864, |
|
"eval_rewards/chosen": -0.34572336077690125, |
|
"eval_rewards/margins": 0.25221893191337585, |
|
"eval_rewards/rejected": -0.5979422926902771, |
|
"eval_runtime": 755.1997, |
|
"eval_samples_per_second": 6.621, |
|
"eval_steps_per_second": 0.208, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1343570057581574, |
|
"grad_norm": 56.52759183086139, |
|
"learning_rate": 4.811522048364154e-07, |
|
"logits/chosen": -2.849454402923584, |
|
"logits/rejected": -2.8368048667907715, |
|
"logps/chosen": -103.74124908447266, |
|
"logps/rejected": -101.4257583618164, |
|
"loss": 0.6547, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.31585240364074707, |
|
"rewards/margins": 0.26868245005607605, |
|
"rewards/rejected": -0.5845348238945007, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.14075495841330773, |
|
"grad_norm": 47.14356880425196, |
|
"learning_rate": 4.775960170697012e-07, |
|
"logits/chosen": -2.882763147354126, |
|
"logits/rejected": -2.871001958847046, |
|
"logps/chosen": -118.808349609375, |
|
"logps/rejected": -114.5535888671875, |
|
"loss": 0.6077, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2729986011981964, |
|
"rewards/margins": 0.38306480646133423, |
|
"rewards/rejected": -0.656063437461853, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.1471529110684581, |
|
"grad_norm": 63.22883397819913, |
|
"learning_rate": 4.7403982930298717e-07, |
|
"logits/chosen": -2.855159282684326, |
|
"logits/rejected": -2.837705135345459, |
|
"logps/chosen": -101.80978393554688, |
|
"logps/rejected": -90.93101501464844, |
|
"loss": 0.6211, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5196329951286316, |
|
"rewards/margins": 0.19388818740844727, |
|
"rewards/rejected": -0.7135211229324341, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.15355086372360843, |
|
"grad_norm": 66.64354781660717, |
|
"learning_rate": 4.7048364153627306e-07, |
|
"logits/chosen": -2.851243734359741, |
|
"logits/rejected": -2.8454842567443848, |
|
"logps/chosen": -121.05877685546875, |
|
"logps/rejected": -106.16410827636719, |
|
"loss": 0.6517, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.46725529432296753, |
|
"rewards/margins": 0.14034488797187805, |
|
"rewards/rejected": -0.607600212097168, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.1599488163787588, |
|
"grad_norm": 57.16801182208312, |
|
"learning_rate": 4.66927453769559e-07, |
|
"logits/chosen": -2.8734335899353027, |
|
"logits/rejected": -2.847668409347534, |
|
"logps/chosen": -123.86625671386719, |
|
"logps/rejected": -109.57325744628906, |
|
"loss": 0.6281, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.3798033595085144, |
|
"rewards/margins": 0.3836090564727783, |
|
"rewards/rejected": -0.7634124755859375, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.16634676903390916, |
|
"grad_norm": 54.517039547772974, |
|
"learning_rate": 4.633712660028449e-07, |
|
"logits/chosen": -2.876328706741333, |
|
"logits/rejected": -2.8625760078430176, |
|
"logps/chosen": -130.20523071289062, |
|
"logps/rejected": -112.2127685546875, |
|
"loss": 0.67, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.47367653250694275, |
|
"rewards/margins": 0.38414520025253296, |
|
"rewards/rejected": -0.8578217625617981, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.1727447216890595, |
|
"grad_norm": 54.60395925488424, |
|
"learning_rate": 4.5981507823613085e-07, |
|
"logits/chosen": -2.844329357147217, |
|
"logits/rejected": -2.8402137756347656, |
|
"logps/chosen": -114.29241943359375, |
|
"logps/rejected": -105.92488861083984, |
|
"loss": 0.6194, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6592072248458862, |
|
"rewards/margins": 0.2703477740287781, |
|
"rewards/rejected": -0.92955482006073, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.17914267434420986, |
|
"grad_norm": 56.55213803108218, |
|
"learning_rate": 4.562588904694168e-07, |
|
"logits/chosen": -2.8192648887634277, |
|
"logits/rejected": -2.8061060905456543, |
|
"logps/chosen": -111.86820220947266, |
|
"logps/rejected": -115.575439453125, |
|
"loss": 0.6544, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5377305746078491, |
|
"rewards/margins": 0.6384528875350952, |
|
"rewards/rejected": -1.1761833429336548, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.1855406269993602, |
|
"grad_norm": 69.17771143675574, |
|
"learning_rate": 4.5270270270270264e-07, |
|
"logits/chosen": -2.843867778778076, |
|
"logits/rejected": -2.829209804534912, |
|
"logps/chosen": -110.8249740600586, |
|
"logps/rejected": -103.67692565917969, |
|
"loss": 0.6367, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5854028463363647, |
|
"rewards/margins": 0.6495189666748047, |
|
"rewards/rejected": -1.2349218130111694, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.19193857965451055, |
|
"grad_norm": 47.35052032879965, |
|
"learning_rate": 4.491465149359886e-07, |
|
"logits/chosen": -2.8120689392089844, |
|
"logits/rejected": -2.7911698818206787, |
|
"logps/chosen": -119.08061218261719, |
|
"logps/rejected": -112.31675720214844, |
|
"loss": 0.57, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.45342451333999634, |
|
"rewards/margins": 0.7334454655647278, |
|
"rewards/rejected": -1.1868698596954346, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.19833653230966092, |
|
"grad_norm": 49.203663039796716, |
|
"learning_rate": 4.4559032716927454e-07, |
|
"logits/chosen": -2.821526527404785, |
|
"logits/rejected": -2.8225045204162598, |
|
"logps/chosen": -119.77931213378906, |
|
"logps/rejected": -109.47914123535156, |
|
"loss": 0.6554, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.7418456077575684, |
|
"rewards/margins": 0.45222848653793335, |
|
"rewards/rejected": -1.194074273109436, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.20473448496481125, |
|
"grad_norm": 49.58718436216588, |
|
"learning_rate": 4.420341394025605e-07, |
|
"logits/chosen": -2.785693883895874, |
|
"logits/rejected": -2.7701306343078613, |
|
"logps/chosen": -107.3865966796875, |
|
"logps/rejected": -106.08250427246094, |
|
"loss": 0.6065, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.7089810371398926, |
|
"rewards/margins": 0.6241555213928223, |
|
"rewards/rejected": -1.3331366777420044, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.21113243761996162, |
|
"grad_norm": 60.39033430088021, |
|
"learning_rate": 4.384779516358463e-07, |
|
"logits/chosen": -2.828508138656616, |
|
"logits/rejected": -2.8167166709899902, |
|
"logps/chosen": -122.96031188964844, |
|
"logps/rejected": -115.10658264160156, |
|
"loss": 0.662, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6257882118225098, |
|
"rewards/margins": 0.5289193391799927, |
|
"rewards/rejected": -1.154707670211792, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.21753039027511195, |
|
"grad_norm": 63.14689712905548, |
|
"learning_rate": 4.3492176386913227e-07, |
|
"logits/chosen": -2.8433403968811035, |
|
"logits/rejected": -2.8479769229888916, |
|
"logps/chosen": -121.58296203613281, |
|
"logps/rejected": -115.6580810546875, |
|
"loss": 0.6481, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6279059648513794, |
|
"rewards/margins": 0.3463929295539856, |
|
"rewards/rejected": -0.9742989540100098, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.22392834293026231, |
|
"grad_norm": 53.84331868502145, |
|
"learning_rate": 4.313655761024182e-07, |
|
"logits/chosen": -2.8341145515441895, |
|
"logits/rejected": -2.8294851779937744, |
|
"logps/chosen": -114.5806884765625, |
|
"logps/rejected": -103.96630859375, |
|
"loss": 0.6084, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.6928296089172363, |
|
"rewards/margins": 0.44785672426223755, |
|
"rewards/rejected": -1.140686273574829, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.23032629558541268, |
|
"grad_norm": 48.0087088492426, |
|
"learning_rate": 4.278093883357041e-07, |
|
"logits/chosen": -2.853909969329834, |
|
"logits/rejected": -2.8232522010803223, |
|
"logps/chosen": -110.68212890625, |
|
"logps/rejected": -98.71046447753906, |
|
"loss": 0.607, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.7764779925346375, |
|
"rewards/margins": 0.4268825054168701, |
|
"rewards/rejected": -1.2033603191375732, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.236724248240563, |
|
"grad_norm": 49.094230475270784, |
|
"learning_rate": 4.2425320056899e-07, |
|
"logits/chosen": -2.831592559814453, |
|
"logits/rejected": -2.821815013885498, |
|
"logps/chosen": -106.56488037109375, |
|
"logps/rejected": -108.5312728881836, |
|
"loss": 0.6504, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.4470910429954529, |
|
"rewards/margins": 0.3594434857368469, |
|
"rewards/rejected": -0.8065345883369446, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.24312220089571338, |
|
"grad_norm": 66.63628934256334, |
|
"learning_rate": 4.2069701280227595e-07, |
|
"logits/chosen": -2.824704170227051, |
|
"logits/rejected": -2.8049395084381104, |
|
"logps/chosen": -120.53758239746094, |
|
"logps/rejected": -106.18167877197266, |
|
"loss": 0.6455, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.8338532447814941, |
|
"rewards/margins": 0.6111718416213989, |
|
"rewards/rejected": -1.4450252056121826, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2495201535508637, |
|
"grad_norm": 69.12388513034483, |
|
"learning_rate": 4.1714082503556185e-07, |
|
"logits/chosen": -2.8500986099243164, |
|
"logits/rejected": -2.826770544052124, |
|
"logps/chosen": -131.38690185546875, |
|
"logps/rejected": -106.4390869140625, |
|
"loss": 0.6847, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.8073829412460327, |
|
"rewards/margins": 0.36057430505752563, |
|
"rewards/rejected": -1.167957067489624, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2559181062060141, |
|
"grad_norm": 57.108856581509585, |
|
"learning_rate": 4.135846372688478e-07, |
|
"logits/chosen": -2.832038164138794, |
|
"logits/rejected": -2.8184292316436768, |
|
"logps/chosen": -119.92472839355469, |
|
"logps/rejected": -125.78714752197266, |
|
"loss": 0.6305, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.4277513921260834, |
|
"rewards/margins": 0.7275630235671997, |
|
"rewards/rejected": -1.1553144454956055, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2559181062060141, |
|
"eval_logits/chosen": -2.8320508003234863, |
|
"eval_logits/rejected": -2.8121678829193115, |
|
"eval_logps/chosen": -117.8738021850586, |
|
"eval_logps/rejected": -104.50364685058594, |
|
"eval_loss": 0.5884435772895813, |
|
"eval_rewards/accuracies": 0.6958598494529724, |
|
"eval_rewards/chosen": -0.6798812747001648, |
|
"eval_rewards/margins": 0.4930422306060791, |
|
"eval_rewards/rejected": -1.1729233264923096, |
|
"eval_runtime": 739.6751, |
|
"eval_samples_per_second": 6.76, |
|
"eval_steps_per_second": 0.212, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.26231605886116444, |
|
"grad_norm": 59.148448424766435, |
|
"learning_rate": 4.100284495021337e-07, |
|
"logits/chosen": -2.8497817516326904, |
|
"logits/rejected": -2.8282692432403564, |
|
"logps/chosen": -121.6697998046875, |
|
"logps/rejected": -120.14678955078125, |
|
"loss": 0.6345, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6451729536056519, |
|
"rewards/margins": 0.7675411701202393, |
|
"rewards/rejected": -1.4127142429351807, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.2687140115163148, |
|
"grad_norm": 41.06134554396416, |
|
"learning_rate": 4.064722617354196e-07, |
|
"logits/chosen": -2.8894846439361572, |
|
"logits/rejected": -2.861454486846924, |
|
"logps/chosen": -125.02449035644531, |
|
"logps/rejected": -121.65775299072266, |
|
"loss": 0.6217, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6182137727737427, |
|
"rewards/margins": 0.7963579893112183, |
|
"rewards/rejected": -1.41457200050354, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.2751119641714651, |
|
"grad_norm": 52.35881081541796, |
|
"learning_rate": 4.0291607396870553e-07, |
|
"logits/chosen": -2.8551623821258545, |
|
"logits/rejected": -2.815028667449951, |
|
"logps/chosen": -109.940673828125, |
|
"logps/rejected": -97.78315734863281, |
|
"loss": 0.6036, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6586909890174866, |
|
"rewards/margins": 0.7408519387245178, |
|
"rewards/rejected": -1.3995428085327148, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.28150991682661547, |
|
"grad_norm": 67.91564328860713, |
|
"learning_rate": 3.993598862019915e-07, |
|
"logits/chosen": -2.813927412033081, |
|
"logits/rejected": -2.8176522254943848, |
|
"logps/chosen": -99.02717590332031, |
|
"logps/rejected": -111.13435363769531, |
|
"loss": 0.6412, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6925631761550903, |
|
"rewards/margins": 0.6947155594825745, |
|
"rewards/rejected": -1.38727867603302, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.28790786948176583, |
|
"grad_norm": 46.303651808982835, |
|
"learning_rate": 3.9580369843527737e-07, |
|
"logits/chosen": -2.8682820796966553, |
|
"logits/rejected": -2.860414981842041, |
|
"logps/chosen": -119.4373779296875, |
|
"logps/rejected": -110.44520568847656, |
|
"loss": 0.6108, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5994283556938171, |
|
"rewards/margins": 0.7199314832687378, |
|
"rewards/rejected": -1.3193597793579102, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.2943058221369162, |
|
"grad_norm": 56.48779597669934, |
|
"learning_rate": 3.9224751066856327e-07, |
|
"logits/chosen": -2.839963912963867, |
|
"logits/rejected": -2.8135039806365967, |
|
"logps/chosen": -132.95506286621094, |
|
"logps/rejected": -108.5090103149414, |
|
"loss": 0.696, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6798344254493713, |
|
"rewards/margins": 0.549295961856842, |
|
"rewards/rejected": -1.2291303873062134, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.30070377479206656, |
|
"grad_norm": 45.638457628682765, |
|
"learning_rate": 3.886913229018492e-07, |
|
"logits/chosen": -2.8314132690429688, |
|
"logits/rejected": -2.822693109512329, |
|
"logps/chosen": -117.86392974853516, |
|
"logps/rejected": -123.7062759399414, |
|
"loss": 0.6809, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.8428860902786255, |
|
"rewards/margins": 0.7259670495986938, |
|
"rewards/rejected": -1.5688531398773193, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.30710172744721687, |
|
"grad_norm": 48.11242331520591, |
|
"learning_rate": 3.851351351351351e-07, |
|
"logits/chosen": -2.8257322311401367, |
|
"logits/rejected": -2.8105177879333496, |
|
"logps/chosen": -112.5777359008789, |
|
"logps/rejected": -107.22886657714844, |
|
"loss": 0.6627, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.941847026348114, |
|
"rewards/margins": 0.46391814947128296, |
|
"rewards/rejected": -1.405765175819397, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.31349968010236723, |
|
"grad_norm": 47.98414298526401, |
|
"learning_rate": 3.8157894736842105e-07, |
|
"logits/chosen": -2.854234218597412, |
|
"logits/rejected": -2.8304595947265625, |
|
"logps/chosen": -114.91446685791016, |
|
"logps/rejected": -112.93721008300781, |
|
"loss": 0.6314, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6863322257995605, |
|
"rewards/margins": 0.7260019183158875, |
|
"rewards/rejected": -1.4123342037200928, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.3198976327575176, |
|
"grad_norm": 54.45893948180426, |
|
"learning_rate": 3.7802275960170695e-07, |
|
"logits/chosen": -2.821240186691284, |
|
"logits/rejected": -2.813072443008423, |
|
"logps/chosen": -115.79627990722656, |
|
"logps/rejected": -108.83988952636719, |
|
"loss": 0.6302, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6848796010017395, |
|
"rewards/margins": 0.7942295670509338, |
|
"rewards/rejected": -1.4791094064712524, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.32629558541266795, |
|
"grad_norm": 72.78110036828252, |
|
"learning_rate": 3.7446657183499284e-07, |
|
"logits/chosen": -2.8358187675476074, |
|
"logits/rejected": -2.8056414127349854, |
|
"logps/chosen": -125.2596664428711, |
|
"logps/rejected": -122.6445083618164, |
|
"loss": 0.6721, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0303077697753906, |
|
"rewards/margins": 0.3377246856689453, |
|
"rewards/rejected": -1.368032693862915, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.3326935380678183, |
|
"grad_norm": 51.57380900020349, |
|
"learning_rate": 3.709103840682788e-07, |
|
"logits/chosen": -2.8579823970794678, |
|
"logits/rejected": -2.8210737705230713, |
|
"logps/chosen": -123.88114166259766, |
|
"logps/rejected": -107.84675598144531, |
|
"loss": 0.628, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7567764520645142, |
|
"rewards/margins": 0.8902345895767212, |
|
"rewards/rejected": -1.647011160850525, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.3390914907229686, |
|
"grad_norm": 45.41853425648929, |
|
"learning_rate": 3.6735419630156474e-07, |
|
"logits/chosen": -2.8426403999328613, |
|
"logits/rejected": -2.808004856109619, |
|
"logps/chosen": -115.7242202758789, |
|
"logps/rejected": -105.14561462402344, |
|
"loss": 0.6144, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.744358241558075, |
|
"rewards/margins": 0.7397834658622742, |
|
"rewards/rejected": -1.4841415882110596, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.345489443378119, |
|
"grad_norm": 45.6068502491896, |
|
"learning_rate": 3.637980085348506e-07, |
|
"logits/chosen": -2.8041722774505615, |
|
"logits/rejected": -2.7811694145202637, |
|
"logps/chosen": -116.31886291503906, |
|
"logps/rejected": -101.0390625, |
|
"loss": 0.6406, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.816309928894043, |
|
"rewards/margins": 0.7303619384765625, |
|
"rewards/rejected": -1.5466718673706055, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.35188739603326935, |
|
"grad_norm": 54.038270911291995, |
|
"learning_rate": 3.602418207681365e-07, |
|
"logits/chosen": -2.8286209106445312, |
|
"logits/rejected": -2.8093667030334473, |
|
"logps/chosen": -125.18721008300781, |
|
"logps/rejected": -119.35871887207031, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.797341525554657, |
|
"rewards/margins": 0.7269377708435059, |
|
"rewards/rejected": -1.5242793560028076, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.3582853486884197, |
|
"grad_norm": 37.61754758300332, |
|
"learning_rate": 3.5668563300142247e-07, |
|
"logits/chosen": -2.7906124591827393, |
|
"logits/rejected": -2.7900514602661133, |
|
"logps/chosen": -104.17930603027344, |
|
"logps/rejected": -117.13945007324219, |
|
"loss": 0.596, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8746695518493652, |
|
"rewards/margins": 0.798784613609314, |
|
"rewards/rejected": -1.6734540462493896, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.3646833013435701, |
|
"grad_norm": 51.519397153489784, |
|
"learning_rate": 3.5312944523470837e-07, |
|
"logits/chosen": -2.760509490966797, |
|
"logits/rejected": -2.745539665222168, |
|
"logps/chosen": -108.7869644165039, |
|
"logps/rejected": -102.55323791503906, |
|
"loss": 0.6569, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.0966747999191284, |
|
"rewards/margins": 0.5053921937942505, |
|
"rewards/rejected": -1.602066993713379, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3710812539987204, |
|
"grad_norm": 54.888184782180275, |
|
"learning_rate": 3.495732574679943e-07, |
|
"logits/chosen": -2.764152765274048, |
|
"logits/rejected": -2.763406991958618, |
|
"logps/chosen": -113.4003677368164, |
|
"logps/rejected": -114.9386215209961, |
|
"loss": 0.6024, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9524062871932983, |
|
"rewards/margins": 0.6822084188461304, |
|
"rewards/rejected": -1.6346147060394287, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.37747920665387075, |
|
"grad_norm": 43.031105911892546, |
|
"learning_rate": 3.460170697012802e-07, |
|
"logits/chosen": -2.7501633167266846, |
|
"logits/rejected": -2.722970485687256, |
|
"logps/chosen": -116.67124938964844, |
|
"logps/rejected": -100.81819152832031, |
|
"loss": 0.567, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8928259015083313, |
|
"rewards/margins": 0.7573403120040894, |
|
"rewards/rejected": -1.6501661539077759, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.3838771593090211, |
|
"grad_norm": 50.40927225064406, |
|
"learning_rate": 3.424608819345661e-07, |
|
"logits/chosen": -2.751669406890869, |
|
"logits/rejected": -2.730020523071289, |
|
"logps/chosen": -112.72367095947266, |
|
"logps/rejected": -104.26304626464844, |
|
"loss": 0.6374, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.5136244297027588, |
|
"rewards/margins": 0.48020678758621216, |
|
"rewards/rejected": -1.9938310384750366, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.3838771593090211, |
|
"eval_logits/chosen": -2.7680461406707764, |
|
"eval_logits/rejected": -2.7502799034118652, |
|
"eval_logps/chosen": -122.29470825195312, |
|
"eval_logps/rejected": -110.04557800292969, |
|
"eval_loss": 0.5879228711128235, |
|
"eval_rewards/accuracies": 0.6799362897872925, |
|
"eval_rewards/chosen": -1.1219713687896729, |
|
"eval_rewards/margins": 0.6051455736160278, |
|
"eval_rewards/rejected": -1.7271168231964111, |
|
"eval_runtime": 286.7339, |
|
"eval_samples_per_second": 17.438, |
|
"eval_steps_per_second": 0.548, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.3902751119641715, |
|
"grad_norm": 38.22901412601261, |
|
"learning_rate": 3.3890469416785205e-07, |
|
"logits/chosen": -2.7812764644622803, |
|
"logits/rejected": -2.7706708908081055, |
|
"logps/chosen": -121.8727798461914, |
|
"logps/rejected": -115.7394027709961, |
|
"loss": 0.6536, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.3321597576141357, |
|
"rewards/margins": 0.34968990087509155, |
|
"rewards/rejected": -1.681849479675293, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.39667306461932184, |
|
"grad_norm": 62.8449168285338, |
|
"learning_rate": 3.35348506401138e-07, |
|
"logits/chosen": -2.7748451232910156, |
|
"logits/rejected": -2.7738356590270996, |
|
"logps/chosen": -139.68206787109375, |
|
"logps/rejected": -123.91287994384766, |
|
"loss": 0.6531, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.9537972211837769, |
|
"rewards/margins": 0.5792536735534668, |
|
"rewards/rejected": -1.5330508947372437, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.40307101727447214, |
|
"grad_norm": 55.7199501098711, |
|
"learning_rate": 3.3179231863442384e-07, |
|
"logits/chosen": -2.766322612762451, |
|
"logits/rejected": -2.7525458335876465, |
|
"logps/chosen": -115.6636734008789, |
|
"logps/rejected": -115.79632568359375, |
|
"loss": 0.6742, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.1982686519622803, |
|
"rewards/margins": 0.32977309823036194, |
|
"rewards/rejected": -1.5280416011810303, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.4094689699296225, |
|
"grad_norm": 59.627044623955385, |
|
"learning_rate": 3.282361308677098e-07, |
|
"logits/chosen": -2.752450942993164, |
|
"logits/rejected": -2.7423059940338135, |
|
"logps/chosen": -117.9424057006836, |
|
"logps/rejected": -100.12724304199219, |
|
"loss": 0.6591, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.099172592163086, |
|
"rewards/margins": 0.4547777771949768, |
|
"rewards/rejected": -1.553950548171997, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.41586692258477287, |
|
"grad_norm": 39.78897483264296, |
|
"learning_rate": 3.2467994310099573e-07, |
|
"logits/chosen": -2.7846992015838623, |
|
"logits/rejected": -2.7701334953308105, |
|
"logps/chosen": -120.6253890991211, |
|
"logps/rejected": -114.74141693115234, |
|
"loss": 0.6499, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.135921835899353, |
|
"rewards/margins": 0.39883118867874146, |
|
"rewards/rejected": -1.5347530841827393, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.42226487523992323, |
|
"grad_norm": 46.01875825095158, |
|
"learning_rate": 3.211237553342817e-07, |
|
"logits/chosen": -2.7716829776763916, |
|
"logits/rejected": -2.7672672271728516, |
|
"logps/chosen": -121.64005279541016, |
|
"logps/rejected": -125.25276947021484, |
|
"loss": 0.6072, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.1352884769439697, |
|
"rewards/margins": 0.6041684746742249, |
|
"rewards/rejected": -1.7394568920135498, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.4286628278950736, |
|
"grad_norm": 59.01505064312942, |
|
"learning_rate": 3.175675675675675e-07, |
|
"logits/chosen": -2.712557315826416, |
|
"logits/rejected": -2.709873914718628, |
|
"logps/chosen": -107.31956481933594, |
|
"logps/rejected": -107.1248779296875, |
|
"loss": 0.6156, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.3347444534301758, |
|
"rewards/margins": 0.6245480179786682, |
|
"rewards/rejected": -1.9592926502227783, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.4350607805502239, |
|
"grad_norm": 46.021209760495935, |
|
"learning_rate": 3.1401137980085347e-07, |
|
"logits/chosen": -2.7640795707702637, |
|
"logits/rejected": -2.759766101837158, |
|
"logps/chosen": -113.60371398925781, |
|
"logps/rejected": -111.9162826538086, |
|
"loss": 0.5924, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.1441354751586914, |
|
"rewards/margins": 0.737005352973938, |
|
"rewards/rejected": -1.881140947341919, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.44145873320537427, |
|
"grad_norm": 34.614349860027566, |
|
"learning_rate": 3.104551920341394e-07, |
|
"logits/chosen": -2.7723183631896973, |
|
"logits/rejected": -2.7740321159362793, |
|
"logps/chosen": -133.4069366455078, |
|
"logps/rejected": -120.85469055175781, |
|
"loss": 0.6382, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.1162710189819336, |
|
"rewards/margins": 0.729344367980957, |
|
"rewards/rejected": -1.8456153869628906, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.44785668586052463, |
|
"grad_norm": 61.67528158068899, |
|
"learning_rate": 3.068990042674253e-07, |
|
"logits/chosen": -2.770324468612671, |
|
"logits/rejected": -2.756533145904541, |
|
"logps/chosen": -116.19869232177734, |
|
"logps/rejected": -124.49647521972656, |
|
"loss": 0.7084, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.2367490530014038, |
|
"rewards/margins": 0.4493141770362854, |
|
"rewards/rejected": -1.6860634088516235, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.454254638515675, |
|
"grad_norm": 63.971216074190245, |
|
"learning_rate": 3.033428165007112e-07, |
|
"logits/chosen": -2.7677056789398193, |
|
"logits/rejected": -2.7645699977874756, |
|
"logps/chosen": -121.53895568847656, |
|
"logps/rejected": -111.52986145019531, |
|
"loss": 0.729, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.3768677711486816, |
|
"rewards/margins": 0.27729541063308716, |
|
"rewards/rejected": -1.654163122177124, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.46065259117082535, |
|
"grad_norm": 47.22353574035586, |
|
"learning_rate": 2.9978662873399715e-07, |
|
"logits/chosen": -2.8085451126098633, |
|
"logits/rejected": -2.788102388381958, |
|
"logps/chosen": -130.08607482910156, |
|
"logps/rejected": -116.80802917480469, |
|
"loss": 0.6264, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9673389196395874, |
|
"rewards/margins": 0.9441471099853516, |
|
"rewards/rejected": -1.911486029624939, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.46705054382597566, |
|
"grad_norm": 61.77806719375072, |
|
"learning_rate": 2.9623044096728305e-07, |
|
"logits/chosen": -2.7899954319000244, |
|
"logits/rejected": -2.7709438800811768, |
|
"logps/chosen": -124.1104507446289, |
|
"logps/rejected": -125.25813293457031, |
|
"loss": 0.7038, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.1099923849105835, |
|
"rewards/margins": 0.7608176469802856, |
|
"rewards/rejected": -1.8708101511001587, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.473448496481126, |
|
"grad_norm": 50.86047987086807, |
|
"learning_rate": 2.92674253200569e-07, |
|
"logits/chosen": -2.7680702209472656, |
|
"logits/rejected": -2.7549118995666504, |
|
"logps/chosen": -121.9208755493164, |
|
"logps/rejected": -110.9996566772461, |
|
"loss": 0.5918, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.2252216339111328, |
|
"rewards/margins": 0.7127790451049805, |
|
"rewards/rejected": -1.9380006790161133, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.4798464491362764, |
|
"grad_norm": 58.045381028548476, |
|
"learning_rate": 2.8911806543385494e-07, |
|
"logits/chosen": -2.819340229034424, |
|
"logits/rejected": -2.803678035736084, |
|
"logps/chosen": -137.2626495361328, |
|
"logps/rejected": -124.6344985961914, |
|
"loss": 0.6366, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.0756101608276367, |
|
"rewards/margins": 0.8651407957077026, |
|
"rewards/rejected": -1.940751075744629, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.48624440179142675, |
|
"grad_norm": 45.24460591639927, |
|
"learning_rate": 2.855618776671408e-07, |
|
"logits/chosen": -2.787562847137451, |
|
"logits/rejected": -2.766334056854248, |
|
"logps/chosen": -122.2945556640625, |
|
"logps/rejected": -107.06929779052734, |
|
"loss": 0.6259, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.0217363834381104, |
|
"rewards/margins": 0.8248605728149414, |
|
"rewards/rejected": -1.8465969562530518, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.4926423544465771, |
|
"grad_norm": 51.01901234924279, |
|
"learning_rate": 2.8200568990042673e-07, |
|
"logits/chosen": -2.773236036300659, |
|
"logits/rejected": -2.7518670558929443, |
|
"logps/chosen": -122.63824462890625, |
|
"logps/rejected": -108.55671691894531, |
|
"loss": 0.5488, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.9058364629745483, |
|
"rewards/margins": 0.8456705212593079, |
|
"rewards/rejected": -1.7515071630477905, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.4990403071017274, |
|
"grad_norm": 47.059317214322036, |
|
"learning_rate": 2.784495021337127e-07, |
|
"logits/chosen": -2.7841098308563232, |
|
"logits/rejected": -2.765838146209717, |
|
"logps/chosen": -132.35830688476562, |
|
"logps/rejected": -108.7806396484375, |
|
"loss": 0.6505, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1347086429595947, |
|
"rewards/margins": 0.5946494340896606, |
|
"rewards/rejected": -1.7293580770492554, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.5054382597568778, |
|
"grad_norm": 55.81431596800419, |
|
"learning_rate": 2.7489331436699857e-07, |
|
"logits/chosen": -2.7832138538360596, |
|
"logits/rejected": -2.7748045921325684, |
|
"logps/chosen": -129.96388244628906, |
|
"logps/rejected": -128.643798828125, |
|
"loss": 0.6315, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.3096433877944946, |
|
"rewards/margins": 0.460153192281723, |
|
"rewards/rejected": -1.76979660987854, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.5118362124120281, |
|
"grad_norm": 47.88527721947066, |
|
"learning_rate": 2.7133712660028446e-07, |
|
"logits/chosen": -2.747264862060547, |
|
"logits/rejected": -2.73887300491333, |
|
"logps/chosen": -118.75581359863281, |
|
"logps/rejected": -112.53265380859375, |
|
"loss": 0.5953, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1123216152191162, |
|
"rewards/margins": 0.8585169911384583, |
|
"rewards/rejected": -1.9708385467529297, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5118362124120281, |
|
"eval_logits/chosen": -2.7703402042388916, |
|
"eval_logits/rejected": -2.7539730072021484, |
|
"eval_logps/chosen": -123.07151794433594, |
|
"eval_logps/rejected": -110.77460479736328, |
|
"eval_loss": 0.5856689214706421, |
|
"eval_rewards/accuracies": 0.6958598494529724, |
|
"eval_rewards/chosen": -1.199651837348938, |
|
"eval_rewards/margins": 0.6003690361976624, |
|
"eval_rewards/rejected": -1.8000208139419556, |
|
"eval_runtime": 282.6345, |
|
"eval_samples_per_second": 17.691, |
|
"eval_steps_per_second": 0.555, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5182341650671785, |
|
"grad_norm": 53.88804382450492, |
|
"learning_rate": 2.677809388335704e-07, |
|
"logits/chosen": -2.7724366188049316, |
|
"logits/rejected": -2.7628140449523926, |
|
"logps/chosen": -115.2289810180664, |
|
"logps/rejected": -118.9228744506836, |
|
"loss": 0.6289, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.2024407386779785, |
|
"rewards/margins": 0.8105290532112122, |
|
"rewards/rejected": -2.012969970703125, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.5246321177223289, |
|
"grad_norm": 64.97852446899729, |
|
"learning_rate": 2.642247510668563e-07, |
|
"logits/chosen": -2.7826988697052, |
|
"logits/rejected": -2.7776927947998047, |
|
"logps/chosen": -128.9366455078125, |
|
"logps/rejected": -126.57633209228516, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.0592586994171143, |
|
"rewards/margins": 0.7127590179443359, |
|
"rewards/rejected": -1.7720177173614502, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.5310300703774792, |
|
"grad_norm": 44.76851798380462, |
|
"learning_rate": 2.6066856330014225e-07, |
|
"logits/chosen": -2.7971854209899902, |
|
"logits/rejected": -2.7861063480377197, |
|
"logps/chosen": -130.8908233642578, |
|
"logps/rejected": -124.34150695800781, |
|
"loss": 0.6262, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.9852834939956665, |
|
"rewards/margins": 0.6130325198173523, |
|
"rewards/rejected": -1.5983158349990845, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.5374280230326296, |
|
"grad_norm": 46.52784154303764, |
|
"learning_rate": 2.5711237553342815e-07, |
|
"logits/chosen": -2.8150525093078613, |
|
"logits/rejected": -2.800579071044922, |
|
"logps/chosen": -136.0585479736328, |
|
"logps/rejected": -105.21810150146484, |
|
"loss": 0.568, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.169297218322754, |
|
"rewards/margins": 0.7267537713050842, |
|
"rewards/rejected": -1.8960508108139038, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.5438259756877799, |
|
"grad_norm": 53.63407392324102, |
|
"learning_rate": 2.5355618776671404e-07, |
|
"logits/chosen": -2.833627223968506, |
|
"logits/rejected": -2.8091251850128174, |
|
"logps/chosen": -131.2395477294922, |
|
"logps/rejected": -124.14871978759766, |
|
"loss": 0.6241, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.223509430885315, |
|
"rewards/margins": 0.6891213059425354, |
|
"rewards/rejected": -1.9126307964324951, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.5502239283429302, |
|
"grad_norm": 40.739420757399046, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": -2.804734468460083, |
|
"logits/rejected": -2.7905805110931396, |
|
"logps/chosen": -117.0075454711914, |
|
"logps/rejected": -106.9182357788086, |
|
"loss": 0.609, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.2571464776992798, |
|
"rewards/margins": 0.5797263383865356, |
|
"rewards/rejected": -1.8368728160858154, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.5566218809980806, |
|
"grad_norm": 52.20343272589441, |
|
"learning_rate": 2.4644381223328594e-07, |
|
"logits/chosen": -2.8013713359832764, |
|
"logits/rejected": -2.7786829471588135, |
|
"logps/chosen": -120.1400375366211, |
|
"logps/rejected": -105.8758773803711, |
|
"loss": 0.6688, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.250685453414917, |
|
"rewards/margins": 0.7675702571868896, |
|
"rewards/rejected": -2.0182557106018066, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.5630198336532309, |
|
"grad_norm": 55.78019195317067, |
|
"learning_rate": 2.4288762446657183e-07, |
|
"logits/chosen": -2.824733257293701, |
|
"logits/rejected": -2.8199667930603027, |
|
"logps/chosen": -123.25453186035156, |
|
"logps/rejected": -134.382080078125, |
|
"loss": 0.6199, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.1268641948699951, |
|
"rewards/margins": 0.8908056020736694, |
|
"rewards/rejected": -2.017669916152954, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.5694177863083814, |
|
"grad_norm": 46.8913181947373, |
|
"learning_rate": 2.393314366998578e-07, |
|
"logits/chosen": -2.7837607860565186, |
|
"logits/rejected": -2.783323049545288, |
|
"logps/chosen": -118.7066421508789, |
|
"logps/rejected": -119.07723236083984, |
|
"loss": 0.7139, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0711259841918945, |
|
"rewards/margins": 0.7609738707542419, |
|
"rewards/rejected": -1.8321001529693604, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.5758157389635317, |
|
"grad_norm": 61.90420729087464, |
|
"learning_rate": 2.3577524893314365e-07, |
|
"logits/chosen": -2.7695021629333496, |
|
"logits/rejected": -2.761353015899658, |
|
"logps/chosen": -111.3558578491211, |
|
"logps/rejected": -111.15797424316406, |
|
"loss": 0.6603, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.1308844089508057, |
|
"rewards/margins": 0.5882295966148376, |
|
"rewards/rejected": -1.7191137075424194, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.582213691618682, |
|
"grad_norm": 48.59525480797925, |
|
"learning_rate": 2.322190611664296e-07, |
|
"logits/chosen": -2.789656639099121, |
|
"logits/rejected": -2.781515121459961, |
|
"logps/chosen": -118.76673889160156, |
|
"logps/rejected": -105.30081939697266, |
|
"loss": 0.6481, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.1197543144226074, |
|
"rewards/margins": 0.6071382761001587, |
|
"rewards/rejected": -1.7268924713134766, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.5886116442738324, |
|
"grad_norm": 43.472580466054765, |
|
"learning_rate": 2.2866287339971549e-07, |
|
"logits/chosen": -2.789944887161255, |
|
"logits/rejected": -2.771533250808716, |
|
"logps/chosen": -120.75154113769531, |
|
"logps/rejected": -119.8617935180664, |
|
"loss": 0.6377, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0260751247406006, |
|
"rewards/margins": 0.7392138242721558, |
|
"rewards/rejected": -1.7652889490127563, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.5950095969289827, |
|
"grad_norm": 55.2411167534148, |
|
"learning_rate": 2.251066856330014e-07, |
|
"logits/chosen": -2.7701098918914795, |
|
"logits/rejected": -2.7492525577545166, |
|
"logps/chosen": -114.12837219238281, |
|
"logps/rejected": -100.77635192871094, |
|
"loss": 0.6102, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.210095763206482, |
|
"rewards/margins": 0.505190372467041, |
|
"rewards/rejected": -1.7152862548828125, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.6014075495841331, |
|
"grad_norm": 50.0185028587488, |
|
"learning_rate": 2.2155049786628733e-07, |
|
"logits/chosen": -2.7873411178588867, |
|
"logits/rejected": -2.7700142860412598, |
|
"logps/chosen": -114.91324615478516, |
|
"logps/rejected": -105.86214447021484, |
|
"loss": 0.6749, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.211742639541626, |
|
"rewards/margins": 0.5054360628128052, |
|
"rewards/rejected": -1.7171787023544312, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.6078055022392834, |
|
"grad_norm": 42.87415326293721, |
|
"learning_rate": 2.1799431009957325e-07, |
|
"logits/chosen": -2.78757381439209, |
|
"logits/rejected": -2.776582717895508, |
|
"logps/chosen": -128.70608520507812, |
|
"logps/rejected": -124.4533920288086, |
|
"loss": 0.6757, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.146447777748108, |
|
"rewards/margins": 0.6257702112197876, |
|
"rewards/rejected": -1.7722179889678955, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.6142034548944337, |
|
"grad_norm": 49.65392226267386, |
|
"learning_rate": 2.1443812233285914e-07, |
|
"logits/chosen": -2.7512404918670654, |
|
"logits/rejected": -2.744755983352661, |
|
"logps/chosen": -110.56729888916016, |
|
"logps/rejected": -121.34492492675781, |
|
"loss": 0.6406, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.2666919231414795, |
|
"rewards/margins": 0.49605482816696167, |
|
"rewards/rejected": -1.762746810913086, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.6206014075495841, |
|
"grad_norm": 47.93077072841145, |
|
"learning_rate": 2.108819345661451e-07, |
|
"logits/chosen": -2.764608144760132, |
|
"logits/rejected": -2.764833927154541, |
|
"logps/chosen": -124.5059585571289, |
|
"logps/rejected": -116.89808654785156, |
|
"loss": 0.597, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0087820291519165, |
|
"rewards/margins": 0.6207214593887329, |
|
"rewards/rejected": -1.6295034885406494, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.6269993602047345, |
|
"grad_norm": 52.39545922578806, |
|
"learning_rate": 2.0732574679943098e-07, |
|
"logits/chosen": -2.7816779613494873, |
|
"logits/rejected": -2.761172294616699, |
|
"logps/chosen": -137.40052795410156, |
|
"logps/rejected": -122.988525390625, |
|
"loss": 0.6108, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0072863101959229, |
|
"rewards/margins": 0.8142153024673462, |
|
"rewards/rejected": -1.8215014934539795, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.6333973128598849, |
|
"grad_norm": 44.290985976089594, |
|
"learning_rate": 2.0376955903271693e-07, |
|
"logits/chosen": -2.774445056915283, |
|
"logits/rejected": -2.763420343399048, |
|
"logps/chosen": -126.58064270019531, |
|
"logps/rejected": -120.07356262207031, |
|
"loss": 0.5508, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9515323638916016, |
|
"rewards/margins": 0.677230179309845, |
|
"rewards/rejected": -1.6287622451782227, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.6397952655150352, |
|
"grad_norm": 47.67634608134284, |
|
"learning_rate": 2.0021337126600283e-07, |
|
"logits/chosen": -2.772531747817993, |
|
"logits/rejected": -2.749040365219116, |
|
"logps/chosen": -130.54287719726562, |
|
"logps/rejected": -118.84379577636719, |
|
"loss": 0.5874, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.2028357982635498, |
|
"rewards/margins": 0.8163717985153198, |
|
"rewards/rejected": -2.01920747756958, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6397952655150352, |
|
"eval_logits/chosen": -2.754683494567871, |
|
"eval_logits/rejected": -2.738701581954956, |
|
"eval_logps/chosen": -123.66197204589844, |
|
"eval_logps/rejected": -111.75140380859375, |
|
"eval_loss": 0.5864265561103821, |
|
"eval_rewards/accuracies": 0.6918789744377136, |
|
"eval_rewards/chosen": -1.2586979866027832, |
|
"eval_rewards/margins": 0.6390010714530945, |
|
"eval_rewards/rejected": -1.897699236869812, |
|
"eval_runtime": 280.4436, |
|
"eval_samples_per_second": 17.829, |
|
"eval_steps_per_second": 0.56, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6461932181701855, |
|
"grad_norm": 63.64218572486143, |
|
"learning_rate": 1.9665718349928875e-07, |
|
"logits/chosen": -2.737196445465088, |
|
"logits/rejected": -2.734534978866577, |
|
"logps/chosen": -121.40058898925781, |
|
"logps/rejected": -127.97169494628906, |
|
"loss": 0.5982, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.1952520608901978, |
|
"rewards/margins": 0.8720852136611938, |
|
"rewards/rejected": -2.0673370361328125, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.6525911708253359, |
|
"grad_norm": 48.03592117722671, |
|
"learning_rate": 1.931009957325747e-07, |
|
"logits/chosen": -2.7495694160461426, |
|
"logits/rejected": -2.735703229904175, |
|
"logps/chosen": -141.25796508789062, |
|
"logps/rejected": -112.2705078125, |
|
"loss": 0.5639, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8717496991157532, |
|
"rewards/margins": 1.0097849369049072, |
|
"rewards/rejected": -1.8815345764160156, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.6589891234804862, |
|
"grad_norm": 50.405047652842626, |
|
"learning_rate": 1.895448079658606e-07, |
|
"logits/chosen": -2.769981861114502, |
|
"logits/rejected": -2.7553834915161133, |
|
"logps/chosen": -117.5353012084961, |
|
"logps/rejected": -118.06101989746094, |
|
"loss": 0.6584, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4604730606079102, |
|
"rewards/margins": 0.6957337856292725, |
|
"rewards/rejected": -2.1562066078186035, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.6653870761356366, |
|
"grad_norm": 42.71526690235821, |
|
"learning_rate": 1.859886201991465e-07, |
|
"logits/chosen": -2.753505229949951, |
|
"logits/rejected": -2.739760637283325, |
|
"logps/chosen": -132.32034301757812, |
|
"logps/rejected": -117.8974838256836, |
|
"loss": 0.6377, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.3013321161270142, |
|
"rewards/margins": 0.5227604508399963, |
|
"rewards/rejected": -1.8240925073623657, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.6717850287907869, |
|
"grad_norm": 41.397749630534875, |
|
"learning_rate": 1.8243243243243243e-07, |
|
"logits/chosen": -2.703765869140625, |
|
"logits/rejected": -2.709289789199829, |
|
"logps/chosen": -113.61067199707031, |
|
"logps/rejected": -124.3532943725586, |
|
"loss": 0.6313, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.3022973537445068, |
|
"rewards/margins": 0.5214705467224121, |
|
"rewards/rejected": -1.823767900466919, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.6781829814459372, |
|
"grad_norm": 47.2302545472036, |
|
"learning_rate": 1.7887624466571835e-07, |
|
"logits/chosen": -2.7339816093444824, |
|
"logits/rejected": -2.7247262001037598, |
|
"logps/chosen": -113.62435150146484, |
|
"logps/rejected": -124.93162536621094, |
|
"loss": 0.6671, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.3559902906417847, |
|
"rewards/margins": 0.7308332920074463, |
|
"rewards/rejected": -2.0868237018585205, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.6845809341010877, |
|
"grad_norm": 45.97787540053498, |
|
"learning_rate": 1.7532005689900424e-07, |
|
"logits/chosen": -2.726437568664551, |
|
"logits/rejected": -2.7229714393615723, |
|
"logps/chosen": -121.73079681396484, |
|
"logps/rejected": -122.3262939453125, |
|
"loss": 0.6183, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.3220767974853516, |
|
"rewards/margins": 0.7562096118927002, |
|
"rewards/rejected": -2.0782861709594727, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.690978886756238, |
|
"grad_norm": 36.28143478337335, |
|
"learning_rate": 1.717638691322902e-07, |
|
"logits/chosen": -2.753014326095581, |
|
"logits/rejected": -2.7368969917297363, |
|
"logps/chosen": -130.1658172607422, |
|
"logps/rejected": -130.8894805908203, |
|
"loss": 0.6223, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.1469093561172485, |
|
"rewards/margins": 0.7361122369766235, |
|
"rewards/rejected": -1.883021593093872, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.6973768394113884, |
|
"grad_norm": 43.630489091326666, |
|
"learning_rate": 1.6820768136557609e-07, |
|
"logits/chosen": -2.729057788848877, |
|
"logits/rejected": -2.717261791229248, |
|
"logps/chosen": -125.09024810791016, |
|
"logps/rejected": -125.5413589477539, |
|
"loss": 0.6573, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.042244791984558, |
|
"rewards/margins": 0.36305585503578186, |
|
"rewards/rejected": -1.4053006172180176, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.7037747920665387, |
|
"grad_norm": 48.19222828508351, |
|
"learning_rate": 1.64651493598862e-07, |
|
"logits/chosen": -2.7365171909332275, |
|
"logits/rejected": -2.712463140487671, |
|
"logps/chosen": -125.68775939941406, |
|
"logps/rejected": -114.6905746459961, |
|
"loss": 0.5656, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0582258701324463, |
|
"rewards/margins": 0.8331489562988281, |
|
"rewards/rejected": -1.891374945640564, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.710172744721689, |
|
"grad_norm": 45.80775187080158, |
|
"learning_rate": 1.6109530583214793e-07, |
|
"logits/chosen": -2.7182765007019043, |
|
"logits/rejected": -2.712038040161133, |
|
"logps/chosen": -118.6636962890625, |
|
"logps/rejected": -127.0698471069336, |
|
"loss": 0.5943, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.082397222518921, |
|
"rewards/margins": 1.1772416830062866, |
|
"rewards/rejected": -2.259639263153076, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.7165706973768394, |
|
"grad_norm": 49.2718796270582, |
|
"learning_rate": 1.5753911806543385e-07, |
|
"logits/chosen": -2.7429823875427246, |
|
"logits/rejected": -2.735952854156494, |
|
"logps/chosen": -139.99502563476562, |
|
"logps/rejected": -133.4441680908203, |
|
"loss": 0.6082, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.9839005470275879, |
|
"rewards/margins": 0.8153827786445618, |
|
"rewards/rejected": -1.7992833852767944, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.7229686500319897, |
|
"grad_norm": 53.63575414905778, |
|
"learning_rate": 1.5398293029871974e-07, |
|
"logits/chosen": -2.6896634101867676, |
|
"logits/rejected": -2.6739554405212402, |
|
"logps/chosen": -132.13693237304688, |
|
"logps/rejected": -115.6174087524414, |
|
"loss": 0.615, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.495951771736145, |
|
"rewards/margins": 0.6560246348381042, |
|
"rewards/rejected": -2.1519765853881836, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.7293666026871402, |
|
"grad_norm": 45.459330351613914, |
|
"learning_rate": 1.504267425320057e-07, |
|
"logits/chosen": -2.747023820877075, |
|
"logits/rejected": -2.7279767990112305, |
|
"logps/chosen": -131.20159912109375, |
|
"logps/rejected": -118.8502426147461, |
|
"loss": 0.6432, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0559256076812744, |
|
"rewards/margins": 0.745880126953125, |
|
"rewards/rejected": -1.801805853843689, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.7357645553422905, |
|
"grad_norm": 50.69242455839191, |
|
"learning_rate": 1.4687055476529158e-07, |
|
"logits/chosen": -2.724719524383545, |
|
"logits/rejected": -2.696193218231201, |
|
"logps/chosen": -132.23805236816406, |
|
"logps/rejected": -116.33622741699219, |
|
"loss": 0.5897, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.1425752639770508, |
|
"rewards/margins": 1.2291043996810913, |
|
"rewards/rejected": -2.3716797828674316, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.7421625079974408, |
|
"grad_norm": 48.260915633915204, |
|
"learning_rate": 1.4331436699857753e-07, |
|
"logits/chosen": -2.7609190940856934, |
|
"logits/rejected": -2.751678466796875, |
|
"logps/chosen": -132.62606811523438, |
|
"logps/rejected": -131.4741668701172, |
|
"loss": 0.6021, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.3398916721343994, |
|
"rewards/margins": 0.5659030079841614, |
|
"rewards/rejected": -1.9057947397232056, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.7485604606525912, |
|
"grad_norm": 51.70354571255556, |
|
"learning_rate": 1.3975817923186345e-07, |
|
"logits/chosen": -2.748525619506836, |
|
"logits/rejected": -2.731635570526123, |
|
"logps/chosen": -148.0665740966797, |
|
"logps/rejected": -112.72142028808594, |
|
"loss": 0.6329, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.066410779953003, |
|
"rewards/margins": 0.9416675567626953, |
|
"rewards/rejected": -2.0080783367156982, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.7549584133077415, |
|
"grad_norm": 46.631060529374324, |
|
"learning_rate": 1.3620199146514935e-07, |
|
"logits/chosen": -2.715355396270752, |
|
"logits/rejected": -2.7042181491851807, |
|
"logps/chosen": -122.81558990478516, |
|
"logps/rejected": -123.2553482055664, |
|
"loss": 0.6433, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.463372826576233, |
|
"rewards/margins": 0.7959567308425903, |
|
"rewards/rejected": -2.2593295574188232, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.7613563659628919, |
|
"grad_norm": 48.30565774738731, |
|
"learning_rate": 1.326458036984353e-07, |
|
"logits/chosen": -2.703979253768921, |
|
"logits/rejected": -2.711151599884033, |
|
"logps/chosen": -123.97117614746094, |
|
"logps/rejected": -118.76310729980469, |
|
"loss": 0.662, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.5227792263031006, |
|
"rewards/margins": 0.5576712489128113, |
|
"rewards/rejected": -2.0804507732391357, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.7677543186180422, |
|
"grad_norm": 42.393577486160744, |
|
"learning_rate": 1.290896159317212e-07, |
|
"logits/chosen": -2.7326602935791016, |
|
"logits/rejected": -2.719825267791748, |
|
"logps/chosen": -139.4118194580078, |
|
"logps/rejected": -122.456787109375, |
|
"loss": 0.5937, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.2478922605514526, |
|
"rewards/margins": 0.7356584668159485, |
|
"rewards/rejected": -1.983550786972046, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.7677543186180422, |
|
"eval_logits/chosen": -2.724303960800171, |
|
"eval_logits/rejected": -2.710904359817505, |
|
"eval_logps/chosen": -125.66478729248047, |
|
"eval_logps/rejected": -114.08828735351562, |
|
"eval_loss": 0.5853144526481628, |
|
"eval_rewards/accuracies": 0.6942675113677979, |
|
"eval_rewards/chosen": -1.4589799642562866, |
|
"eval_rewards/margins": 0.672407329082489, |
|
"eval_rewards/rejected": -2.131387233734131, |
|
"eval_runtime": 282.866, |
|
"eval_samples_per_second": 17.676, |
|
"eval_steps_per_second": 0.555, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.7741522712731925, |
|
"grad_norm": 44.50545746661839, |
|
"learning_rate": 1.255334281650071e-07, |
|
"logits/chosen": -2.72662615776062, |
|
"logits/rejected": -2.701281785964966, |
|
"logps/chosen": -126.24546813964844, |
|
"logps/rejected": -121.22220611572266, |
|
"loss": 0.6014, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.2705129384994507, |
|
"rewards/margins": 0.8755936622619629, |
|
"rewards/rejected": -2.146106719970703, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.780550223928343, |
|
"grad_norm": 40.447524019572015, |
|
"learning_rate": 1.2197724039829303e-07, |
|
"logits/chosen": -2.7539114952087402, |
|
"logits/rejected": -2.744138717651367, |
|
"logps/chosen": -126.99958801269531, |
|
"logps/rejected": -123.30317687988281, |
|
"loss": 0.7122, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.4938628673553467, |
|
"rewards/margins": 0.9415397644042969, |
|
"rewards/rejected": -2.4354023933410645, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.7869481765834933, |
|
"grad_norm": 52.15736150053683, |
|
"learning_rate": 1.1842105263157894e-07, |
|
"logits/chosen": -2.709707736968994, |
|
"logits/rejected": -2.7100932598114014, |
|
"logps/chosen": -107.58488464355469, |
|
"logps/rejected": -112.5229263305664, |
|
"loss": 0.6585, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.4826072454452515, |
|
"rewards/margins": 0.853145956993103, |
|
"rewards/rejected": -2.3357534408569336, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.7933461292386437, |
|
"grad_norm": 50.42046207889911, |
|
"learning_rate": 1.1486486486486487e-07, |
|
"logits/chosen": -2.7156357765197754, |
|
"logits/rejected": -2.7060704231262207, |
|
"logps/chosen": -114.21882629394531, |
|
"logps/rejected": -110.34465026855469, |
|
"loss": 0.5955, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1574304103851318, |
|
"rewards/margins": 0.7427719831466675, |
|
"rewards/rejected": -1.9002023935317993, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.799744081893794, |
|
"grad_norm": 42.01442320837617, |
|
"learning_rate": 1.1130867709815078e-07, |
|
"logits/chosen": -2.7430567741394043, |
|
"logits/rejected": -2.7422475814819336, |
|
"logps/chosen": -128.494140625, |
|
"logps/rejected": -127.20819091796875, |
|
"loss": 0.6143, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1710187196731567, |
|
"rewards/margins": 0.7917193174362183, |
|
"rewards/rejected": -1.962738037109375, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.8061420345489443, |
|
"grad_norm": 49.68905015741029, |
|
"learning_rate": 1.077524893314367e-07, |
|
"logits/chosen": -2.7214901447296143, |
|
"logits/rejected": -2.7370707988739014, |
|
"logps/chosen": -114.65711975097656, |
|
"logps/rejected": -123.6860580444336, |
|
"loss": 0.5802, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.3035097122192383, |
|
"rewards/margins": 0.888281524181366, |
|
"rewards/rejected": -2.19179105758667, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.8125399872040947, |
|
"grad_norm": 53.09725997704241, |
|
"learning_rate": 1.0419630156472262e-07, |
|
"logits/chosen": -2.741344928741455, |
|
"logits/rejected": -2.726548671722412, |
|
"logps/chosen": -141.3468017578125, |
|
"logps/rejected": -131.9232940673828, |
|
"loss": 0.6122, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.1199138164520264, |
|
"rewards/margins": 1.1132014989852905, |
|
"rewards/rejected": -2.2331154346466064, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.818937939859245, |
|
"grad_norm": 40.62862249158643, |
|
"learning_rate": 1.0064011379800854e-07, |
|
"logits/chosen": -2.7529187202453613, |
|
"logits/rejected": -2.7343528270721436, |
|
"logps/chosen": -133.5608367919922, |
|
"logps/rejected": -124.2785415649414, |
|
"loss": 0.5805, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.192963719367981, |
|
"rewards/margins": 0.6357627511024475, |
|
"rewards/rejected": -1.8287265300750732, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.8253358925143954, |
|
"grad_norm": 41.682064792926646, |
|
"learning_rate": 9.708392603129445e-08, |
|
"logits/chosen": -2.738985538482666, |
|
"logits/rejected": -2.735153913497925, |
|
"logps/chosen": -128.6671600341797, |
|
"logps/rejected": -118.43110656738281, |
|
"loss": 0.5913, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.1868826150894165, |
|
"rewards/margins": 0.8519124984741211, |
|
"rewards/rejected": -2.038794994354248, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.8317338451695457, |
|
"grad_norm": 39.13726550603149, |
|
"learning_rate": 9.352773826458037e-08, |
|
"logits/chosen": -2.7326302528381348, |
|
"logits/rejected": -2.7126924991607666, |
|
"logps/chosen": -116.5313949584961, |
|
"logps/rejected": -110.55419921875, |
|
"loss": 0.6194, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.4761756658554077, |
|
"rewards/margins": 0.7760677933692932, |
|
"rewards/rejected": -2.2522435188293457, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.838131797824696, |
|
"grad_norm": 45.15762786397904, |
|
"learning_rate": 8.997155049786629e-08, |
|
"logits/chosen": -2.722672462463379, |
|
"logits/rejected": -2.709507703781128, |
|
"logps/chosen": -118.71284484863281, |
|
"logps/rejected": -112.42694091796875, |
|
"loss": 0.5624, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.477581262588501, |
|
"rewards/margins": 0.778394341468811, |
|
"rewards/rejected": -2.2559754848480225, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.8445297504798465, |
|
"grad_norm": 53.01149491791956, |
|
"learning_rate": 8.64153627311522e-08, |
|
"logits/chosen": -2.7539539337158203, |
|
"logits/rejected": -2.742306709289551, |
|
"logps/chosen": -140.6981201171875, |
|
"logps/rejected": -123.42921447753906, |
|
"loss": 0.6231, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.2546924352645874, |
|
"rewards/margins": 0.8097953796386719, |
|
"rewards/rejected": -2.064487934112549, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.8509277031349968, |
|
"grad_norm": 50.72776697356608, |
|
"learning_rate": 8.285917496443812e-08, |
|
"logits/chosen": -2.7258553504943848, |
|
"logits/rejected": -2.7142205238342285, |
|
"logps/chosen": -112.5505142211914, |
|
"logps/rejected": -115.3934097290039, |
|
"loss": 0.5864, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.3138844966888428, |
|
"rewards/margins": 0.8471376299858093, |
|
"rewards/rejected": -2.1610217094421387, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.8573256557901472, |
|
"grad_norm": 42.82498871290927, |
|
"learning_rate": 7.930298719772404e-08, |
|
"logits/chosen": -2.7396187782287598, |
|
"logits/rejected": -2.723381757736206, |
|
"logps/chosen": -125.97160339355469, |
|
"logps/rejected": -136.47666931152344, |
|
"loss": 0.6425, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.326712727546692, |
|
"rewards/margins": 0.7852845788002014, |
|
"rewards/rejected": -2.111997365951538, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.8637236084452975, |
|
"grad_norm": 45.714252062937796, |
|
"learning_rate": 7.574679943100994e-08, |
|
"logits/chosen": -2.7322983741760254, |
|
"logits/rejected": -2.7127528190612793, |
|
"logps/chosen": -136.2902374267578, |
|
"logps/rejected": -122.64701843261719, |
|
"loss": 0.6639, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2270106077194214, |
|
"rewards/margins": 0.9102425575256348, |
|
"rewards/rejected": -2.1372532844543457, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.8701215611004478, |
|
"grad_norm": 38.60709881607122, |
|
"learning_rate": 7.219061166429587e-08, |
|
"logits/chosen": -2.743222713470459, |
|
"logits/rejected": -2.7290613651275635, |
|
"logps/chosen": -124.32318115234375, |
|
"logps/rejected": -121.3166732788086, |
|
"loss": 0.6216, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4048868417739868, |
|
"rewards/margins": 0.6789718866348267, |
|
"rewards/rejected": -2.0838589668273926, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.8765195137555982, |
|
"grad_norm": 51.70983027706398, |
|
"learning_rate": 6.863442389758179e-08, |
|
"logits/chosen": -2.730867862701416, |
|
"logits/rejected": -2.7255940437316895, |
|
"logps/chosen": -134.34658813476562, |
|
"logps/rejected": -125.96586608886719, |
|
"loss": 0.6424, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.6426973342895508, |
|
"rewards/margins": 0.5250149965286255, |
|
"rewards/rejected": -2.167712450027466, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.8829174664107485, |
|
"grad_norm": 37.61931616704097, |
|
"learning_rate": 6.507823613086771e-08, |
|
"logits/chosen": -2.7112784385681152, |
|
"logits/rejected": -2.694859027862549, |
|
"logps/chosen": -123.34944915771484, |
|
"logps/rejected": -104.432861328125, |
|
"loss": 0.5347, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.2228862047195435, |
|
"rewards/margins": 0.8305751085281372, |
|
"rewards/rejected": -2.0534613132476807, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.889315419065899, |
|
"grad_norm": 54.48021270096627, |
|
"learning_rate": 6.152204836415363e-08, |
|
"logits/chosen": -2.7345829010009766, |
|
"logits/rejected": -2.72003436088562, |
|
"logps/chosen": -136.1989288330078, |
|
"logps/rejected": -122.9295654296875, |
|
"loss": 0.562, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.347959280014038, |
|
"rewards/margins": 1.0466934442520142, |
|
"rewards/rejected": -2.3946526050567627, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.8957133717210493, |
|
"grad_norm": 54.21936478355526, |
|
"learning_rate": 5.796586059743954e-08, |
|
"logits/chosen": -2.7425692081451416, |
|
"logits/rejected": -2.720557928085327, |
|
"logps/chosen": -122.23038482666016, |
|
"logps/rejected": -130.52120971679688, |
|
"loss": 0.6276, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1752973794937134, |
|
"rewards/margins": 1.030903935432434, |
|
"rewards/rejected": -2.2062013149261475, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.8957133717210493, |
|
"eval_logits/chosen": -2.724804639816284, |
|
"eval_logits/rejected": -2.712906837463379, |
|
"eval_logps/chosen": -125.66414642333984, |
|
"eval_logps/rejected": -114.13500213623047, |
|
"eval_loss": 0.584474503993988, |
|
"eval_rewards/accuracies": 0.699840784072876, |
|
"eval_rewards/chosen": -1.4589147567749023, |
|
"eval_rewards/margins": 0.6771440505981445, |
|
"eval_rewards/rejected": -2.136058807373047, |
|
"eval_runtime": 279.8917, |
|
"eval_samples_per_second": 17.864, |
|
"eval_steps_per_second": 0.561, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.9021113243761996, |
|
"grad_norm": 55.42656529498578, |
|
"learning_rate": 5.4409672830725456e-08, |
|
"logits/chosen": -2.757150888442993, |
|
"logits/rejected": -2.7437245845794678, |
|
"logps/chosen": -148.27255249023438, |
|
"logps/rejected": -129.71078491210938, |
|
"loss": 0.5897, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3937550783157349, |
|
"rewards/margins": 1.0089712142944336, |
|
"rewards/rejected": -2.402726173400879, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.90850927703135, |
|
"grad_norm": 44.831480768616196, |
|
"learning_rate": 5.0853485064011376e-08, |
|
"logits/chosen": -2.7586827278137207, |
|
"logits/rejected": -2.747013568878174, |
|
"logps/chosen": -141.61642456054688, |
|
"logps/rejected": -140.4078826904297, |
|
"loss": 0.5571, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.5571249723434448, |
|
"rewards/margins": 0.6771610975265503, |
|
"rewards/rejected": -2.234286069869995, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.9149072296865003, |
|
"grad_norm": 45.6706384991516, |
|
"learning_rate": 4.72972972972973e-08, |
|
"logits/chosen": -2.7541661262512207, |
|
"logits/rejected": -2.741849899291992, |
|
"logps/chosen": -131.68832397460938, |
|
"logps/rejected": -112.1330795288086, |
|
"loss": 0.5596, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.3573007583618164, |
|
"rewards/margins": 0.9988776445388794, |
|
"rewards/rejected": -2.356178045272827, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.9213051823416507, |
|
"grad_norm": 32.91552794038842, |
|
"learning_rate": 4.374110953058322e-08, |
|
"logits/chosen": -2.7465932369232178, |
|
"logits/rejected": -2.7333266735076904, |
|
"logps/chosen": -130.47242736816406, |
|
"logps/rejected": -122.72697448730469, |
|
"loss": 0.5869, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.6356592178344727, |
|
"rewards/margins": 1.0344393253326416, |
|
"rewards/rejected": -2.6700987815856934, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.927703134996801, |
|
"grad_norm": 46.88062796504381, |
|
"learning_rate": 4.018492176386913e-08, |
|
"logits/chosen": -2.76434326171875, |
|
"logits/rejected": -2.746072769165039, |
|
"logps/chosen": -130.2028350830078, |
|
"logps/rejected": -122.24436950683594, |
|
"loss": 0.6267, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.112995982170105, |
|
"rewards/margins": 1.0192458629608154, |
|
"rewards/rejected": -2.132241725921631, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.9341010876519513, |
|
"grad_norm": 52.44065789666525, |
|
"learning_rate": 3.6628733997155046e-08, |
|
"logits/chosen": -2.751476287841797, |
|
"logits/rejected": -2.7347888946533203, |
|
"logps/chosen": -133.9870147705078, |
|
"logps/rejected": -123.64642333984375, |
|
"loss": 0.669, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.618487000465393, |
|
"rewards/margins": 0.8043287992477417, |
|
"rewards/rejected": -2.4228157997131348, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.9404990403071017, |
|
"grad_norm": 46.529272074784444, |
|
"learning_rate": 3.3072546230440967e-08, |
|
"logits/chosen": -2.7366256713867188, |
|
"logits/rejected": -2.7187893390655518, |
|
"logps/chosen": -123.87571716308594, |
|
"logps/rejected": -120.4697265625, |
|
"loss": 0.5889, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.5764787197113037, |
|
"rewards/margins": 0.6577258110046387, |
|
"rewards/rejected": -2.2342045307159424, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.946896992962252, |
|
"grad_norm": 43.88442923670593, |
|
"learning_rate": 2.9516358463726884e-08, |
|
"logits/chosen": -2.7498910427093506, |
|
"logits/rejected": -2.748452663421631, |
|
"logps/chosen": -133.49234008789062, |
|
"logps/rejected": -140.14096069335938, |
|
"loss": 0.6418, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.693602204322815, |
|
"rewards/margins": 0.790652871131897, |
|
"rewards/rejected": -2.484255075454712, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.9532949456174025, |
|
"grad_norm": 44.15596171124334, |
|
"learning_rate": 2.59601706970128e-08, |
|
"logits/chosen": -2.7611732482910156, |
|
"logits/rejected": -2.7567832469940186, |
|
"logps/chosen": -127.27906799316406, |
|
"logps/rejected": -123.48602294921875, |
|
"loss": 0.6792, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.2605165243148804, |
|
"rewards/margins": 0.9137457013130188, |
|
"rewards/rejected": -2.174262285232544, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.9596928982725528, |
|
"grad_norm": 54.18735827602704, |
|
"learning_rate": 2.240398293029872e-08, |
|
"logits/chosen": -2.7405667304992676, |
|
"logits/rejected": -2.7390694618225098, |
|
"logps/chosen": -130.6249237060547, |
|
"logps/rejected": -117.32889556884766, |
|
"loss": 0.6408, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.4110792875289917, |
|
"rewards/margins": 0.7672127485275269, |
|
"rewards/rejected": -2.1782920360565186, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.9660908509277031, |
|
"grad_norm": 36.9566011502463, |
|
"learning_rate": 1.8847795163584636e-08, |
|
"logits/chosen": -2.7048280239105225, |
|
"logits/rejected": -2.7038590908050537, |
|
"logps/chosen": -109.9410400390625, |
|
"logps/rejected": -115.59232330322266, |
|
"loss": 0.5897, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.5398355722427368, |
|
"rewards/margins": 0.9320799112319946, |
|
"rewards/rejected": -2.4719154834747314, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.9724888035828535, |
|
"grad_norm": 34.807994778606435, |
|
"learning_rate": 1.5291607396870554e-08, |
|
"logits/chosen": -2.7435240745544434, |
|
"logits/rejected": -2.730076551437378, |
|
"logps/chosen": -131.5994873046875, |
|
"logps/rejected": -111.15098571777344, |
|
"loss": 0.5825, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3073338270187378, |
|
"rewards/margins": 0.9064651727676392, |
|
"rewards/rejected": -2.213798999786377, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.9788867562380038, |
|
"grad_norm": 54.31697188600043, |
|
"learning_rate": 1.1735419630156473e-08, |
|
"logits/chosen": -2.7345783710479736, |
|
"logits/rejected": -2.7173001766204834, |
|
"logps/chosen": -122.76127624511719, |
|
"logps/rejected": -112.13035583496094, |
|
"loss": 0.6551, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.4825352430343628, |
|
"rewards/margins": 0.7752220034599304, |
|
"rewards/rejected": -2.2577571868896484, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.9852847088931542, |
|
"grad_norm": 40.57564149859545, |
|
"learning_rate": 8.179231863442388e-09, |
|
"logits/chosen": -2.741443157196045, |
|
"logits/rejected": -2.7301018238067627, |
|
"logps/chosen": -129.3629608154297, |
|
"logps/rejected": -118.81050109863281, |
|
"loss": 0.5773, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.336868405342102, |
|
"rewards/margins": 0.9008957743644714, |
|
"rewards/rejected": -2.237764358520508, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.9916826615483045, |
|
"grad_norm": 52.957380548005816, |
|
"learning_rate": 4.623044096728307e-09, |
|
"logits/chosen": -2.7427573204040527, |
|
"logits/rejected": -2.7327628135681152, |
|
"logps/chosen": -130.18255615234375, |
|
"logps/rejected": -125.54536437988281, |
|
"loss": 0.6057, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.218837857246399, |
|
"rewards/margins": 0.9455093145370483, |
|
"rewards/rejected": -2.1643471717834473, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.9980806142034548, |
|
"grad_norm": 45.45734607577736, |
|
"learning_rate": 1.0668563300142248e-09, |
|
"logits/chosen": -2.7248375415802, |
|
"logits/rejected": -2.705608367919922, |
|
"logps/chosen": -140.51812744140625, |
|
"logps/rejected": -118.7292251586914, |
|
"loss": 0.6287, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.6361656188964844, |
|
"rewards/margins": 0.8488451838493347, |
|
"rewards/rejected": -2.4850106239318848, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1563, |
|
"total_flos": 0.0, |
|
"train_loss": 0.424483765719872, |
|
"train_runtime": 7188.1877, |
|
"train_samples_per_second": 6.956, |
|
"train_steps_per_second": 0.217 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1563, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|