diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,7213 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9998741980123286, + "eval_steps": 100, + "global_step": 3974, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "grad_norm": 0.3828125, + "learning_rate": 5.025125628140703e-09, + "logits/chosen": 0.2628047466278076, + "logits/rejected": 0.7914568185806274, + "logps/chosen": -183.46725463867188, + "logps/rejected": -164.62379455566406, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/margins_max": 0.0, + "rewards/margins_min": 0.0, + "rewards/margins_std": 0.0, + "rewards/rejected": 0.0, + "step": 1 + }, + { + "epoch": 0.0, + "grad_norm": 0.341796875, + "learning_rate": 5.0251256281407036e-08, + "logits/chosen": 0.22027336061000824, + "logits/rejected": 0.3840646743774414, + "logps/chosen": -209.14871215820312, + "logps/rejected": -223.64410400390625, + "loss": 0.6933, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0007058419287204742, + "rewards/margins": 0.00020709568343590945, + "rewards/margins_max": 0.002087921602651477, + "rewards/margins_min": -0.0016737302066758275, + "rewards/margins_std": 0.0026598896365612745, + "rewards/rejected": -0.0009129376267082989, + "step": 10 + }, + { + "epoch": 0.01, + "grad_norm": 0.380859375, + "learning_rate": 1.0050251256281407e-07, + "logits/chosen": 0.1058058962225914, + "logits/rejected": 0.4912484288215637, + "logps/chosen": -212.02420043945312, + "logps/rejected": -206.0525360107422, + "loss": 0.6929, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.00032332129194401205, + "rewards/margins": 0.0009101700270548463, + "rewards/margins_max": 0.003948894329369068, + "rewards/margins_min": -0.002128554042428732, + "rewards/margins_std": 0.004297405481338501, + "rewards/rejected": -0.0012334914645180106, + "step": 20 + }, + { + "epoch": 0.01, + "grad_norm": 0.458984375, + "learning_rate": 1.507537688442211e-07, + "logits/chosen": 0.18870362639427185, + "logits/rejected": 0.577911376953125, + "logps/chosen": -234.39236450195312, + "logps/rejected": -218.83242797851562, + "loss": 0.6928, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.00037711235927417874, + "rewards/margins": 0.0012008370831608772, + "rewards/margins_max": 0.003616205183789134, + "rewards/margins_min": -0.0012145310174673796, + "rewards/margins_std": 0.0034158460330218077, + "rewards/rejected": -0.0015779495006427169, + "step": 30 + }, + { + "epoch": 0.01, + "grad_norm": 0.40234375, + "learning_rate": 2.0100502512562815e-07, + "logits/chosen": 0.06429781764745712, + "logits/rejected": 0.31291159987449646, + "logps/chosen": -229.8105926513672, + "logps/rejected": -213.0727996826172, + "loss": 0.6928, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.0004698133561760187, + "rewards/margins": 0.0012018651468679309, + "rewards/margins_max": 0.004088181536644697, + "rewards/margins_min": -0.00168445089366287, + "rewards/margins_std": 0.004081867169588804, + "rewards/rejected": -0.0016716786194592714, + "step": 40 + }, + { + "epoch": 0.01, + "grad_norm": 0.38671875, + "learning_rate": 2.5125628140703517e-07, + "logits/chosen": 0.2478822022676468, + "logits/rejected": 0.3307963013648987, + "logps/chosen": -208.3394317626953, + "logps/rejected": -244.5113067626953, + "loss": 0.6924, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": 0.0009044799953699112, + "rewards/margins": 0.0017498359084129333, + "rewards/margins_max": 0.003947221674025059, + "rewards/margins_min": -0.00044754979899153113, + "rewards/margins_std": 0.003107572440057993, + "rewards/rejected": -0.0008453559130430222, + "step": 50 + }, + { + "epoch": 0.02, + "grad_norm": 0.435546875, + "learning_rate": 3.015075376884422e-07, + "logits/chosen": 0.17191682755947113, + "logits/rejected": 0.508013129234314, + "logps/chosen": -227.90115356445312, + "logps/rejected": -224.430908203125, + "loss": 0.6922, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.00039130254299379885, + "rewards/margins": 0.0019925818778574467, + "rewards/margins_max": 0.0045981681905686855, + "rewards/margins_min": -0.0006130046676844358, + "rewards/margins_std": 0.0036848559975624084, + "rewards/rejected": -0.0023838842753320932, + "step": 60 + }, + { + "epoch": 0.02, + "grad_norm": 0.42578125, + "learning_rate": 3.5175879396984927e-07, + "logits/chosen": 0.17003652453422546, + "logits/rejected": 0.3985624313354492, + "logps/chosen": -211.16152954101562, + "logps/rejected": -210.9799041748047, + "loss": 0.6922, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.00022076326422393322, + "rewards/margins": 0.0016923131188377738, + "rewards/margins_max": 0.004520035348832607, + "rewards/margins_min": -0.0011354093439877033, + "rewards/margins_std": 0.003999003209173679, + "rewards/rejected": -0.0019130764994770288, + "step": 70 + }, + { + "epoch": 0.02, + "grad_norm": 0.427734375, + "learning_rate": 4.020100502512563e-07, + "logits/chosen": 0.1190398707985878, + "logits/rejected": 0.36623337864875793, + "logps/chosen": -212.3631591796875, + "logps/rejected": -220.9187469482422, + "loss": 0.6916, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 0.00043441675370559096, + "rewards/margins": 0.0035046630073338747, + "rewards/margins_max": 0.00651139859110117, + "rewards/margins_min": 0.0004979277146048844, + "rewards/margins_std": 0.004252166021615267, + "rewards/rejected": -0.003070246195420623, + "step": 80 + }, + { + "epoch": 0.02, + "grad_norm": 0.361328125, + "learning_rate": 4.522613065326633e-07, + "logits/chosen": 0.06567513197660446, + "logits/rejected": 0.43274015188217163, + "logps/chosen": -222.13961791992188, + "logps/rejected": -201.4839630126953, + "loss": 0.6913, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -2.6206960683339275e-05, + "rewards/margins": 0.0037318530958145857, + "rewards/margins_max": 0.00678494805470109, + "rewards/margins_min": 0.0006787586025893688, + "rewards/margins_std": 0.004317727871239185, + "rewards/rejected": -0.0037580605130642653, + "step": 90 + }, + { + "epoch": 0.03, + "grad_norm": 0.384765625, + "learning_rate": 5.025125628140703e-07, + "logits/chosen": 0.1317283809185028, + "logits/rejected": 0.39888468384742737, + "logps/chosen": -195.3096923828125, + "logps/rejected": -211.8949432373047, + "loss": 0.6907, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 0.0017639435827732086, + "rewards/margins": 0.00431477464735508, + "rewards/margins_max": 0.008146543055772781, + "rewards/margins_min": 0.00048300548223778605, + "rewards/margins_std": 0.005418939981609583, + "rewards/rejected": -0.0025508308317512274, + "step": 100 + }, + { + "epoch": 0.03, + "grad_norm": 0.400390625, + "learning_rate": 5.527638190954773e-07, + "logits/chosen": 0.10737421363592148, + "logits/rejected": 0.32433614134788513, + "logps/chosen": -205.3096160888672, + "logps/rejected": -220.96994018554688, + "loss": 0.6898, + "rewards/accuracies": 0.862500011920929, + "rewards/chosen": 0.0030672824941575527, + "rewards/margins": 0.007319621741771698, + "rewards/margins_max": 0.01079073641449213, + "rewards/margins_min": 0.0038485073018819094, + "rewards/margins_std": 0.004908897448331118, + "rewards/rejected": -0.004252338781952858, + "step": 110 + }, + { + "epoch": 0.03, + "grad_norm": 0.400390625, + "learning_rate": 6.030150753768844e-07, + "logits/chosen": 0.15490484237670898, + "logits/rejected": 0.6465431451797485, + "logps/chosen": -217.82894897460938, + "logps/rejected": -197.4770050048828, + "loss": 0.6896, + "rewards/accuracies": 0.887499988079071, + "rewards/chosen": 0.0026336044538766146, + "rewards/margins": 0.007411560509353876, + "rewards/margins_max": 0.011540110222995281, + "rewards/margins_min": 0.0032830112613737583, + "rewards/margins_std": 0.005838650278747082, + "rewards/rejected": -0.004777955822646618, + "step": 120 + }, + { + "epoch": 0.03, + "grad_norm": 0.412109375, + "learning_rate": 6.532663316582915e-07, + "logits/chosen": 0.05787094682455063, + "logits/rejected": 0.5067285299301147, + "logps/chosen": -230.8343963623047, + "logps/rejected": -220.9256591796875, + "loss": 0.6881, + "rewards/accuracies": 0.8374999761581421, + "rewards/chosen": 0.002234609331935644, + "rewards/margins": 0.008958352729678154, + "rewards/margins_max": 0.014376277104020119, + "rewards/margins_min": 0.003540429752320051, + "rewards/margins_std": 0.007662100251764059, + "rewards/rejected": -0.006723743863403797, + "step": 130 + }, + { + "epoch": 0.04, + "grad_norm": 0.330078125, + "learning_rate": 7.035175879396985e-07, + "logits/chosen": 0.13236010074615479, + "logits/rejected": 0.47717732191085815, + "logps/chosen": -219.61264038085938, + "logps/rejected": -228.51260375976562, + "loss": 0.6868, + "rewards/accuracies": 0.887499988079071, + "rewards/chosen": 0.005320298485457897, + "rewards/margins": 0.013128049671649933, + "rewards/margins_max": 0.020256798714399338, + "rewards/margins_min": 0.005999299697577953, + "rewards/margins_std": 0.010081576183438301, + "rewards/rejected": -0.007807752583175898, + "step": 140 + }, + { + "epoch": 0.04, + "grad_norm": 0.361328125, + "learning_rate": 7.537688442211055e-07, + "logits/chosen": 0.21956713497638702, + "logits/rejected": 0.5885453820228577, + "logps/chosen": -224.57754516601562, + "logps/rejected": -218.06106567382812, + "loss": 0.6853, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.007667540106922388, + "rewards/margins": 0.01637618988752365, + "rewards/margins_max": 0.022643666714429855, + "rewards/margins_min": 0.010108711197972298, + "rewards/margins_std": 0.008863553404808044, + "rewards/rejected": -0.008708649314939976, + "step": 150 + }, + { + "epoch": 0.04, + "grad_norm": 0.37890625, + "learning_rate": 8.040201005025126e-07, + "logits/chosen": 0.00294627551920712, + "logits/rejected": 0.3304385542869568, + "logps/chosen": -224.15292358398438, + "logps/rejected": -223.5465087890625, + "loss": 0.6845, + "rewards/accuracies": 0.9375, + "rewards/chosen": 0.00907582975924015, + "rewards/margins": 0.018309107050299644, + "rewards/margins_max": 0.025850627571344376, + "rewards/margins_min": 0.010767589323222637, + "rewards/margins_std": 0.010665318928658962, + "rewards/rejected": -0.009233278222382069, + "step": 160 + }, + { + "epoch": 0.04, + "grad_norm": 0.439453125, + "learning_rate": 8.542713567839196e-07, + "logits/chosen": 0.1823168247938156, + "logits/rejected": 0.43500009179115295, + "logps/chosen": -210.53060913085938, + "logps/rejected": -216.46182250976562, + "loss": 0.6835, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 0.009239943698048592, + "rewards/margins": 0.01894932985305786, + "rewards/margins_max": 0.027668584138154984, + "rewards/margins_min": 0.01023007184267044, + "rewards/margins_std": 0.012330890633165836, + "rewards/rejected": -0.009709383361041546, + "step": 170 + }, + { + "epoch": 0.05, + "grad_norm": 0.353515625, + "learning_rate": 9.045226130653266e-07, + "logits/chosen": 0.12103636562824249, + "logits/rejected": 0.3777307868003845, + "logps/chosen": -195.93931579589844, + "logps/rejected": -200.99417114257812, + "loss": 0.6822, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.010400387458503246, + "rewards/margins": 0.02166915312409401, + "rewards/margins_max": 0.03126353397965431, + "rewards/margins_min": 0.012074774131178856, + "rewards/margins_std": 0.013568502850830555, + "rewards/rejected": -0.011268765665590763, + "step": 180 + }, + { + "epoch": 0.05, + "grad_norm": 0.35546875, + "learning_rate": 9.547738693467337e-07, + "logits/chosen": 0.07193199545145035, + "logits/rejected": 0.3750324845314026, + "logps/chosen": -228.74118041992188, + "logps/rejected": -230.8755340576172, + "loss": 0.6797, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": 0.008367463946342468, + "rewards/margins": 0.028456291183829308, + "rewards/margins_max": 0.038965143263339996, + "rewards/margins_min": 0.017947440966963768, + "rewards/margins_std": 0.014861756935715675, + "rewards/rejected": -0.02008882723748684, + "step": 190 + }, + { + "epoch": 0.05, + "grad_norm": 0.390625, + "learning_rate": 1.0050251256281407e-06, + "logits/chosen": 0.02257654443383217, + "logits/rejected": 0.5656744241714478, + "logps/chosen": -222.1704559326172, + "logps/rejected": -204.72787475585938, + "loss": 0.6782, + "rewards/accuracies": 0.9375, + "rewards/chosen": 0.011104286648333073, + "rewards/margins": 0.031201040372252464, + "rewards/margins_max": 0.044583261013031006, + "rewards/margins_min": 0.017818817868828773, + "rewards/margins_std": 0.018925320357084274, + "rewards/rejected": -0.020096752792596817, + "step": 200 + }, + { + "epoch": 0.05, + "grad_norm": 0.388671875, + "learning_rate": 1.0552763819095476e-06, + "logits/chosen": 0.21097414195537567, + "logits/rejected": 0.4384271204471588, + "logps/chosen": -186.71658325195312, + "logps/rejected": -218.23806762695312, + "loss": 0.6771, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.011779891327023506, + "rewards/margins": 0.0341356061398983, + "rewards/margins_max": 0.04913010075688362, + "rewards/margins_min": 0.019141118973493576, + "rewards/margins_std": 0.021205410361289978, + "rewards/rejected": -0.022355718538165092, + "step": 210 + }, + { + "epoch": 0.06, + "grad_norm": 0.3984375, + "learning_rate": 1.1055276381909546e-06, + "logits/chosen": 0.12355975806713104, + "logits/rejected": 0.5098804235458374, + "logps/chosen": -224.91552734375, + "logps/rejected": -234.9082489013672, + "loss": 0.6752, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": 0.015334665775299072, + "rewards/margins": 0.03818144276738167, + "rewards/margins_max": 0.05416835471987724, + "rewards/margins_min": 0.022194528952240944, + "rewards/margins_std": 0.022608909755945206, + "rewards/rejected": -0.022846775129437447, + "step": 220 + }, + { + "epoch": 0.06, + "grad_norm": 0.439453125, + "learning_rate": 1.1557788944723616e-06, + "logits/chosen": 0.19827620685100555, + "logits/rejected": 0.44844430685043335, + "logps/chosen": -176.3722381591797, + "logps/rejected": -183.7699432373047, + "loss": 0.6742, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.011357043869793415, + "rewards/margins": 0.037941962480545044, + "rewards/margins_max": 0.053594231605529785, + "rewards/margins_min": 0.022289691492915154, + "rewards/margins_std": 0.022135648876428604, + "rewards/rejected": -0.026584917679429054, + "step": 230 + }, + { + "epoch": 0.06, + "grad_norm": 0.390625, + "learning_rate": 1.2060301507537688e-06, + "logits/chosen": 0.13188159465789795, + "logits/rejected": 0.5466545820236206, + "logps/chosen": -225.99484252929688, + "logps/rejected": -218.8096160888672, + "loss": 0.67, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.013194072060286999, + "rewards/margins": 0.044676605612039566, + "rewards/margins_max": 0.05990206450223923, + "rewards/margins_min": 0.02945113554596901, + "rewards/margins_std": 0.02153206057846546, + "rewards/rejected": -0.031482525169849396, + "step": 240 + }, + { + "epoch": 0.06, + "grad_norm": 0.404296875, + "learning_rate": 1.256281407035176e-06, + "logits/chosen": 0.14512896537780762, + "logits/rejected": 0.5733065605163574, + "logps/chosen": -217.5274658203125, + "logps/rejected": -214.5115203857422, + "loss": 0.6711, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": 0.011558527126908302, + "rewards/margins": 0.047826338559389114, + "rewards/margins_max": 0.06687624752521515, + "rewards/margins_min": 0.02877642773091793, + "rewards/margins_std": 0.026940640062093735, + "rewards/rejected": -0.03626781329512596, + "step": 250 + }, + { + "epoch": 0.07, + "grad_norm": 0.3828125, + "learning_rate": 1.306532663316583e-06, + "logits/chosen": 0.1416536569595337, + "logits/rejected": 0.4681627154350281, + "logps/chosen": -217.2357940673828, + "logps/rejected": -215.43777465820312, + "loss": 0.6677, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.01843900792300701, + "rewards/margins": 0.058879125863313675, + "rewards/margins_max": 0.08180561661720276, + "rewards/margins_min": 0.03595263510942459, + "rewards/margins_std": 0.032422952353954315, + "rewards/rejected": -0.04044011980295181, + "step": 260 + }, + { + "epoch": 0.07, + "grad_norm": 0.421875, + "learning_rate": 1.3567839195979899e-06, + "logits/chosen": 0.22732439637184143, + "logits/rejected": 0.4276302456855774, + "logps/chosen": -198.55441284179688, + "logps/rejected": -224.4716796875, + "loss": 0.6628, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.017024602741003036, + "rewards/margins": 0.06379345059394836, + "rewards/margins_max": 0.09190671890974045, + "rewards/margins_min": 0.03568018227815628, + "rewards/margins_std": 0.039758164435625076, + "rewards/rejected": -0.04676884785294533, + "step": 270 + }, + { + "epoch": 0.07, + "grad_norm": 0.37109375, + "learning_rate": 1.407035175879397e-06, + "logits/chosen": 0.03006916679441929, + "logits/rejected": 0.2829376757144928, + "logps/chosen": -197.93682861328125, + "logps/rejected": -201.83853149414062, + "loss": 0.6595, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": 0.018216144293546677, + "rewards/margins": 0.06814040243625641, + "rewards/margins_max": 0.09602198749780655, + "rewards/margins_min": 0.04025881737470627, + "rewards/margins_std": 0.03943051025271416, + "rewards/rejected": -0.049924250692129135, + "step": 280 + }, + { + "epoch": 0.07, + "grad_norm": 0.447265625, + "learning_rate": 1.457286432160804e-06, + "logits/chosen": 0.03733636066317558, + "logits/rejected": 0.49974188208580017, + "logps/chosen": -225.4219207763672, + "logps/rejected": -195.27247619628906, + "loss": 0.6537, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": 0.020579595118761063, + "rewards/margins": 0.0742858499288559, + "rewards/margins_max": 0.108786940574646, + "rewards/margins_min": 0.03978477045893669, + "rewards/margins_std": 0.04879189655184746, + "rewards/rejected": -0.05370625853538513, + "step": 290 + }, + { + "epoch": 0.08, + "grad_norm": 0.40234375, + "learning_rate": 1.507537688442211e-06, + "logits/chosen": 0.13124307990074158, + "logits/rejected": 0.43372398614883423, + "logps/chosen": -188.13446044921875, + "logps/rejected": -202.31063842773438, + "loss": 0.6521, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": 0.02380152978003025, + "rewards/margins": 0.0889906957745552, + "rewards/margins_max": 0.11839659512042999, + "rewards/margins_min": 0.059584807604551315, + "rewards/margins_std": 0.041586220264434814, + "rewards/rejected": -0.0651891678571701, + "step": 300 + }, + { + "epoch": 0.08, + "grad_norm": 0.400390625, + "learning_rate": 1.5577889447236182e-06, + "logits/chosen": 0.16254135966300964, + "logits/rejected": 0.4572983682155609, + "logps/chosen": -217.7134552001953, + "logps/rejected": -235.12612915039062, + "loss": 0.6464, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.02339380793273449, + "rewards/margins": 0.09931546449661255, + "rewards/margins_max": 0.13573993742465973, + "rewards/margins_min": 0.06289096921682358, + "rewards/margins_std": 0.05151200294494629, + "rewards/rejected": -0.07592164725065231, + "step": 310 + }, + { + "epoch": 0.08, + "grad_norm": 0.4296875, + "learning_rate": 1.6080402010050252e-06, + "logits/chosen": 0.21073463559150696, + "logits/rejected": 0.5910454988479614, + "logps/chosen": -217.4332733154297, + "logps/rejected": -214.1886444091797, + "loss": 0.6436, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.02802230790257454, + "rewards/margins": 0.11192785203456879, + "rewards/margins_max": 0.1627815067768097, + "rewards/margins_min": 0.061074189841747284, + "rewards/margins_std": 0.07191795110702515, + "rewards/rejected": -0.08390556275844574, + "step": 320 + }, + { + "epoch": 0.08, + "grad_norm": 0.435546875, + "learning_rate": 1.6582914572864321e-06, + "logits/chosen": 0.14464020729064941, + "logits/rejected": 0.46793508529663086, + "logps/chosen": -230.2141876220703, + "logps/rejected": -231.2537078857422, + "loss": 0.6431, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": 0.031488679349422455, + "rewards/margins": 0.1129346638917923, + "rewards/margins_max": 0.15848883986473083, + "rewards/margins_min": 0.06738051772117615, + "rewards/margins_std": 0.06442330777645111, + "rewards/rejected": -0.08144598454236984, + "step": 330 + }, + { + "epoch": 0.09, + "grad_norm": 0.431640625, + "learning_rate": 1.708542713567839e-06, + "logits/chosen": 0.3066442608833313, + "logits/rejected": 0.6389753818511963, + "logps/chosen": -194.7659912109375, + "logps/rejected": -201.19326782226562, + "loss": 0.64, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": 0.01703776977956295, + "rewards/margins": 0.09856927394866943, + "rewards/margins_max": 0.13916271924972534, + "rewards/margins_min": 0.057975828647613525, + "rewards/margins_std": 0.05740780755877495, + "rewards/rejected": -0.08153150975704193, + "step": 340 + }, + { + "epoch": 0.09, + "grad_norm": 0.431640625, + "learning_rate": 1.7587939698492463e-06, + "logits/chosen": 0.11799661815166473, + "logits/rejected": 0.49029532074928284, + "logps/chosen": -191.6995086669922, + "logps/rejected": -209.32369995117188, + "loss": 0.6303, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.018647244200110435, + "rewards/margins": 0.13601182401180267, + "rewards/margins_max": 0.19829824566841125, + "rewards/margins_min": 0.0737253949046135, + "rewards/margins_std": 0.08808630704879761, + "rewards/rejected": -0.11736458539962769, + "step": 350 + }, + { + "epoch": 0.09, + "grad_norm": 0.390625, + "learning_rate": 1.8090452261306533e-06, + "logits/chosen": 0.13119210302829742, + "logits/rejected": 0.2840971350669861, + "logps/chosen": -199.85696411132812, + "logps/rejected": -258.749755859375, + "loss": 0.6292, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.012986330315470695, + "rewards/margins": 0.14114083349704742, + "rewards/margins_max": 0.19774450361728668, + "rewards/margins_min": 0.08453711867332458, + "rewards/margins_std": 0.08004971593618393, + "rewards/rejected": -0.12815448641777039, + "step": 360 + }, + { + "epoch": 0.09, + "grad_norm": 0.40625, + "learning_rate": 1.8592964824120602e-06, + "logits/chosen": 0.2754780650138855, + "logits/rejected": 0.5169572830200195, + "logps/chosen": -207.1035919189453, + "logps/rejected": -230.28738403320312, + "loss": 0.6258, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": 0.014916675165295601, + "rewards/margins": 0.16153986752033234, + "rewards/margins_max": 0.23447349667549133, + "rewards/margins_min": 0.08860625326633453, + "rewards/margins_std": 0.10314369201660156, + "rewards/rejected": -0.14662319421768188, + "step": 370 + }, + { + "epoch": 0.1, + "grad_norm": 0.498046875, + "learning_rate": 1.9095477386934674e-06, + "logits/chosen": 0.062197744846343994, + "logits/rejected": 0.3439430892467499, + "logps/chosen": -222.99612426757812, + "logps/rejected": -234.5655517578125, + "loss": 0.62, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -0.0016535300528630614, + "rewards/margins": 0.15255677700042725, + "rewards/margins_max": 0.23381371796131134, + "rewards/margins_min": 0.07129983603954315, + "rewards/margins_std": 0.1149146556854248, + "rewards/rejected": -0.15421029925346375, + "step": 380 + }, + { + "epoch": 0.1, + "grad_norm": 0.51171875, + "learning_rate": 1.959798994974874e-06, + "logits/chosen": 0.3776804804801941, + "logits/rejected": 0.6220484972000122, + "logps/chosen": -225.0041961669922, + "logps/rejected": -237.2688446044922, + "loss": 0.6203, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.009254536591470242, + "rewards/margins": 0.14196929335594177, + "rewards/margins_max": 0.20804783701896667, + "rewards/margins_min": 0.07589074224233627, + "rewards/margins_std": 0.0934491753578186, + "rewards/rejected": -0.151223823428154, + "step": 390 + }, + { + "epoch": 0.1, + "grad_norm": 0.515625, + "learning_rate": 1.9999984564005714e-06, + "logits/chosen": 0.17335475981235504, + "logits/rejected": 0.6286818385124207, + "logps/chosen": -251.3433380126953, + "logps/rejected": -244.0115509033203, + "loss": 0.6029, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -0.004750807769596577, + "rewards/margins": 0.19618940353393555, + "rewards/margins_max": 0.28234678506851196, + "rewards/margins_min": 0.11003203690052032, + "rewards/margins_std": 0.12184491008520126, + "rewards/rejected": -0.20094020664691925, + "step": 400 + }, + { + "epoch": 0.1, + "grad_norm": 0.5078125, + "learning_rate": 1.999944430920943e-06, + "logits/chosen": 0.2944129705429077, + "logits/rejected": 0.6106816530227661, + "logps/chosen": -209.9373321533203, + "logps/rejected": -256.11431884765625, + "loss": 0.5943, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.014288604259490967, + "rewards/margins": 0.23369868099689484, + "rewards/margins_max": 0.3245174288749695, + "rewards/margins_min": 0.1428799331188202, + "rewards/margins_std": 0.1284371018409729, + "rewards/rejected": -0.2479872703552246, + "step": 410 + }, + { + "epoch": 0.11, + "grad_norm": 0.439453125, + "learning_rate": 1.9998132302352276e-06, + "logits/chosen": 0.10406245291233063, + "logits/rejected": 0.4271799921989441, + "logps/chosen": -219.8295135498047, + "logps/rejected": -235.6389617919922, + "loss": 0.5968, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.027161872014403343, + "rewards/margins": 0.20511429011821747, + "rewards/margins_max": 0.27854466438293457, + "rewards/margins_min": 0.13168397545814514, + "rewards/margins_std": 0.10384617000818253, + "rewards/rejected": -0.23227617144584656, + "step": 420 + }, + { + "epoch": 0.11, + "grad_norm": 0.48046875, + "learning_rate": 1.999604864469428e-06, + "logits/chosen": 0.22821100056171417, + "logits/rejected": 0.5613245964050293, + "logps/chosen": -220.06796264648438, + "logps/rejected": -239.36196899414062, + "loss": 0.5837, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.05030583217740059, + "rewards/margins": 0.23767979443073273, + "rewards/margins_max": 0.35419517755508423, + "rewards/margins_min": 0.12116440385580063, + "rewards/margins_std": 0.16477763652801514, + "rewards/rejected": -0.287985622882843, + "step": 430 + }, + { + "epoch": 0.11, + "grad_norm": 0.423828125, + "learning_rate": 1.999319349705108e-06, + "logits/chosen": 0.2373732626438141, + "logits/rejected": 0.5678123831748962, + "logps/chosen": -253.2532196044922, + "logps/rejected": -260.73516845703125, + "loss": 0.5869, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.06434480845928192, + "rewards/margins": 0.2248760461807251, + "rewards/margins_max": 0.3302004635334015, + "rewards/margins_min": 0.1195516362786293, + "rewards/margins_std": 0.1489512026309967, + "rewards/rejected": -0.2892208695411682, + "step": 440 + }, + { + "epoch": 0.11, + "grad_norm": 0.423828125, + "learning_rate": 1.9989567079781537e-06, + "logits/chosen": 0.2335653007030487, + "logits/rejected": 0.5320082902908325, + "logps/chosen": -208.51205444335938, + "logps/rejected": -247.15762329101562, + "loss": 0.5566, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.060688622295856476, + "rewards/margins": 0.31464827060699463, + "rewards/margins_max": 0.43832287192344666, + "rewards/margins_min": 0.1909736841917038, + "rewards/margins_std": 0.1749022752046585, + "rewards/rejected": -0.3753369152545929, + "step": 450 + }, + { + "epoch": 0.12, + "grad_norm": 0.462890625, + "learning_rate": 1.9985169672770702e-06, + "logits/chosen": -0.06091824918985367, + "logits/rejected": 0.25912588834762573, + "logps/chosen": -213.4940185546875, + "logps/rejected": -248.5473175048828, + "loss": 0.5665, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.10004855692386627, + "rewards/margins": 0.2793845236301422, + "rewards/margins_max": 0.3991774916648865, + "rewards/margins_min": 0.15959155559539795, + "rewards/margins_std": 0.16941285133361816, + "rewards/rejected": -0.3794330954551697, + "step": 460 + }, + { + "epoch": 0.12, + "grad_norm": 0.5078125, + "learning_rate": 1.9980001615408227e-06, + "logits/chosen": 0.12755416333675385, + "logits/rejected": 0.4592605233192444, + "logps/chosen": -226.99948120117188, + "logps/rejected": -252.36849975585938, + "loss": 0.5626, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.12666812539100647, + "rewards/margins": 0.2744174599647522, + "rewards/margins_max": 0.4089486598968506, + "rewards/margins_min": 0.1398862898349762, + "rewards/margins_std": 0.19025583565235138, + "rewards/rejected": -0.4010855555534363, + "step": 470 + }, + { + "epoch": 0.12, + "grad_norm": 0.58984375, + "learning_rate": 1.9974063306562163e-06, + "logits/chosen": 0.04675767198204994, + "logits/rejected": 0.2735728919506073, + "logps/chosen": -219.89529418945312, + "logps/rejected": -272.9166564941406, + "loss": 0.5521, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.1341877430677414, + "rewards/margins": 0.32467249035835266, + "rewards/margins_max": 0.4808884263038635, + "rewards/margins_min": 0.1684565544128418, + "rewards/margins_std": 0.22092270851135254, + "rewards/rejected": -0.45886021852493286, + "step": 480 + }, + { + "epoch": 0.12, + "grad_norm": 0.54296875, + "learning_rate": 1.99673552045482e-06, + "logits/chosen": 0.021597793325781822, + "logits/rejected": 0.5157625675201416, + "logps/chosen": -227.2969512939453, + "logps/rejected": -255.79684448242188, + "loss": 0.5397, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.13789792358875275, + "rewards/margins": 0.34923315048217773, + "rewards/margins_max": 0.5173725485801697, + "rewards/margins_min": 0.18109369277954102, + "rewards/margins_std": 0.23778510093688965, + "rewards/rejected": -0.4871310293674469, + "step": 490 + }, + { + "epoch": 0.13, + "grad_norm": 0.625, + "learning_rate": 1.995987782709425e-06, + "logits/chosen": 0.35428065061569214, + "logits/rejected": 0.7805494070053101, + "logps/chosen": -254.2764129638672, + "logps/rejected": -262.0751953125, + "loss": 0.5326, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.14267601072788239, + "rewards/margins": 0.37824535369873047, + "rewards/margins_max": 0.5719924569129944, + "rewards/margins_min": 0.18449831008911133, + "rewards/margins_std": 0.2739996910095215, + "rewards/rejected": -0.5209213495254517, + "step": 500 + }, + { + "epoch": 0.13, + "grad_norm": 0.47265625, + "learning_rate": 1.995163175130053e-06, + "logits/chosen": 0.13442710041999817, + "logits/rejected": 0.5977517366409302, + "logps/chosen": -269.36590576171875, + "logps/rejected": -279.8051452636719, + "loss": 0.5318, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.20593896508216858, + "rewards/margins": 0.36720719933509827, + "rewards/margins_max": 0.5583890676498413, + "rewards/margins_min": 0.1760253608226776, + "rewards/margins_std": 0.2703719735145569, + "rewards/rejected": -0.5731461644172668, + "step": 510 + }, + { + "epoch": 0.13, + "grad_norm": 0.5546875, + "learning_rate": 1.994261761359501e-06, + "logits/chosen": 0.10652659833431244, + "logits/rejected": 0.6973064541816711, + "logps/chosen": -262.9113464355469, + "logps/rejected": -267.61590576171875, + "loss": 0.5143, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.19917282462120056, + "rewards/margins": 0.3482286036014557, + "rewards/margins_max": 0.5081428289413452, + "rewards/margins_min": 0.18831434845924377, + "rewards/margins_std": 0.2261529266834259, + "rewards/rejected": -0.5474014282226562, + "step": 520 + }, + { + "epoch": 0.13, + "grad_norm": 0.5625, + "learning_rate": 1.9932836109684285e-06, + "logits/chosen": 0.023062556982040405, + "logits/rejected": 0.35867422819137573, + "logps/chosen": -217.8906707763672, + "logps/rejected": -279.52618408203125, + "loss": 0.5197, + "rewards/accuracies": 0.887499988079071, + "rewards/chosen": -0.2201465368270874, + "rewards/margins": 0.4889785647392273, + "rewards/margins_max": 0.7714502215385437, + "rewards/margins_min": 0.20650680363178253, + "rewards/margins_std": 0.3994753360748291, + "rewards/rejected": -0.7091250419616699, + "step": 530 + }, + { + "epoch": 0.14, + "grad_norm": 0.5390625, + "learning_rate": 1.9922287994499877e-06, + "logits/chosen": 0.2635014057159424, + "logits/rejected": 0.6844016313552856, + "logps/chosen": -242.1995849609375, + "logps/rejected": -261.4162902832031, + "loss": 0.5346, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.2828103303909302, + "rewards/margins": 0.3755984306335449, + "rewards/margins_max": 0.5684477686882019, + "rewards/margins_min": 0.18274910748004913, + "rewards/margins_std": 0.27273014187812805, + "rewards/rejected": -0.6584087610244751, + "step": 540 + }, + { + "epoch": 0.14, + "grad_norm": 0.62109375, + "learning_rate": 1.991097408214e-06, + "logits/chosen": 0.07120836526155472, + "logits/rejected": 0.4711441099643707, + "logps/chosen": -283.8448791503906, + "logps/rejected": -323.3854064941406, + "loss": 0.4654, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.2944994866847992, + "rewards/margins": 0.6885162591934204, + "rewards/margins_max": 0.9488954544067383, + "rewards/margins_min": 0.4281369745731354, + "rewards/margins_std": 0.3682318329811096, + "rewards/rejected": -0.9830157160758972, + "step": 550 + }, + { + "epoch": 0.14, + "grad_norm": 0.578125, + "learning_rate": 1.989889524580669e-06, + "logits/chosen": 0.2516458034515381, + "logits/rejected": 0.6511009335517883, + "logps/chosen": -238.0609588623047, + "logps/rejected": -280.14019775390625, + "loss": 0.486, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.3175020217895508, + "rewards/margins": 0.5095471739768982, + "rewards/margins_max": 0.7277418375015259, + "rewards/margins_min": 0.29135242104530334, + "rewards/margins_std": 0.30857396125793457, + "rewards/rejected": -0.8270492553710938, + "step": 560 + }, + { + "epoch": 0.14, + "grad_norm": 0.5546875, + "learning_rate": 1.988605241773843e-06, + "logits/chosen": 0.23482546210289001, + "logits/rejected": 0.39776262640953064, + "logps/chosen": -211.8990020751953, + "logps/rejected": -277.9550476074219, + "loss": 0.4832, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.3305775821208954, + "rewards/margins": 0.5754931569099426, + "rewards/margins_max": 0.837389349937439, + "rewards/margins_min": 0.31359678506851196, + "rewards/margins_std": 0.3703773319721222, + "rewards/rejected": -0.9060707092285156, + "step": 570 + }, + { + "epoch": 0.15, + "grad_norm": 0.84375, + "learning_rate": 1.987244658913821e-06, + "logits/chosen": 0.2136719673871994, + "logits/rejected": 0.5631103515625, + "logps/chosen": -263.46173095703125, + "logps/rejected": -335.8443908691406, + "loss": 0.4707, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -0.3896404504776001, + "rewards/margins": 0.6542876958847046, + "rewards/margins_max": 1.0539064407348633, + "rewards/margins_min": 0.2546689510345459, + "rewards/margins_std": 0.5651463270187378, + "rewards/rejected": -1.0439281463623047, + "step": 580 + }, + { + "epoch": 0.15, + "grad_norm": 0.609375, + "learning_rate": 1.9858078810097e-06, + "logits/chosen": 0.2974611520767212, + "logits/rejected": 0.5850492715835571, + "logps/chosen": -250.642578125, + "logps/rejected": -302.98162841796875, + "loss": 0.4955, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.4073031544685364, + "rewards/margins": 0.4717481732368469, + "rewards/margins_max": 0.761227548122406, + "rewards/margins_min": 0.18226870894432068, + "rewards/margins_std": 0.40938568115234375, + "rewards/rejected": -0.8790512084960938, + "step": 590 + }, + { + "epoch": 0.15, + "grad_norm": 0.66796875, + "learning_rate": 1.984295018951274e-06, + "logits/chosen": 0.09430913627147675, + "logits/rejected": 0.49069744348526, + "logps/chosen": -251.55856323242188, + "logps/rejected": -317.350341796875, + "loss": 0.4458, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.41190361976623535, + "rewards/margins": 0.6539624333381653, + "rewards/margins_max": 0.9886572957038879, + "rewards/margins_min": 0.3192675709724426, + "rewards/margins_std": 0.47332993149757385, + "rewards/rejected": -1.0658659934997559, + "step": 600 + }, + { + "epoch": 0.15, + "grad_norm": 0.59765625, + "learning_rate": 1.9827061895004715e-06, + "logits/chosen": 0.17028877139091492, + "logits/rejected": 0.4926506578922272, + "logps/chosen": -252.2837371826172, + "logps/rejected": -298.1224670410156, + "loss": 0.4782, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.438131719827652, + "rewards/margins": 0.5629655122756958, + "rewards/margins_max": 0.9025141596794128, + "rewards/margins_min": 0.22341683506965637, + "rewards/margins_std": 0.4801942706108093, + "rewards/rejected": -1.0010972023010254, + "step": 610 + }, + { + "epoch": 0.16, + "grad_norm": 0.60546875, + "learning_rate": 1.9810415152823475e-06, + "logits/chosen": 0.10140929371118546, + "logits/rejected": 0.21094012260437012, + "logps/chosen": -253.6886749267578, + "logps/rejected": -349.69720458984375, + "loss": 0.4399, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -0.5137056112289429, + "rewards/margins": 0.7384149432182312, + "rewards/margins_max": 1.147780179977417, + "rewards/margins_min": 0.3290497958660126, + "rewards/margins_std": 0.5789297819137573, + "rewards/rejected": -1.2521207332611084, + "step": 620 + }, + { + "epoch": 0.16, + "grad_norm": 0.498046875, + "learning_rate": 1.979301124775617e-06, + "logits/chosen": 0.21277904510498047, + "logits/rejected": 0.5488343834877014, + "logps/chosen": -275.15899658203125, + "logps/rejected": -342.88812255859375, + "loss": 0.4532, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.4999760687351227, + "rewards/margins": 0.7143586874008179, + "rewards/margins_max": 1.0617420673370361, + "rewards/margins_min": 0.3669753670692444, + "rewards/margins_std": 0.4912742078304291, + "rewards/rejected": -1.2143347263336182, + "step": 630 + }, + { + "epoch": 0.16, + "grad_norm": 0.6171875, + "learning_rate": 1.977485152302741e-06, + "logits/chosen": 0.20225989818572998, + "logits/rejected": 0.380338191986084, + "logps/chosen": -240.4453887939453, + "logps/rejected": -322.8797912597656, + "loss": 0.4514, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.5157987475395203, + "rewards/margins": 0.7272204756736755, + "rewards/margins_max": 1.0614551305770874, + "rewards/margins_min": 0.39298567175865173, + "rewards/margins_std": 0.47267937660217285, + "rewards/rejected": -1.2430192232131958, + "step": 640 + }, + { + "epoch": 0.16, + "grad_norm": 0.55859375, + "learning_rate": 1.9755937380195564e-06, + "logits/chosen": -0.05190020799636841, + "logits/rejected": 0.5600059628486633, + "logps/chosen": -293.57666015625, + "logps/rejected": -305.3736267089844, + "loss": 0.4481, + "rewards/accuracies": 0.887499988079071, + "rewards/chosen": -0.5161153674125671, + "rewards/margins": 0.6342784762382507, + "rewards/margins_max": 1.0201352834701538, + "rewards/margins_min": 0.2484218180179596, + "rewards/margins_std": 0.5456838011741638, + "rewards/rejected": -1.1503938436508179, + "step": 650 + }, + { + "epoch": 0.17, + "grad_norm": 0.703125, + "learning_rate": 1.9736270279044634e-06, + "logits/chosen": 0.014571094885468483, + "logits/rejected": 0.4248642027378082, + "logps/chosen": -266.79010009765625, + "logps/rejected": -353.534912109375, + "loss": 0.4127, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.5665841102600098, + "rewards/margins": 0.798658013343811, + "rewards/margins_max": 1.0735851526260376, + "rewards/margins_min": 0.5237309336662292, + "rewards/margins_std": 0.3888055682182312, + "rewards/rejected": -1.3652422428131104, + "step": 660 + }, + { + "epoch": 0.17, + "grad_norm": 0.58984375, + "learning_rate": 1.9715851737471544e-06, + "logits/chosen": 0.051493000239133835, + "logits/rejected": 0.347175657749176, + "logps/chosen": -256.3532409667969, + "logps/rejected": -362.8037414550781, + "loss": 0.4129, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.5985296368598938, + "rewards/margins": 0.8330098986625671, + "rewards/margins_max": 1.1987718343734741, + "rewards/margins_min": 0.467247873544693, + "rewards/margins_std": 0.5172656178474426, + "rewards/rejected": -1.4315392971038818, + "step": 670 + }, + { + "epoch": 0.17, + "grad_norm": 0.640625, + "learning_rate": 1.969468333136902e-06, + "logits/chosen": 0.10662545263767242, + "logits/rejected": 0.5305906534194946, + "logps/chosen": -277.83624267578125, + "logps/rejected": -321.1910095214844, + "loss": 0.4247, + "rewards/accuracies": 0.862500011920929, + "rewards/chosen": -0.6851547956466675, + "rewards/margins": 0.7354794144630432, + "rewards/margins_max": 1.1970973014831543, + "rewards/margins_min": 0.2738614082336426, + "rewards/margins_std": 0.6528264284133911, + "rewards/rejected": -1.4206342697143555, + "step": 680 + }, + { + "epoch": 0.17, + "grad_norm": 0.66015625, + "learning_rate": 1.9672766694503955e-06, + "logits/chosen": 0.130225270986557, + "logits/rejected": 0.47270625829696655, + "logps/chosen": -272.90985107421875, + "logps/rejected": -354.52911376953125, + "loss": 0.3923, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -0.6151808500289917, + "rewards/margins": 0.8642423748970032, + "rewards/margins_max": 1.2572708129882812, + "rewards/margins_min": 0.4712139964103699, + "rewards/margins_std": 0.5558260679244995, + "rewards/rejected": -1.4794232845306396, + "step": 690 + }, + { + "epoch": 0.18, + "grad_norm": 0.640625, + "learning_rate": 1.9650103518391316e-06, + "logits/chosen": -0.07168503105640411, + "logits/rejected": 0.35873326659202576, + "logps/chosen": -279.2594909667969, + "logps/rejected": -358.3661193847656, + "loss": 0.3894, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.6471285820007324, + "rewards/margins": 0.8690595626831055, + "rewards/margins_max": 1.3681957721710205, + "rewards/margins_min": 0.36992329359054565, + "rewards/margins_std": 0.7058852910995483, + "rewards/rejected": -1.5161882638931274, + "step": 700 + }, + { + "epoch": 0.18, + "grad_norm": 0.59375, + "learning_rate": 1.9626695552163577e-06, + "logits/chosen": 0.1328928917646408, + "logits/rejected": 0.5320017337799072, + "logps/chosen": -294.1138610839844, + "logps/rejected": -380.0039367675781, + "loss": 0.407, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.8441578149795532, + "rewards/margins": 0.9135202169418335, + "rewards/margins_max": 1.4895973205566406, + "rewards/margins_min": 0.33744320273399353, + "rewards/margins_std": 0.8146958351135254, + "rewards/rejected": -1.7576780319213867, + "step": 710 + }, + { + "epoch": 0.18, + "grad_norm": 1.015625, + "learning_rate": 1.9602544602435754e-06, + "logits/chosen": 0.0703146755695343, + "logits/rejected": 0.5812051892280579, + "logps/chosen": -351.3577575683594, + "logps/rejected": -401.6908874511719, + "loss": 0.4347, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.7561784386634827, + "rewards/margins": 1.0206066370010376, + "rewards/margins_max": 1.6420679092407227, + "rewards/margins_min": 0.3991455137729645, + "rewards/margins_std": 0.8788787722587585, + "rewards/rejected": -1.776785135269165, + "step": 720 + }, + { + "epoch": 0.18, + "grad_norm": 0.76953125, + "learning_rate": 1.957765253316595e-06, + "logits/chosen": -0.03158079460263252, + "logits/rejected": 0.36648237705230713, + "logps/chosen": -288.8924865722656, + "logps/rejected": -408.21453857421875, + "loss": 0.3707, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -0.7478488683700562, + "rewards/margins": 1.2099758386611938, + "rewards/margins_max": 1.8198902606964111, + "rewards/margins_min": 0.6000615358352661, + "rewards/margins_std": 0.8625491857528687, + "rewards/rejected": -1.95782470703125, + "step": 730 + }, + { + "epoch": 0.19, + "grad_norm": 0.609375, + "learning_rate": 1.955202126551149e-06, + "logits/chosen": 0.01123755145817995, + "logits/rejected": 0.3031242787837982, + "logps/chosen": -283.08087158203125, + "logps/rejected": -442.47216796875, + "loss": 0.3474, + "rewards/accuracies": 0.9375, + "rewards/chosen": -0.7709259986877441, + "rewards/margins": 1.405474305152893, + "rewards/margins_max": 2.1374449729919434, + "rewards/margins_min": 0.6735036969184875, + "rewards/margins_std": 1.0351628065109253, + "rewards/rejected": -2.1764004230499268, + "step": 740 + }, + { + "epoch": 0.19, + "grad_norm": 0.69921875, + "learning_rate": 1.9525652777680673e-06, + "logits/chosen": 0.17332817614078522, + "logits/rejected": 0.511985182762146, + "logps/chosen": -313.7992248535156, + "logps/rejected": -420.5000915527344, + "loss": 0.4075, + "rewards/accuracies": 0.887499988079071, + "rewards/chosen": -0.8556423187255859, + "rewards/margins": 1.1546481847763062, + "rewards/margins_max": 1.9178001880645752, + "rewards/margins_min": 0.3914966285228729, + "rewards/margins_std": 1.0792595148086548, + "rewards/rejected": -2.0102906227111816, + "step": 750 + }, + { + "epoch": 0.19, + "grad_norm": 0.58984375, + "learning_rate": 1.949854910478007e-06, + "logits/chosen": 0.16492195427417755, + "logits/rejected": 0.468805730342865, + "logps/chosen": -279.9993591308594, + "logps/rejected": -441.51409912109375, + "loss": 0.3282, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -0.8717137575149536, + "rewards/margins": 1.3921509981155396, + "rewards/margins_max": 2.1553454399108887, + "rewards/margins_min": 0.6289564967155457, + "rewards/margins_std": 1.079319953918457, + "rewards/rejected": -2.263864517211914, + "step": 760 + }, + { + "epoch": 0.19, + "grad_norm": 0.546875, + "learning_rate": 1.9470712338657457e-06, + "logits/chosen": -0.0090141287073493, + "logits/rejected": 0.4108152985572815, + "logps/chosen": -303.95452880859375, + "logps/rejected": -422.049072265625, + "loss": 0.3649, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -0.8306927680969238, + "rewards/margins": 1.1475027799606323, + "rewards/margins_max": 1.8583225011825562, + "rewards/margins_min": 0.4366832375526428, + "rewards/margins_std": 1.0052506923675537, + "rewards/rejected": -1.9781955480575562, + "step": 770 + }, + { + "epoch": 0.2, + "grad_norm": 0.8515625, + "learning_rate": 1.9442144627740387e-06, + "logits/chosen": 0.2017272412776947, + "logits/rejected": 0.3989468812942505, + "logps/chosen": -296.7005310058594, + "logps/rejected": -446.35650634765625, + "loss": 0.3255, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.8615614175796509, + "rewards/margins": 1.4004117250442505, + "rewards/margins_max": 2.0196797847747803, + "rewards/margins_min": 0.7811434864997864, + "rewards/margins_std": 0.8757774233818054, + "rewards/rejected": -2.2619731426239014, + "step": 780 + }, + { + "epoch": 0.2, + "grad_norm": 0.54296875, + "learning_rate": 1.9412848176870363e-06, + "logits/chosen": 0.06361217796802521, + "logits/rejected": 0.45090895891189575, + "logps/chosen": -299.48175048828125, + "logps/rejected": -419.0980529785156, + "loss": 0.3474, + "rewards/accuracies": 0.887499988079071, + "rewards/chosen": -0.9132553339004517, + "rewards/margins": 1.1059119701385498, + "rewards/margins_max": 1.6222988367080688, + "rewards/margins_min": 0.5895251035690308, + "rewards/margins_std": 0.7302813529968262, + "rewards/rejected": -2.019167423248291, + "step": 790 + }, + { + "epoch": 0.2, + "grad_norm": 0.63671875, + "learning_rate": 1.938282524713266e-06, + "logits/chosen": 0.14790871739387512, + "logits/rejected": 0.6091148257255554, + "logps/chosen": -302.5926818847656, + "logps/rejected": -411.4151916503906, + "loss": 0.3637, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.8289756774902344, + "rewards/margins": 1.2672948837280273, + "rewards/margins_max": 1.9323132038116455, + "rewards/margins_min": 0.6022766828536987, + "rewards/margins_std": 0.9404776692390442, + "rewards/rejected": -2.0962705612182617, + "step": 800 + }, + { + "epoch": 0.2, + "grad_norm": 0.83203125, + "learning_rate": 1.935207815568183e-06, + "logits/chosen": 0.12243340164422989, + "logits/rejected": 0.3599459230899811, + "logps/chosen": -336.83197021484375, + "logps/rejected": -477.29736328125, + "loss": 0.3501, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -1.0968286991119385, + "rewards/margins": 1.5700525045394897, + "rewards/margins_max": 2.5577869415283203, + "rewards/margins_min": 0.582318127155304, + "rewards/margins_std": 1.3968675136566162, + "rewards/rejected": -2.6668813228607178, + "step": 810 + }, + { + "epoch": 0.21, + "grad_norm": 0.6640625, + "learning_rate": 1.9320609275562863e-06, + "logits/chosen": -0.0032353117130696774, + "logits/rejected": 0.4075491428375244, + "logps/chosen": -307.46099853515625, + "logps/rejected": -445.69635009765625, + "loss": 0.3292, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -1.015716791152954, + "rewards/margins": 1.3720118999481201, + "rewards/margins_max": 2.1683993339538574, + "rewards/margins_min": 0.575624406337738, + "rewards/margins_std": 1.1262620687484741, + "rewards/rejected": -2.387728691101074, + "step": 820 + }, + { + "epoch": 0.21, + "grad_norm": 1.421875, + "learning_rate": 1.9288421035528025e-06, + "logits/chosen": 0.007567564491182566, + "logits/rejected": 0.45127448439598083, + "logps/chosen": -362.3955078125, + "logps/rejected": -487.38250732421875, + "loss": 0.371, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -1.1685550212860107, + "rewards/margins": 1.307808518409729, + "rewards/margins_max": 2.092421531677246, + "rewards/margins_min": 0.5231954455375671, + "rewards/margins_std": 1.1096104383468628, + "rewards/rejected": -2.47636342048645, + "step": 830 + }, + { + "epoch": 0.21, + "grad_norm": 0.734375, + "learning_rate": 1.925551591984943e-06, + "logits/chosen": 0.11853794753551483, + "logits/rejected": 0.392129123210907, + "logps/chosen": -341.79779052734375, + "logps/rejected": -501.59930419921875, + "loss": 0.3212, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -1.0101326704025269, + "rewards/margins": 1.5283323526382446, + "rewards/margins_max": 2.2559328079223633, + "rewards/margins_min": 0.8007319569587708, + "rewards/margins_std": 1.028982400894165, + "rewards/rejected": -2.5384650230407715, + "step": 840 + }, + { + "epoch": 0.21, + "grad_norm": 0.83984375, + "learning_rate": 1.9221896468127285e-06, + "logits/chosen": 0.03412569314241409, + "logits/rejected": 0.4624078869819641, + "logps/chosen": -316.32684326171875, + "logps/rejected": -468.22705078125, + "loss": 0.334, + "rewards/accuracies": 0.9375, + "rewards/chosen": -1.012904405593872, + "rewards/margins": 1.5719475746154785, + "rewards/margins_max": 2.157794713973999, + "rewards/margins_min": 0.9861001968383789, + "rewards/margins_std": 0.8285131454467773, + "rewards/rejected": -2.5848519802093506, + "step": 850 + }, + { + "epoch": 0.22, + "grad_norm": 0.7734375, + "learning_rate": 1.918756527509389e-06, + "logits/chosen": -0.004495727829635143, + "logits/rejected": 0.5306761860847473, + "logps/chosen": -349.19427490234375, + "logps/rejected": -435.3349609375, + "loss": 0.329, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -1.1305290460586548, + "rewards/margins": 1.2037010192871094, + "rewards/margins_max": 1.8855326175689697, + "rewards/margins_min": 0.5218694806098938, + "rewards/margins_std": 0.9642555117607117, + "rewards/rejected": -2.3342299461364746, + "step": 860 + }, + { + "epoch": 0.22, + "grad_norm": 0.84375, + "learning_rate": 1.9152524990413376e-06, + "logits/chosen": 0.07604047656059265, + "logits/rejected": 0.3435381054878235, + "logps/chosen": -312.3734436035156, + "logps/rejected": -463.84552001953125, + "loss": 0.3341, + "rewards/accuracies": 0.9375, + "rewards/chosen": -1.0629851818084717, + "rewards/margins": 1.5512994527816772, + "rewards/margins_max": 2.33793306350708, + "rewards/margins_min": 0.7646657824516296, + "rewards/margins_std": 1.1124681234359741, + "rewards/rejected": -2.6142849922180176, + "step": 870 + }, + { + "epoch": 0.22, + "grad_norm": 1.1328125, + "learning_rate": 1.9116778318477224e-06, + "logits/chosen": 0.017501067370176315, + "logits/rejected": 0.3349132537841797, + "logps/chosen": -367.47442626953125, + "logps/rejected": -500.0546875, + "loss": 0.371, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -1.3893836736679077, + "rewards/margins": 1.4116103649139404, + "rewards/margins_max": 2.3770699501037598, + "rewards/margins_min": 0.44615092873573303, + "rewards/margins_std": 1.3653658628463745, + "rewards/rejected": -2.8009941577911377, + "step": 880 + }, + { + "epoch": 0.22, + "grad_norm": 0.671875, + "learning_rate": 1.908032801819551e-06, + "logits/chosen": 0.09761302173137665, + "logits/rejected": 0.6039578318595886, + "logps/chosen": -371.1180725097656, + "logps/rejected": -442.41339111328125, + "loss": 0.3455, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -1.2109535932540894, + "rewards/margins": 1.2490062713623047, + "rewards/margins_max": 1.8362785577774048, + "rewards/margins_min": 0.6617340445518494, + "rewards/margins_std": 0.8305282592773438, + "rewards/rejected": -2.4599597454071045, + "step": 890 + }, + { + "epoch": 0.23, + "grad_norm": 1.2578125, + "learning_rate": 1.9043176902784006e-06, + "logits/chosen": 0.029796432703733444, + "logits/rejected": 0.5161929726600647, + "logps/chosen": -374.39520263671875, + "logps/rejected": -520.0931396484375, + "loss": 0.3237, + "rewards/accuracies": 0.9375, + "rewards/chosen": -1.319215178489685, + "rewards/margins": 1.6767199039459229, + "rewards/margins_max": 2.5083844661712646, + "rewards/margins_min": 0.8450548052787781, + "rewards/margins_std": 1.1761517524719238, + "rewards/rejected": -2.9959349632263184, + "step": 900 + }, + { + "epoch": 0.23, + "grad_norm": 0.734375, + "learning_rate": 1.900532783954703e-06, + "logits/chosen": -0.1830468475818634, + "logits/rejected": 0.16268977522850037, + "logps/chosen": -327.41705322265625, + "logps/rejected": -516.1043090820312, + "loss": 0.3022, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -1.2103191614151, + "rewards/margins": 1.746766448020935, + "rewards/margins_max": 2.5591721534729004, + "rewards/margins_min": 0.934360682964325, + "rewards/margins_std": 1.1489155292510986, + "rewards/rejected": -2.9570858478546143, + "step": 910 + }, + { + "epoch": 0.23, + "grad_norm": 0.8203125, + "learning_rate": 1.8966783749656162e-06, + "logits/chosen": 0.15995833277702332, + "logits/rejected": 0.3903830647468567, + "logps/chosen": -336.82269287109375, + "logps/rejected": -550.3900146484375, + "loss": 0.305, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -1.3295611143112183, + "rewards/margins": 1.906531572341919, + "rewards/margins_max": 3.0635104179382324, + "rewards/margins_min": 0.7495523691177368, + "rewards/margins_std": 1.6362155675888062, + "rewards/rejected": -3.2360923290252686, + "step": 920 + }, + { + "epoch": 0.23, + "grad_norm": 0.9296875, + "learning_rate": 1.8927547607924793e-06, + "logits/chosen": 0.11276821792125702, + "logits/rejected": 0.4435056149959564, + "logps/chosen": -350.63641357421875, + "logps/rejected": -520.3906860351562, + "loss": 0.2768, + "rewards/accuracies": 0.887499988079071, + "rewards/chosen": -1.1965409517288208, + "rewards/margins": 1.8161017894744873, + "rewards/margins_max": 2.594125986099243, + "rewards/margins_min": 1.0380772352218628, + "rewards/margins_std": 1.1002928018569946, + "rewards/rejected": -3.0126426219940186, + "step": 930 + }, + { + "epoch": 0.24, + "grad_norm": 1.0078125, + "learning_rate": 1.8887622442578524e-06, + "logits/chosen": 0.11966486275196075, + "logits/rejected": 0.5965573191642761, + "logps/chosen": -324.5442810058594, + "logps/rejected": -518.1434936523438, + "loss": 0.3189, + "rewards/accuracies": 0.887499988079071, + "rewards/chosen": -1.1375452280044556, + "rewards/margins": 1.926458716392517, + "rewards/margins_max": 3.091047763824463, + "rewards/margins_min": 0.7618700861930847, + "rewards/margins_std": 1.646977186203003, + "rewards/rejected": -3.0640041828155518, + "step": 940 + }, + { + "epoch": 0.24, + "grad_norm": 0.6953125, + "learning_rate": 1.8847011335021445e-06, + "logits/chosen": 0.18524505198001862, + "logits/rejected": 0.6330695152282715, + "logps/chosen": -354.59686279296875, + "logps/rejected": -523.9287719726562, + "loss": 0.2714, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -1.3136844635009766, + "rewards/margins": 1.8904693126678467, + "rewards/margins_max": 2.8665313720703125, + "rewards/margins_min": 0.9144073724746704, + "rewards/margins_std": 1.3803602457046509, + "rewards/rejected": -3.2041537761688232, + "step": 950 + }, + { + "epoch": 0.24, + "grad_norm": 1.03125, + "learning_rate": 1.8805717419598329e-06, + "logits/chosen": 0.10084180533885956, + "logits/rejected": 0.5015174746513367, + "logps/chosen": -342.0030822753906, + "logps/rejected": -544.0067749023438, + "loss": 0.2753, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -1.1676990985870361, + "rewards/margins": 2.014329195022583, + "rewards/margins_max": 3.015362501144409, + "rewards/margins_min": 1.0132955312728882, + "rewards/margins_std": 1.415675163269043, + "rewards/rejected": -3.182028293609619, + "step": 960 + }, + { + "epoch": 0.24, + "grad_norm": 1.0078125, + "learning_rate": 1.8763743883352707e-06, + "logits/chosen": 0.1762905865907669, + "logits/rejected": 0.6730665564537048, + "logps/chosen": -350.9500427246094, + "logps/rejected": -550.13916015625, + "loss": 0.3047, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -1.2952146530151367, + "rewards/margins": 2.0972981452941895, + "rewards/margins_max": 3.471599578857422, + "rewards/margins_min": 0.7229966521263123, + "rewards/margins_std": 1.9435558319091797, + "rewards/rejected": -3.392512798309326, + "step": 970 + }, + { + "epoch": 0.25, + "grad_norm": 0.78125, + "learning_rate": 1.8721093965780905e-06, + "logits/chosen": 0.21470198035240173, + "logits/rejected": 0.5289596319198608, + "logps/chosen": -344.1557922363281, + "logps/rejected": -550.0116577148438, + "loss": 0.2839, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -1.366784930229187, + "rewards/margins": 2.0145416259765625, + "rewards/margins_max": 3.2122111320495605, + "rewards/margins_min": 0.816872239112854, + "rewards/margins_std": 1.693760633468628, + "rewards/rejected": -3.3813271522521973, + "step": 980 + }, + { + "epoch": 0.25, + "grad_norm": 0.7109375, + "learning_rate": 1.8677770958582019e-06, + "logits/chosen": 0.17914500832557678, + "logits/rejected": 0.4978371262550354, + "logps/chosen": -343.85107421875, + "logps/rejected": -522.5852661132812, + "loss": 0.296, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -1.3435138463974, + "rewards/margins": 1.8682317733764648, + "rewards/margins_max": 2.7188496589660645, + "rewards/margins_min": 1.0176142454147339, + "rewards/margins_std": 1.202954888343811, + "rewards/rejected": -3.2117457389831543, + "step": 990 + }, + { + "epoch": 0.25, + "grad_norm": 1.171875, + "learning_rate": 1.863377820540386e-06, + "logits/chosen": 0.09994121640920639, + "logits/rejected": 0.48022064566612244, + "logps/chosen": -373.71710205078125, + "logps/rejected": -529.862548828125, + "loss": 0.29, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -1.4669697284698486, + "rewards/margins": 1.7658771276474, + "rewards/margins_max": 2.542285680770874, + "rewards/margins_min": 0.9894682765007019, + "rewards/margins_std": 1.0980077981948853, + "rewards/rejected": -3.232846736907959, + "step": 1000 + }, + { + "epoch": 0.25, + "grad_norm": 1.0078125, + "learning_rate": 1.8589119101584897e-06, + "logits/chosen": 0.08443330228328705, + "logits/rejected": 0.3289525806903839, + "logps/chosen": -340.7434997558594, + "logps/rejected": -567.8606567382812, + "loss": 0.267, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -1.3939064741134644, + "rewards/margins": 2.0518834590911865, + "rewards/margins_max": 3.2109062671661377, + "rewards/margins_min": 0.8928610682487488, + "rewards/margins_std": 1.6391054391860962, + "rewards/rejected": -3.4457900524139404, + "step": 1010 + }, + { + "epoch": 0.26, + "grad_norm": 0.73046875, + "learning_rate": 1.854379709389221e-06, + "logits/chosen": -0.020468706265091896, + "logits/rejected": 0.5041080713272095, + "logps/chosen": -358.7152404785156, + "logps/rejected": -573.8004150390625, + "loss": 0.3059, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -1.3497676849365234, + "rewards/margins": 2.207468032836914, + "rewards/margins_max": 3.2299110889434814, + "rewards/margins_min": 1.185024619102478, + "rewards/margins_std": 1.445953130722046, + "rewards/rejected": -3.5572357177734375, + "step": 1020 + }, + { + "epoch": 0.26, + "grad_norm": 0.74609375, + "learning_rate": 1.849781568025545e-06, + "logits/chosen": 0.17804110050201416, + "logits/rejected": 0.613066554069519, + "logps/chosen": -373.60882568359375, + "logps/rejected": -549.121337890625, + "loss": 0.2861, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -1.433725118637085, + "rewards/margins": 2.0137925148010254, + "rewards/margins_max": 3.150330066680908, + "rewards/margins_min": 0.8772546648979187, + "rewards/margins_std": 1.6073071956634521, + "rewards/rejected": -3.4475178718566895, + "step": 1030 + }, + { + "epoch": 0.26, + "grad_norm": 1.015625, + "learning_rate": 1.84511784094969e-06, + "logits/chosen": -0.03766552731394768, + "logits/rejected": 0.4089323580265045, + "logps/chosen": -367.4641418457031, + "logps/rejected": -541.2486572265625, + "loss": 0.2689, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -1.2735382318496704, + "rewards/margins": 1.9268970489501953, + "rewards/margins_max": 2.815758228302002, + "rewards/margins_min": 1.0380356311798096, + "rewards/margins_std": 1.2570399045944214, + "rewards/rejected": -3.200435161590576, + "step": 1040 + }, + { + "epoch": 0.26, + "grad_norm": 1.359375, + "learning_rate": 1.8403888881057558e-06, + "logits/chosen": 0.13449151813983917, + "logits/rejected": 0.6226879954338074, + "logps/chosen": -366.2633056640625, + "logps/rejected": -512.7859497070312, + "loss": 0.2869, + "rewards/accuracies": 0.875, + "rewards/chosen": -1.3161754608154297, + "rewards/margins": 1.7274010181427002, + "rewards/margins_max": 2.7379233837127686, + "rewards/margins_min": 0.7168782353401184, + "rewards/margins_std": 1.429094672203064, + "rewards/rejected": -3.043576240539551, + "step": 1050 + }, + { + "epoch": 0.27, + "grad_norm": 0.97265625, + "learning_rate": 1.8355950744719345e-06, + "logits/chosen": 0.23932485282421112, + "logits/rejected": 0.5507219433784485, + "logps/chosen": -357.52130126953125, + "logps/rejected": -589.6912841796875, + "loss": 0.2619, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -1.4837455749511719, + "rewards/margins": 2.199106216430664, + "rewards/margins_max": 3.2102742195129395, + "rewards/margins_min": 1.1879384517669678, + "rewards/margins_std": 1.430006980895996, + "rewards/rejected": -3.682851791381836, + "step": 1060 + }, + { + "epoch": 0.27, + "grad_norm": 1.1953125, + "learning_rate": 1.830736770032341e-06, + "logits/chosen": 0.2617644965648651, + "logits/rejected": 0.5150817632675171, + "logps/chosen": -347.5115661621094, + "logps/rejected": -615.1715087890625, + "loss": 0.2594, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -1.4821890592575073, + "rewards/margins": 2.4075777530670166, + "rewards/margins_max": 3.7134640216827393, + "rewards/margins_min": 1.1016911268234253, + "rewards/margins_std": 1.8468024730682373, + "rewards/rejected": -3.8897671699523926, + "step": 1070 + }, + { + "epoch": 0.27, + "grad_norm": 0.87109375, + "learning_rate": 1.8258143497484578e-06, + "logits/chosen": 0.00525292893871665, + "logits/rejected": 0.4925769865512848, + "logps/chosen": -371.81378173828125, + "logps/rejected": -569.6729736328125, + "loss": 0.2117, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -1.4877418279647827, + "rewards/margins": 2.2526438236236572, + "rewards/margins_max": 3.327693462371826, + "rewards/margins_min": 1.1775938272476196, + "rewards/margins_std": 1.5203502178192139, + "rewards/rejected": -3.7403857707977295, + "step": 1080 + }, + { + "epoch": 0.27, + "grad_norm": 0.9921875, + "learning_rate": 1.8208281935301955e-06, + "logits/chosen": 0.2466718703508377, + "logits/rejected": 0.6609460711479187, + "logps/chosen": -388.27032470703125, + "logps/rejected": -604.4417724609375, + "loss": 0.2834, + "rewards/accuracies": 0.887499988079071, + "rewards/chosen": -1.688057541847229, + "rewards/margins": 2.2050793170928955, + "rewards/margins_max": 3.456709384918213, + "rewards/margins_min": 0.9534494280815125, + "rewards/margins_std": 1.7700719833374023, + "rewards/rejected": -3.893136501312256, + "step": 1090 + }, + { + "epoch": 0.28, + "grad_norm": 1.0703125, + "learning_rate": 1.8157786862065731e-06, + "logits/chosen": 0.21708440780639648, + "logits/rejected": 0.6412609219551086, + "logps/chosen": -420.9542541503906, + "logps/rejected": -639.0484008789062, + "loss": 0.3048, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -1.730591058731079, + "rewards/margins": 2.4535133838653564, + "rewards/margins_max": 3.991995334625244, + "rewards/margins_min": 0.9150320291519165, + "rewards/margins_std": 2.175741672515869, + "rewards/rejected": -4.1841044425964355, + "step": 1100 + }, + { + "epoch": 0.28, + "grad_norm": 1.5234375, + "learning_rate": 1.810666217496015e-06, + "logits/chosen": 0.30106106400489807, + "logits/rejected": 0.7213363647460938, + "logps/chosen": -380.4069519042969, + "logps/rejected": -642.5003051757812, + "loss": 0.2797, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -1.6323025226593018, + "rewards/margins": 2.6721444129943848, + "rewards/margins_max": 3.9874916076660156, + "rewards/margins_min": 1.3567968606948853, + "rewards/margins_std": 1.8601821660995483, + "rewards/rejected": -4.304447174072266, + "step": 1110 + }, + { + "epoch": 0.28, + "grad_norm": 1.1015625, + "learning_rate": 1.8054911819762739e-06, + "logits/chosen": 0.11988552659749985, + "logits/rejected": 0.576012372970581, + "logps/chosen": -327.7829284667969, + "logps/rejected": -495.76678466796875, + "loss": 0.2594, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -1.3872520923614502, + "rewards/margins": 1.7396234273910522, + "rewards/margins_max": 2.6414241790771484, + "rewards/margins_min": 0.8378230929374695, + "rewards/margins_std": 1.2753384113311768, + "rewards/rejected": -3.126875877380371, + "step": 1120 + }, + { + "epoch": 0.28, + "grad_norm": 0.87890625, + "learning_rate": 1.800253979053977e-06, + "logits/chosen": 0.15926051139831543, + "logits/rejected": 0.5235914587974548, + "logps/chosen": -384.0852355957031, + "logps/rejected": -643.6329956054688, + "loss": 0.2519, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -1.570711612701416, + "rewards/margins": 2.6680593490600586, + "rewards/margins_max": 3.8719935417175293, + "rewards/margins_min": 1.4641246795654297, + "rewards/margins_std": 1.702620506286621, + "rewards/rejected": -4.238770961761475, + "step": 1130 + }, + { + "epoch": 0.29, + "grad_norm": 0.9375, + "learning_rate": 1.7949550129338005e-06, + "logits/chosen": 0.06529082357883453, + "logits/rejected": 0.5438031554222107, + "logps/chosen": -414.1963806152344, + "logps/rejected": -667.8323364257812, + "loss": 0.2537, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -1.781734824180603, + "rewards/margins": 2.5534157752990723, + "rewards/margins_max": 3.9235243797302246, + "rewards/margins_min": 1.1833075284957886, + "rewards/margins_std": 1.9376258850097656, + "rewards/rejected": -4.335150718688965, + "step": 1140 + }, + { + "epoch": 0.29, + "grad_norm": 0.94140625, + "learning_rate": 1.7895946925872731e-06, + "logits/chosen": 0.261190265417099, + "logits/rejected": 0.5999152660369873, + "logps/chosen": -390.030029296875, + "logps/rejected": -711.2191162109375, + "loss": 0.2192, + "rewards/accuracies": 0.9375, + "rewards/chosen": -1.877661108970642, + "rewards/margins": 3.1061835289001465, + "rewards/margins_max": 4.556717872619629, + "rewards/margins_min": 1.6556494235992432, + "rewards/margins_std": 2.0513651371002197, + "rewards/rejected": -4.98384428024292, + "step": 1150 + }, + { + "epoch": 0.29, + "grad_norm": 1.2734375, + "learning_rate": 1.7841734317212116e-06, + "logits/chosen": 0.1313336342573166, + "logits/rejected": 0.593550443649292, + "logps/chosen": -413.43701171875, + "logps/rejected": -669.3980102539062, + "loss": 0.2464, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -1.9083925485610962, + "rewards/margins": 2.6244053840637207, + "rewards/margins_max": 4.089666843414307, + "rewards/margins_min": 1.1591440439224243, + "rewards/margins_std": 2.072192668914795, + "rewards/rejected": -4.5327982902526855, + "step": 1160 + }, + { + "epoch": 0.29, + "grad_norm": 0.859375, + "learning_rate": 1.7786916487457911e-06, + "logits/chosen": 0.10810734331607819, + "logits/rejected": 0.658301055431366, + "logps/chosen": -426.03692626953125, + "logps/rejected": -653.3524169921875, + "loss": 0.2693, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.0395500659942627, + "rewards/margins": 2.416887044906616, + "rewards/margins_max": 3.8193812370300293, + "rewards/margins_min": 1.0143930912017822, + "rewards/margins_std": 1.9834257364273071, + "rewards/rejected": -4.456437110900879, + "step": 1170 + }, + { + "epoch": 0.3, + "grad_norm": 1.1171875, + "learning_rate": 1.7731497667422526e-06, + "logits/chosen": 0.18602201342582703, + "logits/rejected": 0.5325266718864441, + "logps/chosen": -397.35009765625, + "logps/rejected": -679.3014526367188, + "loss": 0.2432, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -1.8916324377059937, + "rewards/margins": 2.8336009979248047, + "rewards/margins_max": 4.271034240722656, + "rewards/margins_min": 1.3961678743362427, + "rewards/margins_std": 2.032837390899658, + "rewards/rejected": -4.725234031677246, + "step": 1180 + }, + { + "epoch": 0.3, + "grad_norm": 0.9765625, + "learning_rate": 1.7675482134302499e-06, + "logits/chosen": 0.25429344177246094, + "logits/rejected": 0.5315398573875427, + "logps/chosen": -383.9230651855469, + "logps/rejected": -631.6602172851562, + "loss": 0.223, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -1.8132168054580688, + "rewards/margins": 2.535676956176758, + "rewards/margins_max": 3.828932523727417, + "rewards/margins_min": 1.2424218654632568, + "rewards/margins_std": 1.828939437866211, + "rewards/rejected": -4.3488945960998535, + "step": 1190 + }, + { + "epoch": 0.3, + "grad_norm": 0.73046875, + "learning_rate": 1.7618874211348381e-06, + "logits/chosen": 0.23039917647838593, + "logits/rejected": 0.6885030269622803, + "logps/chosen": -433.42205810546875, + "logps/rejected": -697.4190063476562, + "loss": 0.2455, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -2.1791675090789795, + "rewards/margins": 2.756437301635742, + "rewards/margins_max": 4.101160049438477, + "rewards/margins_min": 1.4117141962051392, + "rewards/margins_std": 1.9017255306243896, + "rewards/rejected": -4.935604572296143, + "step": 1200 + }, + { + "epoch": 0.3, + "grad_norm": 0.69140625, + "learning_rate": 1.7561678267531078e-06, + "logits/chosen": 0.25268083810806274, + "logits/rejected": 0.638781726360321, + "logps/chosen": -411.7583923339844, + "logps/rejected": -672.2666625976562, + "loss": 0.2394, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -1.9081943035125732, + "rewards/margins": 2.7916979789733887, + "rewards/margins_max": 4.282321929931641, + "rewards/margins_min": 1.3010739088058472, + "rewards/margins_std": 2.108060598373413, + "rewards/rejected": -4.699892520904541, + "step": 1210 + }, + { + "epoch": 0.31, + "grad_norm": 0.828125, + "learning_rate": 1.7503898717204631e-06, + "logits/chosen": 0.1927916258573532, + "logits/rejected": 0.6581898927688599, + "logps/chosen": -395.4380798339844, + "logps/rejected": -690.7493896484375, + "loss": 0.2031, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -1.8340580463409424, + "rewards/margins": 3.096101760864258, + "rewards/margins_max": 4.713414192199707, + "rewards/margins_min": 1.4787895679473877, + "rewards/margins_std": 2.287224531173706, + "rewards/rejected": -4.930159568786621, + "step": 1220 + }, + { + "epoch": 0.31, + "grad_norm": 1.109375, + "learning_rate": 1.7445540019765558e-06, + "logits/chosen": 0.1801643818616867, + "logits/rejected": 0.595844030380249, + "logps/chosen": -403.8412170410156, + "logps/rejected": -678.00146484375, + "loss": 0.2743, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -2.0799314975738525, + "rewards/margins": 2.5960934162139893, + "rewards/margins_max": 4.042551517486572, + "rewards/margins_min": 1.1496355533599854, + "rewards/margins_std": 2.045600414276123, + "rewards/rejected": -4.676024436950684, + "step": 1230 + }, + { + "epoch": 0.31, + "grad_norm": 1.171875, + "learning_rate": 1.7386606679308648e-06, + "logits/chosen": 0.27586087584495544, + "logits/rejected": 0.6709119081497192, + "logps/chosen": -427.78265380859375, + "logps/rejected": -727.4019775390625, + "loss": 0.2212, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -1.961285948753357, + "rewards/margins": 2.957775592803955, + "rewards/margins_max": 4.439484119415283, + "rewards/margins_min": 1.4760667085647583, + "rewards/margins_std": 2.0954525470733643, + "rewards/rejected": -4.919060707092285, + "step": 1240 + }, + { + "epoch": 0.31, + "grad_norm": 1.234375, + "learning_rate": 1.7327103244279347e-06, + "logits/chosen": 0.21624751389026642, + "logits/rejected": 0.5476531386375427, + "logps/chosen": -414.1004943847656, + "logps/rejected": -790.1434326171875, + "loss": 0.209, + "rewards/accuracies": 0.9375, + "rewards/chosen": -1.9725745916366577, + "rewards/margins": 3.613555431365967, + "rewards/margins_max": 5.268401145935059, + "rewards/margins_min": 1.9587090015411377, + "rewards/margins_std": 2.340306520462036, + "rewards/rejected": -5.586129665374756, + "step": 1250 + }, + { + "epoch": 0.32, + "grad_norm": 1.5078125, + "learning_rate": 1.7267034307122716e-06, + "logits/chosen": 0.21748849749565125, + "logits/rejected": 0.5897720456123352, + "logps/chosen": -445.33135986328125, + "logps/rejected": -714.2196044921875, + "loss": 0.2039, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -2.2084898948669434, + "rewards/margins": 2.672968864440918, + "rewards/margins_max": 4.1215972900390625, + "rewards/margins_min": 1.2243406772613525, + "rewards/margins_std": 2.0486698150634766, + "rewards/rejected": -4.881458759307861, + "step": 1260 + }, + { + "epoch": 0.32, + "grad_norm": 0.55078125, + "learning_rate": 1.720640450392898e-06, + "logits/chosen": 0.3318621516227722, + "logits/rejected": 0.7322698831558228, + "logps/chosen": -412.41571044921875, + "logps/rejected": -811.5214233398438, + "loss": 0.2346, + "rewards/accuracies": 0.9375, + "rewards/chosen": -1.9910354614257812, + "rewards/margins": 4.049851417541504, + "rewards/margins_max": 6.252103805541992, + "rewards/margins_min": 1.8475990295410156, + "rewards/margins_std": 3.114454984664917, + "rewards/rejected": -6.040886402130127, + "step": 1270 + }, + { + "epoch": 0.32, + "grad_norm": 0.5625, + "learning_rate": 1.7145218514075728e-06, + "logits/chosen": 0.07924878597259521, + "logits/rejected": 0.4982023239135742, + "logps/chosen": -447.25238037109375, + "logps/rejected": -724.427490234375, + "loss": 0.237, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -2.447425365447998, + "rewards/margins": 2.800419330596924, + "rewards/margins_max": 4.151153087615967, + "rewards/margins_min": 1.4496856927871704, + "rewards/margins_std": 1.9102258682250977, + "rewards/rejected": -5.247844219207764, + "step": 1280 + }, + { + "epoch": 0.32, + "grad_norm": 0.984375, + "learning_rate": 1.7083481059866747e-06, + "logits/chosen": 0.213484525680542, + "logits/rejected": 0.7719516158103943, + "logps/chosen": -416.8954162597656, + "logps/rejected": -753.9082641601562, + "loss": 0.195, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -2.0533287525177, + "rewards/margins": 3.40852689743042, + "rewards/margins_max": 5.488340377807617, + "rewards/margins_min": 1.3287138938903809, + "rewards/margins_std": 2.9412999153137207, + "rewards/rejected": -5.461855888366699, + "step": 1290 + }, + { + "epoch": 0.33, + "grad_norm": 5.21875, + "learning_rate": 1.7021196906167571e-06, + "logits/chosen": 0.24803981184959412, + "logits/rejected": 0.8145266771316528, + "logps/chosen": -478.02178955078125, + "logps/rejected": -837.2579345703125, + "loss": 0.2184, + "rewards/accuracies": 0.9375, + "rewards/chosen": -2.494419813156128, + "rewards/margins": 3.7387890815734863, + "rewards/margins_max": 5.558200836181641, + "rewards/margins_min": 1.9193763732910156, + "rewards/margins_std": 2.573037624359131, + "rewards/rejected": -6.233208656311035, + "step": 1300 + }, + { + "epoch": 0.33, + "grad_norm": 2.90625, + "learning_rate": 1.6958370860037716e-06, + "logits/chosen": 0.11850683391094208, + "logits/rejected": 0.5389954447746277, + "logps/chosen": -446.10235595703125, + "logps/rejected": -709.673828125, + "loss": 0.2605, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -2.3752357959747314, + "rewards/margins": 2.7870852947235107, + "rewards/margins_max": 4.720024108886719, + "rewards/margins_min": 0.8541472554206848, + "rewards/margins_std": 2.7335875034332275, + "rewards/rejected": -5.162322044372559, + "step": 1310 + }, + { + "epoch": 0.33, + "grad_norm": 2.21875, + "learning_rate": 1.6895007770359697e-06, + "logits/chosen": 0.3192082941532135, + "logits/rejected": 0.6527734994888306, + "logps/chosen": -487.09027099609375, + "logps/rejected": -853.08154296875, + "loss": 0.2319, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -2.527470111846924, + "rewards/margins": 3.651360273361206, + "rewards/margins_max": 5.692448616027832, + "rewards/margins_min": 1.6102720499038696, + "rewards/margins_std": 2.8865349292755127, + "rewards/rejected": -6.178830146789551, + "step": 1320 + }, + { + "epoch": 0.33, + "grad_norm": 1.5390625, + "learning_rate": 1.6831112527464763e-06, + "logits/chosen": 0.322293221950531, + "logits/rejected": 0.581436276435852, + "logps/chosen": -464.16619873046875, + "logps/rejected": -799.5835571289062, + "loss": 0.1916, + "rewards/accuracies": 0.9375, + "rewards/chosen": -2.4754815101623535, + "rewards/margins": 3.4213860034942627, + "rewards/margins_max": 5.288193225860596, + "rewards/margins_min": 1.5545791387557983, + "rewards/margins_std": 2.640063762664795, + "rewards/rejected": -5.8968682289123535, + "step": 1330 + }, + { + "epoch": 0.34, + "grad_norm": 4.03125, + "learning_rate": 1.6766690062755487e-06, + "logits/chosen": 0.253692090511322, + "logits/rejected": 0.5565173029899597, + "logps/chosen": -449.30072021484375, + "logps/rejected": -752.14794921875, + "loss": 0.2402, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -2.4055395126342773, + "rewards/margins": 3.2085556983947754, + "rewards/margins_max": 5.205798625946045, + "rewards/margins_min": 1.2113126516342163, + "rewards/margins_std": 2.824528455734253, + "rewards/rejected": -5.614095211029053, + "step": 1340 + }, + { + "epoch": 0.34, + "grad_norm": 1.390625, + "learning_rate": 1.6701745348325153e-06, + "logits/chosen": 0.3277135491371155, + "logits/rejected": 0.6626953482627869, + "logps/chosen": -425.80291748046875, + "logps/rejected": -829.0848388671875, + "loss": 0.2112, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -2.336625337600708, + "rewards/margins": 3.792543888092041, + "rewards/margins_max": 5.489853858947754, + "rewards/margins_min": 2.09523344039917, + "rewards/margins_std": 2.4003586769104004, + "rewards/rejected": -6.129168510437012, + "step": 1350 + }, + { + "epoch": 0.34, + "grad_norm": 3.53125, + "learning_rate": 1.6636283396574018e-06, + "logits/chosen": 0.19394713640213013, + "logits/rejected": 0.7184884548187256, + "logps/chosen": -458.70538330078125, + "logps/rejected": -763.5852661132812, + "loss": 0.2317, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -2.3784217834472656, + "rewards/margins": 3.29382061958313, + "rewards/margins_max": 4.978985786437988, + "rewards/margins_min": 1.6086561679840088, + "rewards/margins_std": 2.3831827640533447, + "rewards/rejected": -5.672242164611816, + "step": 1360 + }, + { + "epoch": 0.34, + "grad_norm": 0.8515625, + "learning_rate": 1.6570309259822453e-06, + "logits/chosen": 0.2924334406852722, + "logits/rejected": 0.6883147358894348, + "logps/chosen": -427.87127685546875, + "logps/rejected": -780.0853881835938, + "loss": 0.1848, + "rewards/accuracies": 0.9375, + "rewards/chosen": -2.2181785106658936, + "rewards/margins": 3.3977344036102295, + "rewards/margins_max": 5.132598876953125, + "rewards/margins_min": 1.662870168685913, + "rewards/margins_std": 2.4534687995910645, + "rewards/rejected": -5.615913391113281, + "step": 1370 + }, + { + "epoch": 0.35, + "grad_norm": 0.921875, + "learning_rate": 1.6503828029921002e-06, + "logits/chosen": 0.5088449716567993, + "logits/rejected": 0.8754922151565552, + "logps/chosen": -491.39111328125, + "logps/rejected": -809.8668212890625, + "loss": 0.1959, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -2.6552605628967285, + "rewards/margins": 3.4456450939178467, + "rewards/margins_max": 5.258717060089111, + "rewards/margins_min": 1.632573127746582, + "rewards/margins_std": 2.5640709400177, + "rewards/rejected": -6.100905418395996, + "step": 1380 + }, + { + "epoch": 0.35, + "grad_norm": 1.4375, + "learning_rate": 1.6436844837857416e-06, + "logits/chosen": 0.2816401422023773, + "logits/rejected": 0.5672039985656738, + "logps/chosen": -441.32769775390625, + "logps/rejected": -788.40576171875, + "loss": 0.1852, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -2.3083629608154297, + "rewards/margins": 3.4626381397247314, + "rewards/margins_max": 5.100627899169922, + "rewards/margins_min": 1.8246475458145142, + "rewards/margins_std": 2.316467761993408, + "rewards/rejected": -5.771000862121582, + "step": 1390 + }, + { + "epoch": 0.35, + "grad_norm": 1.1484375, + "learning_rate": 1.6369364853360619e-06, + "logits/chosen": 0.39103689789772034, + "logits/rejected": 0.6895217299461365, + "logps/chosen": -496.9537658691406, + "logps/rejected": -941.9112548828125, + "loss": 0.2644, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -2.8370614051818848, + "rewards/margins": 4.051840782165527, + "rewards/margins_max": 6.340351104736328, + "rewards/margins_min": 1.7633311748504639, + "rewards/margins_std": 3.2364420890808105, + "rewards/rejected": -6.8889031410217285, + "step": 1400 + }, + { + "epoch": 0.35, + "grad_norm": 1.78125, + "learning_rate": 1.630139328450173e-06, + "logits/chosen": 0.29026108980178833, + "logits/rejected": 0.6609446406364441, + "logps/chosen": -475.28985595703125, + "logps/rejected": -914.2362060546875, + "loss": 0.2142, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -2.75921368598938, + "rewards/margins": 4.294146537780762, + "rewards/margins_max": 6.507538795471191, + "rewards/margins_min": 2.0807533264160156, + "rewards/margins_std": 3.1302103996276855, + "rewards/rejected": -7.0533599853515625, + "step": 1410 + }, + { + "epoch": 0.36, + "grad_norm": 2.5625, + "learning_rate": 1.6232935377292098e-06, + "logits/chosen": 0.09786330163478851, + "logits/rejected": 0.5836361646652222, + "logps/chosen": -473.8296813964844, + "logps/rejected": -825.7180786132812, + "loss": 0.2472, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.630255699157715, + "rewards/margins": 3.6065337657928467, + "rewards/margins_max": 5.6653971672058105, + "rewards/margins_min": 1.547670602798462, + "rewards/margins_std": 2.911672830581665, + "rewards/rejected": -6.236789703369141, + "step": 1420 + }, + { + "epoch": 0.36, + "grad_norm": 1.5625, + "learning_rate": 1.6163996415278423e-06, + "logits/chosen": 0.42069101333618164, + "logits/rejected": 0.6874132752418518, + "logps/chosen": -422.8811950683594, + "logps/rejected": -810.5554809570312, + "loss": 0.1972, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -2.2699074745178223, + "rewards/margins": 3.803889036178589, + "rewards/margins_max": 5.551349639892578, + "rewards/margins_min": 2.0564279556274414, + "rewards/margins_std": 2.471282482147217, + "rewards/rejected": -6.073796272277832, + "step": 1430 + }, + { + "epoch": 0.36, + "grad_norm": 1.9140625, + "learning_rate": 1.6094581719134973e-06, + "logits/chosen": 0.23529568314552307, + "logits/rejected": 0.7506182789802551, + "logps/chosen": -488.1771545410156, + "logps/rejected": -945.7060546875, + "loss": 0.2158, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -2.3911712169647217, + "rewards/margins": 4.839472770690918, + "rewards/margins_max": 7.3261566162109375, + "rewards/margins_min": 2.3527889251708984, + "rewards/margins_std": 3.516702175140381, + "rewards/rejected": -7.230644226074219, + "step": 1440 + }, + { + "epoch": 0.36, + "grad_norm": 0.859375, + "learning_rate": 1.602469664625293e-06, + "logits/chosen": 0.31949982047080994, + "logits/rejected": 0.5621960759162903, + "logps/chosen": -475.3267517089844, + "logps/rejected": -1032.7867431640625, + "loss": 0.1546, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -2.7003226280212402, + "rewards/margins": 5.414786338806152, + "rewards/margins_max": 8.179932594299316, + "rewards/margins_min": 2.6496407985687256, + "rewards/margins_std": 3.9105067253112793, + "rewards/rejected": -8.115108489990234, + "step": 1450 + }, + { + "epoch": 0.37, + "grad_norm": 0.9609375, + "learning_rate": 1.5954346590326923e-06, + "logits/chosen": 0.22190162539482117, + "logits/rejected": 0.5015226602554321, + "logps/chosen": -465.91943359375, + "logps/rejected": -911.0699462890625, + "loss": 0.184, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -2.6864209175109863, + "rewards/margins": 4.381344318389893, + "rewards/margins_max": 6.3844804763793945, + "rewards/margins_min": 2.3782083988189697, + "rewards/margins_std": 2.832862377166748, + "rewards/rejected": -7.067765712738037, + "step": 1460 + }, + { + "epoch": 0.37, + "grad_norm": 2.140625, + "learning_rate": 1.5883536980938731e-06, + "logits/chosen": 0.37031736969947815, + "logits/rejected": 0.7043700218200684, + "logps/chosen": -489.9381408691406, + "logps/rejected": -984.1287841796875, + "loss": 0.1951, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -2.8816380500793457, + "rewards/margins": 4.811006546020508, + "rewards/margins_max": 7.4277215003967285, + "rewards/margins_min": 2.1942927837371826, + "rewards/margins_std": 3.7005927562713623, + "rewards/rejected": -7.6926445960998535, + "step": 1470 + }, + { + "epoch": 0.37, + "grad_norm": 2.0, + "learning_rate": 1.5812273283138238e-06, + "logits/chosen": 0.5258148908615112, + "logits/rejected": 0.7043691873550415, + "logps/chosen": -522.13134765625, + "logps/rejected": -1048.264892578125, + "loss": 0.2019, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -3.000220775604248, + "rewards/margins": 4.934444427490234, + "rewards/margins_max": 7.410369873046875, + "rewards/margins_min": 2.458519458770752, + "rewards/margins_std": 3.5014865398406982, + "rewards/rejected": -7.934664726257324, + "step": 1480 + }, + { + "epoch": 0.37, + "grad_norm": 0.859375, + "learning_rate": 1.5740560997021647e-06, + "logits/chosen": 0.4362607002258301, + "logits/rejected": 0.8238092660903931, + "logps/chosen": -533.056640625, + "logps/rejected": -982.03564453125, + "loss": 0.2047, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -3.158689022064209, + "rewards/margins": 4.55390739440918, + "rewards/margins_max": 6.570149898529053, + "rewards/margins_min": 2.5376639366149902, + "rewards/margins_std": 2.851398229598999, + "rewards/rejected": -7.7125959396362305, + "step": 1490 + }, + { + "epoch": 0.38, + "grad_norm": 0.79296875, + "learning_rate": 1.5668405657306973e-06, + "logits/chosen": 0.5168190598487854, + "logits/rejected": 0.8230735659599304, + "logps/chosen": -525.788818359375, + "logps/rejected": -1038.3076171875, + "loss": 0.1925, + "rewards/accuracies": 0.9375, + "rewards/chosen": -3.3270175457000732, + "rewards/margins": 4.954278469085693, + "rewards/margins_max": 7.399777889251709, + "rewards/margins_min": 2.5087785720825195, + "rewards/margins_std": 3.4584591388702393, + "rewards/rejected": -8.281296730041504, + "step": 1500 + }, + { + "epoch": 0.38, + "grad_norm": 0.90625, + "learning_rate": 1.559581283290689e-06, + "logits/chosen": 0.3661649823188782, + "logits/rejected": 0.813243567943573, + "logps/chosen": -504.9390563964844, + "logps/rejected": -1073.157470703125, + "loss": 0.2528, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -2.903505325317383, + "rewards/margins": 5.695023536682129, + "rewards/margins_max": 8.787598609924316, + "rewards/margins_min": 2.6024482250213623, + "rewards/margins_std": 4.373561859130859, + "rewards/rejected": -8.598528861999512, + "step": 1510 + }, + { + "epoch": 0.38, + "grad_norm": 1.171875, + "learning_rate": 1.5522788126498915e-06, + "logits/chosen": 0.28599125146865845, + "logits/rejected": 0.6875888109207153, + "logps/chosen": -599.96484375, + "logps/rejected": -924.0895385742188, + "loss": 0.3888, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -3.4606220722198486, + "rewards/margins": 3.549129009246826, + "rewards/margins_max": 6.038578033447266, + "rewards/margins_min": 1.0596802234649658, + "rewards/margins_std": 3.5206127166748047, + "rewards/rejected": -7.0097503662109375, + "step": 1520 + }, + { + "epoch": 0.38, + "grad_norm": 2.21875, + "learning_rate": 1.544933717409301e-06, + "logits/chosen": 0.3157169818878174, + "logits/rejected": 0.866075336933136, + "logps/chosen": -495.86260986328125, + "logps/rejected": -990.1038208007812, + "loss": 0.1955, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -2.8637332916259766, + "rewards/margins": 4.898533344268799, + "rewards/margins_max": 7.39690637588501, + "rewards/margins_min": 2.4001593589782715, + "rewards/margins_std": 3.533233642578125, + "rewards/rejected": -7.762265682220459, + "step": 1530 + }, + { + "epoch": 0.39, + "grad_norm": 1.9296875, + "learning_rate": 1.537546564459657e-06, + "logits/chosen": 0.3015773594379425, + "logits/rejected": 0.8017538785934448, + "logps/chosen": -490.8206481933594, + "logps/rejected": -873.3521728515625, + "loss": 0.2619, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -2.775757074356079, + "rewards/margins": 4.02579402923584, + "rewards/margins_max": 6.456332206726074, + "rewards/margins_min": 1.5952569246292114, + "rewards/margins_std": 3.4372992515563965, + "rewards/rejected": -6.80155086517334, + "step": 1540 + }, + { + "epoch": 0.39, + "grad_norm": 1.984375, + "learning_rate": 1.5301179239376935e-06, + "logits/chosen": 0.19896200299263, + "logits/rejected": 0.48105502128601074, + "logps/chosen": -472.67767333984375, + "logps/rejected": -867.4581909179688, + "loss": 0.2304, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -2.7534565925598145, + "rewards/margins": 3.8812403678894043, + "rewards/margins_max": 5.89428186416626, + "rewards/margins_min": 1.8681997060775757, + "rewards/margins_std": 2.846869707107544, + "rewards/rejected": -6.634696960449219, + "step": 1550 + }, + { + "epoch": 0.39, + "grad_norm": 2.234375, + "learning_rate": 1.5226483691821335e-06, + "logits/chosen": 0.43792515993118286, + "logits/rejected": 0.809437096118927, + "logps/chosen": -493.06121826171875, + "logps/rejected": -883.27392578125, + "loss": 0.2847, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -2.7731716632843018, + "rewards/margins": 3.9591903686523438, + "rewards/margins_max": 5.8009843826293945, + "rewards/margins_min": 2.117396116256714, + "rewards/margins_std": 2.6046900749206543, + "rewards/rejected": -6.732362270355225, + "step": 1560 + }, + { + "epoch": 0.4, + "grad_norm": 0.859375, + "learning_rate": 1.5151384766894394e-06, + "logits/chosen": 0.25252875685691833, + "logits/rejected": 0.705254852771759, + "logps/chosen": -461.9195861816406, + "logps/rejected": -954.2716674804688, + "loss": 0.1947, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -2.399007797241211, + "rewards/margins": 4.97990608215332, + "rewards/margins_max": 7.502171516418457, + "rewards/margins_min": 2.4576408863067627, + "rewards/margins_std": 3.567021608352661, + "rewards/rejected": -7.378913879394531, + "step": 1570 + }, + { + "epoch": 0.4, + "grad_norm": 2.984375, + "learning_rate": 1.5075888260693213e-06, + "logits/chosen": 0.20744235813617706, + "logits/rejected": 0.490752637386322, + "logps/chosen": -477.570068359375, + "logps/rejected": -938.7899169921875, + "loss": 0.1527, + "rewards/accuracies": 0.9375, + "rewards/chosen": -2.9296581745147705, + "rewards/margins": 4.446514129638672, + "rewards/margins_max": 6.75622034072876, + "rewards/margins_min": 2.136807441711426, + "rewards/margins_std": 3.2664177417755127, + "rewards/rejected": -7.376172065734863, + "step": 1580 + }, + { + "epoch": 0.4, + "grad_norm": 0.66015625, + "learning_rate": 1.5e-06, + "logits/chosen": 0.25578054785728455, + "logits/rejected": 0.7139034867286682, + "logps/chosen": -460.7802734375, + "logps/rejected": -875.15283203125, + "loss": 0.1833, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -2.5101754665374756, + "rewards/margins": 4.372230052947998, + "rewards/margins_max": 6.4557671546936035, + "rewards/margins_min": 2.2886929512023926, + "rewards/margins_std": 2.9465668201446533, + "rewards/rejected": -6.882405757904053, + "step": 1590 + }, + { + "epoch": 0.4, + "grad_norm": 0.9609375, + "learning_rate": 1.4923725841832382e-06, + "logits/chosen": 0.2641240358352661, + "logits/rejected": 0.716410756111145, + "logps/chosen": -516.456298828125, + "logps/rejected": -952.1373901367188, + "loss": 0.2241, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -2.865016460418701, + "rewards/margins": 4.353453636169434, + "rewards/margins_max": 6.641819953918457, + "rewards/margins_min": 2.0650863647460938, + "rewards/margins_std": 3.236238956451416, + "rewards/rejected": -7.218469142913818, + "step": 1600 + }, + { + "epoch": 0.41, + "grad_norm": 1.7265625, + "learning_rate": 1.4847071672991365e-06, + "logits/chosen": 0.38563448190689087, + "logits/rejected": 0.6463780999183655, + "logps/chosen": -488.828125, + "logps/rejected": -1118.4166259765625, + "loss": 0.1518, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -2.9519083499908447, + "rewards/margins": 5.86095666885376, + "rewards/margins_max": 7.7536725997924805, + "rewards/margins_min": 3.9682400226593018, + "rewards/margins_std": 2.6767053604125977, + "rewards/rejected": -8.812864303588867, + "step": 1610 + }, + { + "epoch": 0.41, + "grad_norm": 1.4765625, + "learning_rate": 1.4770043409606979e-06, + "logits/chosen": 0.47096341848373413, + "logits/rejected": 0.6747244596481323, + "logps/chosen": -484.9869689941406, + "logps/rejected": -1008.4423828125, + "loss": 0.1929, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -2.9664974212646484, + "rewards/margins": 5.106780529022217, + "rewards/margins_max": 7.504878997802734, + "rewards/margins_min": 2.7086825370788574, + "rewards/margins_std": 3.391422748565674, + "rewards/rejected": -8.073277473449707, + "step": 1620 + }, + { + "epoch": 0.41, + "grad_norm": 0.98828125, + "learning_rate": 1.4692646996681678e-06, + "logits/chosen": 0.47422710061073303, + "logits/rejected": 0.685745358467102, + "logps/chosen": -458.32696533203125, + "logps/rejected": -1029.868408203125, + "loss": 0.1466, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6799213886260986, + "rewards/margins": 5.579705715179443, + "rewards/margins_max": 8.32711410522461, + "rewards/margins_min": 2.8322973251342773, + "rewards/margins_std": 3.885422945022583, + "rewards/rejected": -8.259626388549805, + "step": 1630 + }, + { + "epoch": 0.41, + "grad_norm": 1.3359375, + "learning_rate": 1.4614888407631518e-06, + "logits/chosen": 0.27054479718208313, + "logits/rejected": 0.8626736402511597, + "logps/chosen": -516.4133911132812, + "logps/rejected": -979.1773681640625, + "loss": 0.1787, + "rewards/accuracies": 0.9375, + "rewards/chosen": -2.8786580562591553, + "rewards/margins": 4.838742733001709, + "rewards/margins_max": 7.334200382232666, + "rewards/margins_min": 2.3432841300964355, + "rewards/margins_std": 3.5291106700897217, + "rewards/rejected": -7.717400550842285, + "step": 1640 + }, + { + "epoch": 0.42, + "grad_norm": 1.265625, + "learning_rate": 1.4536773643825129e-06, + "logits/chosen": 0.35027459263801575, + "logits/rejected": 0.6745755076408386, + "logps/chosen": -515.3684692382812, + "logps/rejected": -903.1793823242188, + "loss": 0.1713, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -2.938141107559204, + "rewards/margins": 3.8568992614746094, + "rewards/margins_max": 5.576117038726807, + "rewards/margins_min": 2.137681484222412, + "rewards/margins_std": 2.4313409328460693, + "rewards/rejected": -6.795041084289551, + "step": 1650 + }, + { + "epoch": 0.42, + "grad_norm": 0.90234375, + "learning_rate": 1.4458308734120524e-06, + "logits/chosen": 0.308353990316391, + "logits/rejected": 0.7953172922134399, + "logps/chosen": -468.75897216796875, + "logps/rejected": -853.1901245117188, + "loss": 0.2044, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -2.7174980640411377, + "rewards/margins": 3.790794849395752, + "rewards/margins_max": 5.873719215393066, + "rewards/margins_min": 1.7078701257705688, + "rewards/margins_std": 2.9457004070281982, + "rewards/rejected": -6.508293151855469, + "step": 1660 + }, + { + "epoch": 0.42, + "grad_norm": 0.58203125, + "learning_rate": 1.4379499734399796e-06, + "logits/chosen": 0.3180529773235321, + "logits/rejected": 0.6785213351249695, + "logps/chosen": -446.6173400878906, + "logps/rejected": -992.4054565429688, + "loss": 0.1215, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -2.490750789642334, + "rewards/margins": 5.268213748931885, + "rewards/margins_max": 7.856361389160156, + "rewards/margins_min": 2.680065631866455, + "rewards/margins_std": 3.660193681716919, + "rewards/rejected": -7.758963584899902, + "step": 1670 + }, + { + "epoch": 0.42, + "grad_norm": 3.234375, + "learning_rate": 1.4300352727101737e-06, + "logits/chosen": 0.39259445667266846, + "logits/rejected": 0.7314284443855286, + "logps/chosen": -519.9503173828125, + "logps/rejected": -1025.9241943359375, + "loss": 0.1885, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -3.1284713745117188, + "rewards/margins": 4.959225654602051, + "rewards/margins_max": 7.500026702880859, + "rewards/margins_min": 2.418423891067505, + "rewards/margins_std": 3.593236207962036, + "rewards/rejected": -8.08769702911377, + "step": 1680 + }, + { + "epoch": 0.43, + "grad_norm": 3.90625, + "learning_rate": 1.4220873820752395e-06, + "logits/chosen": 0.3535314202308655, + "logits/rejected": 0.8503448367118835, + "logps/chosen": -514.1683349609375, + "logps/rejected": -1090.859619140625, + "loss": 0.233, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -2.791045665740967, + "rewards/margins": 5.699390411376953, + "rewards/margins_max": 9.158174514770508, + "rewards/margins_min": 2.240605115890503, + "rewards/margins_std": 4.891460418701172, + "rewards/rejected": -8.490435600280762, + "step": 1690 + }, + { + "epoch": 0.43, + "grad_norm": 1.25, + "learning_rate": 1.414106914949361e-06, + "logits/chosen": 0.2840239107608795, + "logits/rejected": 0.7125069499015808, + "logps/chosen": -540.17822265625, + "logps/rejected": -1097.022216796875, + "loss": 0.228, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.084819793701172, + "rewards/margins": 5.610352516174316, + "rewards/margins_max": 9.078147888183594, + "rewards/margins_min": 2.1425588130950928, + "rewards/margins_std": 4.904201984405518, + "rewards/rejected": -8.695172309875488, + "step": 1700 + }, + { + "epoch": 0.43, + "grad_norm": 5.59375, + "learning_rate": 1.4060944872609605e-06, + "logits/chosen": 0.32603517174720764, + "logits/rejected": 0.8470407724380493, + "logps/chosen": -518.9697875976562, + "logps/rejected": -984.0426635742188, + "loss": 0.2196, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -2.9778990745544434, + "rewards/margins": 4.964657783508301, + "rewards/margins_max": 7.731484889984131, + "rewards/margins_min": 2.1978302001953125, + "rewards/margins_std": 3.912884473800659, + "rewards/rejected": -7.942556858062744, + "step": 1710 + }, + { + "epoch": 0.43, + "grad_norm": 0.88671875, + "learning_rate": 1.3980507174051592e-06, + "logits/chosen": 0.2727965712547302, + "logits/rejected": 0.8068740963935852, + "logps/chosen": -499.65667724609375, + "logps/rejected": -935.8679809570312, + "loss": 0.1572, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -2.829961061477661, + "rewards/margins": 4.54538631439209, + "rewards/margins_max": 6.558957099914551, + "rewards/margins_min": 2.531816005706787, + "rewards/margins_std": 2.847618579864502, + "rewards/rejected": -7.375347137451172, + "step": 1720 + }, + { + "epoch": 0.44, + "grad_norm": 3.578125, + "learning_rate": 1.3899762261960517e-06, + "logits/chosen": 0.456474244594574, + "logits/rejected": 0.7277069091796875, + "logps/chosen": -548.7547607421875, + "logps/rejected": -1049.939208984375, + "loss": 0.1818, + "rewards/accuracies": 0.9375, + "rewards/chosen": -3.2771522998809814, + "rewards/margins": 4.879497051239014, + "rewards/margins_max": 7.429345607757568, + "rewards/margins_min": 2.329648017883301, + "rewards/margins_std": 3.6060307025909424, + "rewards/rejected": -8.156648635864258, + "step": 1730 + }, + { + "epoch": 0.44, + "grad_norm": 0.71484375, + "learning_rate": 1.381871636818791e-06, + "logits/chosen": 0.24610686302185059, + "logits/rejected": 0.7779833078384399, + "logps/chosen": -458.67041015625, + "logps/rejected": -814.8113403320312, + "loss": 0.234, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -2.4150824546813965, + "rewards/margins": 3.6235358715057373, + "rewards/margins_max": 5.524462699890137, + "rewards/margins_min": 1.7226091623306274, + "rewards/margins_std": 2.6883163452148438, + "rewards/rejected": -6.0386176109313965, + "step": 1740 + }, + { + "epoch": 0.44, + "grad_norm": 1.0234375, + "learning_rate": 1.3737375747814914e-06, + "logits/chosen": 0.33012324571609497, + "logits/rejected": 0.7673249244689941, + "logps/chosen": -503.94842529296875, + "logps/rejected": -943.15380859375, + "loss": 0.2264, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -3.0407915115356445, + "rewards/margins": 4.353100776672363, + "rewards/margins_max": 6.474888801574707, + "rewards/margins_min": 2.2313132286071777, + "rewards/margins_std": 3.0006611347198486, + "rewards/rejected": -7.39389181137085, + "step": 1750 + }, + { + "epoch": 0.44, + "grad_norm": 0.6328125, + "learning_rate": 1.3655746678669524e-06, + "logits/chosen": 0.44528093934059143, + "logits/rejected": 0.9088476300239563, + "logps/chosen": -535.5958251953125, + "logps/rejected": -997.3358154296875, + "loss": 0.1982, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -3.2844367027282715, + "rewards/margins": 4.700140476226807, + "rewards/margins_max": 6.8690080642700195, + "rewards/margins_min": 2.53127384185791, + "rewards/margins_std": 3.0672411918640137, + "rewards/rejected": -7.984577178955078, + "step": 1760 + }, + { + "epoch": 0.45, + "grad_norm": 3.5, + "learning_rate": 1.3573835460842062e-06, + "logits/chosen": 0.30346041917800903, + "logits/rejected": 0.7271562814712524, + "logps/chosen": -472.663330078125, + "logps/rejected": -1005.3018798828125, + "loss": 0.1877, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -2.7229371070861816, + "rewards/margins": 5.189479351043701, + "rewards/margins_max": 8.325895309448242, + "rewards/margins_min": 2.0530643463134766, + "rewards/margins_std": 4.435561180114746, + "rewards/rejected": -7.912416934967041, + "step": 1770 + }, + { + "epoch": 0.45, + "grad_norm": 13.8125, + "learning_rate": 1.3491648416198947e-06, + "logits/chosen": 0.3526113033294678, + "logits/rejected": 0.6005972027778625, + "logps/chosen": -493.38714599609375, + "logps/rejected": -1039.467041015625, + "loss": 0.1729, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9820990562438965, + "rewards/margins": 5.418988227844238, + "rewards/margins_max": 8.14158821105957, + "rewards/margins_min": 2.6963882446289062, + "rewards/margins_std": 3.850337505340576, + "rewards/rejected": -8.401086807250977, + "step": 1780 + }, + { + "epoch": 0.45, + "grad_norm": 3.421875, + "learning_rate": 1.340919188789477e-06, + "logits/chosen": 0.4165642261505127, + "logits/rejected": 0.8380780220031738, + "logps/chosen": -529.6197509765625, + "logps/rejected": -929.248046875, + "loss": 0.1725, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.2881054878234863, + "rewards/margins": 4.095848083496094, + "rewards/margins_max": 6.220312118530273, + "rewards/margins_min": 1.971383810043335, + "rewards/margins_std": 3.004446029663086, + "rewards/rejected": -7.383954048156738, + "step": 1790 + }, + { + "epoch": 0.45, + "grad_norm": 1.34375, + "learning_rate": 1.3326472239882734e-06, + "logits/chosen": 0.43543314933776855, + "logits/rejected": 0.9925807118415833, + "logps/chosen": -525.8448486328125, + "logps/rejected": -1062.7294921875, + "loss": 0.191, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.225259780883789, + "rewards/margins": 5.37452507019043, + "rewards/margins_max": 8.14264965057373, + "rewards/margins_min": 2.6064014434814453, + "rewards/margins_std": 3.9147191047668457, + "rewards/rejected": -8.599784851074219, + "step": 1800 + }, + { + "epoch": 0.46, + "grad_norm": 0.75390625, + "learning_rate": 1.3243495856423489e-06, + "logits/chosen": 0.36167892813682556, + "logits/rejected": 0.8087556958198547, + "logps/chosen": -553.3516845703125, + "logps/rejected": -1192.406005859375, + "loss": 0.1608, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -3.324446439743042, + "rewards/margins": 6.299948692321777, + "rewards/margins_max": 9.06318187713623, + "rewards/margins_min": 3.5367157459259033, + "rewards/margins_std": 3.907802104949951, + "rewards/rejected": -9.624395370483398, + "step": 1810 + }, + { + "epoch": 0.46, + "grad_norm": 2.03125, + "learning_rate": 1.3160269141592396e-06, + "logits/chosen": 0.39735549688339233, + "logits/rejected": 0.7091315388679504, + "logps/chosen": -510.31158447265625, + "logps/rejected": -1065.9410400390625, + "loss": 0.1815, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -2.9605770111083984, + "rewards/margins": 5.331042289733887, + "rewards/margins_max": 8.254611015319824, + "rewards/margins_min": 2.407473087310791, + "rewards/margins_std": 4.134551525115967, + "rewards/rejected": -8.291619300842285, + "step": 1820 + }, + { + "epoch": 0.46, + "grad_norm": 1.1328125, + "learning_rate": 1.3076798518785272e-06, + "logits/chosen": 0.4008331298828125, + "logits/rejected": 0.8075596690177917, + "logps/chosen": -503.36688232421875, + "logps/rejected": -1024.9261474609375, + "loss": 0.1532, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.090235948562622, + "rewards/margins": 5.229609966278076, + "rewards/margins_max": 8.532186508178711, + "rewards/margins_min": 1.9270336627960205, + "rewards/margins_std": 4.670548439025879, + "rewards/rejected": -8.319845199584961, + "step": 1830 + }, + { + "epoch": 0.46, + "grad_norm": 0.84375, + "learning_rate": 1.2993090430222618e-06, + "logits/chosen": 0.4138672351837158, + "logits/rejected": 0.7416144013404846, + "logps/chosen": -577.0935668945312, + "logps/rejected": -1252.086181640625, + "loss": 0.2346, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -3.5649490356445312, + "rewards/margins": 6.544081211090088, + "rewards/margins_max": 10.0579833984375, + "rewards/margins_min": 3.030177593231201, + "rewards/margins_std": 4.969409465789795, + "rewards/rejected": -10.109029769897461, + "step": 1840 + }, + { + "epoch": 0.47, + "grad_norm": 0.90625, + "learning_rate": 1.2909151336452427e-06, + "logits/chosen": 0.3605644702911377, + "logits/rejected": 0.9392998814582825, + "logps/chosen": -585.3292236328125, + "logps/rejected": -1179.452392578125, + "loss": 0.1969, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -3.3506827354431152, + "rewards/margins": 6.266888618469238, + "rewards/margins_max": 10.083221435546875, + "rewards/margins_min": 2.4505550861358643, + "rewards/margins_std": 5.3971099853515625, + "rewards/rejected": -9.617570877075195, + "step": 1850 + }, + { + "epoch": 0.47, + "grad_norm": 3.59375, + "learning_rate": 1.2824987715851559e-06, + "logits/chosen": 0.371305912733078, + "logits/rejected": 0.8649128675460815, + "logps/chosen": -520.8292846679688, + "logps/rejected": -995.4801025390625, + "loss": 0.1632, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.0674500465393066, + "rewards/margins": 4.9427289962768555, + "rewards/margins_max": 7.798059940338135, + "rewards/margins_min": 2.0873985290527344, + "rewards/margins_std": 4.038046836853027, + "rewards/rejected": -8.01017951965332, + "step": 1860 + }, + { + "epoch": 0.47, + "grad_norm": 3.03125, + "learning_rate": 1.2740606064125737e-06, + "logits/chosen": 0.24925783276557922, + "logits/rejected": 0.7453621029853821, + "logps/chosen": -577.8242797851562, + "logps/rejected": -1344.7510986328125, + "loss": 0.1198, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -3.645860195159912, + "rewards/margins": 7.597962856292725, + "rewards/margins_max": 11.720430374145508, + "rewards/margins_min": 3.475494384765625, + "rewards/margins_std": 5.830049991607666, + "rewards/rejected": -11.243823051452637, + "step": 1870 + }, + { + "epoch": 0.47, + "grad_norm": 0.81640625, + "learning_rate": 1.265601289380822e-06, + "logits/chosen": 0.44502177834510803, + "logits/rejected": 0.7797173261642456, + "logps/chosen": -554.5269775390625, + "logps/rejected": -1116.17919921875, + "loss": 0.1529, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -3.5217156410217285, + "rewards/margins": 5.7031121253967285, + "rewards/margins_max": 8.348360061645508, + "rewards/margins_min": 3.057863712310791, + "rewards/margins_std": 3.7409462928771973, + "rewards/rejected": -9.224828720092773, + "step": 1880 + }, + { + "epoch": 0.48, + "grad_norm": 1.21875, + "learning_rate": 1.257121473375716e-06, + "logits/chosen": 0.41753944754600525, + "logits/rejected": 0.9086526036262512, + "logps/chosen": -539.7106323242188, + "logps/rejected": -1138.9730224609375, + "loss": 0.2151, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.3240904808044434, + "rewards/margins": 5.920177459716797, + "rewards/margins_max": 9.132664680480957, + "rewards/margins_min": 2.7076900005340576, + "rewards/margins_std": 4.543143272399902, + "rewards/rejected": -9.244268417358398, + "step": 1890 + }, + { + "epoch": 0.48, + "grad_norm": 0.82421875, + "learning_rate": 1.248621812865172e-06, + "logits/chosen": 0.5087807774543762, + "logits/rejected": 0.8646427989006042, + "logps/chosen": -657.2662353515625, + "logps/rejected": -1365.0345458984375, + "loss": 0.2192, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -4.301126480102539, + "rewards/margins": 7.023177146911621, + "rewards/margins_max": 10.254752159118652, + "rewards/margins_min": 3.7916018962860107, + "rewards/margins_std": 4.570137977600098, + "rewards/rejected": -11.324304580688477, + "step": 1900 + }, + { + "epoch": 0.48, + "grad_norm": 0.75390625, + "learning_rate": 1.240102963848695e-06, + "logits/chosen": 0.4806975722312927, + "logits/rejected": 0.7998193502426147, + "logps/chosen": -539.7196044921875, + "logps/rejected": -1049.60205078125, + "loss": 0.2559, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -3.533198595046997, + "rewards/margins": 5.0815324783325195, + "rewards/margins_max": 8.007471084594727, + "rewards/margins_min": 2.1555933952331543, + "rewards/margins_std": 4.137903213500977, + "rewards/rejected": -8.614730834960938, + "step": 1910 + }, + { + "epoch": 0.48, + "grad_norm": 3.328125, + "learning_rate": 1.2315655838067487e-06, + "logits/chosen": 0.4073428511619568, + "logits/rejected": 0.8953601121902466, + "logps/chosen": -563.6649169921875, + "logps/rejected": -1113.478515625, + "loss": 0.2925, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -3.5817649364471436, + "rewards/margins": 5.543329238891602, + "rewards/margins_max": 8.386140823364258, + "rewards/margins_min": 2.700516700744629, + "rewards/margins_std": 4.02034330368042, + "rewards/rejected": -9.125093460083008, + "step": 1920 + }, + { + "epoch": 0.49, + "grad_norm": 1.078125, + "learning_rate": 1.2230103316500127e-06, + "logits/chosen": 0.4126254916191101, + "logits/rejected": 0.8263294100761414, + "logps/chosen": -555.5484008789062, + "logps/rejected": -1175.513671875, + "loss": 0.1711, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -3.3831818103790283, + "rewards/margins": 6.290456295013428, + "rewards/margins_max": 9.095232009887695, + "rewards/margins_min": 3.4856808185577393, + "rewards/margins_std": 3.9665520191192627, + "rewards/rejected": -9.673639297485352, + "step": 1930 + }, + { + "epoch": 0.49, + "grad_norm": 18.625, + "learning_rate": 1.2144378676685263e-06, + "logits/chosen": 0.40424099564552307, + "logits/rejected": 0.7649755477905273, + "logps/chosen": -560.0888061523438, + "logps/rejected": -1269.2265625, + "loss": 0.2382, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -3.489391326904297, + "rewards/margins": 7.084428310394287, + "rewards/margins_max": 10.971991539001465, + "rewards/margins_min": 3.196864604949951, + "rewards/margins_std": 5.497844696044922, + "rewards/rejected": -10.573820114135742, + "step": 1940 + }, + { + "epoch": 0.49, + "grad_norm": 2.25, + "learning_rate": 1.2058488534807302e-06, + "logits/chosen": 0.4380221366882324, + "logits/rejected": 0.858269989490509, + "logps/chosen": -622.8287353515625, + "logps/rejected": -1164.4425048828125, + "loss": 0.1878, + "rewards/accuracies": 0.9375, + "rewards/chosen": -3.897209882736206, + "rewards/margins": 5.580199241638184, + "rewards/margins_max": 8.42861270904541, + "rewards/margins_min": 2.731786012649536, + "rewards/margins_std": 4.028264045715332, + "rewards/rejected": -9.477409362792969, + "step": 1950 + }, + { + "epoch": 0.49, + "grad_norm": 1.3359375, + "learning_rate": 1.197243951982401e-06, + "logits/chosen": 0.3885877728462219, + "logits/rejected": 0.9749298095703125, + "logps/chosen": -560.0235595703125, + "logps/rejected": -1132.405029296875, + "loss": 0.1304, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -3.391322374343872, + "rewards/margins": 5.737778186798096, + "rewards/margins_max": 8.883737564086914, + "rewards/margins_min": 2.591817617416382, + "rewards/margins_std": 4.449059009552002, + "rewards/rejected": -9.12909984588623, + "step": 1960 + }, + { + "epoch": 0.5, + "grad_norm": 2.234375, + "learning_rate": 1.1886238272954896e-06, + "logits/chosen": 0.45476874709129333, + "logits/rejected": 0.8959047198295593, + "logps/chosen": -620.4302368164062, + "logps/rejected": -1258.3804931640625, + "loss": 0.1997, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.8833556175231934, + "rewards/margins": 6.6274213790893555, + "rewards/margins_max": 10.6688232421875, + "rewards/margins_min": 2.58601975440979, + "rewards/margins_std": 5.7154059410095215, + "rewards/rejected": -10.510777473449707, + "step": 1970 + }, + { + "epoch": 0.5, + "grad_norm": 0.79296875, + "learning_rate": 1.1799891447168647e-06, + "logits/chosen": 0.5257728695869446, + "logits/rejected": 0.8648616671562195, + "logps/chosen": -681.9531860351562, + "logps/rejected": -1429.496826171875, + "loss": 0.1605, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -4.491623878479004, + "rewards/margins": 7.457159519195557, + "rewards/margins_max": 10.727521896362305, + "rewards/margins_min": 4.186797142028809, + "rewards/margins_std": 4.624989986419678, + "rewards/rejected": -11.948783874511719, + "step": 1980 + }, + { + "epoch": 0.5, + "grad_norm": 1.3125, + "learning_rate": 1.1713405706669666e-06, + "logits/chosen": 0.39060765504837036, + "logits/rejected": 0.8693227767944336, + "logps/chosen": -638.9912109375, + "logps/rejected": -1159.7806396484375, + "loss": 0.3408, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -3.9858803749084473, + "rewards/margins": 5.464795112609863, + "rewards/margins_max": 9.650136947631836, + "rewards/margins_min": 1.2794535160064697, + "rewards/margins_std": 5.918967247009277, + "rewards/rejected": -9.450675964355469, + "step": 1990 + }, + { + "epoch": 0.5, + "grad_norm": 2.625, + "learning_rate": 1.162678772638372e-06, + "logits/chosen": 0.3979375958442688, + "logits/rejected": 0.8895372152328491, + "logps/chosen": -626.7593994140625, + "logps/rejected": -1275.314697265625, + "loss": 0.2618, + "rewards/accuracies": 0.9375, + "rewards/chosen": -3.9609382152557373, + "rewards/margins": 6.693270683288574, + "rewards/margins_max": 10.132143020629883, + "rewards/margins_min": 3.2543983459472656, + "rewards/margins_std": 4.86329984664917, + "rewards/rejected": -10.654208183288574, + "step": 2000 + }, + { + "epoch": 0.51, + "grad_norm": 1.125, + "learning_rate": 1.1540044191442776e-06, + "logits/chosen": 0.43077486753463745, + "logits/rejected": 0.9984035491943359, + "logps/chosen": -551.9407958984375, + "logps/rejected": -1064.71728515625, + "loss": 0.1814, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -3.184940814971924, + "rewards/margins": 5.308284759521484, + "rewards/margins_max": 8.101531982421875, + "rewards/margins_min": 2.5150370597839355, + "rewards/margins_std": 3.9502487182617188, + "rewards/rejected": -8.493226051330566, + "step": 2010 + }, + { + "epoch": 0.51, + "grad_norm": 3.375, + "learning_rate": 1.145318179666904e-06, + "logits/chosen": 0.3742697238922119, + "logits/rejected": 0.9613549113273621, + "logps/chosen": -551.4102172851562, + "logps/rejected": -1192.2840576171875, + "loss": 0.1466, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -3.417285203933716, + "rewards/margins": 6.326827049255371, + "rewards/margins_max": 9.474775314331055, + "rewards/margins_min": 3.1788787841796875, + "rewards/margins_std": 4.451870918273926, + "rewards/rejected": -9.744112014770508, + "step": 2020 + }, + { + "epoch": 0.51, + "grad_norm": 1.1953125, + "learning_rate": 1.1366207246058268e-06, + "logits/chosen": 0.580926775932312, + "logits/rejected": 1.0238367319107056, + "logps/chosen": -597.9246826171875, + "logps/rejected": -1226.0614013671875, + "loss": 0.1785, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.605300188064575, + "rewards/margins": 6.226903915405273, + "rewards/margins_max": 9.348957061767578, + "rewards/margins_min": 3.1048502922058105, + "rewards/margins_std": 4.415249824523926, + "rewards/rejected": -9.832204818725586, + "step": 2030 + }, + { + "epoch": 0.51, + "grad_norm": 5.21875, + "learning_rate": 1.1279127252262344e-06, + "logits/chosen": 0.36743634939193726, + "logits/rejected": 0.7529619336128235, + "logps/chosen": -611.38037109375, + "logps/rejected": -1260.2279052734375, + "loss": 0.1646, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -3.913753032684326, + "rewards/margins": 6.523721218109131, + "rewards/margins_max": 10.163423538208008, + "rewards/margins_min": 2.884019136428833, + "rewards/margins_std": 5.147315502166748, + "rewards/rejected": -10.43747329711914, + "step": 2040 + }, + { + "epoch": 0.52, + "grad_norm": 1.34375, + "learning_rate": 1.11919485360712e-06, + "logits/chosen": 0.4193040728569031, + "logits/rejected": 0.7446034550666809, + "logps/chosen": -641.4575805664062, + "logps/rejected": -1331.7291259765625, + "loss": 0.1542, + "rewards/accuracies": 0.9375, + "rewards/chosen": -4.2941741943359375, + "rewards/margins": 6.686350345611572, + "rewards/margins_max": 10.751882553100586, + "rewards/margins_min": 2.620816707611084, + "rewards/margins_std": 5.7495317459106445, + "rewards/rejected": -10.980524063110352, + "step": 2050 + }, + { + "epoch": 0.52, + "grad_norm": 1.6171875, + "learning_rate": 1.110467782589412e-06, + "logits/chosen": 0.37651658058166504, + "logits/rejected": 0.9150172472000122, + "logps/chosen": -641.582275390625, + "logps/rejected": -1308.950927734375, + "loss": 0.2451, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -3.919959306716919, + "rewards/margins": 6.766678810119629, + "rewards/margins_max": 10.914981842041016, + "rewards/margins_min": 2.618375778198242, + "rewards/margins_std": 5.866586208343506, + "rewards/rejected": -10.686636924743652, + "step": 2060 + }, + { + "epoch": 0.52, + "grad_norm": 0.69140625, + "learning_rate": 1.101732185724043e-06, + "logits/chosen": 0.602503776550293, + "logits/rejected": 0.9572169184684753, + "logps/chosen": -564.55126953125, + "logps/rejected": -1175.539794921875, + "loss": 0.161, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -3.7203216552734375, + "rewards/margins": 5.9916558265686035, + "rewards/margins_max": 9.613600730895996, + "rewards/margins_min": 2.3697094917297363, + "rewards/margins_std": 5.122204780578613, + "rewards/rejected": -9.711977005004883, + "step": 2070 + }, + { + "epoch": 0.52, + "grad_norm": 0.6953125, + "learning_rate": 1.0929887372199673e-06, + "logits/chosen": 0.4709581434726715, + "logits/rejected": 0.9506624937057495, + "logps/chosen": -559.4562377929688, + "logps/rejected": -1188.647216796875, + "loss": 0.1344, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -3.6829051971435547, + "rewards/margins": 6.341403007507324, + "rewards/margins_max": 9.901277542114258, + "rewards/margins_min": 2.7815279960632324, + "rewards/margins_std": 5.034422874450684, + "rewards/rejected": -10.024307250976562, + "step": 2080 + }, + { + "epoch": 0.53, + "grad_norm": 2.265625, + "learning_rate": 1.084238111892123e-06, + "logits/chosen": 0.5924087762832642, + "logits/rejected": 0.9477392435073853, + "logps/chosen": -567.0867309570312, + "logps/rejected": -1217.984130859375, + "loss": 0.1593, + "rewards/accuracies": 0.9375, + "rewards/chosen": -3.6363062858581543, + "rewards/margins": 6.4515509605407715, + "rewards/margins_max": 9.455270767211914, + "rewards/margins_min": 3.44783091545105, + "rewards/margins_std": 4.24790096282959, + "rewards/rejected": -10.087857246398926, + "step": 2090 + }, + { + "epoch": 0.53, + "grad_norm": 1.015625, + "learning_rate": 1.075480985109353e-06, + "logits/chosen": 0.4340541958808899, + "logits/rejected": 0.8853395581245422, + "logps/chosen": -648.7272338867188, + "logps/rejected": -1269.769775390625, + "loss": 0.1406, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -4.123702049255371, + "rewards/margins": 6.3718366622924805, + "rewards/margins_max": 9.116823196411133, + "rewards/margins_min": 3.6268508434295654, + "rewards/margins_std": 3.8819961547851562, + "rewards/rejected": -10.495538711547852, + "step": 2100 + }, + { + "epoch": 0.53, + "grad_norm": 0.74609375, + "learning_rate": 1.0667180327422796e-06, + "logits/chosen": 0.4427351951599121, + "logits/rejected": 0.8773029446601868, + "logps/chosen": -652.7124633789062, + "logps/rejected": -1093.98046875, + "loss": 0.2191, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -4.100010871887207, + "rewards/margins": 4.69572639465332, + "rewards/margins_max": 7.104989528656006, + "rewards/margins_min": 2.2864630222320557, + "rewards/margins_std": 3.4072136878967285, + "rewards/rejected": -8.795738220214844, + "step": 2110 + }, + { + "epoch": 0.53, + "grad_norm": 3.796875, + "learning_rate": 1.0579499311111394e-06, + "logits/chosen": 0.4106171727180481, + "logits/rejected": 0.8539530038833618, + "logps/chosen": -598.7738037109375, + "logps/rejected": -1391.320068359375, + "loss": 0.1609, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -3.6164703369140625, + "rewards/margins": 8.027566909790039, + "rewards/margins_max": 12.555456161499023, + "rewards/margins_min": 3.4996769428253174, + "rewards/margins_std": 6.403402805328369, + "rewards/rejected": -11.644036293029785, + "step": 2120 + }, + { + "epoch": 0.54, + "grad_norm": 0.77734375, + "learning_rate": 1.0491773569335877e-06, + "logits/chosen": 0.4420396685600281, + "logits/rejected": 0.9396398663520813, + "logps/chosen": -626.4326782226562, + "logps/rejected": -1140.4166259765625, + "loss": 0.2575, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -4.183171272277832, + "rewards/margins": 5.311387062072754, + "rewards/margins_max": 8.279863357543945, + "rewards/margins_min": 2.3429112434387207, + "rewards/margins_std": 4.19805908203125, + "rewards/rejected": -9.494558334350586, + "step": 2130 + }, + { + "epoch": 0.54, + "grad_norm": 1.703125, + "learning_rate": 1.0404009872724686e-06, + "logits/chosen": 0.3594892621040344, + "logits/rejected": 0.8964468240737915, + "logps/chosen": -567.0457763671875, + "logps/rejected": -1121.90234375, + "loss": 0.1394, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -3.354614734649658, + "rewards/margins": 5.570419788360596, + "rewards/margins_max": 8.419346809387207, + "rewards/margins_min": 2.7214925289154053, + "rewards/margins_std": 4.028990745544434, + "rewards/rejected": -8.925034523010254, + "step": 2140 + }, + { + "epoch": 0.54, + "grad_norm": 0.91796875, + "learning_rate": 1.0316214994835588e-06, + "logits/chosen": 0.355679452419281, + "logits/rejected": 0.9933696985244751, + "logps/chosen": -608.57177734375, + "logps/rejected": -1124.74609375, + "loss": 0.1533, + "rewards/accuracies": 0.9375, + "rewards/chosen": -3.8642611503601074, + "rewards/margins": 5.441197872161865, + "rewards/margins_max": 8.085689544677734, + "rewards/margins_min": 2.7967066764831543, + "rewards/margins_std": 3.739875316619873, + "rewards/rejected": -9.305459976196289, + "step": 2150 + }, + { + "epoch": 0.54, + "grad_norm": 0.6484375, + "learning_rate": 1.0228395711632915e-06, + "logits/chosen": 0.3872026205062866, + "logits/rejected": 0.8455197215080261, + "logps/chosen": -627.2762451171875, + "logps/rejected": -1316.978759765625, + "loss": 0.2178, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -4.086087703704834, + "rewards/margins": 6.8035407066345215, + "rewards/margins_max": 9.993437767028809, + "rewards/margins_min": 3.613642930984497, + "rewards/margins_std": 4.511196613311768, + "rewards/rejected": -10.889628410339355, + "step": 2160 + }, + { + "epoch": 0.55, + "grad_norm": 4.75, + "learning_rate": 1.0140558800964588e-06, + "logits/chosen": 0.3922487199306488, + "logits/rejected": 0.8369787335395813, + "logps/chosen": -600.4080810546875, + "logps/rejected": -1210.494384765625, + "loss": 0.2214, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -3.5966057777404785, + "rewards/margins": 6.296887397766113, + "rewards/margins_max": 9.430109024047852, + "rewards/margins_min": 3.1636674404144287, + "rewards/margins_std": 4.431042671203613, + "rewards/rejected": -9.89349365234375, + "step": 2170 + }, + { + "epoch": 0.55, + "grad_norm": 4.71875, + "learning_rate": 1.0052711042039e-06, + "logits/chosen": 0.510870635509491, + "logits/rejected": 0.8489816784858704, + "logps/chosen": -562.1312866210938, + "logps/rejected": -1294.7515869140625, + "loss": 0.1907, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -3.6715149879455566, + "rewards/margins": 7.00562047958374, + "rewards/margins_max": 11.010260581970215, + "rewards/margins_min": 3.0009806156158447, + "rewards/margins_std": 5.663416385650635, + "rewards/rejected": -10.677135467529297, + "step": 2180 + }, + { + "epoch": 0.55, + "grad_norm": 3.546875, + "learning_rate": 9.964859214901813e-07, + "logits/chosen": 0.3070334494113922, + "logits/rejected": 0.711986243724823, + "logps/chosen": -667.531005859375, + "logps/rejected": -1298.389892578125, + "loss": 0.2149, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -4.415340900421143, + "rewards/margins": 6.254446983337402, + "rewards/margins_max": 9.225793838500977, + "rewards/margins_min": 3.283099412918091, + "rewards/margins_std": 4.202120304107666, + "rewards/rejected": -10.66978931427002, + "step": 2190 + }, + { + "epoch": 0.55, + "grad_norm": 1.25, + "learning_rate": 9.87701009991267e-07, + "logits/chosen": 0.6090846657752991, + "logits/rejected": 1.093515396118164, + "logps/chosen": -604.7011108398438, + "logps/rejected": -1219.076416015625, + "loss": 0.1831, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.6504428386688232, + "rewards/margins": 6.507830619812012, + "rewards/margins_max": 9.95383358001709, + "rewards/margins_min": 3.0618269443511963, + "rewards/margins_std": 4.873384952545166, + "rewards/rejected": -10.158273696899414, + "step": 2200 + }, + { + "epoch": 0.56, + "grad_norm": 1.8203125, + "learning_rate": 9.789170477221891e-07, + "logits/chosen": 0.49116769433021545, + "logits/rejected": 0.964964747428894, + "logps/chosen": -525.8590087890625, + "logps/rejected": -1271.779296875, + "loss": 0.1133, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -3.2402710914611816, + "rewards/margins": 7.343132019042969, + "rewards/margins_max": 11.660395622253418, + "rewards/margins_min": 3.0258681774139404, + "rewards/margins_std": 6.105532646179199, + "rewards/rejected": -10.583402633666992, + "step": 2210 + }, + { + "epoch": 0.56, + "grad_norm": 1.5703125, + "learning_rate": 9.701347126247183e-07, + "logits/chosen": 0.3676094114780426, + "logits/rejected": 0.7860090732574463, + "logps/chosen": -562.3323364257812, + "logps/rejected": -1244.939453125, + "loss": 0.1245, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.605191469192505, + "rewards/margins": 6.7962799072265625, + "rewards/margins_max": 10.232267379760742, + "rewards/margins_min": 3.3602943420410156, + "rewards/margins_std": 4.859219074249268, + "rewards/rejected": -10.401471138000488, + "step": 2220 + }, + { + "epoch": 0.56, + "grad_norm": 1.4609375, + "learning_rate": 9.61354682515042e-07, + "logits/chosen": 0.5297726392745972, + "logits/rejected": 0.9975612759590149, + "logps/chosen": -554.0660400390625, + "logps/rejected": -1282.62109375, + "loss": 0.2119, + "rewards/accuracies": 0.887499988079071, + "rewards/chosen": -3.6336886882781982, + "rewards/margins": 7.203893184661865, + "rewards/margins_max": 11.056930541992188, + "rewards/margins_min": 3.3508553504943848, + "rewards/margins_std": 5.4490180015563965, + "rewards/rejected": -10.837581634521484, + "step": 2230 + }, + { + "epoch": 0.56, + "grad_norm": 13.9375, + "learning_rate": 9.525776350314484e-07, + "logits/chosen": 0.3922134041786194, + "logits/rejected": 0.9736678004264832, + "logps/chosen": -554.39990234375, + "logps/rejected": -1209.232177734375, + "loss": 0.1955, + "rewards/accuracies": 0.9375, + "rewards/chosen": -3.258437395095825, + "rewards/margins": 6.5767412185668945, + "rewards/margins_max": 10.349874496459961, + "rewards/margins_min": 2.8036084175109863, + "rewards/margins_std": 5.336016654968262, + "rewards/rejected": -9.835180282592773, + "step": 2240 + }, + { + "epoch": 0.57, + "grad_norm": 2.765625, + "learning_rate": 9.438042475820292e-07, + "logits/chosen": 0.403189480304718, + "logits/rejected": 0.7983392477035522, + "logps/chosen": -584.6541748046875, + "logps/rejected": -1261.7821044921875, + "loss": 0.1611, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.42396879196167, + "rewards/margins": 6.889365196228027, + "rewards/margins_max": 10.340558052062988, + "rewards/margins_min": 3.43817138671875, + "rewards/margins_std": 4.880724906921387, + "rewards/rejected": -10.313333511352539, + "step": 2250 + }, + { + "epoch": 0.57, + "grad_norm": 0.5390625, + "learning_rate": 9.350351972923963e-07, + "logits/chosen": 0.3648914396762848, + "logits/rejected": 0.8595021963119507, + "logps/chosen": -579.9949340820312, + "logps/rejected": -1241.930419921875, + "loss": 0.0986, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -3.5924155712127686, + "rewards/margins": 6.509829521179199, + "rewards/margins_max": 9.569954872131348, + "rewards/margins_min": 3.4497056007385254, + "rewards/margins_std": 4.327669620513916, + "rewards/rejected": -10.102245330810547, + "step": 2260 + }, + { + "epoch": 0.57, + "grad_norm": 6.28125, + "learning_rate": 9.262711609534209e-07, + "logits/chosen": 0.5114152431488037, + "logits/rejected": 0.7859119772911072, + "logps/chosen": -552.0835571289062, + "logps/rejected": -1168.512451171875, + "loss": 0.2194, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -3.4986891746520996, + "rewards/margins": 5.916913032531738, + "rewards/margins_max": 9.174135208129883, + "rewards/margins_min": 2.659688711166382, + "rewards/margins_std": 4.606410026550293, + "rewards/rejected": -9.41560173034668, + "step": 2270 + }, + { + "epoch": 0.57, + "grad_norm": 1.71875, + "learning_rate": 9.175128149690018e-07, + "logits/chosen": 0.42313352227211, + "logits/rejected": 0.761069655418396, + "logps/chosen": -566.6101684570312, + "logps/rejected": -1010.4449462890625, + "loss": 0.2312, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -3.5931007862091064, + "rewards/margins": 4.439265727996826, + "rewards/margins_max": 6.598448753356934, + "rewards/margins_min": 2.2800817489624023, + "rewards/margins_std": 3.05354642868042, + "rewards/rejected": -8.032365798950195, + "step": 2280 + }, + { + "epoch": 0.58, + "grad_norm": 0.490234375, + "learning_rate": 9.087608353038571e-07, + "logits/chosen": 0.5663483738899231, + "logits/rejected": 0.9419177770614624, + "logps/chosen": -617.2533569335938, + "logps/rejected": -1253.386474609375, + "loss": 0.1776, + "rewards/accuracies": 0.9375, + "rewards/chosen": -3.911703586578369, + "rewards/margins": 6.384354591369629, + "rewards/margins_max": 10.122902870178223, + "rewards/margins_min": 2.6458072662353516, + "rewards/margins_std": 5.287104606628418, + "rewards/rejected": -10.29605770111084, + "step": 2290 + }, + { + "epoch": 0.58, + "grad_norm": 3.265625, + "learning_rate": 9.00015897431357e-07, + "logits/chosen": 0.44118762016296387, + "logits/rejected": 0.962271511554718, + "logps/chosen": -645.8919677734375, + "logps/rejected": -1220.1890869140625, + "loss": 0.2006, + "rewards/accuracies": 0.9375, + "rewards/chosen": -4.0967254638671875, + "rewards/margins": 6.049219131469727, + "rewards/margins_max": 8.595663070678711, + "rewards/margins_min": 3.5027756690979004, + "rewards/margins_std": 3.60121488571167, + "rewards/rejected": -10.145944595336914, + "step": 2300 + }, + { + "epoch": 0.58, + "grad_norm": 1.625, + "learning_rate": 8.912786762813893e-07, + "logits/chosen": 0.5058658123016357, + "logits/rejected": 0.9005535244941711, + "logps/chosen": -589.7290649414062, + "logps/rejected": -1193.9273681640625, + "loss": 0.1621, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -3.5936293601989746, + "rewards/margins": 6.243061065673828, + "rewards/margins_max": 9.953969955444336, + "rewards/margins_min": 2.5321524143218994, + "rewards/margins_std": 5.24801778793335, + "rewards/rejected": -9.836690902709961, + "step": 2310 + }, + { + "epoch": 0.58, + "grad_norm": 0.703125, + "learning_rate": 8.82549846188269e-07, + "logits/chosen": 0.5765672922134399, + "logits/rejected": 0.8515122532844543, + "logps/chosen": -576.6160278320312, + "logps/rejected": -1192.1400146484375, + "loss": 0.1866, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -3.822211742401123, + "rewards/margins": 5.900345802307129, + "rewards/margins_max": 8.774417877197266, + "rewards/margins_min": 3.0262744426727295, + "rewards/margins_std": 4.064550876617432, + "rewards/rejected": -9.72255802154541, + "step": 2320 + }, + { + "epoch": 0.59, + "grad_norm": 2.28125, + "learning_rate": 8.738300808386933e-07, + "logits/chosen": 0.4344192445278168, + "logits/rejected": 0.8709548115730286, + "logps/chosen": -618.6167602539062, + "logps/rejected": -1314.2210693359375, + "loss": 0.1313, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -4.075563907623291, + "rewards/margins": 7.01934289932251, + "rewards/margins_max": 10.089629173278809, + "rewards/margins_min": 3.9490573406219482, + "rewards/margins_std": 4.342040061950684, + "rewards/rejected": -11.0949068069458, + "step": 2330 + }, + { + "epoch": 0.59, + "grad_norm": 1.015625, + "learning_rate": 8.65120053219748e-07, + "logits/chosen": 0.4593490958213806, + "logits/rejected": 0.8710781335830688, + "logps/chosen": -554.8482666015625, + "logps/rejected": -1096.6068115234375, + "loss": 0.1465, + "rewards/accuracies": 0.9375, + "rewards/chosen": -3.5408222675323486, + "rewards/margins": 5.38980770111084, + "rewards/margins_max": 8.383840560913086, + "rewards/margins_min": 2.3957760334014893, + "rewards/margins_std": 4.234200477600098, + "rewards/rejected": -8.930630683898926, + "step": 2340 + }, + { + "epoch": 0.59, + "grad_norm": 1.75, + "learning_rate": 8.564204355669643e-07, + "logits/chosen": 0.4738622307777405, + "logits/rejected": 0.8612324595451355, + "logps/chosen": -653.3856201171875, + "logps/rejected": -1402.7314453125, + "loss": 0.2242, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -4.03350305557251, + "rewards/margins": 7.781327724456787, + "rewards/margins_max": 12.15905475616455, + "rewards/margins_min": 3.403602123260498, + "rewards/margins_std": 6.191039085388184, + "rewards/rejected": -11.814830780029297, + "step": 2350 + }, + { + "epoch": 0.59, + "grad_norm": 1.078125, + "learning_rate": 8.477318993124392e-07, + "logits/chosen": 0.44268113374710083, + "logits/rejected": 0.979813277721405, + "logps/chosen": -556.304443359375, + "logps/rejected": -1236.2825927734375, + "loss": 0.2425, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -3.656306505203247, + "rewards/margins": 6.749668121337891, + "rewards/margins_max": 10.653985977172852, + "rewards/margins_min": 2.845351457595825, + "rewards/margins_std": 5.521537780761719, + "rewards/rejected": -10.405974388122559, + "step": 2360 + }, + { + "epoch": 0.6, + "grad_norm": 2.859375, + "learning_rate": 8.390551150330113e-07, + "logits/chosen": 0.3767511248588562, + "logits/rejected": 0.7756798267364502, + "logps/chosen": -626.3853759765625, + "logps/rejected": -1276.080810546875, + "loss": 0.3063, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -4.063645362854004, + "rewards/margins": 6.4792351722717285, + "rewards/margins_max": 9.45138931274414, + "rewards/margins_min": 3.507080078125, + "rewards/margins_std": 4.203261375427246, + "rewards/rejected": -10.542880058288574, + "step": 2370 + }, + { + "epoch": 0.6, + "grad_norm": 7.1875, + "learning_rate": 8.303907523985085e-07, + "logits/chosen": 0.41792359948158264, + "logits/rejected": 0.9234131574630737, + "logps/chosen": -583.3489379882812, + "logps/rejected": -1221.631591796875, + "loss": 0.1307, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -3.7345657348632812, + "rewards/margins": 6.419626712799072, + "rewards/margins_max": 9.607501983642578, + "rewards/margins_min": 3.2317516803741455, + "rewards/margins_std": 4.508336067199707, + "rewards/rejected": -10.154191970825195, + "step": 2380 + }, + { + "epoch": 0.6, + "grad_norm": 1.6875, + "learning_rate": 8.217394801200631e-07, + "logits/chosen": 0.5521947741508484, + "logits/rejected": 0.8723956942558289, + "logps/chosen": -594.2333984375, + "logps/rejected": -1298.763427734375, + "loss": 0.148, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.983570098876953, + "rewards/margins": 6.878546237945557, + "rewards/margins_max": 10.753538131713867, + "rewards/margins_min": 3.0035533905029297, + "rewards/margins_std": 5.480066776275635, + "rewards/rejected": -10.862115859985352, + "step": 2390 + }, + { + "epoch": 0.6, + "grad_norm": 1.3515625, + "learning_rate": 8.131019658984988e-07, + "logits/chosen": 0.421779066324234, + "logits/rejected": 0.9385878443717957, + "logps/chosen": -586.00244140625, + "logps/rejected": -1196.017822265625, + "loss": 0.1597, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.60229754447937, + "rewards/margins": 6.160573482513428, + "rewards/margins_max": 8.99445629119873, + "rewards/margins_min": 3.3266918659210205, + "rewards/margins_std": 4.007714748382568, + "rewards/rejected": -9.762872695922852, + "step": 2400 + }, + { + "epoch": 0.61, + "grad_norm": 1.6328125, + "learning_rate": 8.04478876372801e-07, + "logits/chosen": 0.3881237208843231, + "logits/rejected": 1.046502709388733, + "logps/chosen": -631.3989868164062, + "logps/rejected": -1197.179443359375, + "loss": 0.2175, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.762930393218994, + "rewards/margins": 6.060236930847168, + "rewards/margins_max": 9.582808494567871, + "rewards/margins_min": 2.5376646518707275, + "rewards/margins_std": 4.9816694259643555, + "rewards/rejected": -9.82316780090332, + "step": 2410 + }, + { + "epoch": 0.61, + "grad_norm": 0.96484375, + "learning_rate": 7.958708770686628e-07, + "logits/chosen": 0.3488084673881531, + "logits/rejected": 0.9390872716903687, + "logps/chosen": -602.5117797851562, + "logps/rejected": -1286.147705078125, + "loss": 0.1196, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.809040069580078, + "rewards/margins": 6.788311958312988, + "rewards/margins_max": 9.610162734985352, + "rewards/margins_min": 3.9664599895477295, + "rewards/margins_std": 3.990701198577881, + "rewards/rejected": -10.59735107421875, + "step": 2420 + }, + { + "epoch": 0.61, + "grad_norm": 5.21875, + "learning_rate": 7.872786323471231e-07, + "logits/chosen": 0.4111207127571106, + "logits/rejected": 0.8087761998176575, + "logps/chosen": -582.8560180664062, + "logps/rejected": -1202.2833251953125, + "loss": 0.1622, + "rewards/accuracies": 0.9375, + "rewards/chosen": -3.470329761505127, + "rewards/margins": 6.254929065704346, + "rewards/margins_max": 9.86207389831543, + "rewards/margins_min": 2.6477839946746826, + "rewards/margins_std": 5.101273536682129, + "rewards/rejected": -9.725258827209473, + "step": 2430 + }, + { + "epoch": 0.61, + "grad_norm": 2.359375, + "learning_rate": 7.787028053532894e-07, + "logits/chosen": 0.42040500044822693, + "logits/rejected": 0.9697147607803345, + "logps/chosen": -607.8787231445312, + "logps/rejected": -1137.3770751953125, + "loss": 0.2404, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.6307804584503174, + "rewards/margins": 5.513142108917236, + "rewards/margins_max": 8.46685791015625, + "rewards/margins_min": 2.5594258308410645, + "rewards/margins_std": 4.177186012268066, + "rewards/rejected": -9.143922805786133, + "step": 2440 + }, + { + "epoch": 0.62, + "grad_norm": 0.5078125, + "learning_rate": 7.701440579651564e-07, + "logits/chosen": 0.39534759521484375, + "logits/rejected": 0.827987015247345, + "logps/chosen": -666.9830322265625, + "logps/rejected": -1299.258544921875, + "loss": 0.1684, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -4.383899688720703, + "rewards/margins": 6.384281635284424, + "rewards/margins_max": 9.895980834960938, + "rewards/margins_min": 2.8725833892822266, + "rewards/margins_std": 4.9662909507751465, + "rewards/rejected": -10.768181800842285, + "step": 2450 + }, + { + "epoch": 0.62, + "grad_norm": 3.4375, + "learning_rate": 7.616030507425251e-07, + "logits/chosen": 0.5693954229354858, + "logits/rejected": 0.8889672160148621, + "logps/chosen": -617.1419677734375, + "logps/rejected": -1388.5489501953125, + "loss": 0.137, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -3.9340217113494873, + "rewards/margins": 7.426980018615723, + "rewards/margins_max": 11.167816162109375, + "rewards/margins_min": 3.686145067214966, + "rewards/margins_std": 5.290339946746826, + "rewards/rejected": -11.361001968383789, + "step": 2460 + }, + { + "epoch": 0.62, + "grad_norm": 0.50390625, + "learning_rate": 7.530804428760189e-07, + "logits/chosen": 0.4677404463291168, + "logits/rejected": 0.9375128746032715, + "logps/chosen": -596.3168334960938, + "logps/rejected": -1248.475830078125, + "loss": 0.1194, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -3.896852970123291, + "rewards/margins": 6.540476322174072, + "rewards/margins_max": 9.212288856506348, + "rewards/margins_min": 3.8686630725860596, + "rewards/margins_std": 3.7785136699676514, + "rewards/rejected": -10.437329292297363, + "step": 2470 + }, + { + "epoch": 0.62, + "grad_norm": 2.671875, + "learning_rate": 7.445768921362075e-07, + "logits/chosen": 0.40075913071632385, + "logits/rejected": 0.7473156452178955, + "logps/chosen": -569.2059326171875, + "logps/rejected": -1103.2969970703125, + "loss": 0.2328, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.6370136737823486, + "rewards/margins": 5.360899448394775, + "rewards/margins_max": 8.596506118774414, + "rewards/margins_min": 2.1252918243408203, + "rewards/margins_std": 4.575839996337891, + "rewards/rejected": -8.997913360595703, + "step": 2480 + }, + { + "epoch": 0.63, + "grad_norm": 0.76171875, + "learning_rate": 7.360930548228421e-07, + "logits/chosen": 0.5869132280349731, + "logits/rejected": 0.8721901774406433, + "logps/chosen": -587.7561645507812, + "logps/rejected": -1406.796630859375, + "loss": 0.1456, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -3.811744213104248, + "rewards/margins": 8.148642539978027, + "rewards/margins_max": 12.553075790405273, + "rewards/margins_min": 3.744208812713623, + "rewards/margins_std": 6.2288103103637695, + "rewards/rejected": -11.960387229919434, + "step": 2490 + }, + { + "epoch": 0.63, + "grad_norm": 1.078125, + "learning_rate": 7.276295857142004e-07, + "logits/chosen": 0.32453638315200806, + "logits/rejected": 0.8772487640380859, + "logps/chosen": -585.3636474609375, + "logps/rejected": -1191.1376953125, + "loss": 0.1195, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -3.542001247406006, + "rewards/margins": 6.292872428894043, + "rewards/margins_max": 9.072725296020508, + "rewards/margins_min": 3.5130207538604736, + "rewards/margins_std": 3.931304931640625, + "rewards/rejected": -9.834874153137207, + "step": 2500 + }, + { + "epoch": 0.63, + "grad_norm": 2.0, + "learning_rate": 7.191871380165537e-07, + "logits/chosen": 0.5947480797767639, + "logits/rejected": 0.9666692018508911, + "logps/chosen": -587.4915161132812, + "logps/rejected": -1285.529541015625, + "loss": 0.2129, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.7693405151367188, + "rewards/margins": 6.991917610168457, + "rewards/margins_max": 10.897686004638672, + "rewards/margins_min": 3.0861494541168213, + "rewards/margins_std": 5.523590564727783, + "rewards/rejected": -10.76125717163086, + "step": 2510 + }, + { + "epoch": 0.63, + "grad_norm": 5.15625, + "learning_rate": 7.107663633137513e-07, + "logits/chosen": 0.5616310834884644, + "logits/rejected": 0.9230579137802124, + "logps/chosen": -596.8382568359375, + "logps/rejected": -1349.6082763671875, + "loss": 0.2132, + "rewards/accuracies": 0.9375, + "rewards/chosen": -3.841231107711792, + "rewards/margins": 7.509341239929199, + "rewards/margins_max": 12.250666618347168, + "rewards/margins_min": 2.7680153846740723, + "rewards/margins_std": 6.705247402191162, + "rewards/rejected": -11.35057258605957, + "step": 2520 + }, + { + "epoch": 0.64, + "grad_norm": 1.0625, + "learning_rate": 7.023679115169304e-07, + "logits/chosen": 0.3933202028274536, + "logits/rejected": 0.8223272562026978, + "logps/chosen": -606.7198486328125, + "logps/rejected": -1352.9849853515625, + "loss": 0.1704, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -3.759533643722534, + "rewards/margins": 7.4397478103637695, + "rewards/margins_max": 11.29192066192627, + "rewards/margins_min": 3.5875747203826904, + "rewards/margins_std": 5.4477949142456055, + "rewards/rejected": -11.199281692504883, + "step": 2530 + }, + { + "epoch": 0.64, + "grad_norm": 1.25, + "learning_rate": 6.93992430814359e-07, + "logits/chosen": 0.42247194051742554, + "logits/rejected": 0.9332104921340942, + "logps/chosen": -637.812255859375, + "logps/rejected": -1319.972900390625, + "loss": 0.1834, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -4.083704471588135, + "rewards/margins": 6.8782501220703125, + "rewards/margins_max": 11.13465690612793, + "rewards/margins_min": 2.6218440532684326, + "rewards/margins_std": 6.019468307495117, + "rewards/rejected": -10.961955070495605, + "step": 2540 + }, + { + "epoch": 0.64, + "grad_norm": 2.375, + "learning_rate": 6.856405676214072e-07, + "logits/chosen": 0.4980488717556, + "logits/rejected": 0.9414850473403931, + "logps/chosen": -626.1489868164062, + "logps/rejected": -1227.9940185546875, + "loss": 0.1541, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -3.9557862281799316, + "rewards/margins": 5.900100231170654, + "rewards/margins_max": 8.798306465148926, + "rewards/margins_min": 3.00189471244812, + "rewards/margins_std": 4.098681926727295, + "rewards/rejected": -9.855887413024902, + "step": 2550 + }, + { + "epoch": 0.64, + "grad_norm": 9.3125, + "learning_rate": 6.773129665306569e-07, + "logits/chosen": 0.35069847106933594, + "logits/rejected": 0.8379716873168945, + "logps/chosen": -567.0838623046875, + "logps/rejected": -1157.524658203125, + "loss": 0.1994, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.548142910003662, + "rewards/margins": 5.7133989334106445, + "rewards/margins_max": 8.906744956970215, + "rewards/margins_min": 2.520052433013916, + "rewards/margins_std": 4.516073703765869, + "rewards/rejected": -9.261542320251465, + "step": 2560 + }, + { + "epoch": 0.65, + "grad_norm": 0.890625, + "learning_rate": 6.690102702621547e-07, + "logits/chosen": 0.3375098407268524, + "logits/rejected": 0.8294457197189331, + "logps/chosen": -558.5492553710938, + "logps/rejected": -1136.806396484375, + "loss": 0.1465, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -3.5482871532440186, + "rewards/margins": 5.674252033233643, + "rewards/margins_max": 8.784135818481445, + "rewards/margins_min": 2.5643677711486816, + "rewards/margins_std": 4.398039817810059, + "rewards/rejected": -9.222538948059082, + "step": 2570 + }, + { + "epoch": 0.65, + "grad_norm": 0.7734375, + "learning_rate": 6.60733119613804e-07, + "logits/chosen": 0.5296992063522339, + "logits/rejected": 0.8904238939285278, + "logps/chosen": -576.5020751953125, + "logps/rejected": -1276.6494140625, + "loss": 0.1902, + "rewards/accuracies": 0.9375, + "rewards/chosen": -3.6155083179473877, + "rewards/margins": 6.869490623474121, + "rewards/margins_max": 10.674718856811523, + "rewards/margins_min": 3.064263105392456, + "rewards/margins_std": 5.381404399871826, + "rewards/rejected": -10.48499870300293, + "step": 2580 + }, + { + "epoch": 0.65, + "grad_norm": 0.5, + "learning_rate": 6.524821534119112e-07, + "logits/chosen": 0.5071766972541809, + "logits/rejected": 0.9823764562606812, + "logps/chosen": -582.9547729492188, + "logps/rejected": -1479.55126953125, + "loss": 0.1345, + "rewards/accuracies": 0.9375, + "rewards/chosen": -3.5877151489257812, + "rewards/margins": 8.957578659057617, + "rewards/margins_max": 15.099912643432617, + "rewards/margins_min": 2.815244197845459, + "rewards/margins_std": 8.686573028564453, + "rewards/rejected": -12.545293807983398, + "step": 2590 + }, + { + "epoch": 0.65, + "grad_norm": 1.1171875, + "learning_rate": 6.442580084618804e-07, + "logits/chosen": 0.48143234848976135, + "logits/rejected": 1.0688936710357666, + "logps/chosen": -597.7962646484375, + "logps/rejected": -1267.8271484375, + "loss": 0.1344, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -3.772063732147217, + "rewards/margins": 6.709539890289307, + "rewards/margins_max": 9.977819442749023, + "rewards/margins_min": 3.441260576248169, + "rewards/margins_std": 4.622044086456299, + "rewards/rejected": -10.481603622436523, + "step": 2600 + }, + { + "epoch": 0.66, + "grad_norm": 1.9765625, + "learning_rate": 6.360613194990638e-07, + "logits/chosen": 0.41432422399520874, + "logits/rejected": 0.8854449987411499, + "logps/chosen": -622.7572021484375, + "logps/rejected": -1250.2237548828125, + "loss": 0.2274, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -4.131538391113281, + "rewards/margins": 6.169598579406738, + "rewards/margins_max": 9.522704124450684, + "rewards/margins_min": 2.816493511199951, + "rewards/margins_std": 4.742007255554199, + "rewards/rejected": -10.301137924194336, + "step": 2610 + }, + { + "epoch": 0.66, + "grad_norm": 1.6484375, + "learning_rate": 6.278927191397762e-07, + "logits/chosen": 0.3944636583328247, + "logits/rejected": 0.9111081957817078, + "logps/chosen": -612.4472045898438, + "logps/rejected": -1219.752685546875, + "loss": 0.1986, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -3.7693982124328613, + "rewards/margins": 6.301706314086914, + "rewards/margins_max": 9.534825325012207, + "rewards/margins_min": 3.0685877799987793, + "rewards/margins_std": 4.572320461273193, + "rewards/rejected": -10.071104049682617, + "step": 2620 + }, + { + "epoch": 0.66, + "grad_norm": 0.9140625, + "learning_rate": 6.197528378324663e-07, + "logits/chosen": 0.5100525617599487, + "logits/rejected": 0.9623018503189087, + "logps/chosen": -584.0420532226562, + "logps/rejected": -1203.010498046875, + "loss": 0.1742, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.7736029624938965, + "rewards/margins": 6.160708427429199, + "rewards/margins_max": 9.585714340209961, + "rewards/margins_min": 2.7357051372528076, + "rewards/margins_std": 4.843687534332275, + "rewards/rejected": -9.93431282043457, + "step": 2630 + }, + { + "epoch": 0.66, + "grad_norm": 3.734375, + "learning_rate": 6.116423038090623e-07, + "logits/chosen": 0.5766229629516602, + "logits/rejected": 0.9825431108474731, + "logps/chosen": -547.1226196289062, + "logps/rejected": -1281.45703125, + "loss": 0.3216, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -3.65391206741333, + "rewards/margins": 7.179081916809082, + "rewards/margins_max": 9.718598365783691, + "rewards/margins_min": 4.639565467834473, + "rewards/margins_std": 3.591418743133545, + "rewards/rejected": -10.83299446105957, + "step": 2640 + }, + { + "epoch": 0.67, + "grad_norm": 0.97265625, + "learning_rate": 6.035617430364839e-07, + "logits/chosen": 0.4997124671936035, + "logits/rejected": 0.9522945284843445, + "logps/chosen": -583.899169921875, + "logps/rejected": -1155.713623046875, + "loss": 0.1477, + "rewards/accuracies": 0.9375, + "rewards/chosen": -3.7351505756378174, + "rewards/margins": 5.709227561950684, + "rewards/margins_max": 8.30264949798584, + "rewards/margins_min": 3.1158056259155273, + "rewards/margins_std": 3.6676526069641113, + "rewards/rejected": -9.444378852844238, + "step": 2650 + }, + { + "epoch": 0.67, + "grad_norm": 1.1640625, + "learning_rate": 5.955117791683289e-07, + "logits/chosen": 0.5455132722854614, + "logits/rejected": 0.7467927932739258, + "logps/chosen": -618.4952392578125, + "logps/rejected": -1362.67529296875, + "loss": 0.1119, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -4.014021873474121, + "rewards/margins": 7.122605323791504, + "rewards/margins_max": 10.956873893737793, + "rewards/margins_min": 3.2883358001708984, + "rewards/margins_std": 5.422475337982178, + "rewards/rejected": -11.136625289916992, + "step": 2660 + }, + { + "epoch": 0.67, + "grad_norm": 1.484375, + "learning_rate": 5.874930334967425e-07, + "logits/chosen": 0.3480473756790161, + "logits/rejected": 0.8517535924911499, + "logps/chosen": -577.0358276367188, + "logps/rejected": -1364.579833984375, + "loss": 0.17, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -3.2995376586914062, + "rewards/margins": 8.075822830200195, + "rewards/margins_max": 12.583778381347656, + "rewards/margins_min": 3.5678658485412598, + "rewards/margins_std": 6.375213623046875, + "rewards/rejected": -11.375359535217285, + "step": 2670 + }, + { + "epoch": 0.67, + "grad_norm": 0.8515625, + "learning_rate": 5.795061249044657e-07, + "logits/chosen": 0.36974793672561646, + "logits/rejected": 0.9354592561721802, + "logps/chosen": -607.3040771484375, + "logps/rejected": -1114.209228515625, + "loss": 0.2163, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -3.8398520946502686, + "rewards/margins": 5.254799842834473, + "rewards/margins_max": 8.094499588012695, + "rewards/margins_min": 2.4150993824005127, + "rewards/margins_std": 4.015942573547363, + "rewards/rejected": -9.09465217590332, + "step": 2680 + }, + { + "epoch": 0.68, + "grad_norm": 0.94921875, + "learning_rate": 5.715516698170694e-07, + "logits/chosen": 0.4757654070854187, + "logits/rejected": 0.9228278994560242, + "logps/chosen": -593.3265380859375, + "logps/rejected": -1246.779296875, + "loss": 0.1711, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -3.8630759716033936, + "rewards/margins": 6.61892032623291, + "rewards/margins_max": 10.43076229095459, + "rewards/margins_min": 2.8070778846740723, + "rewards/margins_std": 5.390759468078613, + "rewards/rejected": -10.481996536254883, + "step": 2690 + }, + { + "epoch": 0.68, + "grad_norm": 2.265625, + "learning_rate": 5.636302821553791e-07, + "logits/chosen": 0.5951135754585266, + "logits/rejected": 0.9289643168449402, + "logps/chosen": -600.3472900390625, + "logps/rejected": -1256.505126953125, + "loss": 0.1846, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -4.007782459259033, + "rewards/margins": 6.590015411376953, + "rewards/margins_max": 9.676294326782227, + "rewards/margins_min": 3.503735065460205, + "rewards/margins_std": 4.364659309387207, + "rewards/rejected": -10.597796440124512, + "step": 2700 + }, + { + "epoch": 0.68, + "grad_norm": 1.078125, + "learning_rate": 5.557425732880927e-07, + "logits/chosen": 0.45299792289733887, + "logits/rejected": 0.9710724949836731, + "logps/chosen": -576.3006591796875, + "logps/rejected": -1305.9984130859375, + "loss": 0.2169, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -3.6723008155822754, + "rewards/margins": 7.247259616851807, + "rewards/margins_max": 10.579780578613281, + "rewards/margins_min": 3.914738893508911, + "rewards/margins_std": 4.712896347045898, + "rewards/rejected": -10.919560432434082, + "step": 2710 + }, + { + "epoch": 0.68, + "grad_norm": 4.25, + "learning_rate": 5.478891519845969e-07, + "logits/chosen": 0.4582904279232025, + "logits/rejected": 1.0101871490478516, + "logps/chosen": -571.7638549804688, + "logps/rejected": -1202.6436767578125, + "loss": 0.223, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -3.6778640747070312, + "rewards/margins": 6.366325378417969, + "rewards/margins_max": 9.646313667297363, + "rewards/margins_min": 3.086336851119995, + "rewards/margins_std": 4.638604164123535, + "rewards/rejected": -10.044189453125, + "step": 2720 + }, + { + "epoch": 0.69, + "grad_norm": 6.5, + "learning_rate": 5.400706243679814e-07, + "logits/chosen": 0.39346835017204285, + "logits/rejected": 0.9161213040351868, + "logps/chosen": -564.0874633789062, + "logps/rejected": -1229.169677734375, + "loss": 0.1437, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -3.5225460529327393, + "rewards/margins": 6.72415828704834, + "rewards/margins_max": 10.41191291809082, + "rewards/margins_min": 3.036404848098755, + "rewards/margins_std": 5.215271472930908, + "rewards/rejected": -10.246706008911133, + "step": 2730 + }, + { + "epoch": 0.69, + "grad_norm": 9.625, + "learning_rate": 5.322875938682574e-07, + "logits/chosen": 0.4170478284358978, + "logits/rejected": 0.855624794960022, + "logps/chosen": -595.4231567382812, + "logps/rejected": -1336.313720703125, + "loss": 0.1761, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -3.6344857215881348, + "rewards/margins": 7.587996482849121, + "rewards/margins_max": 11.985794067382812, + "rewards/margins_min": 3.190199375152588, + "rewards/margins_std": 6.219425201416016, + "rewards/rejected": -11.222482681274414, + "step": 2740 + }, + { + "epoch": 0.69, + "grad_norm": 0.921875, + "learning_rate": 5.245406611757881e-07, + "logits/chosen": 0.45017296075820923, + "logits/rejected": 0.7095610499382019, + "logps/chosen": -599.9002685546875, + "logps/rejected": -1215.00634765625, + "loss": 0.2039, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -4.224530220031738, + "rewards/margins": 5.976971626281738, + "rewards/margins_max": 9.308272361755371, + "rewards/margins_min": 2.645669460296631, + "rewards/margins_std": 4.711172580718994, + "rewards/rejected": -10.201501846313477, + "step": 2750 + }, + { + "epoch": 0.69, + "grad_norm": 1.140625, + "learning_rate": 5.168304241949258e-07, + "logits/chosen": 0.5480870008468628, + "logits/rejected": 0.9951409101486206, + "logps/chosen": -630.0256958007812, + "logps/rejected": -1283.5689697265625, + "loss": 0.3047, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -4.216586589813232, + "rewards/margins": 6.3858795166015625, + "rewards/margins_max": 9.220312118530273, + "rewards/margins_min": 3.5514473915100098, + "rewards/margins_std": 4.0084919929504395, + "rewards/rejected": -10.602466583251953, + "step": 2760 + }, + { + "epoch": 0.7, + "grad_norm": 1.71875, + "learning_rate": 5.091574779978654e-07, + "logits/chosen": 0.5319818258285522, + "logits/rejected": 0.923554539680481, + "logps/chosen": -576.5374755859375, + "logps/rejected": -1277.248046875, + "loss": 0.2105, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.666407823562622, + "rewards/margins": 6.9405670166015625, + "rewards/margins_max": 10.391286849975586, + "rewards/margins_min": 3.4898483753204346, + "rewards/margins_std": 4.880053997039795, + "rewards/rejected": -10.606975555419922, + "step": 2770 + }, + { + "epoch": 0.7, + "grad_norm": 2.09375, + "learning_rate": 5.015224147787195e-07, + "logits/chosen": 0.4306615889072418, + "logits/rejected": 0.8923788070678711, + "logps/chosen": -581.0130004882812, + "logps/rejected": -1277.997314453125, + "loss": 0.1599, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -3.6794135570526123, + "rewards/margins": 7.105888366699219, + "rewards/margins_max": 11.283435821533203, + "rewards/margins_min": 2.928340435028076, + "rewards/margins_std": 5.907945156097412, + "rewards/rejected": -10.785301208496094, + "step": 2780 + }, + { + "epoch": 0.7, + "grad_norm": 1.453125, + "learning_rate": 4.939258238078098e-07, + "logits/chosen": 0.3736962378025055, + "logits/rejected": 0.9539716839790344, + "logps/chosen": -574.2659301757812, + "logps/rejected": -1150.515869140625, + "loss": 0.0895, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.42749285697937, + "rewards/margins": 5.820822715759277, + "rewards/margins_max": 8.33712100982666, + "rewards/margins_min": 3.3045241832733154, + "rewards/margins_std": 3.5585830211639404, + "rewards/rejected": -9.248315811157227, + "step": 2790 + }, + { + "epoch": 0.7, + "grad_norm": 2.296875, + "learning_rate": 4.863682913861911e-07, + "logits/chosen": 0.39504092931747437, + "logits/rejected": 0.6548932790756226, + "logps/chosen": -580.8141479492188, + "logps/rejected": -1209.3515625, + "loss": 0.2155, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -3.753568172454834, + "rewards/margins": 5.986493110656738, + "rewards/margins_max": 9.503013610839844, + "rewards/margins_min": 2.469972610473633, + "rewards/margins_std": 4.973111152648926, + "rewards/rejected": -9.74006175994873, + "step": 2800 + }, + { + "epoch": 0.71, + "grad_norm": 0.51953125, + "learning_rate": 4.788504008003977e-07, + "logits/chosen": 0.36534491181373596, + "logits/rejected": 0.7744854092597961, + "logps/chosen": -587.0809326171875, + "logps/rejected": -1210.5406494140625, + "loss": 0.2413, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -3.5366668701171875, + "rewards/margins": 6.2145538330078125, + "rewards/margins_max": 10.104393005371094, + "rewards/margins_min": 2.324714183807373, + "rewards/margins_std": 5.501064300537109, + "rewards/rejected": -9.751221656799316, + "step": 2810 + }, + { + "epoch": 0.71, + "grad_norm": 0.703125, + "learning_rate": 4.7137273227742746e-07, + "logits/chosen": 0.3758518695831299, + "logits/rejected": 0.9578613042831421, + "logps/chosen": -546.7774658203125, + "logps/rejected": -1061.381591796875, + "loss": 0.2159, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -3.3317322731018066, + "rewards/margins": 5.217543601989746, + "rewards/margins_max": 9.113043785095215, + "rewards/margins_min": 1.3220431804656982, + "rewards/margins_std": 5.509068965911865, + "rewards/rejected": -8.549276351928711, + "step": 2820 + }, + { + "epoch": 0.71, + "grad_norm": 1.9921875, + "learning_rate": 4.639358629399601e-07, + "logits/chosen": 0.384821355342865, + "logits/rejected": 0.8197442293167114, + "logps/chosen": -592.8553466796875, + "logps/rejected": -1149.2913818359375, + "loss": 0.1985, + "rewards/accuracies": 0.9375, + "rewards/chosen": -3.7938003540039062, + "rewards/margins": 5.607391357421875, + "rewards/margins_max": 8.316540718078613, + "rewards/margins_min": 2.8982410430908203, + "rewards/margins_std": 3.8313167095184326, + "rewards/rejected": -9.401190757751465, + "step": 2830 + }, + { + "epoch": 0.71, + "grad_norm": 9.625, + "learning_rate": 4.5654036676181496e-07, + "logits/chosen": 0.44163426756858826, + "logits/rejected": 0.8041768074035645, + "logps/chosen": -654.1993408203125, + "logps/rejected": -1390.2359619140625, + "loss": 0.2291, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -4.165137767791748, + "rewards/margins": 7.4157538414001465, + "rewards/margins_max": 11.67861557006836, + "rewards/margins_min": 3.152892589569092, + "rewards/margins_std": 6.0285964012146, + "rewards/rejected": -11.580891609191895, + "step": 2840 + }, + { + "epoch": 0.72, + "grad_norm": 4.0625, + "learning_rate": 4.491868145236508e-07, + "logits/chosen": 0.3212242126464844, + "logits/rejected": 0.8466861844062805, + "logps/chosen": -621.2687377929688, + "logps/rejected": -1346.79296875, + "loss": 0.169, + "rewards/accuracies": 0.9375, + "rewards/chosen": -3.9158835411071777, + "rewards/margins": 7.350478172302246, + "rewards/margins_max": 11.066621780395508, + "rewards/margins_min": 3.634335994720459, + "rewards/margins_std": 5.255418300628662, + "rewards/rejected": -11.266361236572266, + "step": 2850 + }, + { + "epoch": 0.72, + "grad_norm": 1.3359375, + "learning_rate": 4.418757737689156e-07, + "logits/chosen": 0.31801286339759827, + "logits/rejected": 0.8061238527297974, + "logps/chosen": -559.0628662109375, + "logps/rejected": -1199.774169921875, + "loss": 0.1446, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.384986162185669, + "rewards/margins": 6.031187057495117, + "rewards/margins_max": 8.800252914428711, + "rewards/margins_min": 3.2621231079101562, + "rewards/margins_std": 3.916048765182495, + "rewards/rejected": -9.416173934936523, + "step": 2860 + }, + { + "epoch": 0.72, + "grad_norm": 1.953125, + "learning_rate": 4.346078087600411e-07, + "logits/chosen": 0.4582739472389221, + "logits/rejected": 0.9584504961967468, + "logps/chosen": -622.5162963867188, + "logps/rejected": -1202.921142578125, + "loss": 0.2192, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -3.9695796966552734, + "rewards/margins": 5.917517185211182, + "rewards/margins_max": 9.363494873046875, + "rewards/margins_min": 2.4715399742126465, + "rewards/margins_std": 4.873347282409668, + "rewards/rejected": -9.887097358703613, + "step": 2870 + }, + { + "epoch": 0.72, + "grad_norm": 1.7421875, + "learning_rate": 4.273834804348959e-07, + "logits/chosen": 0.47292360663414, + "logits/rejected": 0.8965142369270325, + "logps/chosen": -550.5333862304688, + "logps/rejected": -1067.5582275390625, + "loss": 0.2409, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -3.4259536266326904, + "rewards/margins": 5.33632755279541, + "rewards/margins_max": 8.64592170715332, + "rewards/margins_min": 2.0267326831817627, + "rewards/margins_std": 4.680473327636719, + "rewards/rejected": -8.76228141784668, + "step": 2880 + }, + { + "epoch": 0.73, + "grad_norm": 0.92578125, + "learning_rate": 4.202033463634913e-07, + "logits/chosen": 0.24783340096473694, + "logits/rejected": 0.7742137312889099, + "logps/chosen": -621.947998046875, + "logps/rejected": -1266.604248046875, + "loss": 0.1774, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -3.8389954566955566, + "rewards/margins": 6.534533500671387, + "rewards/margins_max": 9.25249195098877, + "rewards/margins_min": 3.8165740966796875, + "rewards/margins_std": 3.8437747955322266, + "rewards/rejected": -10.373528480529785, + "step": 2890 + }, + { + "epoch": 0.73, + "grad_norm": 1.2578125, + "learning_rate": 4.1306796070494755e-07, + "logits/chosen": 0.5090914368629456, + "logits/rejected": 0.9704787135124207, + "logps/chosen": -566.8453369140625, + "logps/rejected": -1316.5975341796875, + "loss": 0.2067, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -3.5487873554229736, + "rewards/margins": 7.52276086807251, + "rewards/margins_max": 11.503535270690918, + "rewards/margins_min": 3.541985034942627, + "rewards/margins_std": 5.629666328430176, + "rewards/rejected": -11.071548461914062, + "step": 2900 + }, + { + "epoch": 0.73, + "grad_norm": 0.73828125, + "learning_rate": 4.0597787416472605e-07, + "logits/chosen": 0.42445096373558044, + "logits/rejected": 1.0089080333709717, + "logps/chosen": -590.0271606445312, + "logps/rejected": -1290.34912109375, + "loss": 0.1194, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -3.6672592163085938, + "rewards/margins": 7.150638580322266, + "rewards/margins_max": 10.635174751281738, + "rewards/margins_min": 3.6661014556884766, + "rewards/margins_std": 4.9278788566589355, + "rewards/rejected": -10.817896842956543, + "step": 2910 + }, + { + "epoch": 0.73, + "grad_norm": 2.46875, + "learning_rate": 3.989336339521244e-07, + "logits/chosen": 0.4603755474090576, + "logits/rejected": 1.016980767250061, + "logps/chosen": -556.4505615234375, + "logps/rejected": -1168.83642578125, + "loss": 0.2162, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -3.3615431785583496, + "rewards/margins": 6.154811859130859, + "rewards/margins_max": 9.212437629699707, + "rewards/margins_min": 3.097187042236328, + "rewards/margins_std": 4.324134349822998, + "rewards/rejected": -9.516355514526367, + "step": 2920 + }, + { + "epoch": 0.74, + "grad_norm": 1.3359375, + "learning_rate": 3.919357837380436e-07, + "logits/chosen": 0.5008795857429504, + "logits/rejected": 0.9082363843917847, + "logps/chosen": -596.6324462890625, + "logps/rejected": -1242.9361572265625, + "loss": 0.1661, + "rewards/accuracies": 0.9375, + "rewards/chosen": -3.753378391265869, + "rewards/margins": 6.4934234619140625, + "rewards/margins_max": 9.598888397216797, + "rewards/margins_min": 3.3879590034484863, + "rewards/margins_std": 4.39178991317749, + "rewards/rejected": -10.246801376342773, + "step": 2930 + }, + { + "epoch": 0.74, + "grad_norm": 0.62890625, + "learning_rate": 3.849848636130293e-07, + "logits/chosen": 0.37968841195106506, + "logits/rejected": 0.7749906778335571, + "logps/chosen": -589.7718505859375, + "logps/rejected": -1237.0853271484375, + "loss": 0.1493, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -3.7084765434265137, + "rewards/margins": 6.354104042053223, + "rewards/margins_max": 9.316937446594238, + "rewards/margins_min": 3.3912723064422607, + "rewards/margins_std": 4.190077304840088, + "rewards/rejected": -10.062582015991211, + "step": 2940 + }, + { + "epoch": 0.74, + "grad_norm": 0.90625, + "learning_rate": 3.780814100455848e-07, + "logits/chosen": 0.4370139539241791, + "logits/rejected": 0.7921696901321411, + "logps/chosen": -594.7927856445312, + "logps/rejected": -1272.9208984375, + "loss": 0.1801, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -3.9955291748046875, + "rewards/margins": 6.536546230316162, + "rewards/margins_max": 10.13880443572998, + "rewards/margins_min": 2.9342868328094482, + "rewards/margins_std": 5.094363689422607, + "rewards/rejected": -10.532075881958008, + "step": 2950 + }, + { + "epoch": 0.74, + "grad_norm": 1.8671875, + "learning_rate": 3.712259558407698e-07, + "logits/chosen": 0.5578526258468628, + "logits/rejected": 1.0129783153533936, + "logps/chosen": -612.83154296875, + "logps/rejected": -1290.3111572265625, + "loss": 0.1758, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.8081657886505127, + "rewards/margins": 6.890603542327881, + "rewards/margins_max": 10.6620512008667, + "rewards/margins_min": 3.119157314300537, + "rewards/margins_std": 5.3336310386657715, + "rewards/rejected": -10.698770523071289, + "step": 2960 + }, + { + "epoch": 0.75, + "grad_norm": 3.5, + "learning_rate": 3.644190300990774e-07, + "logits/chosen": 0.5283955931663513, + "logits/rejected": 0.9858170747756958, + "logps/chosen": -581.1080932617188, + "logps/rejected": -1192.7562255859375, + "loss": 0.1819, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -3.572531223297119, + "rewards/margins": 6.240419387817383, + "rewards/margins_max": 10.008376121520996, + "rewards/margins_min": 2.4724607467651367, + "rewards/margins_std": 5.328697204589844, + "rewards/rejected": -9.812950134277344, + "step": 2970 + }, + { + "epoch": 0.75, + "grad_norm": 0.96875, + "learning_rate": 3.576611581755972e-07, + "logits/chosen": 0.4499734044075012, + "logits/rejected": 0.7068200707435608, + "logps/chosen": -523.8099975585938, + "logps/rejected": -1294.130615234375, + "loss": 0.1334, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.308804988861084, + "rewards/margins": 7.3770246505737305, + "rewards/margins_max": 10.823869705200195, + "rewards/margins_min": 3.930180311203003, + "rewards/margins_std": 4.874573707580566, + "rewards/rejected": -10.685829162597656, + "step": 2980 + }, + { + "epoch": 0.75, + "grad_norm": 1.0546875, + "learning_rate": 3.5095286163947155e-07, + "logits/chosen": 0.48973578214645386, + "logits/rejected": 0.9675741195678711, + "logps/chosen": -530.2913208007812, + "logps/rejected": -1184.1497802734375, + "loss": 0.1522, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -3.161573886871338, + "rewards/margins": 6.543496608734131, + "rewards/margins_max": 9.359611511230469, + "rewards/margins_min": 3.7273802757263184, + "rewards/margins_std": 3.9825892448425293, + "rewards/rejected": -9.705069541931152, + "step": 2990 + }, + { + "epoch": 0.75, + "grad_norm": 1.0078125, + "learning_rate": 3.442946582336379e-07, + "logits/chosen": 0.4471007287502289, + "logits/rejected": 0.9367235898971558, + "logps/chosen": -580.6430053710938, + "logps/rejected": -1280.119140625, + "loss": 0.1246, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.6090996265411377, + "rewards/margins": 7.072485446929932, + "rewards/margins_max": 11.0000638961792, + "rewards/margins_min": 3.1449074745178223, + "rewards/margins_std": 5.554434776306152, + "rewards/rejected": -10.681586265563965, + "step": 3000 + }, + { + "epoch": 0.76, + "grad_norm": 1.5078125, + "learning_rate": 3.376870618348722e-07, + "logits/chosen": 0.46739286184310913, + "logits/rejected": 0.8226820230484009, + "logps/chosen": -575.8204956054688, + "logps/rejected": -1147.048583984375, + "loss": 0.0994, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.88541841506958, + "rewards/margins": 5.527801513671875, + "rewards/margins_max": 7.994576930999756, + "rewards/margins_min": 3.0610268115997314, + "rewards/margins_std": 3.488546371459961, + "rewards/rejected": -9.41322135925293, + "step": 3010 + }, + { + "epoch": 0.76, + "grad_norm": 7.4375, + "learning_rate": 3.311305824141273e-07, + "logits/chosen": 0.4271882176399231, + "logits/rejected": 0.8448736071586609, + "logps/chosen": -613.0482177734375, + "logps/rejected": -1061.115966796875, + "loss": 0.3039, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -4.161238670349121, + "rewards/margins": 4.51308536529541, + "rewards/margins_max": 7.211556434631348, + "rewards/margins_min": 1.81461501121521, + "rewards/margins_std": 3.816213607788086, + "rewards/rejected": -8.674324989318848, + "step": 3020 + }, + { + "epoch": 0.76, + "grad_norm": 1.34375, + "learning_rate": 3.2462572599717263e-07, + "logits/chosen": 0.6139329671859741, + "logits/rejected": 0.8676943778991699, + "logps/chosen": -577.7686767578125, + "logps/rejected": -1452.658203125, + "loss": 0.158, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -3.870023250579834, + "rewards/margins": 8.4967679977417, + "rewards/margins_max": 13.280682563781738, + "rewards/margins_min": 3.7128536701202393, + "rewards/margins_std": 6.765477180480957, + "rewards/rejected": -12.366792678833008, + "step": 3030 + }, + { + "epoch": 0.76, + "grad_norm": 0.765625, + "learning_rate": 3.181729946255406e-07, + "logits/chosen": 0.4582037031650543, + "logits/rejected": 0.9174816012382507, + "logps/chosen": -629.7871704101562, + "logps/rejected": -1234.114990234375, + "loss": 0.1879, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.7807984352111816, + "rewards/margins": 6.365638732910156, + "rewards/margins_max": 9.294533729553223, + "rewards/margins_min": 3.4367434978485107, + "rewards/margins_std": 4.142083168029785, + "rewards/rejected": -10.14643669128418, + "step": 3040 + }, + { + "epoch": 0.77, + "grad_norm": 10.9375, + "learning_rate": 3.1177288631777953e-07, + "logits/chosen": 0.5115953683853149, + "logits/rejected": 0.9640370607376099, + "logps/chosen": -588.091552734375, + "logps/rejected": -1170.9622802734375, + "loss": 0.2467, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.9625473022460938, + "rewards/margins": 5.80161190032959, + "rewards/margins_max": 8.664453506469727, + "rewards/margins_min": 2.9387693405151367, + "rewards/margins_std": 4.048670291900635, + "rewards/rejected": -9.764158248901367, + "step": 3050 + }, + { + "epoch": 0.77, + "grad_norm": 2.828125, + "learning_rate": 3.054258950310152e-07, + "logits/chosen": 0.43586626648902893, + "logits/rejected": 0.8257268667221069, + "logps/chosen": -565.931640625, + "logps/rejected": -1140.6973876953125, + "loss": 0.2435, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.604163408279419, + "rewards/margins": 5.731095314025879, + "rewards/margins_max": 9.021711349487305, + "rewards/margins_min": 2.4404799938201904, + "rewards/margins_std": 4.653633117675781, + "rewards/rejected": -9.335259437561035, + "step": 3060 + }, + { + "epoch": 0.77, + "grad_norm": 1.1484375, + "learning_rate": 2.9913251062282984e-07, + "logits/chosen": 0.5903941988945007, + "logits/rejected": 0.9113849401473999, + "logps/chosen": -563.6361083984375, + "logps/rejected": -1265.0657958984375, + "loss": 0.1899, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -3.6609840393066406, + "rewards/margins": 6.926054954528809, + "rewards/margins_max": 10.484045028686523, + "rewards/margins_min": 3.3680667877197266, + "rewards/margins_std": 5.031756401062012, + "rewards/rejected": -10.587040901184082, + "step": 3070 + }, + { + "epoch": 0.77, + "grad_norm": 6.78125, + "learning_rate": 2.9289321881345254e-07, + "logits/chosen": 0.5719391703605652, + "logits/rejected": 0.9248722791671753, + "logps/chosen": -599.0647583007812, + "logps/rejected": -1356.6328125, + "loss": 0.1565, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.8286640644073486, + "rewards/margins": 7.712100028991699, + "rewards/margins_max": 11.743194580078125, + "rewards/margins_min": 3.681006908416748, + "rewards/margins_std": 5.700827598571777, + "rewards/rejected": -11.540764808654785, + "step": 3080 + }, + { + "epoch": 0.78, + "grad_norm": 0.60546875, + "learning_rate": 2.867085011482737e-07, + "logits/chosen": 0.48627376556396484, + "logits/rejected": 0.8899961709976196, + "logps/chosen": -660.3402709960938, + "logps/rejected": -1349.8409423828125, + "loss": 0.1774, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -4.254647254943848, + "rewards/margins": 7.1945695877075195, + "rewards/margins_max": 10.741331100463867, + "rewards/margins_min": 3.6478075981140137, + "rewards/margins_std": 5.015878200531006, + "rewards/rejected": -11.449216842651367, + "step": 3090 + }, + { + "epoch": 0.78, + "grad_norm": 0.890625, + "learning_rate": 2.8057883496067925e-07, + "logits/chosen": 0.5544101595878601, + "logits/rejected": 0.8789188265800476, + "logps/chosen": -529.1682739257812, + "logps/rejected": -1224.767822265625, + "loss": 0.1593, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.319157838821411, + "rewards/margins": 6.78830099105835, + "rewards/margins_max": 10.131688117980957, + "rewards/margins_min": 3.4449145793914795, + "rewards/margins_std": 4.728262901306152, + "rewards/rejected": -10.107458114624023, + "step": 3100 + }, + { + "epoch": 0.78, + "grad_norm": 1.7890625, + "learning_rate": 2.7450469333520853e-07, + "logits/chosen": 0.39449039101600647, + "logits/rejected": 0.6853546500205994, + "logps/chosen": -568.887451171875, + "logps/rejected": -1200.629638671875, + "loss": 0.1714, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -3.762941837310791, + "rewards/margins": 6.143300533294678, + "rewards/margins_max": 9.352704048156738, + "rewards/margins_min": 2.9338972568511963, + "rewards/margins_std": 4.538782119750977, + "rewards/rejected": -9.906242370605469, + "step": 3110 + }, + { + "epoch": 0.79, + "grad_norm": 1.140625, + "learning_rate": 2.6848654507104463e-07, + "logits/chosen": 0.3403048515319824, + "logits/rejected": 0.807928740978241, + "logps/chosen": -615.323974609375, + "logps/rejected": -1190.897705078125, + "loss": 0.161, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -3.839613676071167, + "rewards/margins": 5.719782829284668, + "rewards/margins_max": 8.89350700378418, + "rewards/margins_min": 2.546060085296631, + "rewards/margins_std": 4.4883222579956055, + "rewards/rejected": -9.559396743774414, + "step": 3120 + }, + { + "epoch": 0.79, + "grad_norm": 1.921875, + "learning_rate": 2.625248546458303e-07, + "logits/chosen": 0.4214434027671814, + "logits/rejected": 0.8966633677482605, + "logps/chosen": -620.8934936523438, + "logps/rejected": -1347.8140869140625, + "loss": 0.1593, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -4.028387546539307, + "rewards/margins": 7.290696144104004, + "rewards/margins_max": 11.02523422241211, + "rewards/margins_min": 3.556157350540161, + "rewards/margins_std": 5.281435489654541, + "rewards/rejected": -11.319084167480469, + "step": 3130 + }, + { + "epoch": 0.79, + "grad_norm": 2.046875, + "learning_rate": 2.5662008217982156e-07, + "logits/chosen": 0.47852668166160583, + "logits/rejected": 0.9050714373588562, + "logps/chosen": -579.2857666015625, + "logps/rejected": -1326.245849609375, + "loss": 0.1588, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -3.5103771686553955, + "rewards/margins": 7.595736026763916, + "rewards/margins_max": 11.297245025634766, + "rewards/margins_min": 3.894225597381592, + "rewards/margins_std": 5.2347259521484375, + "rewards/rejected": -11.10611343383789, + "step": 3140 + }, + { + "epoch": 0.79, + "grad_norm": 2.3125, + "learning_rate": 2.507726834003745e-07, + "logits/chosen": 0.5341039299964905, + "logits/rejected": 0.9968475103378296, + "logps/chosen": -572.641845703125, + "logps/rejected": -1260.7801513671875, + "loss": 0.1122, + "rewards/accuracies": 0.9375, + "rewards/chosen": -3.6314964294433594, + "rewards/margins": 6.958949089050293, + "rewards/margins_max": 11.029642105102539, + "rewards/margins_min": 2.888258457183838, + "rewards/margins_std": 5.756827354431152, + "rewards/rejected": -10.590445518493652, + "step": 3150 + }, + { + "epoch": 0.8, + "grad_norm": 2.71875, + "learning_rate": 2.44983109606773e-07, + "logits/chosen": 0.44414272904396057, + "logits/rejected": 0.72679603099823, + "logps/chosen": -596.27880859375, + "logps/rejected": -1328.358154296875, + "loss": 0.177, + "rewards/accuracies": 0.9375, + "rewards/chosen": -3.8258304595947266, + "rewards/margins": 7.107968330383301, + "rewards/margins_max": 10.948715209960938, + "rewards/margins_min": 3.2672207355499268, + "rewards/margins_std": 5.431636810302734, + "rewards/rejected": -10.933798789978027, + "step": 3160 + }, + { + "epoch": 0.8, + "grad_norm": 0.8203125, + "learning_rate": 2.3925180763539845e-07, + "logits/chosen": 0.4964269697666168, + "logits/rejected": 0.9164209365844727, + "logps/chosen": -562.5709228515625, + "logps/rejected": -1107.593017578125, + "loss": 0.1703, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.371009349822998, + "rewards/margins": 5.362520217895508, + "rewards/margins_max": 8.385972023010254, + "rewards/margins_min": 2.3390681743621826, + "rewards/margins_std": 4.275806427001953, + "rewards/rejected": -8.733530044555664, + "step": 3170 + }, + { + "epoch": 0.8, + "grad_norm": 4.125, + "learning_rate": 2.3357921982524197e-07, + "logits/chosen": 0.5338067412376404, + "logits/rejected": 0.9268990755081177, + "logps/chosen": -571.2572021484375, + "logps/rejected": -1339.582763671875, + "loss": 0.1291, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -3.572880506515503, + "rewards/margins": 7.33321475982666, + "rewards/margins_max": 10.481898307800293, + "rewards/margins_min": 4.184528827667236, + "rewards/margins_std": 4.452913284301758, + "rewards/rejected": -10.906094551086426, + "step": 3180 + }, + { + "epoch": 0.8, + "grad_norm": 1.640625, + "learning_rate": 2.279657839837652e-07, + "logits/chosen": 0.4593687951564789, + "logits/rejected": 0.8522500991821289, + "logps/chosen": -554.2239379882812, + "logps/rejected": -1212.277587890625, + "loss": 0.2414, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -3.5025417804718018, + "rewards/margins": 6.630636692047119, + "rewards/margins_max": 10.59939956665039, + "rewards/margins_min": 2.661872386932373, + "rewards/margins_std": 5.612679958343506, + "rewards/rejected": -10.1331787109375, + "step": 3190 + }, + { + "epoch": 0.81, + "grad_norm": 0.859375, + "learning_rate": 2.2241193335311127e-07, + "logits/chosen": 0.4334026277065277, + "logits/rejected": 0.8511263728141785, + "logps/chosen": -522.2242431640625, + "logps/rejected": -1108.195556640625, + "loss": 0.1553, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -3.257991313934326, + "rewards/margins": 5.921751976013184, + "rewards/margins_max": 8.671719551086426, + "rewards/margins_min": 3.1717848777770996, + "rewards/margins_std": 3.889040470123291, + "rewards/rejected": -9.179742813110352, + "step": 3200 + }, + { + "epoch": 0.81, + "grad_norm": 1.21875, + "learning_rate": 2.1691809657666592e-07, + "logits/chosen": 0.4394384026527405, + "logits/rejected": 0.9547786712646484, + "logps/chosen": -563.9006958007812, + "logps/rejected": -984.5545654296875, + "loss": 0.2146, + "rewards/accuracies": 0.9375, + "rewards/chosen": -3.6484360694885254, + "rewards/margins": 4.430947780609131, + "rewards/margins_max": 6.850518226623535, + "rewards/margins_min": 2.011378765106201, + "rewards/margins_std": 3.421788454055786, + "rewards/rejected": -8.079385757446289, + "step": 3210 + }, + { + "epoch": 0.81, + "grad_norm": 0.94140625, + "learning_rate": 2.1148469766597698e-07, + "logits/chosen": 0.5856447219848633, + "logits/rejected": 0.9771261215209961, + "logps/chosen": -587.283935546875, + "logps/rejected": -1270.7833251953125, + "loss": 0.1871, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -3.7281603813171387, + "rewards/margins": 6.7978515625, + "rewards/margins_max": 10.585257530212402, + "rewards/margins_min": 3.0104446411132812, + "rewards/margins_std": 5.356202125549316, + "rewards/rejected": -10.526012420654297, + "step": 3220 + }, + { + "epoch": 0.81, + "grad_norm": 1.625, + "learning_rate": 2.06112155968028e-07, + "logits/chosen": 0.34765639901161194, + "logits/rejected": 0.7540073990821838, + "logps/chosen": -610.2107543945312, + "logps/rejected": -1326.593017578125, + "loss": 0.1437, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -4.039941787719727, + "rewards/margins": 7.0522661209106445, + "rewards/margins_max": 10.800148010253906, + "rewards/margins_min": 3.3043816089630127, + "rewards/margins_std": 5.300307750701904, + "rewards/rejected": -11.092206954956055, + "step": 3230 + }, + { + "epoch": 0.82, + "grad_norm": 4.375, + "learning_rate": 2.0080088613287293e-07, + "logits/chosen": 0.4891189932823181, + "logits/rejected": 0.9726032018661499, + "logps/chosen": -548.3533935546875, + "logps/rejected": -1111.445068359375, + "loss": 0.1692, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.530595302581787, + "rewards/margins": 5.695003509521484, + "rewards/margins_max": 8.761409759521484, + "rewards/margins_min": 2.628596305847168, + "rewards/margins_std": 4.336554527282715, + "rewards/rejected": -9.22559928894043, + "step": 3240 + }, + { + "epoch": 0.82, + "grad_norm": 1.453125, + "learning_rate": 1.955512980816354e-07, + "logits/chosen": 0.5204964876174927, + "logits/rejected": 0.8765512704849243, + "logps/chosen": -591.1302490234375, + "logps/rejected": -1324.301513671875, + "loss": 0.2054, + "rewards/accuracies": 0.9375, + "rewards/chosen": -3.970345973968506, + "rewards/margins": 7.200788974761963, + "rewards/margins_max": 10.806253433227539, + "rewards/margins_min": 3.595324754714966, + "rewards/margins_std": 5.098896026611328, + "rewards/rejected": -11.171134948730469, + "step": 3250 + }, + { + "epoch": 0.82, + "grad_norm": 1.2890625, + "learning_rate": 1.9036379697486927e-07, + "logits/chosen": 0.47821909189224243, + "logits/rejected": 0.921379566192627, + "logps/chosen": -550.5033569335938, + "logps/rejected": -1350.917724609375, + "loss": 0.1216, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -3.5862479209899902, + "rewards/margins": 7.927116394042969, + "rewards/margins_max": 12.039579391479492, + "rewards/margins_min": 3.81465220451355, + "rewards/margins_std": 5.815901756286621, + "rewards/rejected": -11.513364791870117, + "step": 3260 + }, + { + "epoch": 0.82, + "grad_norm": 0.9375, + "learning_rate": 1.8523878318128926e-07, + "logits/chosen": 0.5904892683029175, + "logits/rejected": 1.0056906938552856, + "logps/chosen": -576.0701904296875, + "logps/rejected": -1295.9527587890625, + "loss": 0.1443, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -3.6342978477478027, + "rewards/margins": 7.424314022064209, + "rewards/margins_max": 11.07739543914795, + "rewards/margins_min": 3.771233320236206, + "rewards/margins_std": 5.166236400604248, + "rewards/rejected": -11.058611869812012, + "step": 3270 + }, + { + "epoch": 0.83, + "grad_norm": 2.5, + "learning_rate": 1.8017665224687185e-07, + "logits/chosen": 0.4087589383125305, + "logits/rejected": 0.9955110549926758, + "logps/chosen": -640.23828125, + "logps/rejected": -1283.7027587890625, + "loss": 0.188, + "rewards/accuracies": 0.9375, + "rewards/chosen": -3.8483219146728516, + "rewards/margins": 6.573834419250488, + "rewards/margins_max": 9.99498176574707, + "rewards/margins_min": 3.15268611907959, + "rewards/margins_std": 4.8382344245910645, + "rewards/rejected": -10.422155380249023, + "step": 3280 + }, + { + "epoch": 0.83, + "grad_norm": 1.015625, + "learning_rate": 1.7517779486432494e-07, + "logits/chosen": 0.5131040811538696, + "logits/rejected": 0.9352075457572937, + "logps/chosen": -604.0338745117188, + "logps/rejected": -1292.89111328125, + "loss": 0.1687, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.8847403526306152, + "rewards/margins": 6.89129638671875, + "rewards/margins_max": 10.85944938659668, + "rewards/margins_min": 2.9231438636779785, + "rewards/margins_std": 5.611815452575684, + "rewards/rejected": -10.77603816986084, + "step": 3290 + }, + { + "epoch": 0.83, + "grad_norm": 1.703125, + "learning_rate": 1.7024259684293674e-07, + "logits/chosen": 0.4551068842411041, + "logits/rejected": 0.9105457067489624, + "logps/chosen": -587.896240234375, + "logps/rejected": -1109.6060791015625, + "loss": 0.2436, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -3.670138120651245, + "rewards/margins": 5.2051897048950195, + "rewards/margins_max": 8.09550666809082, + "rewards/margins_min": 2.314873218536377, + "rewards/margins_std": 4.0875244140625, + "rewards/rejected": -8.875328063964844, + "step": 3300 + }, + { + "epoch": 0.83, + "grad_norm": 0.95703125, + "learning_rate": 1.6537143907879792e-07, + "logits/chosen": 0.4002392292022705, + "logits/rejected": 0.8851076364517212, + "logps/chosen": -600.163818359375, + "logps/rejected": -1274.1259765625, + "loss": 0.1875, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -3.855924606323242, + "rewards/margins": 6.683934688568115, + "rewards/margins_max": 10.393302917480469, + "rewards/margins_min": 2.97456693649292, + "rewards/margins_std": 5.2458391189575195, + "rewards/rejected": -10.539859771728516, + "step": 3310 + }, + { + "epoch": 0.84, + "grad_norm": 8.1875, + "learning_rate": 1.6056469752540347e-07, + "logits/chosen": 0.5070708394050598, + "logits/rejected": 1.0886653661727905, + "logps/chosen": -579.6941528320312, + "logps/rejected": -1257.633544921875, + "loss": 0.1963, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -3.607515335083008, + "rewards/margins": 6.9815239906311035, + "rewards/margins_max": 10.53345012664795, + "rewards/margins_min": 3.4295973777770996, + "rewards/margins_std": 5.023181915283203, + "rewards/rejected": -10.58903980255127, + "step": 3320 + }, + { + "epoch": 0.84, + "grad_norm": 0.87890625, + "learning_rate": 1.5582274316463928e-07, + "logits/chosen": 0.42002058029174805, + "logits/rejected": 0.8670506477355957, + "logps/chosen": -607.319580078125, + "logps/rejected": -1416.8555908203125, + "loss": 0.1503, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -3.934675693511963, + "rewards/margins": 8.056138038635254, + "rewards/margins_max": 11.901208877563477, + "rewards/margins_min": 4.211067199707031, + "rewards/margins_std": 5.437750816345215, + "rewards/rejected": -11.990813255310059, + "step": 3330 + }, + { + "epoch": 0.84, + "grad_norm": 0.58203125, + "learning_rate": 1.511459419781469e-07, + "logits/chosen": 0.473996639251709, + "logits/rejected": 0.9316139221191406, + "logps/chosen": -638.8199462890625, + "logps/rejected": -1332.9974365234375, + "loss": 0.1523, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -4.206120491027832, + "rewards/margins": 7.044719696044922, + "rewards/margins_max": 10.616361618041992, + "rewards/margins_min": 3.4730796813964844, + "rewards/margins_std": 5.05106258392334, + "rewards/rejected": -11.250840187072754, + "step": 3340 + }, + { + "epoch": 0.84, + "grad_norm": 0.875, + "learning_rate": 1.4653465491908e-07, + "logits/chosen": 0.4140965938568115, + "logits/rejected": 0.8925831913948059, + "logps/chosen": -568.9302978515625, + "logps/rejected": -1233.1417236328125, + "loss": 0.1886, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -3.6400482654571533, + "rewards/margins": 6.646452903747559, + "rewards/margins_max": 10.187222480773926, + "rewards/margins_min": 3.105684518814087, + "rewards/margins_std": 5.007403373718262, + "rewards/rejected": -10.28650188446045, + "step": 3350 + }, + { + "epoch": 0.85, + "grad_norm": 2.21875, + "learning_rate": 1.4198923788424477e-07, + "logits/chosen": 0.47543078660964966, + "logits/rejected": 0.9080629348754883, + "logps/chosen": -633.4432373046875, + "logps/rejected": -1273.630615234375, + "loss": 0.14, + "rewards/accuracies": 0.9375, + "rewards/chosen": -4.122483730316162, + "rewards/margins": 6.322574615478516, + "rewards/margins_max": 9.358713150024414, + "rewards/margins_min": 3.2864346504211426, + "rewards/margins_std": 4.293749809265137, + "rewards/rejected": -10.445058822631836, + "step": 3360 + }, + { + "epoch": 0.85, + "grad_norm": 1.015625, + "learning_rate": 1.375100416866316e-07, + "logits/chosen": 0.5130153298377991, + "logits/rejected": 0.9054125547409058, + "logps/chosen": -539.0538940429688, + "logps/rejected": -1185.374267578125, + "loss": 0.1416, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -3.414109468460083, + "rewards/margins": 6.318561553955078, + "rewards/margins_max": 9.108491897583008, + "rewards/margins_min": 3.5286312103271484, + "rewards/margins_std": 3.9455573558807373, + "rewards/rejected": -9.732671737670898, + "step": 3370 + }, + { + "epoch": 0.85, + "grad_norm": 1.03125, + "learning_rate": 1.3309741202834045e-07, + "logits/chosen": 0.4064570367336273, + "logits/rejected": 0.9202351570129395, + "logps/chosen": -582.8175659179688, + "logps/rejected": -1286.4290771484375, + "loss": 0.1051, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -3.6537833213806152, + "rewards/margins": 6.9002556800842285, + "rewards/margins_max": 9.804253578186035, + "rewards/margins_min": 3.9962570667266846, + "rewards/margins_std": 4.106873512268066, + "rewards/rejected": -10.554038047790527, + "step": 3380 + }, + { + "epoch": 0.85, + "grad_norm": 2.890625, + "learning_rate": 1.2875168947389982e-07, + "logits/chosen": 0.4888080656528473, + "logits/rejected": 0.8091050386428833, + "logps/chosen": -651.0318603515625, + "logps/rejected": -1255.9091796875, + "loss": 0.1691, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -4.149880409240723, + "rewards/margins": 6.239043712615967, + "rewards/margins_max": 9.588689804077148, + "rewards/margins_min": 2.889397144317627, + "rewards/margins_std": 4.73711633682251, + "rewards/rejected": -10.388925552368164, + "step": 3390 + }, + { + "epoch": 0.86, + "grad_norm": 0.95703125, + "learning_rate": 1.2447320942398075e-07, + "logits/chosen": 0.4371975362300873, + "logits/rejected": 1.0079147815704346, + "logps/chosen": -620.552978515625, + "logps/rejected": -1189.5225830078125, + "loss": 0.294, + "rewards/accuracies": 0.887499988079071, + "rewards/chosen": -3.896024227142334, + "rewards/margins": 6.076613903045654, + "rewards/margins_max": 9.558730125427246, + "rewards/margins_min": 2.5944974422454834, + "rewards/margins_std": 4.924456596374512, + "rewards/rejected": -9.972637176513672, + "step": 3400 + }, + { + "epoch": 0.86, + "grad_norm": 1.34375, + "learning_rate": 1.2026230208951304e-07, + "logits/chosen": 0.472128301858902, + "logits/rejected": 0.9814669489860535, + "logps/chosen": -617.8382568359375, + "logps/rejected": -1211.0347900390625, + "loss": 0.1947, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -3.850552797317505, + "rewards/margins": 6.239529132843018, + "rewards/margins_max": 9.678964614868164, + "rewards/margins_min": 2.8000922203063965, + "rewards/margins_std": 4.864098072052002, + "rewards/rejected": -10.090081214904785, + "step": 3410 + }, + { + "epoch": 0.86, + "grad_norm": 8.4375, + "learning_rate": 1.1611929246619723e-07, + "logits/chosen": 0.45898929238319397, + "logits/rejected": 0.8415622711181641, + "logps/chosen": -584.2407836914062, + "logps/rejected": -1248.3134765625, + "loss": 0.267, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.8289635181427, + "rewards/margins": 6.5659613609313965, + "rewards/margins_max": 9.749704360961914, + "rewards/margins_min": 3.3822174072265625, + "rewards/margins_std": 4.502493858337402, + "rewards/rejected": -10.394925117492676, + "step": 3420 + }, + { + "epoch": 0.86, + "grad_norm": 10.8125, + "learning_rate": 1.1204450030942347e-07, + "logits/chosen": 0.5145548582077026, + "logits/rejected": 0.8463503122329712, + "logps/chosen": -588.1199951171875, + "logps/rejected": -1244.9676513671875, + "loss": 0.2621, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -3.942927122116089, + "rewards/margins": 6.304174423217773, + "rewards/margins_max": 9.600616455078125, + "rewards/margins_min": 3.007732629776001, + "rewards/margins_std": 4.661872386932373, + "rewards/rejected": -10.247102737426758, + "step": 3430 + }, + { + "epoch": 0.87, + "grad_norm": 0.71875, + "learning_rate": 1.080382401095925e-07, + "logits/chosen": 0.5430434942245483, + "logits/rejected": 1.0276672840118408, + "logps/chosen": -612.2022094726562, + "logps/rejected": -1238.375, + "loss": 0.177, + "rewards/accuracies": 0.9375, + "rewards/chosen": -3.7015655040740967, + "rewards/margins": 6.311164379119873, + "rewards/margins_max": 9.718725204467773, + "rewards/margins_min": 2.903604745864868, + "rewards/margins_std": 4.81901741027832, + "rewards/rejected": -10.012730598449707, + "step": 3440 + }, + { + "epoch": 0.87, + "grad_norm": 0.5234375, + "learning_rate": 1.0410082106784235e-07, + "logits/chosen": 0.4352169632911682, + "logits/rejected": 1.0200514793395996, + "logps/chosen": -686.00732421875, + "logps/rejected": -1239.3880615234375, + "loss": 0.3034, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -4.280230522155762, + "rewards/margins": 6.025425910949707, + "rewards/margins_max": 9.804727554321289, + "rewards/margins_min": 2.246123790740967, + "rewards/margins_std": 5.34473991394043, + "rewards/rejected": -10.305655479431152, + "step": 3450 + }, + { + "epoch": 0.87, + "grad_norm": 3.125, + "learning_rate": 1.0023254707218609e-07, + "logits/chosen": 0.4326336979866028, + "logits/rejected": 0.8475500345230103, + "logps/chosen": -625.3204345703125, + "logps/rejected": -1264.6829833984375, + "loss": 0.2183, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -3.916844606399536, + "rewards/margins": 6.507603645324707, + "rewards/margins_max": 10.618246078491211, + "rewards/margins_min": 2.3969624042510986, + "rewards/margins_std": 5.813324928283691, + "rewards/rejected": -10.42444896697998, + "step": 3460 + }, + { + "epoch": 0.87, + "grad_norm": 2.09375, + "learning_rate": 9.643371667405698e-08, + "logits/chosen": 0.4223089814186096, + "logits/rejected": 0.9621411561965942, + "logps/chosen": -599.6414794921875, + "logps/rejected": -1107.6328125, + "loss": 0.1659, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -3.7435672283172607, + "rewards/margins": 5.297955513000488, + "rewards/margins_max": 7.878331184387207, + "rewards/margins_min": 2.717580795288086, + "rewards/margins_std": 3.6492016315460205, + "rewards/rejected": -9.041522979736328, + "step": 3470 + }, + { + "epoch": 0.88, + "grad_norm": 9.875, + "learning_rate": 9.270462306526594e-08, + "logits/chosen": 0.540179431438446, + "logits/rejected": 0.956885039806366, + "logps/chosen": -563.6200561523438, + "logps/rejected": -1207.8319091796875, + "loss": 0.2462, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -3.532160520553589, + "rewards/margins": 6.614119052886963, + "rewards/margins_max": 10.017059326171875, + "rewards/margins_min": 3.21117901802063, + "rewards/margins_std": 4.812485218048096, + "rewards/rejected": -10.146280288696289, + "step": 3480 + }, + { + "epoch": 0.88, + "grad_norm": 1.2890625, + "learning_rate": 8.904555405537406e-08, + "logits/chosen": 0.4101219177246094, + "logits/rejected": 0.9202925562858582, + "logps/chosen": -566.47998046875, + "logps/rejected": -1209.119873046875, + "loss": 0.1687, + "rewards/accuracies": 0.9375, + "rewards/chosen": -3.4704158306121826, + "rewards/margins": 6.5966796875, + "rewards/margins_max": 9.444000244140625, + "rewards/margins_min": 3.749358654022217, + "rewards/margins_std": 4.026719570159912, + "rewards/rejected": -10.067094802856445, + "step": 3490 + }, + { + "epoch": 0.88, + "grad_norm": 1.5, + "learning_rate": 8.545679204947953e-08, + "logits/chosen": 0.5104061365127563, + "logits/rejected": 0.9191001653671265, + "logps/chosen": -560.3649291992188, + "logps/rejected": -1123.328857421875, + "loss": 0.1221, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -3.688387632369995, + "rewards/margins": 5.621038913726807, + "rewards/margins_max": 7.867220401763916, + "rewards/margins_min": 3.3748581409454346, + "rewards/margins_std": 3.176579713821411, + "rewards/rejected": -9.309426307678223, + "step": 3500 + }, + { + "epoch": 0.88, + "grad_norm": 13.1875, + "learning_rate": 8.193861402642088e-08, + "logits/chosen": 0.3396713137626648, + "logits/rejected": 0.8896854519844055, + "logps/chosen": -639.7154541015625, + "logps/rejected": -1173.7528076171875, + "loss": 0.2153, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -3.856093168258667, + "rewards/margins": 5.626918315887451, + "rewards/margins_max": 8.778867721557617, + "rewards/margins_min": 2.4749696254730225, + "rewards/margins_std": 4.457529067993164, + "rewards/rejected": -9.483012199401855, + "step": 3510 + }, + { + "epoch": 0.89, + "grad_norm": 0.83203125, + "learning_rate": 7.849129151740119e-08, + "logits/chosen": 0.49893778562545776, + "logits/rejected": 0.9670238494873047, + "logps/chosen": -577.2950439453125, + "logps/rejected": -1137.2681884765625, + "loss": 0.1803, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.5992584228515625, + "rewards/margins": 5.753302097320557, + "rewards/margins_max": 8.932449340820312, + "rewards/margins_min": 2.574155807495117, + "rewards/margins_std": 4.495992660522461, + "rewards/rejected": -9.352560043334961, + "step": 3520 + }, + { + "epoch": 0.89, + "grad_norm": 1.703125, + "learning_rate": 7.511509058502996e-08, + "logits/chosen": 0.4532325863838196, + "logits/rejected": 0.9574426412582397, + "logps/chosen": -568.4635620117188, + "logps/rejected": -1101.7496337890625, + "loss": 0.2359, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -3.4725348949432373, + "rewards/margins": 5.306643486022949, + "rewards/margins_max": 8.2151460647583, + "rewards/margins_min": 2.398141384124756, + "rewards/margins_std": 4.113243579864502, + "rewards/rejected": -8.779179573059082, + "step": 3530 + }, + { + "epoch": 0.89, + "grad_norm": 11.8125, + "learning_rate": 7.18102718027901e-08, + "logits/chosen": 0.5021312236785889, + "logits/rejected": 0.9430710673332214, + "logps/chosen": -587.4429321289062, + "logps/rejected": -1174.86181640625, + "loss": 0.2749, + "rewards/accuracies": 0.887499988079071, + "rewards/chosen": -3.7951760292053223, + "rewards/margins": 5.84264612197876, + "rewards/margins_max": 9.605822563171387, + "rewards/margins_min": 2.0794689655303955, + "rewards/margins_std": 5.321936130523682, + "rewards/rejected": -9.637822151184082, + "step": 3540 + }, + { + "epoch": 0.89, + "grad_norm": 1.25, + "learning_rate": 6.857709023492586e-08, + "logits/chosen": 0.39984625577926636, + "logits/rejected": 0.8395845293998718, + "logps/chosen": -564.1094970703125, + "logps/rejected": -1282.72802734375, + "loss": 0.2096, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.5986900329589844, + "rewards/margins": 6.7760329246521, + "rewards/margins_max": 10.53276252746582, + "rewards/margins_min": 3.0193045139312744, + "rewards/margins_std": 5.312817573547363, + "rewards/rejected": -10.374723434448242, + "step": 3550 + }, + { + "epoch": 0.9, + "grad_norm": 0.74609375, + "learning_rate": 6.541579541675734e-08, + "logits/chosen": 0.4497915208339691, + "logits/rejected": 0.8971832394599915, + "logps/chosen": -563.6310424804688, + "logps/rejected": -1266.3800048828125, + "loss": 0.1246, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -3.5002026557922363, + "rewards/margins": 7.074613094329834, + "rewards/margins_max": 10.105807304382324, + "rewards/margins_min": 4.043417930603027, + "rewards/margins_std": 4.28675651550293, + "rewards/rejected": -10.57481575012207, + "step": 3560 + }, + { + "epoch": 0.9, + "grad_norm": 2.71875, + "learning_rate": 6.232663133542204e-08, + "logits/chosen": 0.32878604531288147, + "logits/rejected": 0.9640012979507446, + "logps/chosen": -651.9713134765625, + "logps/rejected": -1202.863525390625, + "loss": 0.1796, + "rewards/accuracies": 0.9375, + "rewards/chosen": -4.158196449279785, + "rewards/margins": 5.708923816680908, + "rewards/margins_max": 8.873598098754883, + "rewards/margins_min": 2.544250011444092, + "rewards/margins_std": 4.475523948669434, + "rewards/rejected": -9.867119789123535, + "step": 3570 + }, + { + "epoch": 0.9, + "grad_norm": 0.458984375, + "learning_rate": 5.9309836411043034e-08, + "logits/chosen": 0.4480930268764496, + "logits/rejected": 0.9676550030708313, + "logps/chosen": -615.3204956054688, + "logps/rejected": -1192.294677734375, + "loss": 0.1424, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -3.8424465656280518, + "rewards/margins": 6.077481269836426, + "rewards/margins_max": 9.163492202758789, + "rewards/margins_min": 2.9914684295654297, + "rewards/margins_std": 4.364280700683594, + "rewards/rejected": -9.919927597045898, + "step": 3580 + }, + { + "epoch": 0.9, + "grad_norm": 1.0390625, + "learning_rate": 5.636564347832906e-08, + "logits/chosen": 0.5807913541793823, + "logits/rejected": 1.0163103342056274, + "logps/chosen": -546.7759399414062, + "logps/rejected": -1098.3787841796875, + "loss": 0.1258, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -3.498042583465576, + "rewards/margins": 5.564633369445801, + "rewards/margins_max": 8.194429397583008, + "rewards/margins_min": 2.9348368644714355, + "rewards/margins_std": 3.7190933227539062, + "rewards/rejected": -9.062675476074219, + "step": 3590 + }, + { + "epoch": 0.91, + "grad_norm": 2.734375, + "learning_rate": 5.349427976860321e-08, + "logits/chosen": 0.38955169916152954, + "logits/rejected": 0.9389937520027161, + "logps/chosen": -605.4104614257812, + "logps/rejected": -1252.658935546875, + "loss": 0.1961, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -3.7875728607177734, + "rewards/margins": 6.668432712554932, + "rewards/margins_max": 9.599607467651367, + "rewards/margins_min": 3.737257480621338, + "rewards/margins_std": 4.1453070640563965, + "rewards/rejected": -10.456005096435547, + "step": 3600 + }, + { + "epoch": 0.91, + "grad_norm": 2.015625, + "learning_rate": 5.069596689226652e-08, + "logits/chosen": 0.44946521520614624, + "logits/rejected": 0.9365663528442383, + "logps/chosen": -622.5480346679688, + "logps/rejected": -1250.881103515625, + "loss": 0.1408, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.8964874744415283, + "rewards/margins": 6.451874732971191, + "rewards/margins_max": 10.348373413085938, + "rewards/margins_min": 2.5553746223449707, + "rewards/margins_std": 5.510483264923096, + "rewards/rejected": -10.348361015319824, + "step": 3610 + }, + { + "epoch": 0.91, + "grad_norm": 4.90625, + "learning_rate": 4.797092082169307e-08, + "logits/chosen": 0.5568719506263733, + "logits/rejected": 1.070988655090332, + "logps/chosen": -644.2371826171875, + "logps/rejected": -1172.1806640625, + "loss": 0.2374, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -4.062577724456787, + "rewards/margins": 5.627659797668457, + "rewards/margins_max": 8.762027740478516, + "rewards/margins_min": 2.4932923316955566, + "rewards/margins_std": 4.432665824890137, + "rewards/rejected": -9.690237998962402, + "step": 3620 + }, + { + "epoch": 0.91, + "grad_norm": 4.0, + "learning_rate": 4.531935187456215e-08, + "logits/chosen": 0.562368631362915, + "logits/rejected": 1.0721943378448486, + "logps/chosen": -617.67333984375, + "logps/rejected": -1335.0179443359375, + "loss": 0.1633, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -3.883314609527588, + "rewards/margins": 7.272046089172363, + "rewards/margins_max": 10.699112892150879, + "rewards/margins_min": 3.844979763031006, + "rewards/margins_std": 4.846603870391846, + "rewards/rejected": -11.15536117553711, + "step": 3630 + }, + { + "epoch": 0.92, + "grad_norm": 2.40625, + "learning_rate": 4.274146469762563e-08, + "logits/chosen": 0.5142907500267029, + "logits/rejected": 0.8736904859542847, + "logps/chosen": -538.7899169921875, + "logps/rejected": -1283.476318359375, + "loss": 0.1636, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -3.5881550312042236, + "rewards/margins": 7.207592010498047, + "rewards/margins_max": 10.598726272583008, + "rewards/margins_min": 3.8164570331573486, + "rewards/margins_std": 4.795788288116455, + "rewards/rejected": -10.795746803283691, + "step": 3640 + }, + { + "epoch": 0.92, + "grad_norm": 0.9609375, + "learning_rate": 4.023745825091407e-08, + "logits/chosen": 0.4232380986213684, + "logits/rejected": 0.8965535163879395, + "logps/chosen": -613.1757202148438, + "logps/rejected": -1340.668212890625, + "loss": 0.1607, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.898705005645752, + "rewards/margins": 7.219210624694824, + "rewards/margins_max": 10.523519515991211, + "rewards/margins_min": 3.914902925491333, + "rewards/margins_std": 4.672996997833252, + "rewards/rejected": -11.117916107177734, + "step": 3650 + }, + { + "epoch": 0.92, + "grad_norm": 6.1875, + "learning_rate": 3.780752579237978e-08, + "logits/chosen": 0.4038727283477783, + "logits/rejected": 0.8197474479675293, + "logps/chosen": -599.9093017578125, + "logps/rejected": -1350.9764404296875, + "loss": 0.2599, + "rewards/accuracies": 0.9375, + "rewards/chosen": -3.743206739425659, + "rewards/margins": 7.354147434234619, + "rewards/margins_max": 11.198575019836426, + "rewards/margins_min": 3.5097198486328125, + "rewards/margins_std": 5.4368414878845215, + "rewards/rejected": -11.0973539352417, + "step": 3660 + }, + { + "epoch": 0.92, + "grad_norm": 1.1640625, + "learning_rate": 3.545185486298274e-08, + "logits/chosen": 0.5607768893241882, + "logits/rejected": 0.8514927625656128, + "logps/chosen": -599.4677734375, + "logps/rejected": -1308.130859375, + "loss": 0.1914, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.958008289337158, + "rewards/margins": 6.930544376373291, + "rewards/margins_max": 10.306341171264648, + "rewards/margins_min": 3.5547471046447754, + "rewards/margins_std": 4.774097442626953, + "rewards/rejected": -10.888551712036133, + "step": 3670 + }, + { + "epoch": 0.93, + "grad_norm": 1.4375, + "learning_rate": 3.317062727221542e-08, + "logits/chosen": 0.6026689410209656, + "logits/rejected": 0.9835416674613953, + "logps/chosen": -614.91943359375, + "logps/rejected": -1405.431640625, + "loss": 0.1626, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -3.7954108715057373, + "rewards/margins": 7.760351657867432, + "rewards/margins_max": 12.802927017211914, + "rewards/margins_min": 2.7177751064300537, + "rewards/margins_std": 7.131278991699219, + "rewards/rejected": -11.555761337280273, + "step": 3680 + }, + { + "epoch": 0.93, + "grad_norm": 0.6484375, + "learning_rate": 3.096401908407076e-08, + "logits/chosen": 0.39605578780174255, + "logits/rejected": 0.9517404437065125, + "logps/chosen": -632.5601806640625, + "logps/rejected": -1478.791015625, + "loss": 0.1858, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -3.7204372882843018, + "rewards/margins": 8.738731384277344, + "rewards/margins_max": 13.405255317687988, + "rewards/margins_min": 4.072208404541016, + "rewards/margins_std": 6.599459648132324, + "rewards/rejected": -12.459168434143066, + "step": 3690 + }, + { + "epoch": 0.93, + "grad_norm": 4.09375, + "learning_rate": 2.883220060345437e-08, + "logits/chosen": 0.4364239275455475, + "logits/rejected": 0.8532499074935913, + "logps/chosen": -554.8377075195312, + "logps/rejected": -1244.270263671875, + "loss": 0.1934, + "rewards/accuracies": 0.9375, + "rewards/chosen": -3.2251949310302734, + "rewards/margins": 7.302639007568359, + "rewards/margins_max": 11.826835632324219, + "rewards/margins_min": 2.778442859649658, + "rewards/margins_std": 6.398179531097412, + "rewards/rejected": -10.52783489227295, + "step": 3700 + }, + { + "epoch": 0.93, + "grad_norm": 4.0625, + "learning_rate": 2.6775336363039636e-08, + "logits/chosen": 0.294972687959671, + "logits/rejected": 0.7404045462608337, + "logps/chosen": -603.755126953125, + "logps/rejected": -1179.319580078125, + "loss": 0.207, + "rewards/accuracies": 0.9375, + "rewards/chosen": -4.001626014709473, + "rewards/margins": 5.692513465881348, + "rewards/margins_max": 8.635394096374512, + "rewards/margins_min": 2.7496330738067627, + "rewards/margins_std": 4.161861896514893, + "rewards/rejected": -9.69413948059082, + "step": 3710 + }, + { + "epoch": 0.94, + "grad_norm": 2.25, + "learning_rate": 2.4793585110569726e-08, + "logits/chosen": 0.4034551680088043, + "logits/rejected": 0.7582255601882935, + "logps/chosen": -613.5555419921875, + "logps/rejected": -1203.7205810546875, + "loss": 0.1721, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -4.042942523956299, + "rewards/margins": 5.850724220275879, + "rewards/margins_max": 8.855157852172852, + "rewards/margins_min": 2.8462884426116943, + "rewards/margins_std": 4.248912811279297, + "rewards/rejected": -9.89366626739502, + "step": 3720 + }, + { + "epoch": 0.94, + "grad_norm": 0.5546875, + "learning_rate": 2.2887099796605192e-08, + "logits/chosen": 0.48683229088783264, + "logits/rejected": 0.9286754727363586, + "logps/chosen": -577.3275146484375, + "logps/rejected": -1279.540771484375, + "loss": 0.1469, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.7833991050720215, + "rewards/margins": 6.769036293029785, + "rewards/margins_max": 10.353792190551758, + "rewards/margins_min": 3.184278964996338, + "rewards/margins_std": 5.069611549377441, + "rewards/rejected": -10.552433967590332, + "step": 3730 + }, + { + "epoch": 0.94, + "grad_norm": 0.921875, + "learning_rate": 2.1056027562719515e-08, + "logits/chosen": 0.4771907925605774, + "logits/rejected": 0.9152711629867554, + "logps/chosen": -630.6005859375, + "logps/rejected": -1180.2467041015625, + "loss": 0.191, + "rewards/accuracies": 0.9375, + "rewards/chosen": -4.1157917976379395, + "rewards/margins": 5.6571149826049805, + "rewards/margins_max": 8.83124828338623, + "rewards/margins_min": 2.482980728149414, + "rewards/margins_std": 4.488903045654297, + "rewards/rejected": -9.772905349731445, + "step": 3740 + }, + { + "epoch": 0.94, + "grad_norm": 1.7734375, + "learning_rate": 1.9300509730142855e-08, + "logits/chosen": 0.43386468291282654, + "logits/rejected": 0.904864490032196, + "logps/chosen": -554.5198974609375, + "logps/rejected": -1208.7537841796875, + "loss": 0.1863, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.451647996902466, + "rewards/margins": 6.490715026855469, + "rewards/margins_max": 9.633821487426758, + "rewards/margins_min": 3.3476085662841797, + "rewards/margins_std": 4.445023536682129, + "rewards/rejected": -9.942361831665039, + "step": 3750 + }, + { + "epoch": 0.95, + "grad_norm": 0.94140625, + "learning_rate": 1.762068178885501e-08, + "logits/chosen": 0.41329479217529297, + "logits/rejected": 0.8515909910202026, + "logps/chosen": -620.33935546875, + "logps/rejected": -1288.8076171875, + "loss": 0.1975, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -3.9792284965515137, + "rewards/margins": 6.714905738830566, + "rewards/margins_max": 10.155709266662598, + "rewards/margins_min": 3.2741000652313232, + "rewards/margins_std": 4.86603307723999, + "rewards/rejected": -10.694132804870605, + "step": 3760 + }, + { + "epoch": 0.95, + "grad_norm": 7.59375, + "learning_rate": 1.6016673387127642e-08, + "logits/chosen": 0.41189831495285034, + "logits/rejected": 1.1138523817062378, + "logps/chosen": -601.9688720703125, + "logps/rejected": -1127.280029296875, + "loss": 0.265, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -3.6903653144836426, + "rewards/margins": 5.70098876953125, + "rewards/margins_max": 8.490083694458008, + "rewards/margins_min": 2.9118943214416504, + "rewards/margins_std": 3.9443747997283936, + "rewards/rejected": -9.39135456085205, + "step": 3770 + }, + { + "epoch": 0.95, + "grad_norm": 1.671875, + "learning_rate": 1.4488608321519214e-08, + "logits/chosen": 0.310377836227417, + "logits/rejected": 0.877922534942627, + "logps/chosen": -592.072021484375, + "logps/rejected": -1218.7236328125, + "loss": 0.1276, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8421072959899902, + "rewards/margins": 6.450199127197266, + "rewards/margins_max": 9.784720420837402, + "rewards/margins_min": 3.115678310394287, + "rewards/margins_std": 4.715724945068359, + "rewards/rejected": -10.292306900024414, + "step": 3780 + }, + { + "epoch": 0.95, + "grad_norm": 0.58203125, + "learning_rate": 1.3036604527319472e-08, + "logits/chosen": 0.5283810496330261, + "logits/rejected": 0.9584972262382507, + "logps/chosen": -610.931884765625, + "logps/rejected": -1124.7642822265625, + "loss": 0.1821, + "rewards/accuracies": 0.9375, + "rewards/chosen": -3.874329090118408, + "rewards/margins": 5.139523506164551, + "rewards/margins_max": 7.766670227050781, + "rewards/margins_min": 2.512375831604004, + "rewards/margins_std": 3.7153477668762207, + "rewards/rejected": -9.013853073120117, + "step": 3790 + }, + { + "epoch": 0.96, + "grad_norm": 1.6875, + "learning_rate": 1.1660774069447876e-08, + "logits/chosen": 0.5633661150932312, + "logits/rejected": 0.9613991975784302, + "logps/chosen": -556.5394287109375, + "logps/rejected": -1291.819091796875, + "loss": 0.1353, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -3.2998318672180176, + "rewards/margins": 7.387022495269775, + "rewards/margins_max": 10.791549682617188, + "rewards/margins_min": 3.982494831085205, + "rewards/margins_std": 4.814728736877441, + "rewards/rejected": -10.686854362487793, + "step": 3800 + }, + { + "epoch": 0.96, + "grad_norm": 1.484375, + "learning_rate": 1.0361223133804386e-08, + "logits/chosen": 0.5381686091423035, + "logits/rejected": 0.9398612976074219, + "logps/chosen": -635.9791259765625, + "logps/rejected": -1432.7926025390625, + "loss": 0.1523, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -4.109006881713867, + "rewards/margins": 7.819764137268066, + "rewards/margins_max": 11.635190963745117, + "rewards/margins_min": 4.004334926605225, + "rewards/margins_std": 5.3958306312561035, + "rewards/rejected": -11.928770065307617, + "step": 3810 + }, + { + "epoch": 0.96, + "grad_norm": 37.75, + "learning_rate": 9.138052019073472e-09, + "logits/chosen": 0.45118942856788635, + "logits/rejected": 0.796768844127655, + "logps/chosen": -662.9164428710938, + "logps/rejected": -1193.200439453125, + "loss": 0.4235, + "rewards/accuracies": 0.887499988079071, + "rewards/chosen": -4.19881534576416, + "rewards/margins": 5.3286051750183105, + "rewards/margins_max": 8.492276191711426, + "rewards/margins_min": 2.1649346351623535, + "rewards/margins_std": 4.474106311798096, + "rewards/rejected": -9.527420043945312, + "step": 3820 + }, + { + "epoch": 0.96, + "grad_norm": 1.5234375, + "learning_rate": 7.991355128984079e-09, + "logits/chosen": 0.49201154708862305, + "logits/rejected": 0.9295538067817688, + "logps/chosen": -532.0567626953125, + "logps/rejected": -1102.3900146484375, + "loss": 0.1717, + "rewards/accuracies": 0.9375, + "rewards/chosen": -3.469942092895508, + "rewards/margins": 5.525341033935547, + "rewards/margins_max": 8.777814865112305, + "rewards/margins_min": 2.2728657722473145, + "rewards/margins_std": 4.59969425201416, + "rewards/rejected": -8.995283126831055, + "step": 3830 + }, + { + "epoch": 0.97, + "grad_norm": 1.3984375, + "learning_rate": 6.921220965023012e-09, + "logits/chosen": 0.388469398021698, + "logits/rejected": 0.9527280926704407, + "logps/chosen": -636.8690185546875, + "logps/rejected": -1200.610595703125, + "loss": 0.1942, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -4.05787467956543, + "rewards/margins": 5.918933391571045, + "rewards/margins_max": 8.800240516662598, + "rewards/margins_min": 3.037627696990967, + "rewards/margins_std": 4.074782848358154, + "rewards/rejected": -9.976808547973633, + "step": 3840 + }, + { + "epoch": 0.97, + "grad_norm": 0.9375, + "learning_rate": 5.9277321196044006e-09, + "logits/chosen": 0.4024096429347992, + "logits/rejected": 0.9591943025588989, + "logps/chosen": -618.9902954101562, + "logps/rejected": -1130.0238037109375, + "loss": 0.1986, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -3.7532222270965576, + "rewards/margins": 5.360965251922607, + "rewards/margins_max": 8.214117050170898, + "rewards/margins_min": 2.5078141689300537, + "rewards/margins_std": 4.0349650382995605, + "rewards/rejected": -9.114187240600586, + "step": 3850 + }, + { + "epoch": 0.97, + "grad_norm": 1.5, + "learning_rate": 5.010965269695577e-09, + "logits/chosen": 0.3706130385398865, + "logits/rejected": 0.9580795168876648, + "logps/chosen": -593.1583251953125, + "logps/rejected": -1240.548095703125, + "loss": 0.1502, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -3.345301389694214, + "rewards/margins": 6.722414970397949, + "rewards/margins_max": 9.771614074707031, + "rewards/margins_min": 3.67321515083313, + "rewards/margins_std": 4.312219619750977, + "rewards/rejected": -10.067716598510742, + "step": 3860 + }, + { + "epoch": 0.97, + "grad_norm": 3.421875, + "learning_rate": 4.170991170898808e-09, + "logits/chosen": 0.5548506379127502, + "logits/rejected": 0.9052824974060059, + "logps/chosen": -576.05126953125, + "logps/rejected": -1191.8560791015625, + "loss": 0.1446, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.7464206218719482, + "rewards/margins": 6.168720722198486, + "rewards/margins_max": 8.987236976623535, + "rewards/margins_min": 3.3502049446105957, + "rewards/margins_std": 3.9859836101531982, + "rewards/rejected": -9.915140151977539, + "step": 3870 + }, + { + "epoch": 0.98, + "grad_norm": 1.6640625, + "learning_rate": 3.407874651990883e-09, + "logits/chosen": 0.4875260293483734, + "logits/rejected": 0.9021614193916321, + "logps/chosen": -564.7864379882812, + "logps/rejected": -1175.726318359375, + "loss": 0.2231, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -3.7371113300323486, + "rewards/margins": 6.037837982177734, + "rewards/margins_max": 9.710600852966309, + "rewards/margins_min": 2.3650765419006348, + "rewards/margins_std": 5.194069862365723, + "rewards/rejected": -9.77495002746582, + "step": 3880 + }, + { + "epoch": 0.98, + "grad_norm": 0.9296875, + "learning_rate": 2.7216746099193443e-09, + "logits/chosen": 0.537278950214386, + "logits/rejected": 0.9852391481399536, + "logps/chosen": -634.4955444335938, + "logps/rejected": -1307.7852783203125, + "loss": 0.1988, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -4.1310648918151855, + "rewards/margins": 6.686505317687988, + "rewards/margins_max": 10.347613334655762, + "rewards/margins_min": 3.025397539138794, + "rewards/margins_std": 5.177587985992432, + "rewards/rejected": -10.817570686340332, + "step": 3890 + }, + { + "epoch": 0.98, + "grad_norm": 1.2265625, + "learning_rate": 2.112444005256564e-09, + "logits/chosen": 0.4740668833255768, + "logits/rejected": 0.7506653070449829, + "logps/chosen": -627.5335693359375, + "logps/rejected": -1310.2884521484375, + "loss": 0.185, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -4.233953475952148, + "rewards/margins": 6.597451210021973, + "rewards/margins_max": 10.050703048706055, + "rewards/margins_min": 3.1441988945007324, + "rewards/margins_std": 4.883635520935059, + "rewards/rejected": -10.831403732299805, + "step": 3900 + }, + { + "epoch": 0.98, + "grad_norm": 2.03125, + "learning_rate": 1.5802298581132356e-09, + "logits/chosen": 0.4343351721763611, + "logits/rejected": 0.8734685182571411, + "logps/chosen": -593.9387817382812, + "logps/rejected": -1331.359130859375, + "loss": 0.1502, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -3.890392780303955, + "rewards/margins": 7.371206760406494, + "rewards/margins_max": 11.046722412109375, + "rewards/margins_min": 3.695690631866455, + "rewards/margins_std": 5.197963714599609, + "rewards/rejected": -11.261598587036133, + "step": 3910 + }, + { + "epoch": 0.99, + "grad_norm": 3.359375, + "learning_rate": 1.1250732445080569e-09, + "logits/chosen": 0.48562726378440857, + "logits/rejected": 0.9255334734916687, + "logps/chosen": -666.9027709960938, + "logps/rejected": -1247.671630859375, + "loss": 0.1468, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -4.539218425750732, + "rewards/margins": 5.876649379730225, + "rewards/margins_max": 8.69355583190918, + "rewards/margins_min": 3.0597426891326904, + "rewards/margins_std": 3.9837074279785156, + "rewards/rejected": -10.415867805480957, + "step": 3920 + }, + { + "epoch": 0.99, + "grad_norm": 1.140625, + "learning_rate": 7.470092931987082e-10, + "logits/chosen": 0.41472572088241577, + "logits/rejected": 0.7566056847572327, + "logps/chosen": -558.6325073242188, + "logps/rejected": -1361.521240234375, + "loss": 0.1731, + "rewards/accuracies": 0.9125000238418579, + "rewards/chosen": -3.543172836303711, + "rewards/margins": 7.946673393249512, + "rewards/margins_max": 12.362479209899902, + "rewards/margins_min": 3.5308678150177, + "rewards/margins_std": 6.244892597198486, + "rewards/rejected": -11.489847183227539, + "step": 3930 + }, + { + "epoch": 0.99, + "grad_norm": 0.9296875, + "learning_rate": 4.4606718296991143e-10, + "logits/chosen": 0.45776480436325073, + "logits/rejected": 0.9777078628540039, + "logps/chosen": -553.6112060546875, + "logps/rejected": -1242.527587890625, + "loss": 0.2555, + "rewards/accuracies": 0.9375, + "rewards/chosen": -3.5952773094177246, + "rewards/margins": 6.822895050048828, + "rewards/margins_max": 10.043266296386719, + "rewards/margins_min": 3.6025233268737793, + "rewards/margins_std": 4.554293632507324, + "rewards/rejected": -10.418172836303711, + "step": 3940 + }, + { + "epoch": 0.99, + "grad_norm": 1.9453125, + "learning_rate": 2.2227014038189717e-10, + "logits/chosen": 0.47588786482810974, + "logits/rejected": 0.8221324682235718, + "logps/chosen": -567.9865112304688, + "logps/rejected": -1433.75048828125, + "loss": 0.1907, + "rewards/accuracies": 0.9375, + "rewards/chosen": -3.7355358600616455, + "rewards/margins": 8.41942024230957, + "rewards/margins_max": 12.90966796875, + "rewards/margins_min": 3.9291725158691406, + "rewards/margins_std": 6.3501691818237305, + "rewards/rejected": -12.15495491027832, + "step": 3950 + }, + { + "epoch": 1.0, + "grad_norm": 1.21875, + "learning_rate": 7.563543797717287e-11, + "logits/chosen": 0.5077834129333496, + "logits/rejected": 1.067440390586853, + "logps/chosen": -619.56005859375, + "logps/rejected": -1299.0670166015625, + "loss": 0.1767, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -4.131485939025879, + "rewards/margins": 6.871898651123047, + "rewards/margins_max": 9.76762580871582, + "rewards/margins_min": 3.9761710166931152, + "rewards/margins_std": 4.09517765045166, + "rewards/rejected": -11.003384590148926, + "step": 3960 + }, + { + "epoch": 1.0, + "grad_norm": 0.490234375, + "learning_rate": 6.174392948143925e-12, + "logits/chosen": 0.5018728971481323, + "logits/rejected": 1.0147392749786377, + "logps/chosen": -592.7308349609375, + "logps/rejected": -1142.571044921875, + "loss": 0.198, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -3.673344373703003, + "rewards/margins": 5.651946067810059, + "rewards/margins_max": 9.167214393615723, + "rewards/margins_min": 2.1366782188415527, + "rewards/margins_std": 4.971339702606201, + "rewards/rejected": -9.325291633605957, + "step": 3970 + }, + { + "epoch": 1.0, + "eval_logits/chosen": 0.8492512106895447, + "eval_logits/rejected": 1.0155344009399414, + "eval_logps/chosen": -616.4664916992188, + "eval_logps/rejected": -637.1886596679688, + "eval_loss": 0.7862498164176941, + "eval_rewards/accuracies": 0.5755000114440918, + "eval_rewards/chosen": -2.7944495677948, + "eval_rewards/margins": 0.39339083433151245, + "eval_rewards/margins_max": 3.4648597240448, + "eval_rewards/margins_min": -1.876849889755249, + "eval_rewards/margins_std": 1.728664517402649, + "eval_rewards/rejected": -3.187840461730957, + "eval_runtime": 2500.5294, + "eval_samples_per_second": 4.799, + "eval_steps_per_second": 0.3, + "step": 3974 + }, + { + "epoch": 1.0, + "step": 3974, + "total_flos": 0.0, + "train_loss": 0.27614202823210554, + "train_runtime": 32543.5484, + "train_samples_per_second": 1.954, + "train_steps_per_second": 0.122 + } + ], + "logging_steps": 10, + "max_steps": 3974, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 100, + "total_flos": 0.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}