diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,5040 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 100.0, + "global_step": 239, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "grad_norm": 0.0, + "learning_rate": 0.0, + "logits/generated": -3.130502223968506, + "logits/oppo_generated": -3.1088104248046875, + "logits/oppo_real": -3.130502223968506, + "logits/real": -3.1088104248046875, + "logps/generated": -99.40917205810547, + "logps/oppo_gen": -99.40917205810547, + "logps/oppo_real": -459.3097229003906, + "logps/real": -459.3097229003906, + "loss": 2.0, + "loss/gen": 2.0, + "loss/real": 0.0, + "rewards/accuracies": 0.0, + "rewards/generated": 0.0, + "rewards/margins": 0.0, + "rewards/real": 0.0, + "step": 1 + }, + { + "epoch": 0.01, + "grad_norm": 0.0, + "learning_rate": 0.0, + "logits/generated": -3.0933988094329834, + "logits/oppo_generated": -2.919645309448242, + "logits/oppo_real": -3.0933988094329834, + "logits/real": -2.919645309448242, + "logps/generated": -103.65153503417969, + "logps/oppo_gen": -103.65153503417969, + "logps/oppo_real": -392.1358642578125, + "logps/real": -392.1358642578125, + "loss": 2.0, + "loss/gen": 2.0, + "loss/real": 0.0, + "rewards/accuracies": 0.0, + "rewards/generated": 0.0, + "rewards/margins": 0.0, + "rewards/real": 0.0, + "step": 2 + }, + { + "epoch": 0.01, + "grad_norm": 0.0, + "learning_rate": 0.0, + "logits/generated": -2.6572537422180176, + "logits/oppo_generated": -2.8074941635131836, + "logits/oppo_real": -2.6572537422180176, + "logits/real": -2.8074941635131836, + "logps/generated": -72.88986206054688, + "logps/oppo_gen": -72.88986206054688, + "logps/oppo_real": -291.916748046875, + "logps/real": -291.916748046875, + "loss": 2.0, + "loss/gen": 2.0, + "loss/real": 0.0, + "rewards/accuracies": 0.0, + "rewards/generated": 0.0, + "rewards/margins": 0.0, + "rewards/real": 0.0, + "step": 3 + }, + { + "epoch": 0.02, + "grad_norm": 0.0, + "learning_rate": 0.0, + "logits/generated": -2.8966193199157715, + "logits/oppo_generated": -2.768460273742676, + "logits/oppo_real": -2.8966193199157715, + "logits/real": -2.768460273742676, + "logps/generated": -64.05287170410156, + "logps/oppo_gen": -64.05287170410156, + "logps/oppo_real": -376.8367919921875, + "logps/real": -376.8367919921875, + "loss": 2.0, + "loss/gen": 2.0, + "loss/real": 0.0, + "rewards/accuracies": 0.0, + "rewards/generated": 0.0, + "rewards/margins": 0.0, + "rewards/real": 0.0, + "step": 4 + }, + { + "epoch": 0.02, + "grad_norm": 0.0, + "learning_rate": 0.0, + "logits/generated": -2.889317512512207, + "logits/oppo_generated": -2.708950996398926, + "logits/oppo_real": -2.889317512512207, + "logits/real": -2.708950996398926, + "logps/generated": -48.29164123535156, + "logps/oppo_gen": -48.29164123535156, + "logps/oppo_real": -173.0751953125, + "logps/real": -173.0751953125, + "loss": 2.0, + "loss/gen": 2.0, + "loss/real": 0.0, + "rewards/accuracies": 0.0, + "rewards/generated": 0.0, + "rewards/margins": 0.0, + "rewards/real": 0.0, + "step": 5 + }, + { + "epoch": 0.03, + "grad_norm": 21.44563623958737, + "learning_rate": 1.6666666666666667e-08, + "logits/generated": -2.957958698272705, + "logits/oppo_generated": -2.749436378479004, + "logits/oppo_real": -2.957958698272705, + "logits/real": -2.749436378479004, + "logps/generated": -48.84138488769531, + "logps/oppo_gen": -48.84138488769531, + "logps/oppo_real": -139.2998046875, + "logps/real": -139.2998046875, + "loss": 2.0, + "loss/gen": 2.0, + "loss/real": 0.0, + "rewards/accuracies": 0.0, + "rewards/generated": 0.0, + "rewards/margins": 0.0, + "rewards/real": 0.0, + "step": 6 + }, + { + "epoch": 0.03, + "grad_norm": 24.181090932615223, + "learning_rate": 3.3333333333333334e-08, + "logits/generated": -3.1195316314697266, + "logits/oppo_generated": -2.9545342922210693, + "logits/oppo_real": -3.1195316314697266, + "logits/real": -2.9545342922210693, + "logps/generated": -163.2059783935547, + "logps/oppo_gen": -163.2059783935547, + "logps/oppo_real": -432.88226318359375, + "logps/real": -432.88226318359375, + "loss": 2.0, + "loss/gen": 2.0, + "loss/real": 0.0, + "rewards/accuracies": 0.0, + "rewards/generated": 0.0, + "rewards/margins": 0.0, + "rewards/real": 0.0, + "step": 7 + }, + { + "epoch": 0.03, + "grad_norm": 23.92123585650089, + "learning_rate": 5e-08, + "logits/generated": -2.910332441329956, + "logits/oppo_generated": -2.9416637420654297, + "logits/oppo_real": -2.910332441329956, + "logits/real": -2.9416637420654297, + "logps/generated": -69.29386901855469, + "logps/oppo_gen": -69.29386901855469, + "logps/oppo_real": -311.59619140625, + "logps/real": -311.59619140625, + "loss": 2.0, + "loss/gen": 2.0, + "loss/real": 0.0, + "rewards/accuracies": 0.0, + "rewards/generated": 0.0, + "rewards/margins": 0.0, + "rewards/real": 0.0, + "step": 8 + }, + { + "epoch": 0.04, + "grad_norm": 18.72729908334438, + "learning_rate": 6.666666666666667e-08, + "logits/generated": -2.409820318222046, + "logits/oppo_generated": -2.294548273086548, + "logits/oppo_real": -2.409976005554199, + "logits/real": -2.29451322555542, + "logps/generated": -82.21556091308594, + "logps/oppo_gen": -82.20011138916016, + "logps/oppo_real": -381.1852111816406, + "logps/real": -381.1634521484375, + "loss": 2.0, + "loss/gen": 1.9998455047607422, + "loss/real": 7.733702659606934e-06, + "rewards/accuracies": 0.875, + "rewards/generated": -0.015454292297363281, + "rewards/margins": 0.03723621368408203, + "rewards/real": 0.02178192138671875, + "step": 9 + }, + { + "epoch": 0.04, + "grad_norm": 17.202277028374397, + "learning_rate": 8.333333333333333e-08, + "logits/generated": -2.962700366973877, + "logits/oppo_generated": -2.9239017963409424, + "logits/oppo_real": -2.963313579559326, + "logits/real": -2.923351526260376, + "logps/generated": -93.16413879394531, + "logps/oppo_gen": -93.09856414794922, + "logps/oppo_real": -233.10401916503906, + "logps/real": -233.077880859375, + "loss": 1.9995, + "loss/gen": 1.9993443489074707, + "loss/real": 5.08427619934082e-05, + "rewards/accuracies": 0.875, + "rewards/generated": -0.06557583808898926, + "rewards/margins": 0.09174036979675293, + "rewards/real": 0.026164531707763672, + "step": 10 + }, + { + "epoch": 0.05, + "grad_norm": 17.401866312895148, + "learning_rate": 1e-07, + "logits/generated": -2.8563976287841797, + "logits/oppo_generated": -2.837850570678711, + "logits/oppo_real": -2.857771396636963, + "logits/real": -2.836169958114624, + "logps/generated": -59.6667594909668, + "logps/oppo_gen": -59.46293640136719, + "logps/oppo_real": -142.69805908203125, + "logps/real": -142.69808959960938, + "loss": 1.998, + "loss/gen": 1.9979617595672607, + "loss/real": 0.0006099119782447815, + "rewards/accuracies": 0.875, + "rewards/generated": -0.20382428169250488, + "rewards/margins": 0.20379090309143066, + "rewards/real": -3.337860107421875e-05, + "step": 11 + }, + { + "epoch": 0.05, + "grad_norm": 17.22730830301668, + "learning_rate": 1.1666666666666667e-07, + "logits/generated": -2.875734329223633, + "logits/oppo_generated": -2.7672762870788574, + "logits/oppo_real": -2.8780808448791504, + "logits/real": -2.7644264698028564, + "logps/generated": -70.91142272949219, + "logps/oppo_gen": -70.58644104003906, + "logps/oppo_real": -343.4704284667969, + "logps/real": -343.3141174316406, + "loss": 1.9968, + "loss/gen": 1.9967502355575562, + "loss/real": 0.00020247697830200195, + "rewards/accuracies": 1.0, + "rewards/generated": -0.32497692108154297, + "rewards/margins": 0.48128724098205566, + "rewards/real": 0.1563103199005127, + "step": 12 + }, + { + "epoch": 0.05, + "grad_norm": 15.819281409300507, + "learning_rate": 1.3333333333333334e-07, + "logits/generated": -2.815218925476074, + "logits/oppo_generated": -2.8374581336975098, + "logits/oppo_real": -2.822021961212158, + "logits/real": -2.830238103866577, + "logps/generated": -107.48092651367188, + "logps/oppo_gen": -106.73956298828125, + "logps/oppo_real": -280.41741943359375, + "logps/real": -280.51971435546875, + "loss": 1.9918, + "loss/gen": 1.9925864934921265, + "loss/real": 0.0017677471041679382, + "rewards/accuracies": 1.0, + "rewards/generated": -0.7413442134857178, + "rewards/margins": 0.6390435695648193, + "rewards/real": -0.10230064392089844, + "step": 13 + }, + { + "epoch": 0.06, + "grad_norm": 16.905273409293585, + "learning_rate": 1.5e-07, + "logits/generated": -2.7615389823913574, + "logits/oppo_generated": -2.8255615234375, + "logits/oppo_real": -2.771684169769287, + "logits/real": -2.815335273742676, + "logps/generated": -87.6278076171875, + "logps/oppo_gen": -85.86231994628906, + "logps/oppo_real": -289.01318359375, + "logps/real": -288.6315612792969, + "loss": 1.9878, + "loss/gen": 1.9823451042175293, + "loss/real": 0.001967109739780426, + "rewards/accuracies": 1.0, + "rewards/generated": -1.7654941082000732, + "rewards/margins": 2.147136688232422, + "rewards/real": 0.38164258003234863, + "step": 14 + }, + { + "epoch": 0.06, + "grad_norm": 16.47449070515203, + "learning_rate": 1.6666666666666665e-07, + "logits/generated": -3.1344149112701416, + "logits/oppo_generated": -2.7394165992736816, + "logits/oppo_real": -3.1553921699523926, + "logits/real": -2.717817544937134, + "logps/generated": -77.48875427246094, + "logps/oppo_gen": -74.47514343261719, + "logps/oppo_real": -366.370361328125, + "logps/real": -366.13739013671875, + "loss": 1.9771, + "loss/gen": 1.9698638916015625, + "loss/real": 0.002904340624809265, + "rewards/accuracies": 1.0, + "rewards/generated": -3.013608455657959, + "rewards/margins": 3.2465546131134033, + "rewards/real": 0.23294615745544434, + "step": 15 + }, + { + "epoch": 0.07, + "grad_norm": 16.775265896504816, + "learning_rate": 1.833333333333333e-07, + "logits/generated": -2.108006000518799, + "logits/oppo_generated": -2.1468427181243896, + "logits/oppo_real": -2.142064094543457, + "logits/real": -2.117852210998535, + "logps/generated": -81.89228820800781, + "logps/oppo_gen": -78.08332824707031, + "logps/oppo_real": -437.152587890625, + "logps/real": -437.1942443847656, + "loss": 1.9669, + "loss/gen": 1.9619104862213135, + "loss/real": 0.004711121320724487, + "rewards/accuracies": 1.0, + "rewards/generated": -3.8089609146118164, + "rewards/margins": 3.76729679107666, + "rewards/real": -0.04166412353515625, + "step": 16 + }, + { + "epoch": 0.07, + "grad_norm": 15.954718888453357, + "learning_rate": 2e-07, + "logits/generated": -2.9154043197631836, + "logits/oppo_generated": -2.902646064758301, + "logits/oppo_real": -2.953411817550659, + "logits/real": -2.8594038486480713, + "logps/generated": -77.6299819946289, + "logps/oppo_gen": -72.53976440429688, + "logps/oppo_real": -310.7004089355469, + "logps/real": -310.427734375, + "loss": 1.961, + "loss/gen": 1.9490978717803955, + "loss/real": 0.0060840025544166565, + "rewards/accuracies": 1.0, + "rewards/generated": -5.090216636657715, + "rewards/margins": 5.362889289855957, + "rewards/real": 0.2726726531982422, + "step": 17 + }, + { + "epoch": 0.08, + "grad_norm": 17.536179990025712, + "learning_rate": 2.1666666666666667e-07, + "logits/generated": -2.9157583713531494, + "logits/oppo_generated": -2.947140693664551, + "logits/oppo_real": -2.9634807109832764, + "logits/real": -2.8917417526245117, + "logps/generated": -83.36225891113281, + "logps/oppo_gen": -74.80116271972656, + "logps/oppo_real": -309.46124267578125, + "logps/real": -309.94171142578125, + "loss": 1.9385, + "loss/gen": 1.9143891334533691, + "loss/real": 0.010849758982658386, + "rewards/accuracies": 1.0, + "rewards/generated": -8.56109619140625, + "rewards/margins": 8.080650329589844, + "rewards/real": -0.48044562339782715, + "step": 18 + }, + { + "epoch": 0.08, + "grad_norm": 18.055822491099047, + "learning_rate": 2.3333333333333333e-07, + "logits/generated": -2.398920774459839, + "logits/oppo_generated": -2.6668543815612793, + "logits/oppo_real": -2.47564697265625, + "logits/real": -2.5944724082946777, + "logps/generated": -77.91998291015625, + "logps/oppo_gen": -67.190673828125, + "logps/oppo_real": -285.60797119140625, + "logps/real": -287.39215087890625, + "loss": 1.9138, + "loss/gen": 1.892707109451294, + "loss/real": 0.023035116493701935, + "rewards/accuracies": 1.0, + "rewards/generated": -10.729303359985352, + "rewards/margins": 8.945160865783691, + "rewards/real": -1.7841424942016602, + "step": 19 + }, + { + "epoch": 0.08, + "grad_norm": 16.8409722750193, + "learning_rate": 2.5e-07, + "logits/generated": -3.0429744720458984, + "logits/oppo_generated": -2.7376956939697266, + "logits/oppo_real": -3.1153059005737305, + "logits/real": -2.66239595413208, + "logps/generated": -108.49114990234375, + "logps/oppo_gen": -93.65745544433594, + "logps/oppo_real": -173.968994140625, + "logps/real": -176.7364044189453, + "loss": 1.9, + "loss/gen": 1.8516631126403809, + "loss/real": 0.030972033739089966, + "rewards/accuracies": 1.0, + "rewards/generated": -14.833693504333496, + "rewards/margins": 12.066278457641602, + "rewards/real": -2.7674155235290527, + "step": 20 + }, + { + "epoch": 0.09, + "grad_norm": 17.0265708165071, + "learning_rate": 2.6666666666666667e-07, + "logits/generated": -2.807745933532715, + "logits/oppo_generated": -2.6699156761169434, + "logits/oppo_real": -2.8930060863494873, + "logits/real": -2.5801193714141846, + "logps/generated": -62.11570739746094, + "logps/oppo_gen": -50.189754486083984, + "logps/oppo_real": -197.0562286376953, + "logps/real": -198.63250732421875, + "loss": 1.8804, + "loss/gen": 1.8807404041290283, + "loss/real": 0.03226040303707123, + "rewards/accuracies": 0.875, + "rewards/generated": -11.925955772399902, + "rewards/margins": 10.349677085876465, + "rewards/real": -1.5762791633605957, + "step": 21 + }, + { + "epoch": 0.09, + "grad_norm": 15.942840997675052, + "learning_rate": 2.833333333333333e-07, + "logits/generated": -2.883525848388672, + "logits/oppo_generated": -2.8113152980804443, + "logits/oppo_real": -2.997610330581665, + "logits/real": -2.701076030731201, + "logps/generated": -77.23695373535156, + "logps/oppo_gen": -59.91856384277344, + "logps/oppo_real": -175.6089324951172, + "logps/real": -180.22738647460938, + "loss": 1.8703, + "loss/gen": 1.8268163204193115, + "loss/real": 0.05111686885356903, + "rewards/accuracies": 1.0, + "rewards/generated": -17.318382263183594, + "rewards/margins": 12.699928283691406, + "rewards/real": -4.618453025817871, + "step": 22 + }, + { + "epoch": 0.1, + "grad_norm": 18.193819711198465, + "learning_rate": 3e-07, + "logits/generated": -2.71798038482666, + "logits/oppo_generated": -2.712057113647461, + "logits/oppo_real": -2.83805513381958, + "logits/real": -2.592156410217285, + "logps/generated": -108.12179565429688, + "logps/oppo_gen": -84.5518798828125, + "logps/oppo_real": -331.96221923828125, + "logps/real": -337.7485656738281, + "loss": 1.8307, + "loss/gen": 1.76430082321167, + "loss/real": 0.058562956750392914, + "rewards/accuracies": 1.0, + "rewards/generated": -23.569915771484375, + "rewards/margins": 17.783584594726562, + "rewards/real": -5.786332130432129, + "step": 23 + }, + { + "epoch": 0.1, + "grad_norm": 16.996427272784786, + "learning_rate": 3.166666666666666e-07, + "logits/generated": -2.2170791625976562, + "logits/oppo_generated": -2.4313888549804688, + "logits/oppo_real": -2.3368191719055176, + "logits/real": -2.3218801021575928, + "logps/generated": -98.60990142822266, + "logps/oppo_gen": -70.7446060180664, + "logps/oppo_real": -186.56976318359375, + "logps/real": -192.2266082763672, + "loss": 1.8198, + "loss/gen": 1.7213470935821533, + "loss/real": 0.05736871063709259, + "rewards/accuracies": 1.0, + "rewards/generated": -27.865299224853516, + "rewards/margins": 22.208459854125977, + "rewards/real": -5.6568403244018555, + "step": 24 + }, + { + "epoch": 0.1, + "grad_norm": 16.67077026711208, + "learning_rate": 3.333333333333333e-07, + "logits/generated": -2.77504825592041, + "logits/oppo_generated": -2.8222999572753906, + "logits/oppo_real": -2.956730842590332, + "logits/real": -2.662292957305908, + "logps/generated": -79.92071533203125, + "logps/oppo_gen": -55.461936950683594, + "logps/oppo_real": -125.98847198486328, + "logps/real": -132.84556579589844, + "loss": 1.7945, + "loss/gen": 1.7554123401641846, + "loss/real": 0.07477347552776337, + "rewards/accuracies": 0.875, + "rewards/generated": -24.458776473999023, + "rewards/margins": 17.601680755615234, + "rewards/real": -6.857094764709473, + "step": 25 + }, + { + "epoch": 0.11, + "grad_norm": 16.22325115917978, + "learning_rate": 3.5e-07, + "logits/generated": -2.4703292846679688, + "logits/oppo_generated": -2.9076757431030273, + "logits/oppo_real": -2.661245822906494, + "logits/real": -2.7052745819091797, + "logps/generated": -103.34319305419922, + "logps/oppo_gen": -71.46342468261719, + "logps/oppo_real": -293.69677734375, + "logps/real": -296.63507080078125, + "loss": 1.7719, + "loss/gen": 1.6812022924423218, + "loss/real": 0.05883955955505371, + "rewards/accuracies": 1.0, + "rewards/generated": -31.879772186279297, + "rewards/margins": 28.941482543945312, + "rewards/real": -2.938288688659668, + "step": 26 + }, + { + "epoch": 0.11, + "grad_norm": 15.655657911240558, + "learning_rate": 3.666666666666666e-07, + "logits/generated": -2.6560888290405273, + "logits/oppo_generated": -3.018123149871826, + "logits/oppo_real": -2.837935447692871, + "logits/real": -2.8067123889923096, + "logps/generated": -81.89646911621094, + "logps/oppo_gen": -51.06623458862305, + "logps/oppo_real": -151.72972106933594, + "logps/real": -171.45449829101562, + "loss": 1.7534, + "loss/gen": 1.6916977167129517, + "loss/real": 0.19724780321121216, + "rewards/accuracies": 0.875, + "rewards/generated": -30.83022689819336, + "rewards/margins": 11.105447769165039, + "rewards/real": -19.724781036376953, + "step": 27 + }, + { + "epoch": 0.12, + "grad_norm": 15.655657911240558, + "learning_rate": 3.666666666666666e-07, + "logits/generated": -2.387822151184082, + "logits/oppo_generated": -2.7700376510620117, + "logits/oppo_real": -2.6328747272491455, + "logits/real": -2.5694613456726074, + "logps/generated": -121.05097961425781, + "logps/oppo_gen": -72.09120178222656, + "logps/oppo_real": -411.427978515625, + "logps/real": -412.50714111328125, + "loss": 1.6877, + "loss/gen": 1.5104023218154907, + "loss/real": 0.0984681025147438, + "rewards/accuracies": 1.0, + "rewards/generated": -48.95977020263672, + "rewards/margins": 47.880558013916016, + "rewards/real": -1.0792131423950195, + "step": 28 + }, + { + "epoch": 0.12, + "grad_norm": 16.840291816557006, + "learning_rate": 3.8333333333333335e-07, + "logits/generated": -2.6876704692840576, + "logits/oppo_generated": -2.91198468208313, + "logits/oppo_real": -2.9211230278015137, + "logits/real": -2.680572509765625, + "logps/generated": -127.4861831665039, + "logps/oppo_gen": -82.21741485595703, + "logps/oppo_real": -301.3589172363281, + "logps/real": -309.1620178222656, + "loss": 1.7013, + "loss/gen": 1.5473122596740723, + "loss/real": 0.10803677141666412, + "rewards/accuracies": 1.0, + "rewards/generated": -45.268768310546875, + "rewards/margins": 37.46567153930664, + "rewards/real": -7.8031005859375, + "step": 29 + }, + { + "epoch": 0.13, + "grad_norm": 17.95870813154182, + "learning_rate": 4e-07, + "logits/generated": -2.7156505584716797, + "logits/oppo_generated": -2.4022648334503174, + "logits/oppo_real": -2.97650146484375, + "logits/real": -2.1997687816619873, + "logps/generated": -136.54647827148438, + "logps/oppo_gen": -99.30915832519531, + "logps/oppo_real": -226.3162841796875, + "logps/real": -240.60678100585938, + "loss": 1.6691, + "loss/gen": 1.627626895904541, + "loss/real": 0.14702296257019043, + "rewards/accuracies": 1.0, + "rewards/generated": -37.23731231689453, + "rewards/margins": 22.946819305419922, + "rewards/real": -14.29049301147461, + "step": 30 + }, + { + "epoch": 0.13, + "grad_norm": 18.000802763945956, + "learning_rate": 4.1666666666666667e-07, + "logits/generated": -2.7124738693237305, + "logits/oppo_generated": -2.854034900665283, + "logits/oppo_real": -2.9424033164978027, + "logits/real": -2.666820526123047, + "logps/generated": -94.56930541992188, + "logps/oppo_gen": -54.3837890625, + "logps/oppo_real": -252.91123962402344, + "logps/real": -263.7140197753906, + "loss": 1.635, + "loss/gen": 1.5981448888778687, + "loss/real": 0.11872847378253937, + "rewards/accuracies": 1.0, + "rewards/generated": -40.185516357421875, + "rewards/margins": 29.382728576660156, + "rewards/real": -10.802785873413086, + "step": 31 + }, + { + "epoch": 0.13, + "grad_norm": 17.376117039182688, + "learning_rate": 4.3333333333333335e-07, + "logits/generated": -2.646808385848999, + "logits/oppo_generated": -2.9263906478881836, + "logits/oppo_real": -2.9535346031188965, + "logits/real": -2.6604020595550537, + "logps/generated": -133.86170959472656, + "logps/oppo_gen": -78.93435668945312, + "logps/oppo_real": -298.2490234375, + "logps/real": -312.76690673828125, + "loss": 1.629, + "loss/gen": 1.4507265090942383, + "loss/real": 0.1622442901134491, + "rewards/accuracies": 1.0, + "rewards/generated": -54.927345275878906, + "rewards/margins": 40.409461975097656, + "rewards/real": -14.517885208129883, + "step": 32 + }, + { + "epoch": 0.14, + "grad_norm": 19.314468826625482, + "learning_rate": 4.5e-07, + "logits/generated": -2.7972543239593506, + "logits/oppo_generated": -2.9521539211273193, + "logits/oppo_real": -3.0699048042297363, + "logits/real": -2.6584794521331787, + "logps/generated": -189.65911865234375, + "logps/oppo_gen": -136.80690002441406, + "logps/oppo_real": -344.64990234375, + "logps/real": -362.10601806640625, + "loss": 1.5829, + "loss/gen": 1.4714778661727905, + "loss/real": 0.20715071260929108, + "rewards/accuracies": 1.0, + "rewards/generated": -52.85221862792969, + "rewards/margins": 35.396095275878906, + "rewards/real": -17.45612144470215, + "step": 33 + }, + { + "epoch": 0.14, + "grad_norm": 19.659759256745826, + "learning_rate": 4.6666666666666666e-07, + "logits/generated": -2.6684279441833496, + "logits/oppo_generated": -2.8447458744049072, + "logits/oppo_real": -2.998192548751831, + "logits/real": -2.5552549362182617, + "logps/generated": -146.35748291015625, + "logps/oppo_gen": -79.24800109863281, + "logps/oppo_real": -401.9757385253906, + "logps/real": -426.55157470703125, + "loss": 1.5299, + "loss/gen": 1.3289053440093994, + "loss/real": 0.2510119676589966, + "rewards/accuracies": 1.0, + "rewards/generated": -67.10946655273438, + "rewards/margins": 42.533626556396484, + "rewards/real": -24.575847625732422, + "step": 34 + }, + { + "epoch": 0.15, + "grad_norm": 21.872934472794725, + "learning_rate": 4.833333333333333e-07, + "logits/generated": -2.586422920227051, + "logits/oppo_generated": -2.942030906677246, + "logits/oppo_real": -2.9536867141723633, + "logits/real": -2.602694511413574, + "logps/generated": -140.24705505371094, + "logps/oppo_gen": -62.21235656738281, + "logps/oppo_real": -296.8402404785156, + "logps/real": -320.66766357421875, + "loss": 1.4528, + "loss/gen": 1.2196528911590576, + "loss/real": 0.259676456451416, + "rewards/accuracies": 1.0, + "rewards/generated": -78.03470611572266, + "rewards/margins": 54.207305908203125, + "rewards/real": -23.8273983001709, + "step": 35 + }, + { + "epoch": 0.15, + "grad_norm": 22.019553078421858, + "learning_rate": 5e-07, + "logits/generated": -2.2861862182617188, + "logits/oppo_generated": -2.792217493057251, + "logits/oppo_real": -2.680948257446289, + "logits/real": -2.4454588890075684, + "logps/generated": -128.45945739746094, + "logps/oppo_gen": -49.044715881347656, + "logps/oppo_real": -183.3726348876953, + "logps/real": -205.66845703125, + "loss": 1.4374, + "loss/gen": 1.2058525085449219, + "loss/real": 0.26081162691116333, + "rewards/accuracies": 0.75, + "rewards/generated": -79.41474914550781, + "rewards/margins": 57.11891555786133, + "rewards/real": -22.29583168029785, + "step": 36 + }, + { + "epoch": 0.15, + "grad_norm": 21.563572776755652, + "learning_rate": 4.996438746438746e-07, + "logits/generated": -2.352074146270752, + "logits/oppo_generated": -2.5968940258026123, + "logits/oppo_real": -2.84472393989563, + "logits/real": -2.1995410919189453, + "logps/generated": -192.30780029296875, + "logps/oppo_gen": -96.46727752685547, + "logps/oppo_real": -441.2087097167969, + "logps/real": -454.900390625, + "loss": 1.3751, + "loss/gen": 1.1064567565917969, + "loss/real": 0.17831739783287048, + "rewards/accuracies": 1.0, + "rewards/generated": -95.84051513671875, + "rewards/margins": 82.14884948730469, + "rewards/real": -13.691666603088379, + "step": 37 + }, + { + "epoch": 0.16, + "grad_norm": 27.510149937966077, + "learning_rate": 4.992877492877492e-07, + "logits/generated": -2.568110466003418, + "logits/oppo_generated": -3.097993850708008, + "logits/oppo_real": -3.161780834197998, + "logits/real": -2.603790283203125, + "logps/generated": -209.0296630859375, + "logps/oppo_gen": -86.33152770996094, + "logps/oppo_real": -374.5130615234375, + "logps/real": -400.4665832519531, + "loss": 1.2577, + "loss/gen": 0.7730186581611633, + "loss/real": 0.2766711115837097, + "rewards/accuracies": 0.875, + "rewards/generated": -122.69813537597656, + "rewards/margins": 96.74461364746094, + "rewards/real": -25.95351791381836, + "step": 38 + }, + { + "epoch": 0.16, + "grad_norm": 27.09461729076114, + "learning_rate": 4.98931623931624e-07, + "logits/generated": -2.2019739151000977, + "logits/oppo_generated": -2.648486614227295, + "logits/oppo_real": -2.7488012313842773, + "logits/real": -2.1398563385009766, + "logps/generated": -196.87429809570312, + "logps/oppo_gen": -78.30477142333984, + "logps/oppo_real": -363.86407470703125, + "logps/real": -402.26739501953125, + "loss": 1.2019, + "loss/gen": 0.8143048882484436, + "loss/real": 0.3951404094696045, + "rewards/accuracies": 1.0, + "rewards/generated": -118.56951904296875, + "rewards/margins": 80.16621398925781, + "rewards/real": -38.403289794921875, + "step": 39 + }, + { + "epoch": 0.17, + "grad_norm": 27.1369816207399, + "learning_rate": 4.985754985754986e-07, + "logits/generated": -2.157773017883301, + "logits/oppo_generated": -2.864193916320801, + "logits/oppo_real": -2.7761850357055664, + "logits/real": -2.3227579593658447, + "logps/generated": -172.39085388183594, + "logps/oppo_gen": -60.6450309753418, + "logps/oppo_real": -320.1565856933594, + "logps/real": -337.9253845214844, + "loss": 1.1863, + "loss/gen": 0.9242483377456665, + "loss/real": 0.19020405411720276, + "rewards/accuracies": 0.875, + "rewards/generated": -111.7458267211914, + "rewards/margins": 93.97701263427734, + "rewards/real": -17.768808364868164, + "step": 40 + }, + { + "epoch": 0.17, + "grad_norm": 24.71832653121781, + "learning_rate": 4.982193732193732e-07, + "logits/generated": -2.3361663818359375, + "logits/oppo_generated": -2.812058210372925, + "logits/oppo_real": -2.982236862182617, + "logits/real": -2.289778470993042, + "logps/generated": -202.0223388671875, + "logps/oppo_gen": -90.06674194335938, + "logps/oppo_real": -176.9713592529297, + "logps/real": -221.30557250976562, + "loss": 1.1883, + "loss/gen": 0.8804440498352051, + "loss/real": 0.46414560079574585, + "rewards/accuracies": 1.0, + "rewards/generated": -111.95559692382812, + "rewards/margins": 67.62137603759766, + "rewards/real": -44.3342170715332, + "step": 41 + }, + { + "epoch": 0.18, + "grad_norm": 25.356203831684, + "learning_rate": 4.978632478632478e-07, + "logits/generated": -2.2286205291748047, + "logits/oppo_generated": -2.9253015518188477, + "logits/oppo_real": -2.9079301357269287, + "logits/real": -2.3039026260375977, + "logps/generated": -171.630859375, + "logps/oppo_gen": -54.79414367675781, + "logps/oppo_real": -186.92176818847656, + "logps/real": -248.6990966796875, + "loss": 1.1143, + "loss/gen": 0.8316328525543213, + "loss/real": 0.6177734136581421, + "rewards/accuracies": 1.0, + "rewards/generated": -116.83671569824219, + "rewards/margins": 55.05937194824219, + "rewards/real": -61.77734375, + "step": 42 + }, + { + "epoch": 0.18, + "grad_norm": 27.712655447875367, + "learning_rate": 4.975071225071225e-07, + "logits/generated": -2.067673683166504, + "logits/oppo_generated": -2.9949498176574707, + "logits/oppo_real": -2.9107003211975098, + "logits/real": -2.3845181465148926, + "logps/generated": -286.51165771484375, + "logps/oppo_gen": -79.9820785522461, + "logps/oppo_real": -404.1100158691406, + "logps/real": -439.1029968261719, + "loss": 1.0441, + "loss/gen": 0.4001755118370056, + "loss/real": 0.3547167181968689, + "rewards/accuracies": 1.0, + "rewards/generated": -206.529541015625, + "rewards/margins": 171.53656005859375, + "rewards/real": -34.99298858642578, + "step": 43 + }, + { + "epoch": 0.18, + "grad_norm": 26.747401031017247, + "learning_rate": 4.971509971509972e-07, + "logits/generated": -1.7710440158843994, + "logits/oppo_generated": -2.4440221786499023, + "logits/oppo_real": -2.3998050689697266, + "logits/real": -1.8367252349853516, + "logps/generated": -312.6555480957031, + "logps/oppo_gen": -93.22187805175781, + "logps/oppo_real": -290.8685302734375, + "logps/real": -330.1634521484375, + "loss": 0.9874, + "loss/gen": 0.40501296520233154, + "loss/real": 0.39968231320381165, + "rewards/accuracies": 1.0, + "rewards/generated": -219.43365478515625, + "rewards/margins": 180.13876342773438, + "rewards/real": -39.29491424560547, + "step": 44 + }, + { + "epoch": 0.19, + "grad_norm": 29.940363291114327, + "learning_rate": 4.967948717948718e-07, + "logits/generated": -2.0964088439941406, + "logits/oppo_generated": -2.9232547283172607, + "logits/oppo_real": -2.7114880084991455, + "logits/real": -2.3123269081115723, + "logps/generated": -256.84454345703125, + "logps/oppo_gen": -64.50846862792969, + "logps/oppo_real": -239.8323974609375, + "logps/real": -305.34417724609375, + "loss": 0.9685, + "loss/gen": 0.41360723972320557, + "loss/real": 0.6562252640724182, + "rewards/accuracies": 1.0, + "rewards/generated": -192.33609008789062, + "rewards/margins": 126.8243179321289, + "rewards/real": -65.51176452636719, + "step": 45 + }, + { + "epoch": 0.19, + "grad_norm": 26.65060984072996, + "learning_rate": 4.964387464387464e-07, + "logits/generated": -2.263786792755127, + "logits/oppo_generated": -2.741456985473633, + "logits/oppo_real": -2.9938759803771973, + "logits/real": -2.225804328918457, + "logps/generated": -184.13873291015625, + "logps/oppo_gen": -58.174400329589844, + "logps/oppo_real": -258.21685791015625, + "logps/real": -307.77520751953125, + "loss": 0.9712, + "loss/gen": 0.7734701633453369, + "loss/real": 0.5007840394973755, + "rewards/accuracies": 0.875, + "rewards/generated": -125.96434783935547, + "rewards/margins": 76.40599060058594, + "rewards/real": -49.558353424072266, + "step": 46 + }, + { + "epoch": 0.2, + "grad_norm": 28.4577902373904, + "learning_rate": 4.96082621082621e-07, + "logits/generated": -2.171962022781372, + "logits/oppo_generated": -2.814079761505127, + "logits/oppo_real": -2.964923620223999, + "logits/real": -2.236274480819702, + "logps/generated": -279.45623779296875, + "logps/oppo_gen": -78.5189208984375, + "logps/oppo_real": -288.56396484375, + "logps/real": -338.4586181640625, + "loss": 0.8781, + "loss/gen": 0.38681352138519287, + "loss/real": 0.5231560468673706, + "rewards/accuracies": 1.0, + "rewards/generated": -200.93731689453125, + "rewards/margins": 151.0426788330078, + "rewards/real": -49.89463806152344, + "step": 47 + }, + { + "epoch": 0.2, + "grad_norm": 28.363362753377597, + "learning_rate": 4.957264957264958e-07, + "logits/generated": -2.1406655311584473, + "logits/oppo_generated": -2.7121076583862305, + "logits/oppo_real": -2.932806968688965, + "logits/real": -2.1540122032165527, + "logps/generated": -254.42098999023438, + "logps/oppo_gen": -72.10917663574219, + "logps/oppo_real": -299.3392333984375, + "logps/real": -366.06121826171875, + "loss": 0.8697, + "loss/gen": 0.38246485590934753, + "loss/real": 0.6711124181747437, + "rewards/accuracies": 1.0, + "rewards/generated": -182.31182861328125, + "rewards/margins": 115.58984375, + "rewards/real": -66.72196960449219, + "step": 48 + }, + { + "epoch": 0.21, + "grad_norm": 29.28652212909496, + "learning_rate": 4.953703703703703e-07, + "logits/generated": -2.362544059753418, + "logits/oppo_generated": -2.814209461212158, + "logits/oppo_real": -3.157527208328247, + "logits/real": -2.4037039279937744, + "logps/generated": -309.79132080078125, + "logps/oppo_gen": -80.24543762207031, + "logps/oppo_real": -294.9969482421875, + "logps/real": -339.64544677734375, + "loss": 0.7412, + "loss/gen": 0.41209107637405396, + "loss/real": 0.4473879337310791, + "rewards/accuracies": 1.0, + "rewards/generated": -229.5458984375, + "rewards/margins": 184.89743041992188, + "rewards/real": -44.648468017578125, + "step": 49 + }, + { + "epoch": 0.21, + "grad_norm": 37.235111000384265, + "learning_rate": 4.95014245014245e-07, + "logits/generated": -2.274956464767456, + "logits/oppo_generated": -2.9343652725219727, + "logits/oppo_real": -2.7617945671081543, + "logits/real": -2.470684051513672, + "logps/generated": -271.6064758300781, + "logps/oppo_gen": -82.74765014648438, + "logps/oppo_real": -315.32562255859375, + "logps/real": -348.8078308105469, + "loss": 0.8359, + "loss/gen": 0.3331334590911865, + "loss/real": 0.33817416429519653, + "rewards/accuracies": 1.0, + "rewards/generated": -188.85882568359375, + "rewards/margins": 155.37660217285156, + "rewards/real": -33.482208251953125, + "step": 50 + }, + { + "epoch": 0.21, + "grad_norm": 24.169072033670428, + "learning_rate": 4.946581196581196e-07, + "logits/generated": -2.1854918003082275, + "logits/oppo_generated": -2.805569648742676, + "logits/oppo_real": -2.7846250534057617, + "logits/real": -2.2574825286865234, + "logps/generated": -207.7048797607422, + "logps/oppo_gen": -45.456573486328125, + "logps/oppo_real": -161.39598083496094, + "logps/real": -193.92062377929688, + "loss": 0.7238, + "loss/gen": 0.49297964572906494, + "loss/real": 0.37008020281791687, + "rewards/accuracies": 1.0, + "rewards/generated": -162.24830627441406, + "rewards/margins": 129.72366333007812, + "rewards/real": -32.52463150024414, + "step": 51 + }, + { + "epoch": 0.22, + "grad_norm": 28.44118708927029, + "learning_rate": 4.943019943019943e-07, + "logits/generated": -1.998687982559204, + "logits/oppo_generated": -2.7444612979888916, + "logits/oppo_real": -2.7595162391662598, + "logits/real": -2.086247444152832, + "logps/generated": -203.22409057617188, + "logps/oppo_gen": -50.193504333496094, + "logps/oppo_real": -148.25294494628906, + "logps/real": -181.40602111816406, + "loss": 0.7164, + "loss/gen": 0.5076989531517029, + "loss/real": 0.35455718636512756, + "rewards/accuracies": 1.0, + "rewards/generated": -153.0305938720703, + "rewards/margins": 119.87752532958984, + "rewards/real": -33.15306854248047, + "step": 52 + }, + { + "epoch": 0.22, + "grad_norm": 28.44118708927029, + "learning_rate": 4.943019943019943e-07, + "logits/generated": -1.962064504623413, + "logits/oppo_generated": -2.660369396209717, + "logits/oppo_real": -2.6082496643066406, + "logits/real": -2.004966974258423, + "logps/generated": -199.8478240966797, + "logps/oppo_gen": -55.80210876464844, + "logps/oppo_real": -201.49038696289062, + "logps/real": -236.99850463867188, + "loss": 0.61, + "loss/gen": 0.625639796257019, + "loss/real": 0.3782804012298584, + "rewards/accuracies": 1.0, + "rewards/generated": -144.04571533203125, + "rewards/margins": 108.53761291503906, + "rewards/real": -35.50811767578125, + "step": 53 + }, + { + "epoch": 0.23, + "grad_norm": 34.77088794433499, + "learning_rate": 4.93945868945869e-07, + "logits/generated": -2.1952624320983887, + "logits/oppo_generated": -2.746832847595215, + "logits/oppo_real": -2.973560333251953, + "logits/real": -2.191551685333252, + "logps/generated": -218.537109375, + "logps/oppo_gen": -77.28608703613281, + "logps/oppo_real": -547.3628540039062, + "logps/real": -573.14208984375, + "loss": 0.6871, + "loss/gen": 0.6249206066131592, + "loss/real": 0.30037403106689453, + "rewards/accuracies": 0.875, + "rewards/generated": -141.2510223388672, + "rewards/margins": 115.47174835205078, + "rewards/real": -25.77927017211914, + "step": 54 + }, + { + "epoch": 0.23, + "grad_norm": 29.88689616704953, + "learning_rate": 4.935897435897436e-07, + "logits/generated": -1.6832900047302246, + "logits/oppo_generated": -2.664555072784424, + "logits/oppo_real": -2.6400251388549805, + "logits/real": -2.0845460891723633, + "logps/generated": -307.5957336425781, + "logps/oppo_gen": -78.57785034179688, + "logps/oppo_real": -398.628662109375, + "logps/real": -400.03839111328125, + "loss": 0.6084, + "loss/gen": 0.14119790494441986, + "loss/real": 0.09441090375185013, + "rewards/accuracies": 1.0, + "rewards/generated": -229.01788330078125, + "rewards/margins": 227.60816955566406, + "rewards/real": -1.4097027778625488, + "step": 55 + }, + { + "epoch": 0.23, + "grad_norm": 26.655489804517757, + "learning_rate": 4.932336182336182e-07, + "logits/generated": -2.235076665878296, + "logits/oppo_generated": -2.638930320739746, + "logits/oppo_real": -3.1015210151672363, + "logits/real": -2.072552442550659, + "logps/generated": -260.4788818359375, + "logps/oppo_gen": -84.6130599975586, + "logps/oppo_real": -310.54534912109375, + "logps/real": -339.3001708984375, + "loss": 0.6508, + "loss/gen": 0.5073614716529846, + "loss/real": 0.32746076583862305, + "rewards/accuracies": 0.75, + "rewards/generated": -175.86582946777344, + "rewards/margins": 147.1110076904297, + "rewards/real": -28.754831314086914, + "step": 56 + }, + { + "epoch": 0.24, + "grad_norm": 51.166719683061565, + "learning_rate": 4.928774928774928e-07, + "logits/generated": -2.112302780151367, + "logits/oppo_generated": -2.9305167198181152, + "logits/oppo_real": -2.7986156940460205, + "logits/real": -2.261821746826172, + "logps/generated": -329.22186279296875, + "logps/oppo_gen": -55.247596740722656, + "logps/oppo_real": -159.6094970703125, + "logps/real": -214.50485229492188, + "loss": 0.5733, + "loss/gen": 0.1604616641998291, + "loss/real": 0.5549860000610352, + "rewards/accuracies": 1.0, + "rewards/generated": -273.9742431640625, + "rewards/margins": 219.07891845703125, + "rewards/real": -54.895347595214844, + "step": 57 + }, + { + "epoch": 0.24, + "grad_norm": 54.18406446361213, + "learning_rate": 4.925213675213676e-07, + "logits/generated": -1.972760558128357, + "logits/oppo_generated": -2.733177900314331, + "logits/oppo_real": -3.0261659622192383, + "logits/real": -2.0272536277770996, + "logps/generated": -270.2849426269531, + "logps/oppo_gen": -77.4105453491211, + "logps/oppo_real": -291.50042724609375, + "logps/real": -314.6445617675781, + "loss": 0.6405, + "loss/gen": 0.24113653600215912, + "loss/real": 0.25997018814086914, + "rewards/accuracies": 1.0, + "rewards/generated": -192.8743896484375, + "rewards/margins": 169.73028564453125, + "rewards/real": -23.144100189208984, + "step": 58 + }, + { + "epoch": 0.25, + "grad_norm": 31.552650652260482, + "learning_rate": 4.921652421652421e-07, + "logits/generated": -1.6506314277648926, + "logits/oppo_generated": -2.70068359375, + "logits/oppo_real": -2.622352361679077, + "logits/real": -1.8686270713806152, + "logps/generated": -331.11773681640625, + "logps/oppo_gen": -66.53448486328125, + "logps/oppo_real": -142.07913208007812, + "logps/real": -227.437744140625, + "loss": 0.6586, + "loss/gen": 0.03464512526988983, + "loss/real": 0.853585958480835, + "rewards/accuracies": 1.0, + "rewards/generated": -264.5832824707031, + "rewards/margins": 179.22467041015625, + "rewards/real": -85.35860443115234, + "step": 59 + }, + { + "epoch": 0.25, + "grad_norm": 27.14020221317216, + "learning_rate": 4.918091168091168e-07, + "logits/generated": -2.1259684562683105, + "logits/oppo_generated": -3.0608558654785156, + "logits/oppo_real": -3.0881457328796387, + "logits/real": -2.3352560997009277, + "logps/generated": -275.365478515625, + "logps/oppo_gen": -78.30126953125, + "logps/oppo_real": -296.7585144042969, + "logps/real": -317.5572509765625, + "loss": 0.567, + "loss/gen": 0.22492240369319916, + "loss/real": 0.27364999055862427, + "rewards/accuracies": 1.0, + "rewards/generated": -197.064208984375, + "rewards/margins": 176.26547241210938, + "rewards/real": -20.798734664916992, + "step": 60 + }, + { + "epoch": 0.26, + "grad_norm": 37.116987705997744, + "learning_rate": 4.914529914529914e-07, + "logits/generated": -2.0405826568603516, + "logits/oppo_generated": -2.904336929321289, + "logits/oppo_real": -3.0007967948913574, + "logits/real": -2.274019956588745, + "logps/generated": -340.4947509765625, + "logps/oppo_gen": -78.76142883300781, + "logps/oppo_real": -321.17315673828125, + "logps/real": -368.8492431640625, + "loss": 0.5723, + "loss/gen": 0.07833881676197052, + "loss/real": 0.5737317800521851, + "rewards/accuracies": 0.875, + "rewards/generated": -261.7333068847656, + "rewards/margins": 214.0572509765625, + "rewards/real": -47.676055908203125, + "step": 61 + }, + { + "epoch": 0.26, + "grad_norm": 28.903528165773942, + "learning_rate": 4.910968660968661e-07, + "logits/generated": -2.1163697242736816, + "logits/oppo_generated": -3.0246148109436035, + "logits/oppo_real": -3.155604839324951, + "logits/real": -2.3015458583831787, + "logps/generated": -325.52911376953125, + "logps/oppo_gen": -99.78816986083984, + "logps/oppo_real": -357.6624755859375, + "logps/real": -369.4306640625, + "loss": 0.4923, + "loss/gen": 0.08446104824542999, + "loss/real": 0.1771288365125656, + "rewards/accuracies": 1.0, + "rewards/generated": -225.74095153808594, + "rewards/margins": 213.97276306152344, + "rewards/real": -11.768176078796387, + "step": 62 + }, + { + "epoch": 0.26, + "grad_norm": 42.85362560553147, + "learning_rate": 4.907407407407407e-07, + "logits/generated": -1.962327480316162, + "logits/oppo_generated": -2.718918800354004, + "logits/oppo_real": -2.8950438499450684, + "logits/real": -2.042083740234375, + "logps/generated": -230.06312561035156, + "logps/oppo_gen": -73.73533630371094, + "logps/oppo_real": -276.2977294921875, + "logps/real": -287.68817138671875, + "loss": 0.4966, + "loss/gen": 0.5271965861320496, + "loss/real": 0.24724090099334717, + "rewards/accuracies": 0.875, + "rewards/generated": -156.3278045654297, + "rewards/margins": 144.93739318847656, + "rewards/real": -11.390399932861328, + "step": 63 + }, + { + "epoch": 0.27, + "grad_norm": 60.4540336582008, + "learning_rate": 4.903846153846153e-07, + "logits/generated": -2.031083822250366, + "logits/oppo_generated": -2.7741386890411377, + "logits/oppo_real": -2.8905487060546875, + "logits/real": -2.146413803100586, + "logps/generated": -242.85926818847656, + "logps/oppo_gen": -70.42605590820312, + "logps/oppo_real": -291.8798522949219, + "logps/real": -347.0901794433594, + "loss": 0.5204, + "loss/gen": 0.3330785036087036, + "loss/real": 0.5649424195289612, + "rewards/accuracies": 1.0, + "rewards/generated": -172.43319702148438, + "rewards/margins": 117.2228775024414, + "rewards/real": -55.21034240722656, + "step": 64 + }, + { + "epoch": 0.27, + "grad_norm": 60.4540336582008, + "learning_rate": 4.903846153846153e-07, + "logits/generated": -1.8672581911087036, + "logits/oppo_generated": -2.731257438659668, + "logits/oppo_real": -2.804780960083008, + "logits/real": -2.0895280838012695, + "logps/generated": -339.7601013183594, + "logps/oppo_gen": -143.67832946777344, + "logps/oppo_real": -309.55450439453125, + "logps/real": -320.901123046875, + "loss": 0.3716, + "loss/gen": 0.2359689623117447, + "loss/real": 0.16374921798706055, + "rewards/accuracies": 1.0, + "rewards/generated": -196.081787109375, + "rewards/margins": 184.73516845703125, + "rewards/real": -11.34660816192627, + "step": 65 + }, + { + "epoch": 0.28, + "grad_norm": 36.390479081394865, + "learning_rate": 4.9002849002849e-07, + "logits/generated": -1.936166524887085, + "logits/oppo_generated": -2.710496664047241, + "logits/oppo_real": -2.980191707611084, + "logits/real": -1.961893081665039, + "logps/generated": -314.8875732421875, + "logps/oppo_gen": -71.51214599609375, + "logps/oppo_real": -284.34765625, + "logps/real": -309.43060302734375, + "loss": 0.4212, + "loss/gen": 0.0, + "loss/real": 0.35151734948158264, + "rewards/accuracies": 1.0, + "rewards/generated": -243.37542724609375, + "rewards/margins": 218.29248046875, + "rewards/real": -25.082937240600586, + "step": 66 + }, + { + "epoch": 0.28, + "grad_norm": 68.84343437853207, + "learning_rate": 4.896723646723647e-07, + "logits/generated": -2.055253267288208, + "logits/oppo_generated": -3.0934062004089355, + "logits/oppo_real": -3.077010154724121, + "logits/real": -2.3065128326416016, + "logps/generated": -340.2142333984375, + "logps/oppo_gen": -109.1805419921875, + "logps/oppo_real": -348.23834228515625, + "logps/real": -346.0427551269531, + "loss": 0.5169, + "loss/gen": 0.03457939624786377, + "loss/real": 0.09117183089256287, + "rewards/accuracies": 1.0, + "rewards/generated": -231.03366088867188, + "rewards/margins": 233.22927856445312, + "rewards/real": 2.195611000061035, + "step": 67 + }, + { + "epoch": 0.28, + "grad_norm": 34.09506041248864, + "learning_rate": 4.893162393162393e-07, + "logits/generated": -1.9534183740615845, + "logits/oppo_generated": -2.838265895843506, + "logits/oppo_real": -3.01387357711792, + "logits/real": -2.075807809829712, + "logps/generated": -265.85076904296875, + "logps/oppo_gen": -75.5096206665039, + "logps/oppo_real": -242.11915588378906, + "logps/real": -281.80438232421875, + "loss": 0.5159, + "loss/gen": 0.18575912714004517, + "loss/real": 0.5187460780143738, + "rewards/accuracies": 0.875, + "rewards/generated": -190.34115600585938, + "rewards/margins": 150.65594482421875, + "rewards/real": -39.68519973754883, + "step": 68 + }, + { + "epoch": 0.29, + "grad_norm": 34.22783840494459, + "learning_rate": 4.889601139601139e-07, + "logits/generated": -1.8958648443222046, + "logits/oppo_generated": -2.786154270172119, + "logits/oppo_real": -2.980445146560669, + "logits/real": -1.9751079082489014, + "logps/generated": -315.6554870605469, + "logps/oppo_gen": -78.40753173828125, + "logps/oppo_real": -188.29739379882812, + "logps/real": -222.51780700683594, + "loss": 0.4878, + "loss/gen": 0.023022428154945374, + "loss/real": 0.41188403964042664, + "rewards/accuracies": 1.0, + "rewards/generated": -237.24795532226562, + "rewards/margins": 203.02752685546875, + "rewards/real": -34.220428466796875, + "step": 69 + }, + { + "epoch": 0.29, + "grad_norm": 35.71409671406464, + "learning_rate": 4.886039886039886e-07, + "logits/generated": -1.8381710052490234, + "logits/oppo_generated": -2.8353500366210938, + "logits/oppo_real": -2.788581371307373, + "logits/real": -1.96791410446167, + "logps/generated": -293.2041015625, + "logps/oppo_gen": -74.27359008789062, + "logps/oppo_real": -262.4258728027344, + "logps/real": -312.2054443359375, + "loss": 0.458, + "loss/gen": 0.1743556559085846, + "loss/real": 0.5692518949508667, + "rewards/accuracies": 0.875, + "rewards/generated": -218.93051147460938, + "rewards/margins": 169.15093994140625, + "rewards/real": -49.77956771850586, + "step": 70 + }, + { + "epoch": 0.3, + "grad_norm": 47.1891271449592, + "learning_rate": 4.882478632478633e-07, + "logits/generated": -1.840759515762329, + "logits/oppo_generated": -2.8188014030456543, + "logits/oppo_real": -2.757133960723877, + "logits/real": -1.9096312522888184, + "logps/generated": -285.27850341796875, + "logps/oppo_gen": -55.317054748535156, + "logps/oppo_real": -178.10824584960938, + "logps/real": -215.8996124267578, + "loss": 0.4616, + "loss/gen": 0.0, + "loss/real": 0.41009002923965454, + "rewards/accuracies": 1.0, + "rewards/generated": -229.96144104003906, + "rewards/margins": 192.17007446289062, + "rewards/real": -37.79136276245117, + "step": 71 + }, + { + "epoch": 0.3, + "grad_norm": 38.070165804919505, + "learning_rate": 4.878917378917379e-07, + "logits/generated": -1.9046831130981445, + "logits/oppo_generated": -2.865746259689331, + "logits/oppo_real": -2.85042142868042, + "logits/real": -2.0951907634735107, + "logps/generated": -316.7037658691406, + "logps/oppo_gen": -101.81581115722656, + "logps/oppo_real": -463.47314453125, + "logps/real": -466.7184143066406, + "loss": 0.4761, + "loss/gen": 0.05637046694755554, + "loss/real": 0.1776004135608673, + "rewards/accuracies": 1.0, + "rewards/generated": -214.88796997070312, + "rewards/margins": 211.6427001953125, + "rewards/real": -3.2452640533447266, + "step": 72 + }, + { + "epoch": 0.31, + "grad_norm": 36.46849560751033, + "learning_rate": 4.875356125356125e-07, + "logits/generated": -1.9553462266921997, + "logits/oppo_generated": -2.9923882484436035, + "logits/oppo_real": -2.813816547393799, + "logits/real": -2.1159887313842773, + "logps/generated": -345.8742980957031, + "logps/oppo_gen": -78.51251220703125, + "logps/oppo_real": -286.4658508300781, + "logps/real": -274.63427734375, + "loss": 0.4137, + "loss/gen": 0.007427394390106201, + "loss/real": 0.03321278840303421, + "rewards/accuracies": 1.0, + "rewards/generated": -267.3617858886719, + "rewards/margins": 279.1933898925781, + "rewards/real": 11.83156967163086, + "step": 73 + }, + { + "epoch": 0.31, + "grad_norm": 31.978947389991983, + "learning_rate": 4.871794871794871e-07, + "logits/generated": -2.1326825618743896, + "logits/oppo_generated": -2.7725887298583984, + "logits/oppo_real": -3.063380002975464, + "logits/real": -2.051950216293335, + "logps/generated": -261.63037109375, + "logps/oppo_gen": -79.40229034423828, + "logps/oppo_real": -383.419677734375, + "logps/real": -401.0813293457031, + "loss": 0.4048, + "loss/gen": 0.24421586096286774, + "loss/real": 0.2781957685947418, + "rewards/accuracies": 1.0, + "rewards/generated": -182.2280731201172, + "rewards/margins": 164.56642150878906, + "rewards/real": -17.661649703979492, + "step": 74 + }, + { + "epoch": 0.31, + "grad_norm": 43.62784005350683, + "learning_rate": 4.868233618233618e-07, + "logits/generated": -2.1139259338378906, + "logits/oppo_generated": -2.8321666717529297, + "logits/oppo_real": -3.1668171882629395, + "logits/real": -2.0823147296905518, + "logps/generated": -363.62158203125, + "logps/oppo_gen": -99.83964538574219, + "logps/oppo_real": -322.6613464355469, + "logps/real": -310.8186950683594, + "loss": 0.3929, + "loss/gen": 0.28169000148773193, + "loss/real": 0.1750582456588745, + "rewards/accuracies": 1.0, + "rewards/generated": -263.7819519042969, + "rewards/margins": 275.6246337890625, + "rewards/real": 11.842658042907715, + "step": 75 + }, + { + "epoch": 0.32, + "grad_norm": 32.57134409346178, + "learning_rate": 4.864672364672365e-07, + "logits/generated": -2.2618470191955566, + "logits/oppo_generated": -3.000812530517578, + "logits/oppo_real": -3.1619484424591064, + "logits/real": -2.332085609436035, + "logps/generated": -293.44439697265625, + "logps/oppo_gen": -83.82888793945312, + "logps/oppo_real": -441.3746337890625, + "logps/real": -436.41595458984375, + "loss": 0.4213, + "loss/gen": 0.061429619789123535, + "loss/real": 0.08119938522577286, + "rewards/accuracies": 1.0, + "rewards/generated": -209.61550903320312, + "rewards/margins": 214.57421875, + "rewards/real": 4.958704948425293, + "step": 76 + }, + { + "epoch": 0.32, + "grad_norm": 56.40902867117107, + "learning_rate": 4.861111111111111e-07, + "logits/generated": -1.467146635055542, + "logits/oppo_generated": -2.4111037254333496, + "logits/oppo_real": -2.622360944747925, + "logits/real": -1.5239063501358032, + "logps/generated": -283.9698791503906, + "logps/oppo_gen": -94.29784393310547, + "logps/oppo_real": -307.8828125, + "logps/real": -318.51324462890625, + "loss": 0.4075, + "loss/gen": 0.1725415140390396, + "loss/real": 0.24915428459644318, + "rewards/accuracies": 1.0, + "rewards/generated": -189.67201232910156, + "rewards/margins": 179.04159545898438, + "rewards/real": -10.630415916442871, + "step": 77 + }, + { + "epoch": 0.33, + "grad_norm": 57.75006764065303, + "learning_rate": 4.857549857549857e-07, + "logits/generated": -1.9342730045318604, + "logits/oppo_generated": -2.7816574573516846, + "logits/oppo_real": -2.923349380493164, + "logits/real": -2.000138759613037, + "logps/generated": -264.1120300292969, + "logps/oppo_gen": -70.22672271728516, + "logps/oppo_real": -286.0644836425781, + "logps/real": -369.57379150390625, + "loss": 0.414, + "loss/gen": 0.20479409396648407, + "loss/real": 0.8575762510299683, + "rewards/accuracies": 0.875, + "rewards/generated": -193.8852996826172, + "rewards/margins": 110.37598419189453, + "rewards/real": -83.50932312011719, + "step": 78 + }, + { + "epoch": 0.33, + "grad_norm": 115.49377688862721, + "learning_rate": 4.853988603988603e-07, + "logits/generated": -1.7300872802734375, + "logits/oppo_generated": -2.624129056930542, + "logits/oppo_real": -2.6314826011657715, + "logits/real": -1.706296682357788, + "logps/generated": -254.1842498779297, + "logps/oppo_gen": -48.185340881347656, + "logps/oppo_real": -148.66656494140625, + "logps/real": -187.1585693359375, + "loss": 0.3277, + "loss/gen": 0.08585792779922485, + "loss/real": 0.4419286251068115, + "rewards/accuracies": 1.0, + "rewards/generated": -205.9989013671875, + "rewards/margins": 167.50689697265625, + "rewards/real": -38.492000579833984, + "step": 79 + }, + { + "epoch": 0.33, + "grad_norm": 63.99712937592885, + "learning_rate": 4.850427350427351e-07, + "logits/generated": -1.8802506923675537, + "logits/oppo_generated": -2.668670177459717, + "logits/oppo_real": -2.9500231742858887, + "logits/real": -1.7524856328964233, + "logps/generated": -292.76727294921875, + "logps/oppo_gen": -76.79248809814453, + "logps/oppo_real": -287.1414794921875, + "logps/real": -329.4079284667969, + "loss": 0.34, + "loss/gen": 0.22407092154026031, + "loss/real": 0.4708248972892761, + "rewards/accuracies": 1.0, + "rewards/generated": -215.97479248046875, + "rewards/margins": 173.7083740234375, + "rewards/real": -42.266422271728516, + "step": 80 + }, + { + "epoch": 0.34, + "grad_norm": 158.62792427367572, + "learning_rate": 4.846866096866097e-07, + "logits/generated": -2.008450984954834, + "logits/oppo_generated": -2.8624868392944336, + "logits/oppo_real": -3.0077338218688965, + "logits/real": -1.9637665748596191, + "logps/generated": -250.85487365722656, + "logps/oppo_gen": -103.01863861083984, + "logps/oppo_real": -484.10565185546875, + "logps/real": -474.10980224609375, + "loss": 0.554, + "loss/gen": 0.6981667280197144, + "loss/real": 0.05877792090177536, + "rewards/accuracies": 1.0, + "rewards/generated": -147.83624267578125, + "rewards/margins": 157.83209228515625, + "rewards/real": 9.995855331420898, + "step": 81 + }, + { + "epoch": 0.34, + "grad_norm": 94.57272850331907, + "learning_rate": 4.843304843304843e-07, + "logits/generated": -1.8665781021118164, + "logits/oppo_generated": -2.976921796798706, + "logits/oppo_real": -3.0094780921936035, + "logits/real": -1.9989147186279297, + "logps/generated": -280.32012939453125, + "logps/oppo_gen": -66.51390075683594, + "logps/oppo_real": -174.39071655273438, + "logps/real": -190.4137420654297, + "loss": 0.5, + "loss/gen": 0.09350240230560303, + "loss/real": 0.278587281703949, + "rewards/accuracies": 1.0, + "rewards/generated": -213.8062286376953, + "rewards/margins": 197.783203125, + "rewards/real": -16.02302360534668, + "step": 82 + }, + { + "epoch": 0.35, + "grad_norm": 46.788289592255346, + "learning_rate": 4.839743589743589e-07, + "logits/generated": -1.8179612159729004, + "logits/oppo_generated": -3.01529598236084, + "logits/oppo_real": -2.9185380935668945, + "logits/real": -2.0011558532714844, + "logps/generated": -466.2247619628906, + "logps/oppo_gen": -86.220458984375, + "logps/oppo_real": -329.8023376464844, + "logps/real": -332.1396179199219, + "loss": 0.4253, + "loss/gen": 0.05652913451194763, + "loss/real": 0.2249125838279724, + "rewards/accuracies": 1.0, + "rewards/generated": -380.00433349609375, + "rewards/margins": 377.66705322265625, + "rewards/real": -2.3372955322265625, + "step": 83 + }, + { + "epoch": 0.35, + "grad_norm": 45.30856125122767, + "learning_rate": 4.836182336182337e-07, + "logits/generated": -1.641934871673584, + "logits/oppo_generated": -2.864108085632324, + "logits/oppo_real": -2.8596436977386475, + "logits/real": -1.7771339416503906, + "logps/generated": -308.81744384765625, + "logps/oppo_gen": -79.35113525390625, + "logps/oppo_real": -357.43438720703125, + "logps/real": -351.2286376953125, + "loss": 0.4106, + "loss/gen": 0.02170167863368988, + "loss/real": 0.11313143372535706, + "rewards/accuracies": 1.0, + "rewards/generated": -229.46632385253906, + "rewards/margins": 235.67205810546875, + "rewards/real": 6.2057342529296875, + "step": 84 + }, + { + "epoch": 0.36, + "grad_norm": 60.23657786179424, + "learning_rate": 4.832621082621082e-07, + "logits/generated": -1.7438234090805054, + "logits/oppo_generated": -2.635812282562256, + "logits/oppo_real": -2.784547805786133, + "logits/real": -1.6820318698883057, + "logps/generated": -362.5193786621094, + "logps/oppo_gen": -87.48421478271484, + "logps/oppo_real": -250.10626220703125, + "logps/real": -273.316162109375, + "loss": 0.3701, + "loss/gen": 0.0328507125377655, + "loss/real": 0.2887704372406006, + "rewards/accuracies": 1.0, + "rewards/generated": -275.03515625, + "rewards/margins": 251.82522583007812, + "rewards/real": -23.209918975830078, + "step": 85 + }, + { + "epoch": 0.36, + "grad_norm": 78.00425982458425, + "learning_rate": 4.829059829059829e-07, + "logits/generated": -1.6737346649169922, + "logits/oppo_generated": -2.9845218658447266, + "logits/oppo_real": -3.016307830810547, + "logits/real": -2.0023183822631836, + "logps/generated": -242.6670379638672, + "logps/oppo_gen": -55.523197174072266, + "logps/oppo_real": -291.81378173828125, + "logps/real": -357.2690734863281, + "loss": 0.4034, + "loss/gen": 0.28107643127441406, + "loss/real": 0.7271257638931274, + "rewards/accuracies": 0.75, + "rewards/generated": -187.14382934570312, + "rewards/margins": 121.68854522705078, + "rewards/real": -65.45529174804688, + "step": 86 + }, + { + "epoch": 0.36, + "grad_norm": 36.09174608885233, + "learning_rate": 4.825498575498575e-07, + "logits/generated": -1.8338725566864014, + "logits/oppo_generated": -2.8317785263061523, + "logits/oppo_real": -2.849785327911377, + "logits/real": -1.9712032079696655, + "logps/generated": -256.17181396484375, + "logps/oppo_gen": -65.48351287841797, + "logps/oppo_real": -259.8980712890625, + "logps/real": -279.2626953125, + "loss": 0.3141, + "loss/gen": 0.18348117172718048, + "loss/real": 0.2724674642086029, + "rewards/accuracies": 1.0, + "rewards/generated": -190.68829345703125, + "rewards/margins": 171.32366943359375, + "rewards/real": -19.364639282226562, + "step": 87 + }, + { + "epoch": 0.37, + "grad_norm": 43.09703010540576, + "learning_rate": 4.821937321937321e-07, + "logits/generated": -1.9401007890701294, + "logits/oppo_generated": -2.9616637229919434, + "logits/oppo_real": -2.8549320697784424, + "logits/real": -2.2171854972839355, + "logps/generated": -271.0581359863281, + "logps/oppo_gen": -66.1073226928711, + "logps/oppo_real": -297.0393981933594, + "logps/real": -283.0654602050781, + "loss": 0.3459, + "loss/gen": 0.11610506474971771, + "loss/real": 0.01628967374563217, + "rewards/accuracies": 1.0, + "rewards/generated": -204.95083618164062, + "rewards/margins": 218.9247589111328, + "rewards/real": 13.973949432373047, + "step": 88 + }, + { + "epoch": 0.37, + "grad_norm": 39.01662618859587, + "learning_rate": 4.818376068376069e-07, + "logits/generated": -2.057642936706543, + "logits/oppo_generated": -2.944060802459717, + "logits/oppo_real": -2.977362632751465, + "logits/real": -2.0833921432495117, + "logps/generated": -284.1174621582031, + "logps/oppo_gen": -49.032493591308594, + "logps/oppo_real": -197.13412475585938, + "logps/real": -235.72854614257812, + "loss": 0.3748, + "loss/gen": 0.1771981120109558, + "loss/real": 0.44538283348083496, + "rewards/accuracies": 1.0, + "rewards/generated": -235.0849609375, + "rewards/margins": 196.49057006835938, + "rewards/real": -38.594398498535156, + "step": 89 + }, + { + "epoch": 0.38, + "grad_norm": 76.44660552368398, + "learning_rate": 4.814814814814814e-07, + "logits/generated": -2.012396812438965, + "logits/oppo_generated": -2.9935152530670166, + "logits/oppo_real": -2.782620906829834, + "logits/real": -2.228747844696045, + "logps/generated": -225.5810546875, + "logps/oppo_gen": -79.41259002685547, + "logps/oppo_real": -304.58465576171875, + "logps/real": -294.3427734375, + "loss": 0.4461, + "loss/gen": 0.6170589327812195, + "loss/real": 0.013846360146999359, + "rewards/accuracies": 1.0, + "rewards/generated": -146.16847229003906, + "rewards/margins": 156.41033935546875, + "rewards/real": 10.241872787475586, + "step": 90 + }, + { + "epoch": 0.38, + "grad_norm": 44.887227127320834, + "learning_rate": 4.811253561253561e-07, + "logits/generated": -1.8049852848052979, + "logits/oppo_generated": -3.0348973274230957, + "logits/oppo_real": -2.8550362586975098, + "logits/real": -2.1674275398254395, + "logps/generated": -334.69403076171875, + "logps/oppo_gen": -147.11734008789062, + "logps/oppo_real": -324.0049743652344, + "logps/real": -330.11480712890625, + "loss": 0.3668, + "loss/gen": 0.32383447885513306, + "loss/real": 0.19668863713741302, + "rewards/accuracies": 1.0, + "rewards/generated": -187.57669067382812, + "rewards/margins": 181.46685791015625, + "rewards/real": -6.109820365905762, + "step": 91 + }, + { + "epoch": 0.38, + "grad_norm": 33.3576313215254, + "learning_rate": 4.807692307692307e-07, + "logits/generated": -1.867674469947815, + "logits/oppo_generated": -2.8708338737487793, + "logits/oppo_real": -2.8143606185913086, + "logits/real": -2.028439998626709, + "logps/generated": -307.528564453125, + "logps/oppo_gen": -81.77798461914062, + "logps/oppo_real": -330.5220031738281, + "logps/real": -330.1318054199219, + "loss": 0.355, + "loss/gen": 0.2485622763633728, + "loss/real": 0.1294267177581787, + "rewards/accuracies": 1.0, + "rewards/generated": -225.75057983398438, + "rewards/margins": 226.1407470703125, + "rewards/real": 0.3901691436767578, + "step": 92 + }, + { + "epoch": 0.39, + "grad_norm": 48.87256477708762, + "learning_rate": 4.804131054131054e-07, + "logits/generated": -1.6193779706954956, + "logits/oppo_generated": -2.7298922538757324, + "logits/oppo_real": -2.698655605316162, + "logits/real": -1.7858012914657593, + "logps/generated": -299.572998046875, + "logps/oppo_gen": -74.60616302490234, + "logps/oppo_real": -251.41427612304688, + "logps/real": -266.9776306152344, + "loss": 0.3602, + "loss/gen": 0.04555131494998932, + "loss/real": 0.34169435501098633, + "rewards/accuracies": 1.0, + "rewards/generated": -224.9668426513672, + "rewards/margins": 209.40345764160156, + "rewards/real": -15.563373565673828, + "step": 93 + }, + { + "epoch": 0.39, + "grad_norm": 63.47711970909282, + "learning_rate": 4.8005698005698e-07, + "logits/generated": -1.8660156726837158, + "logits/oppo_generated": -2.9584808349609375, + "logits/oppo_real": -2.8358330726623535, + "logits/real": -2.125810384750366, + "logps/generated": -329.8253479003906, + "logps/oppo_gen": -83.23335266113281, + "logps/oppo_real": -311.66064453125, + "logps/real": -308.5706787109375, + "loss": 0.3475, + "loss/gen": 0.030205443501472473, + "loss/real": 0.12542490661144257, + "rewards/accuracies": 1.0, + "rewards/generated": -246.59197998046875, + "rewards/margins": 249.68197631835938, + "rewards/real": 3.089975357055664, + "step": 94 + }, + { + "epoch": 0.4, + "grad_norm": 41.84529030025362, + "learning_rate": 4.797008547008547e-07, + "logits/generated": -1.794067621231079, + "logits/oppo_generated": -2.83894681930542, + "logits/oppo_real": -2.731696605682373, + "logits/real": -1.9485492706298828, + "logps/generated": -330.9621887207031, + "logps/oppo_gen": -103.72628021240234, + "logps/oppo_real": -218.9561767578125, + "logps/real": -219.8974609375, + "loss": 0.307, + "loss/gen": 0.06028883159160614, + "loss/real": 0.14670495688915253, + "rewards/accuracies": 1.0, + "rewards/generated": -227.23593139648438, + "rewards/margins": 226.29464721679688, + "rewards/real": -0.9412956237792969, + "step": 95 + }, + { + "epoch": 0.4, + "grad_norm": 32.90370323418684, + "learning_rate": 4.793447293447293e-07, + "logits/generated": -1.8011112213134766, + "logits/oppo_generated": -2.7633142471313477, + "logits/oppo_real": -2.9560418128967285, + "logits/real": -1.7214103937149048, + "logps/generated": -348.088134765625, + "logps/oppo_gen": -74.91079711914062, + "logps/oppo_real": -299.2713623046875, + "logps/real": -282.55438232421875, + "loss": 0.3138, + "loss/gen": 0.033689409494400024, + "loss/real": 0.08471229672431946, + "rewards/accuracies": 1.0, + "rewards/generated": -273.1773376464844, + "rewards/margins": 289.8943176269531, + "rewards/real": 16.716989517211914, + "step": 96 + }, + { + "epoch": 0.41, + "grad_norm": 63.98752090862131, + "learning_rate": 4.78988603988604e-07, + "logits/generated": -1.8813642263412476, + "logits/oppo_generated": -2.8308515548706055, + "logits/oppo_real": -3.085522174835205, + "logits/real": -1.916734218597412, + "logps/generated": -354.05621337890625, + "logps/oppo_gen": -134.01483154296875, + "logps/oppo_real": -442.37945556640625, + "logps/real": -426.1463928222656, + "loss": 0.3186, + "loss/gen": 0.08905501663684845, + "loss/real": 0.009031563997268677, + "rewards/accuracies": 1.0, + "rewards/generated": -220.04135131835938, + "rewards/margins": 236.27442932128906, + "rewards/real": 16.233068466186523, + "step": 97 + }, + { + "epoch": 0.41, + "grad_norm": 28.016905994138163, + "learning_rate": 4.786324786324786e-07, + "logits/generated": -1.7203209400177002, + "logits/oppo_generated": -2.8044867515563965, + "logits/oppo_real": -2.8060150146484375, + "logits/real": -1.783670425415039, + "logps/generated": -276.6085205078125, + "logps/oppo_gen": -51.423309326171875, + "logps/oppo_real": -222.54879760742188, + "logps/real": -233.36328125, + "loss": 0.2702, + "loss/gen": 0.014314472675323486, + "loss/real": 0.26025110483169556, + "rewards/accuracies": 1.0, + "rewards/generated": -225.1852264404297, + "rewards/margins": 214.37075805664062, + "rewards/real": -10.814473152160645, + "step": 98 + }, + { + "epoch": 0.41, + "grad_norm": 42.268631478097916, + "learning_rate": 4.782763532763532e-07, + "logits/generated": -1.8753924369812012, + "logits/oppo_generated": -2.932793140411377, + "logits/oppo_real": -2.9959638118743896, + "logits/real": -2.070061683654785, + "logps/generated": -297.700927734375, + "logps/oppo_gen": -68.20332336425781, + "logps/oppo_real": -376.541015625, + "logps/real": -386.68438720703125, + "loss": 0.2846, + "loss/gen": 0.0, + "loss/real": 0.2723137140274048, + "rewards/accuracies": 1.0, + "rewards/generated": -229.4976043701172, + "rewards/margins": 219.35421752929688, + "rewards/real": -10.143372535705566, + "step": 99 + }, + { + "epoch": 0.42, + "grad_norm": 62.84870885768557, + "learning_rate": 4.779202279202279e-07, + "logits/generated": -1.745549201965332, + "logits/oppo_generated": -2.780601739883423, + "logits/oppo_real": -2.8726038932800293, + "logits/real": -1.851616621017456, + "logps/generated": -308.7665100097656, + "logps/oppo_gen": -75.83106994628906, + "logps/oppo_real": -327.609619140625, + "logps/real": -323.2357177734375, + "loss": 0.3766, + "loss/gen": 0.17890962958335876, + "loss/real": 0.07637953758239746, + "rewards/accuracies": 1.0, + "rewards/generated": -232.93545532226562, + "rewards/margins": 237.3093719482422, + "rewards/real": 4.373929977416992, + "step": 100 + }, + { + "epoch": 0.42, + "grad_norm": 80.61028208687152, + "learning_rate": 4.775641025641026e-07, + "logits/generated": -1.832048773765564, + "logits/oppo_generated": -2.91953706741333, + "logits/oppo_real": -2.820370674133301, + "logits/real": -2.110717535018921, + "logps/generated": -299.78704833984375, + "logps/oppo_gen": -75.91517639160156, + "logps/oppo_real": -531.0400390625, + "logps/real": -533.7149047851562, + "loss": 0.3108, + "loss/gen": 0.07047438621520996, + "loss/real": 0.11644826829433441, + "rewards/accuracies": 1.0, + "rewards/generated": -223.87188720703125, + "rewards/margins": 221.1970672607422, + "rewards/real": -2.6748085021972656, + "step": 101 + }, + { + "epoch": 0.43, + "grad_norm": 21.048619255395202, + "learning_rate": 4.772079772079772e-07, + "logits/generated": -1.83795964717865, + "logits/oppo_generated": -2.927794933319092, + "logits/oppo_real": -2.8259315490722656, + "logits/real": -2.103085517883301, + "logps/generated": -293.03070068359375, + "logps/oppo_gen": -75.32722473144531, + "logps/oppo_real": -334.3116149902344, + "logps/real": -329.84771728515625, + "loss": 0.2749, + "loss/gen": 0.1955680400133133, + "loss/real": 0.08636181056499481, + "rewards/accuracies": 1.0, + "rewards/generated": -217.70347595214844, + "rewards/margins": 222.16734313964844, + "rewards/real": 4.463872909545898, + "step": 102 + }, + { + "epoch": 0.43, + "grad_norm": 27.770455184464364, + "learning_rate": 4.768518518518518e-07, + "logits/generated": -1.8320189714431763, + "logits/oppo_generated": -2.798323154449463, + "logits/oppo_real": -3.0827927589416504, + "logits/real": -2.0010604858398438, + "logps/generated": -309.42236328125, + "logps/oppo_gen": -85.98326110839844, + "logps/oppo_real": -484.7052001953125, + "logps/real": -477.567626953125, + "loss": 0.2701, + "loss/gen": 0.009465828537940979, + "loss/real": 0.04307375103235245, + "rewards/accuracies": 1.0, + "rewards/generated": -223.43910217285156, + "rewards/margins": 230.57669067382812, + "rewards/real": 7.137579441070557, + "step": 103 + }, + { + "epoch": 0.44, + "grad_norm": 35.29567146022477, + "learning_rate": 4.764957264957264e-07, + "logits/generated": -1.6535446643829346, + "logits/oppo_generated": -2.820817232131958, + "logits/oppo_real": -2.7580766677856445, + "logits/real": -1.9198226928710938, + "logps/generated": -388.9769287109375, + "logps/oppo_gen": -98.39456176757812, + "logps/oppo_real": -435.86871337890625, + "logps/real": -446.45001220703125, + "loss": 0.3203, + "loss/gen": 0.04320457577705383, + "loss/real": 0.25816553831100464, + "rewards/accuracies": 1.0, + "rewards/generated": -290.58233642578125, + "rewards/margins": 280.00103759765625, + "rewards/real": -10.581292152404785, + "step": 104 + }, + { + "epoch": 0.44, + "grad_norm": 23.55976183940932, + "learning_rate": 4.761396011396011e-07, + "logits/generated": -1.8841466903686523, + "logits/oppo_generated": -2.991581439971924, + "logits/oppo_real": -3.002182960510254, + "logits/real": -2.131711483001709, + "logps/generated": -322.656494140625, + "logps/oppo_gen": -81.12940216064453, + "logps/oppo_real": -296.61138916015625, + "logps/real": -295.33599853515625, + "loss": 0.2212, + "loss/gen": 0.0, + "loss/real": 0.21376575529575348, + "rewards/accuracies": 1.0, + "rewards/generated": -241.52708435058594, + "rewards/margins": 242.80247497558594, + "rewards/real": 1.2753915786743164, + "step": 105 + }, + { + "epoch": 0.44, + "grad_norm": 23.497506516649803, + "learning_rate": 4.7578347578347577e-07, + "logits/generated": -1.910527229309082, + "logits/oppo_generated": -2.8433456420898438, + "logits/oppo_real": -3.012195110321045, + "logits/real": -1.9440618753433228, + "logps/generated": -244.58482360839844, + "logps/oppo_gen": -63.396881103515625, + "logps/oppo_real": -288.55780029296875, + "logps/real": -273.79388427734375, + "loss": 0.1928, + "loss/gen": 0.48318159580230713, + "loss/real": 0.007603831589221954, + "rewards/accuracies": 1.0, + "rewards/generated": -181.18792724609375, + "rewards/margins": 195.951904296875, + "rewards/real": 14.763958930969238, + "step": 106 + }, + { + "epoch": 0.45, + "grad_norm": 60.16537065598345, + "learning_rate": 4.754273504273504e-07, + "logits/generated": -1.7519464492797852, + "logits/oppo_generated": -2.75607967376709, + "logits/oppo_real": -3.044626235961914, + "logits/real": -1.8694126605987549, + "logps/generated": -350.42431640625, + "logps/oppo_gen": -89.79308319091797, + "logps/oppo_real": -237.51071166992188, + "logps/real": -241.78152465820312, + "loss": 0.2889, + "loss/gen": 0.0006802082061767578, + "loss/real": 0.14765188097953796, + "rewards/accuracies": 1.0, + "rewards/generated": -260.6312561035156, + "rewards/margins": 256.36041259765625, + "rewards/real": -4.270831108093262, + "step": 107 + }, + { + "epoch": 0.45, + "grad_norm": 60.16537065598345, + "learning_rate": 4.754273504273504e-07, + "logits/generated": -2.0505292415618896, + "logits/oppo_generated": -2.9334537982940674, + "logits/oppo_real": -3.0197911262512207, + "logits/real": -2.080873966217041, + "logps/generated": -322.20416259765625, + "logps/oppo_gen": -86.25882720947266, + "logps/oppo_real": -171.73361206054688, + "logps/real": -155.16622924804688, + "loss": 0.2352, + "loss/gen": 0.0, + "loss/real": 0.036911122500896454, + "rewards/accuracies": 1.0, + "rewards/generated": -235.94532775878906, + "rewards/margins": 252.5127410888672, + "rewards/real": 16.567398071289062, + "step": 108 + }, + { + "epoch": 0.46, + "grad_norm": 61.99013711558779, + "learning_rate": 4.7507122507122507e-07, + "logits/generated": -1.7211092710494995, + "logits/oppo_generated": -2.8885016441345215, + "logits/oppo_real": -2.9670629501342773, + "logits/real": -2.023648977279663, + "logps/generated": -242.21588134765625, + "logps/oppo_gen": -52.36747741699219, + "logps/oppo_real": -234.88699340820312, + "logps/real": -226.70179748535156, + "loss": 0.2528, + "loss/gen": 0.2596120834350586, + "loss/real": 0.1372058391571045, + "rewards/accuracies": 1.0, + "rewards/generated": -189.84840393066406, + "rewards/margins": 198.03363037109375, + "rewards/real": 8.185225486755371, + "step": 109 + }, + { + "epoch": 0.46, + "grad_norm": 52.08842110566656, + "learning_rate": 4.747150997150997e-07, + "logits/generated": -1.8464903831481934, + "logits/oppo_generated": -2.902094841003418, + "logits/oppo_real": -2.738150119781494, + "logits/real": -2.1050400733947754, + "logps/generated": -330.8840026855469, + "logps/oppo_gen": -71.77503967285156, + "logps/oppo_real": -226.59805297851562, + "logps/real": -215.78268432617188, + "loss": 0.2926, + "loss/gen": 0.0, + "loss/real": 0.001055300235748291, + "rewards/accuracies": 1.0, + "rewards/generated": -259.1089782714844, + "rewards/margins": 269.92437744140625, + "rewards/real": 10.815394401550293, + "step": 110 + }, + { + "epoch": 0.46, + "grad_norm": 48.25866937579066, + "learning_rate": 4.743589743589743e-07, + "logits/generated": -1.7407563924789429, + "logits/oppo_generated": -2.78233003616333, + "logits/oppo_real": -2.810633420944214, + "logits/real": -1.92085862159729, + "logps/generated": -262.89617919921875, + "logps/oppo_gen": -51.96064758300781, + "logps/oppo_real": -160.8415069580078, + "logps/real": -168.8170928955078, + "loss": 0.2552, + "loss/gen": 0.11834511160850525, + "loss/real": 0.24989712238311768, + "rewards/accuracies": 1.0, + "rewards/generated": -210.93551635742188, + "rewards/margins": 202.95993041992188, + "rewards/real": -7.9755964279174805, + "step": 111 + }, + { + "epoch": 0.47, + "grad_norm": 48.57264161718894, + "learning_rate": 4.74002849002849e-07, + "logits/generated": -1.7602338790893555, + "logits/oppo_generated": -2.7906460762023926, + "logits/oppo_real": -2.7454147338867188, + "logits/real": -1.9760286808013916, + "logps/generated": -233.21107482910156, + "logps/oppo_gen": -67.77021789550781, + "logps/oppo_real": -355.9058837890625, + "logps/real": -337.13189697265625, + "loss": 0.3077, + "loss/gen": 0.4533146023750305, + "loss/real": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -165.44085693359375, + "rewards/margins": 184.21485900878906, + "rewards/real": 18.774002075195312, + "step": 112 + }, + { + "epoch": 0.47, + "grad_norm": 83.06293794560996, + "learning_rate": 4.7364672364672366e-07, + "logits/generated": -1.7146689891815186, + "logits/oppo_generated": -2.784420967102051, + "logits/oppo_real": -2.58797550201416, + "logits/real": -1.975735068321228, + "logps/generated": -264.473388671875, + "logps/oppo_gen": -53.4489631652832, + "logps/oppo_real": -213.77337646484375, + "logps/real": -224.57752990722656, + "loss": 0.2609, + "loss/gen": 0.06298929452896118, + "loss/real": 0.2708776891231537, + "rewards/accuracies": 0.875, + "rewards/generated": -211.02444458007812, + "rewards/margins": 200.22027587890625, + "rewards/real": -10.804159164428711, + "step": 113 + }, + { + "epoch": 0.48, + "grad_norm": 54.351966206097366, + "learning_rate": 4.7329059829059823e-07, + "logits/generated": -1.9484028816223145, + "logits/oppo_generated": -2.9693868160247803, + "logits/oppo_real": -2.897064208984375, + "logits/real": -2.2127110958099365, + "logps/generated": -297.3500061035156, + "logps/oppo_gen": -65.07535552978516, + "logps/oppo_real": -380.3414306640625, + "logps/real": -407.437255859375, + "loss": 0.3122, + "loss/gen": 0.0, + "loss/real": 0.37634211778640747, + "rewards/accuracies": 1.0, + "rewards/generated": -232.27462768554688, + "rewards/margins": 205.17877197265625, + "rewards/real": -27.095857620239258, + "step": 114 + }, + { + "epoch": 0.48, + "grad_norm": 44.581477978090156, + "learning_rate": 4.729344729344729e-07, + "logits/generated": -2.0135183334350586, + "logits/oppo_generated": -2.8074076175689697, + "logits/oppo_real": -2.9744620323181152, + "logits/real": -2.006603479385376, + "logps/generated": -310.1141357421875, + "logps/oppo_gen": -81.67523193359375, + "logps/oppo_real": -332.10321044921875, + "logps/real": -344.72015380859375, + "loss": 0.2123, + "loss/gen": 0.24567674100399017, + "loss/real": 0.2426839917898178, + "rewards/accuracies": 0.875, + "rewards/generated": -228.43890380859375, + "rewards/margins": 215.82196044921875, + "rewards/real": -12.616944313049316, + "step": 115 + }, + { + "epoch": 0.49, + "grad_norm": 75.96934131814294, + "learning_rate": 4.725783475783476e-07, + "logits/generated": -1.9424694776535034, + "logits/oppo_generated": -2.8780970573425293, + "logits/oppo_real": -2.880333185195923, + "logits/real": -2.094572067260742, + "logps/generated": -384.4183654785156, + "logps/oppo_gen": -83.72149658203125, + "logps/oppo_real": -272.17291259765625, + "logps/real": -253.05941772460938, + "loss": 0.3365, + "loss/gen": 0.0, + "loss/real": 0.005366437137126923, + "rewards/accuracies": 1.0, + "rewards/generated": -300.6968688964844, + "rewards/margins": 319.81036376953125, + "rewards/real": 19.113483428955078, + "step": 116 + }, + { + "epoch": 0.49, + "grad_norm": 68.25540843955152, + "learning_rate": 4.722222222222222e-07, + "logits/generated": -1.8966500759124756, + "logits/oppo_generated": -2.8689210414886475, + "logits/oppo_real": -3.036574602127075, + "logits/real": -2.050349473953247, + "logps/generated": -328.08782958984375, + "logps/oppo_gen": -61.806739807128906, + "logps/oppo_real": -213.864013671875, + "logps/real": -201.69223022460938, + "loss": 0.3216, + "loss/gen": 0.027267932891845703, + "loss/real": 0.0526232048869133, + "rewards/accuracies": 1.0, + "rewards/generated": -266.2810974121094, + "rewards/margins": 278.45294189453125, + "rewards/real": 12.171795845031738, + "step": 117 + }, + { + "epoch": 0.49, + "grad_norm": 50.70511841227792, + "learning_rate": 4.7186609686609683e-07, + "logits/generated": -1.9891793727874756, + "logits/oppo_generated": -2.847443103790283, + "logits/oppo_real": -2.9110074043273926, + "logits/real": -2.0313127040863037, + "logps/generated": -321.2769775390625, + "logps/oppo_gen": -68.70259857177734, + "logps/oppo_real": -252.70947265625, + "logps/real": -237.62449645996094, + "loss": 0.2511, + "loss/gen": 0.01610538363456726, + "loss/real": 0.042245976626873016, + "rewards/accuracies": 1.0, + "rewards/generated": -252.5743865966797, + "rewards/margins": 267.6593322753906, + "rewards/real": 15.084959983825684, + "step": 118 + }, + { + "epoch": 0.5, + "grad_norm": 30.791028943166324, + "learning_rate": 4.715099715099715e-07, + "logits/generated": -2.0675950050354004, + "logits/oppo_generated": -2.850525140762329, + "logits/oppo_real": -2.9623799324035645, + "logits/real": -2.093207359313965, + "logps/generated": -307.7158203125, + "logps/oppo_gen": -70.65492248535156, + "logps/oppo_real": -241.07968139648438, + "logps/real": -263.4491271972656, + "loss": 0.2583, + "loss/gen": 0.007858753204345703, + "loss/real": 0.329021155834198, + "rewards/accuracies": 0.875, + "rewards/generated": -237.0609130859375, + "rewards/margins": 214.69146728515625, + "rewards/real": -22.36945152282715, + "step": 119 + }, + { + "epoch": 0.5, + "grad_norm": 50.498172538097855, + "learning_rate": 4.711538461538461e-07, + "logits/generated": -1.8775372505187988, + "logits/oppo_generated": -2.760641574859619, + "logits/oppo_real": -2.835960865020752, + "logits/real": -2.0484185218811035, + "logps/generated": -296.631591796875, + "logps/oppo_gen": -77.80702209472656, + "logps/oppo_real": -309.97265625, + "logps/real": -322.2628173828125, + "loss": 0.2541, + "loss/gen": 0.1177206039428711, + "loss/real": 0.30542153120040894, + "rewards/accuracies": 1.0, + "rewards/generated": -218.82455444335938, + "rewards/margins": 206.5343475341797, + "rewards/real": -12.290206909179688, + "step": 120 + }, + { + "epoch": 0.51, + "grad_norm": 43.8276807575525, + "learning_rate": 4.707977207977208e-07, + "logits/generated": -1.9596431255340576, + "logits/oppo_generated": -2.762300491333008, + "logits/oppo_real": -2.91391658782959, + "logits/real": -2.064877986907959, + "logps/generated": -285.3265075683594, + "logps/oppo_gen": -79.30331420898438, + "logps/oppo_real": -206.95407104492188, + "logps/real": -199.25717163085938, + "loss": 0.2071, + "loss/gen": 0.15041278302669525, + "loss/real": 0.06987226009368896, + "rewards/accuracies": 1.0, + "rewards/generated": -206.023193359375, + "rewards/margins": 213.7200927734375, + "rewards/real": 7.696885108947754, + "step": 121 + }, + { + "epoch": 0.51, + "grad_norm": 92.10083850391916, + "learning_rate": 4.7044159544159537e-07, + "logits/generated": -1.6970422267913818, + "logits/oppo_generated": -2.8723740577697754, + "logits/oppo_real": -2.730229139328003, + "logits/real": -2.0857439041137695, + "logps/generated": -367.9686584472656, + "logps/oppo_gen": -68.4917984008789, + "logps/oppo_real": -205.74790954589844, + "logps/real": -213.32887268066406, + "loss": 0.2593, + "loss/gen": 0.01937010884284973, + "loss/real": 0.20608964562416077, + "rewards/accuracies": 1.0, + "rewards/generated": -299.4768371582031, + "rewards/margins": 291.8958740234375, + "rewards/real": -7.580964088439941, + "step": 122 + }, + { + "epoch": 0.51, + "grad_norm": 78.7925604237164, + "learning_rate": 4.7008547008547005e-07, + "logits/generated": -2.007908344268799, + "logits/oppo_generated": -2.833265781402588, + "logits/oppo_real": -2.8581643104553223, + "logits/real": -2.1557552814483643, + "logps/generated": -311.3591613769531, + "logps/oppo_gen": -72.44357299804688, + "logps/oppo_real": -294.85699462890625, + "logps/real": -279.35943603515625, + "loss": 0.2969, + "loss/gen": 0.0, + "loss/real": 0.0311884805560112, + "rewards/accuracies": 1.0, + "rewards/generated": -238.91558837890625, + "rewards/margins": 254.41310119628906, + "rewards/real": 15.497525215148926, + "step": 123 + }, + { + "epoch": 0.52, + "grad_norm": 68.66656869430713, + "learning_rate": 4.697293447293447e-07, + "logits/generated": -1.8297350406646729, + "logits/oppo_generated": -2.8131227493286133, + "logits/oppo_real": -2.815453052520752, + "logits/real": -2.1473500728607178, + "logps/generated": -365.8243408203125, + "logps/oppo_gen": -118.46414184570312, + "logps/oppo_real": -350.6376953125, + "logps/real": -340.7218933105469, + "loss": 0.2489, + "loss/gen": 0.0096682608127594, + "loss/real": 0.037821196019649506, + "rewards/accuracies": 1.0, + "rewards/generated": -247.36019897460938, + "rewards/margins": 257.2760009765625, + "rewards/real": 9.915809631347656, + "step": 124 + }, + { + "epoch": 0.52, + "grad_norm": 41.412493633036036, + "learning_rate": 4.6937321937321934e-07, + "logits/generated": -1.9806835651397705, + "logits/oppo_generated": -2.868478775024414, + "logits/oppo_real": -2.87443208694458, + "logits/real": -2.1723623275756836, + "logps/generated": -303.59033203125, + "logps/oppo_gen": -72.4801025390625, + "logps/oppo_real": -315.2503356933594, + "logps/real": -303.8700256347656, + "loss": 0.2741, + "loss/gen": 0.0, + "loss/real": 0.03125518560409546, + "rewards/accuracies": 1.0, + "rewards/generated": -231.11021423339844, + "rewards/margins": 242.4905242919922, + "rewards/real": 11.380315780639648, + "step": 125 + }, + { + "epoch": 0.53, + "grad_norm": 70.06525975831389, + "learning_rate": 4.69017094017094e-07, + "logits/generated": -1.5762176513671875, + "logits/oppo_generated": -2.5010550022125244, + "logits/oppo_real": -2.635188102722168, + "logits/real": -1.7362252473831177, + "logps/generated": -365.1429443359375, + "logps/oppo_gen": -80.23007202148438, + "logps/oppo_real": -347.019287109375, + "logps/real": -397.1420593261719, + "loss": 0.1983, + "loss/gen": 0.0, + "loss/real": 0.6604471206665039, + "rewards/accuracies": 1.0, + "rewards/generated": -284.9128723144531, + "rewards/margins": 234.7901153564453, + "rewards/real": -50.12276840209961, + "step": 126 + }, + { + "epoch": 0.53, + "grad_norm": 55.3230546956635, + "learning_rate": 4.6866096866096864e-07, + "logits/generated": -1.6325474977493286, + "logits/oppo_generated": -2.6126623153686523, + "logits/oppo_real": -2.6145567893981934, + "logits/real": -1.8411592245101929, + "logps/generated": -336.42913818359375, + "logps/oppo_gen": -73.5291748046875, + "logps/oppo_real": -317.5265808105469, + "logps/real": -295.2335205078125, + "loss": 0.2608, + "loss/gen": 0.0, + "loss/real": 0.005345538258552551, + "rewards/accuracies": 1.0, + "rewards/generated": -262.89996337890625, + "rewards/margins": 285.1929931640625, + "rewards/real": 22.293060302734375, + "step": 127 + }, + { + "epoch": 0.54, + "grad_norm": 45.401364908412994, + "learning_rate": 4.6830484330484326e-07, + "logits/generated": -2.1946914196014404, + "logits/oppo_generated": -3.0297465324401855, + "logits/oppo_real": -3.101362705230713, + "logits/real": -2.43679141998291, + "logps/generated": -350.4021911621094, + "logps/oppo_gen": -120.2161865234375, + "logps/oppo_real": -532.0965576171875, + "logps/real": -521.9387817382812, + "loss": 0.3068, + "loss/gen": 0.011398926377296448, + "loss/real": 0.09254828840494156, + "rewards/accuracies": 1.0, + "rewards/generated": -230.1859893798828, + "rewards/margins": 240.34373474121094, + "rewards/real": 10.15774154663086, + "step": 128 + }, + { + "epoch": 0.54, + "grad_norm": 78.93660616576285, + "learning_rate": 4.6794871794871794e-07, + "logits/generated": -1.9267959594726562, + "logits/oppo_generated": -2.4462087154388428, + "logits/oppo_real": -2.882254123687744, + "logits/real": -1.6094073057174683, + "logps/generated": -312.116943359375, + "logps/oppo_gen": -74.71348571777344, + "logps/oppo_real": -324.086669921875, + "logps/real": -350.8844909667969, + "loss": 0.2236, + "loss/gen": 0.0, + "loss/real": 0.3186296820640564, + "rewards/accuracies": 1.0, + "rewards/generated": -237.4034423828125, + "rewards/margins": 210.60562133789062, + "rewards/real": -26.79781723022461, + "step": 129 + }, + { + "epoch": 0.54, + "grad_norm": 45.77649026025122, + "learning_rate": 4.675925925925926e-07, + "logits/generated": -2.0611488819122314, + "logits/oppo_generated": -2.9427778720855713, + "logits/oppo_real": -2.9869794845581055, + "logits/real": -2.2055954933166504, + "logps/generated": -256.5621337890625, + "logps/oppo_gen": -57.98387908935547, + "logps/oppo_real": -299.8202209472656, + "logps/real": -324.31402587890625, + "loss": 0.2659, + "loss/gen": 0.2458263337612152, + "loss/real": 0.35364583134651184, + "rewards/accuracies": 1.0, + "rewards/generated": -198.57826232910156, + "rewards/margins": 174.08447265625, + "rewards/real": -24.493789672851562, + "step": 130 + }, + { + "epoch": 0.55, + "grad_norm": 129.35351572821668, + "learning_rate": 4.672364672364672e-07, + "logits/generated": -1.9105026721954346, + "logits/oppo_generated": -2.462200880050659, + "logits/oppo_real": -2.7382378578186035, + "logits/real": -1.603495478630066, + "logps/generated": -283.0691223144531, + "logps/oppo_gen": -109.31198120117188, + "logps/oppo_real": -333.22021484375, + "logps/real": -322.46685791015625, + "loss": 0.291, + "loss/gen": 0.48477140069007874, + "loss/real": 0.011933863162994385, + "rewards/accuracies": 0.875, + "rewards/generated": -173.7571258544922, + "rewards/margins": 184.51043701171875, + "rewards/real": 10.753315925598145, + "step": 131 + }, + { + "epoch": 0.55, + "grad_norm": 35.79489027671233, + "learning_rate": 4.6688034188034186e-07, + "logits/generated": -2.0710315704345703, + "logits/oppo_generated": -2.9814329147338867, + "logits/oppo_real": -2.8366198539733887, + "logits/real": -2.283444404602051, + "logps/generated": -375.65386962890625, + "logps/oppo_gen": -117.97686767578125, + "logps/oppo_real": -333.4208679199219, + "logps/real": -320.87835693359375, + "loss": 0.3409, + "loss/gen": 0.10222794115543365, + "loss/real": 0.14502938091754913, + "rewards/accuracies": 1.0, + "rewards/generated": -257.677001953125, + "rewards/margins": 270.2195129394531, + "rewards/real": 12.542512893676758, + "step": 132 + }, + { + "epoch": 0.56, + "grad_norm": 38.12187487191573, + "learning_rate": 4.6652421652421653e-07, + "logits/generated": -1.6588772535324097, + "logits/oppo_generated": -2.6781723499298096, + "logits/oppo_real": -2.516916513442993, + "logits/real": -1.9674652814865112, + "logps/generated": -311.58111572265625, + "logps/oppo_gen": -60.19814682006836, + "logps/oppo_real": -262.58551025390625, + "logps/real": -258.6761474609375, + "loss": 0.2715, + "loss/gen": 0.24061748385429382, + "loss/real": 0.12360702455043793, + "rewards/accuracies": 1.0, + "rewards/generated": -251.3829803466797, + "rewards/margins": 255.29237365722656, + "rewards/real": 3.9093809127807617, + "step": 133 + }, + { + "epoch": 0.56, + "grad_norm": 42.19599162224269, + "learning_rate": 4.6616809116809116e-07, + "logits/generated": -1.791245698928833, + "logits/oppo_generated": -2.8787498474121094, + "logits/oppo_real": -2.805894374847412, + "logits/real": -2.2519092559814453, + "logps/generated": -353.5828552246094, + "logps/oppo_gen": -124.28936767578125, + "logps/oppo_real": -606.1627807617188, + "logps/real": -580.4054565429688, + "loss": 0.2072, + "loss/gen": 0.14101070165634155, + "loss/real": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -229.29348754882812, + "rewards/margins": 255.05081176757812, + "rewards/real": 25.75733184814453, + "step": 134 + }, + { + "epoch": 0.56, + "grad_norm": 26.621898201803482, + "learning_rate": 4.658119658119658e-07, + "logits/generated": -2.115086078643799, + "logits/oppo_generated": -2.765538454055786, + "logits/oppo_real": -2.839543342590332, + "logits/real": -2.0567030906677246, + "logps/generated": -295.31121826171875, + "logps/oppo_gen": -83.72669982910156, + "logps/oppo_real": -361.6756591796875, + "logps/real": -362.77685546875, + "loss": 0.2186, + "loss/gen": 0.22105728089809418, + "loss/real": 0.2065998762845993, + "rewards/accuracies": 1.0, + "rewards/generated": -211.5845184326172, + "rewards/margins": 210.4833526611328, + "rewards/real": -1.1011724472045898, + "step": 135 + }, + { + "epoch": 0.57, + "grad_norm": 18.935126127513485, + "learning_rate": 4.654558404558404e-07, + "logits/generated": -2.0748369693756104, + "logits/oppo_generated": -2.7416014671325684, + "logits/oppo_real": -2.8941569328308105, + "logits/real": -1.9765853881835938, + "logps/generated": -263.2115478515625, + "logps/oppo_gen": -51.659912109375, + "logps/oppo_real": -267.5926513671875, + "logps/real": -248.9636688232422, + "loss": 0.2529, + "loss/gen": 0.011083722114562988, + "loss/real": 0.0029998421669006348, + "rewards/accuracies": 1.0, + "rewards/generated": -211.55162048339844, + "rewards/margins": 230.18063354492188, + "rewards/real": 18.62899398803711, + "step": 136 + }, + { + "epoch": 0.57, + "grad_norm": 99.21669289206076, + "learning_rate": 4.650997150997151e-07, + "logits/generated": -1.808034896850586, + "logits/oppo_generated": -2.609920024871826, + "logits/oppo_real": -2.5399818420410156, + "logits/real": -1.9245736598968506, + "logps/generated": -339.20257568359375, + "logps/oppo_gen": -81.96345520019531, + "logps/oppo_real": -258.99554443359375, + "logps/real": -247.95123291015625, + "loss": 0.3165, + "loss/gen": 0.0, + "loss/real": 0.023126445710659027, + "rewards/accuracies": 1.0, + "rewards/generated": -257.2391357421875, + "rewards/margins": 268.28338623046875, + "rewards/real": 11.044289588928223, + "step": 137 + }, + { + "epoch": 0.58, + "grad_norm": 39.08625683834169, + "learning_rate": 4.6474358974358975e-07, + "logits/generated": -2.0943126678466797, + "logits/oppo_generated": -2.89731502532959, + "logits/oppo_real": -2.861166000366211, + "logits/real": -2.182774543762207, + "logps/generated": -280.51513671875, + "logps/oppo_gen": -61.10588073730469, + "logps/oppo_real": -297.8720703125, + "logps/real": -302.93011474609375, + "loss": 0.2691, + "loss/gen": 0.0, + "loss/real": 0.24881529808044434, + "rewards/accuracies": 1.0, + "rewards/generated": -219.40927124023438, + "rewards/margins": 214.35122680664062, + "rewards/real": -5.058034896850586, + "step": 138 + }, + { + "epoch": 0.58, + "grad_norm": 36.347380649479064, + "learning_rate": 4.643874643874643e-07, + "logits/generated": -1.9154211282730103, + "logits/oppo_generated": -2.8648695945739746, + "logits/oppo_real": -2.711393356323242, + "logits/real": -2.319422960281372, + "logps/generated": -334.3785400390625, + "logps/oppo_gen": -111.59371948242188, + "logps/oppo_real": -521.255859375, + "logps/real": -498.578857421875, + "loss": 0.2889, + "loss/gen": 0.0, + "loss/real": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -222.78482055664062, + "rewards/margins": 245.46185302734375, + "rewards/real": 22.677024841308594, + "step": 139 + }, + { + "epoch": 0.59, + "grad_norm": 32.38887627937445, + "learning_rate": 4.64031339031339e-07, + "logits/generated": -2.1019668579101562, + "logits/oppo_generated": -2.8064088821411133, + "logits/oppo_real": -2.845989227294922, + "logits/real": -2.034879446029663, + "logps/generated": -286.20269775390625, + "logps/oppo_gen": -52.78784942626953, + "logps/oppo_real": -172.55088806152344, + "logps/real": -191.98782348632812, + "loss": 0.2025, + "loss/gen": 0.0, + "loss/real": 0.3528357446193695, + "rewards/accuracies": 0.875, + "rewards/generated": -233.41485595703125, + "rewards/margins": 213.9779052734375, + "rewards/real": -19.43694305419922, + "step": 140 + }, + { + "epoch": 0.59, + "grad_norm": 40.25526611755675, + "learning_rate": 4.6367521367521367e-07, + "logits/generated": -2.0735549926757812, + "logits/oppo_generated": -3.0264251232147217, + "logits/oppo_real": -2.836057186126709, + "logits/real": -2.4234282970428467, + "logps/generated": -314.87933349609375, + "logps/oppo_gen": -74.337158203125, + "logps/oppo_real": -371.032470703125, + "logps/real": -335.6429138183594, + "loss": 0.1941, + "loss/gen": 0.0, + "loss/real": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -240.5421905517578, + "rewards/margins": 275.9317321777344, + "rewards/real": 35.38955307006836, + "step": 141 + }, + { + "epoch": 0.59, + "grad_norm": 63.756242035094104, + "learning_rate": 4.633190883190883e-07, + "logits/generated": -2.2163453102111816, + "logits/oppo_generated": -2.876476764678955, + "logits/oppo_real": -2.912707805633545, + "logits/real": -2.1965935230255127, + "logps/generated": -285.357666015625, + "logps/oppo_gen": -90.53692626953125, + "logps/oppo_real": -383.74615478515625, + "logps/real": -361.18463134765625, + "loss": 0.2526, + "loss/gen": 0.1636572778224945, + "loss/real": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -194.82073974609375, + "rewards/margins": 217.3822784423828, + "rewards/real": 22.561546325683594, + "step": 142 + }, + { + "epoch": 0.6, + "grad_norm": 40.72590533351438, + "learning_rate": 4.6296296296296297e-07, + "logits/generated": -2.3514795303344727, + "logits/oppo_generated": -2.9819746017456055, + "logits/oppo_real": -3.1959123611450195, + "logits/real": -2.3554928302764893, + "logps/generated": -384.0709228515625, + "logps/oppo_gen": -152.70217895507812, + "logps/oppo_real": -483.54266357421875, + "logps/real": -457.64434814453125, + "loss": 0.2946, + "loss/gen": 0.06552544236183167, + "loss/real": 0.027411267161369324, + "rewards/accuracies": 1.0, + "rewards/generated": -231.36874389648438, + "rewards/margins": 257.2670593261719, + "rewards/real": 25.898303985595703, + "step": 143 + }, + { + "epoch": 0.6, + "grad_norm": 43.21577996166808, + "learning_rate": 4.626068376068376e-07, + "logits/generated": -2.227564811706543, + "logits/oppo_generated": -2.7378830909729004, + "logits/oppo_real": -3.110536813735962, + "logits/real": -2.0705103874206543, + "logps/generated": -332.7789306640625, + "logps/oppo_gen": -86.0918960571289, + "logps/oppo_real": -447.7939147949219, + "logps/real": -454.8052978515625, + "loss": 0.3252, + "loss/gen": 0.0, + "loss/real": 0.17898190021514893, + "rewards/accuracies": 1.0, + "rewards/generated": -246.68701171875, + "rewards/margins": 239.67562866210938, + "rewards/real": -7.011386871337891, + "step": 144 + }, + { + "epoch": 0.61, + "grad_norm": 49.99822018316627, + "learning_rate": 4.622507122507122e-07, + "logits/generated": -2.2606253623962402, + "logits/oppo_generated": -2.7491419315338135, + "logits/oppo_real": -3.191051483154297, + "logits/real": -1.9832546710968018, + "logps/generated": -342.39007568359375, + "logps/oppo_gen": -96.26548767089844, + "logps/oppo_real": -305.7531433105469, + "logps/real": -301.7978820800781, + "loss": 0.2736, + "loss/gen": 0.2276860624551773, + "loss/real": 0.10041482746601105, + "rewards/accuracies": 1.0, + "rewards/generated": -246.1245880126953, + "rewards/margins": 250.07986450195312, + "rewards/real": 3.9552855491638184, + "step": 145 + }, + { + "epoch": 0.61, + "grad_norm": 55.35757344302584, + "learning_rate": 4.618945868945869e-07, + "logits/generated": -1.8759946823120117, + "logits/oppo_generated": -2.8662476539611816, + "logits/oppo_real": -2.7619881629943848, + "logits/real": -2.2025985717773438, + "logps/generated": -321.98046875, + "logps/oppo_gen": -76.39656066894531, + "logps/oppo_real": -342.36138916015625, + "logps/real": -322.8648681640625, + "loss": 0.302, + "loss/gen": 0.0, + "loss/real": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -245.58392333984375, + "rewards/margins": 265.0804748535156, + "rewards/real": 19.496538162231445, + "step": 146 + }, + { + "epoch": 0.62, + "grad_norm": 28.60285672945864, + "learning_rate": 4.6153846153846156e-07, + "logits/generated": -2.0779922008514404, + "logits/oppo_generated": -2.973456859588623, + "logits/oppo_real": -2.9541869163513184, + "logits/real": -2.259559154510498, + "logps/generated": -264.32720947265625, + "logps/oppo_gen": -58.52758026123047, + "logps/oppo_real": -196.6337127685547, + "logps/real": -206.65615844726562, + "loss": 0.2171, + "loss/gen": 0.16810482740402222, + "loss/real": 0.20324894785881042, + "rewards/accuracies": 1.0, + "rewards/generated": -205.79962158203125, + "rewards/margins": 195.77719116210938, + "rewards/real": -10.022433280944824, + "step": 147 + }, + { + "epoch": 0.62, + "grad_norm": 34.86539756356759, + "learning_rate": 4.6118233618233613e-07, + "logits/generated": -2.0752716064453125, + "logits/oppo_generated": -2.9579458236694336, + "logits/oppo_real": -2.8345115184783936, + "logits/real": -2.342416763305664, + "logps/generated": -335.4797668457031, + "logps/oppo_gen": -86.37559509277344, + "logps/oppo_real": -329.4002685546875, + "logps/real": -338.41485595703125, + "loss": 0.2448, + "loss/gen": 0.02429106831550598, + "loss/real": 0.24715474247932434, + "rewards/accuracies": 1.0, + "rewards/generated": -249.10418701171875, + "rewards/margins": 240.08961486816406, + "rewards/real": -9.01455020904541, + "step": 148 + }, + { + "epoch": 0.62, + "grad_norm": 38.988763217452316, + "learning_rate": 4.608262108262108e-07, + "logits/generated": -1.6155552864074707, + "logits/oppo_generated": -2.4297678470611572, + "logits/oppo_real": -2.5349526405334473, + "logits/real": -1.6601029634475708, + "logps/generated": -383.1607666015625, + "logps/oppo_gen": -139.25880432128906, + "logps/oppo_real": -366.9024658203125, + "logps/real": -382.0296630859375, + "loss": 0.2172, + "loss/gen": 0.0033193975687026978, + "loss/real": 0.2846258878707886, + "rewards/accuracies": 0.875, + "rewards/generated": -243.90196228027344, + "rewards/margins": 228.77481079101562, + "rewards/real": -15.12716293334961, + "step": 149 + }, + { + "epoch": 0.63, + "grad_norm": 48.59085099873484, + "learning_rate": 4.6047008547008543e-07, + "logits/generated": -1.6524815559387207, + "logits/oppo_generated": -2.59027099609375, + "logits/oppo_real": -2.5751681327819824, + "logits/real": -1.8144464492797852, + "logps/generated": -302.7337646484375, + "logps/oppo_gen": -44.13750076293945, + "logps/oppo_real": -126.39328002929688, + "logps/real": -168.95840454101562, + "loss": 0.2802, + "loss/gen": 0.0, + "loss/real": 0.5524503588676453, + "rewards/accuracies": 1.0, + "rewards/generated": -258.5962829589844, + "rewards/margins": 216.03115844726562, + "rewards/real": -42.56513214111328, + "step": 150 + }, + { + "epoch": 0.63, + "grad_norm": 30.33221914180376, + "learning_rate": 4.601139601139601e-07, + "logits/generated": -1.917588710784912, + "logits/oppo_generated": -2.8061888217926025, + "logits/oppo_real": -2.885352611541748, + "logits/real": -2.1351234912872314, + "logps/generated": -359.21435546875, + "logps/oppo_gen": -82.9956283569336, + "logps/oppo_real": -287.7582702636719, + "logps/real": -278.5579833984375, + "loss": 0.1493, + "loss/gen": 0.0, + "loss/real": 0.013260193169116974, + "rewards/accuracies": 1.0, + "rewards/generated": -276.21868896484375, + "rewards/margins": 285.4189453125, + "rewards/real": 9.200270652770996, + "step": 151 + }, + { + "epoch": 0.64, + "grad_norm": 41.78845855681551, + "learning_rate": 4.5975783475783473e-07, + "logits/generated": -1.7056605815887451, + "logits/oppo_generated": -2.6804826259613037, + "logits/oppo_real": -2.560675621032715, + "logits/real": -2.0222690105438232, + "logps/generated": -343.30615234375, + "logps/oppo_gen": -125.20469665527344, + "logps/oppo_real": -214.75454711914062, + "logps/real": -228.00494384765625, + "loss": 0.2917, + "loss/gen": 0.05971069633960724, + "loss/real": 0.2809779942035675, + "rewards/accuracies": 0.875, + "rewards/generated": -218.10145568847656, + "rewards/margins": 204.85104370117188, + "rewards/real": -13.250406265258789, + "step": 152 + }, + { + "epoch": 0.64, + "grad_norm": 40.09102040347923, + "learning_rate": 4.5940170940170935e-07, + "logits/generated": -1.9970121383666992, + "logits/oppo_generated": -2.8161306381225586, + "logits/oppo_real": -2.873737096786499, + "logits/real": -2.104854106903076, + "logps/generated": -301.53515625, + "logps/oppo_gen": -39.4675178527832, + "logps/oppo_real": -94.7720718383789, + "logps/real": -111.1379165649414, + "loss": 0.2653, + "loss/gen": 0.4245451092720032, + "loss/real": 0.26902616024017334, + "rewards/accuracies": 0.875, + "rewards/generated": -262.0676574707031, + "rewards/margins": 245.70181274414062, + "rewards/real": -16.365846633911133, + "step": 153 + }, + { + "epoch": 0.64, + "grad_norm": 46.05510297204453, + "learning_rate": 4.59045584045584e-07, + "logits/generated": -1.9282357692718506, + "logits/oppo_generated": -2.754338026046753, + "logits/oppo_real": -2.6611428260803223, + "logits/real": -2.1131625175476074, + "logps/generated": -287.21087646484375, + "logps/oppo_gen": -53.64311981201172, + "logps/oppo_real": -189.60964965820312, + "logps/real": -202.54542541503906, + "loss": 0.2588, + "loss/gen": 0.0, + "loss/real": 0.2690971791744232, + "rewards/accuracies": 1.0, + "rewards/generated": -233.5677490234375, + "rewards/margins": 220.6319580078125, + "rewards/real": -12.93579387664795, + "step": 154 + }, + { + "epoch": 0.65, + "grad_norm": 24.115036245971748, + "learning_rate": 4.586894586894587e-07, + "logits/generated": -2.1603193283081055, + "logits/oppo_generated": -2.8700437545776367, + "logits/oppo_real": -3.012883186340332, + "logits/real": -2.2183475494384766, + "logps/generated": -321.8924560546875, + "logps/oppo_gen": -64.43563842773438, + "logps/oppo_real": -366.68572998046875, + "logps/real": -341.61822509765625, + "loss": 0.185, + "loss/gen": 0.0, + "loss/real": 0.0027311518788337708, + "rewards/accuracies": 1.0, + "rewards/generated": -257.45684814453125, + "rewards/margins": 282.52435302734375, + "rewards/real": 25.067520141601562, + "step": 155 + }, + { + "epoch": 0.65, + "grad_norm": 44.438830907021085, + "learning_rate": 4.5833333333333327e-07, + "logits/generated": -1.8886809349060059, + "logits/oppo_generated": -2.896176338195801, + "logits/oppo_real": -2.7520911693573, + "logits/real": -2.2607998847961426, + "logps/generated": -359.7982177734375, + "logps/oppo_gen": -94.6259765625, + "logps/oppo_real": -329.9571533203125, + "logps/real": -318.4111633300781, + "loss": 0.2217, + "loss/gen": 0.0, + "loss/real": 0.06740894168615341, + "rewards/accuracies": 1.0, + "rewards/generated": -265.1722717285156, + "rewards/margins": 276.71826171875, + "rewards/real": 11.545991897583008, + "step": 156 + }, + { + "epoch": 0.66, + "grad_norm": 36.220556469186114, + "learning_rate": 4.5797720797720794e-07, + "logits/generated": -2.0319433212280273, + "logits/oppo_generated": -2.72526478767395, + "logits/oppo_real": -2.760162591934204, + "logits/real": -2.1123125553131104, + "logps/generated": -314.482421875, + "logps/oppo_gen": -70.71673583984375, + "logps/oppo_real": -391.76458740234375, + "logps/real": -421.2603454589844, + "loss": 0.1594, + "loss/gen": 0.0111636221408844, + "loss/real": 0.3985538184642792, + "rewards/accuracies": 1.0, + "rewards/generated": -243.76568603515625, + "rewards/margins": 214.2699737548828, + "rewards/real": -29.495723724365234, + "step": 157 + }, + { + "epoch": 0.66, + "grad_norm": 63.09100647204996, + "learning_rate": 4.576210826210826e-07, + "logits/generated": -2.4284703731536865, + "logits/oppo_generated": -2.979785919189453, + "logits/oppo_real": -3.2641677856445312, + "logits/real": -2.3469204902648926, + "logps/generated": -347.482421875, + "logps/oppo_gen": -92.89317321777344, + "logps/oppo_real": -330.3245849609375, + "logps/real": -342.6152038574219, + "loss": 0.2098, + "loss/gen": 0.051213398575782776, + "loss/real": 0.2405387908220291, + "rewards/accuracies": 0.875, + "rewards/generated": -254.58921813964844, + "rewards/margins": 242.298583984375, + "rewards/real": -12.290639877319336, + "step": 158 + }, + { + "epoch": 0.67, + "grad_norm": 61.13583908242775, + "learning_rate": 4.5726495726495724e-07, + "logits/generated": -1.760241985321045, + "logits/oppo_generated": -2.775574207305908, + "logits/oppo_real": -2.598371744155884, + "logits/real": -2.1788487434387207, + "logps/generated": -302.0619201660156, + "logps/oppo_gen": -65.71693420410156, + "logps/oppo_real": -220.19737243652344, + "logps/real": -206.774658203125, + "loss": 0.1849, + "loss/gen": 0.0, + "loss/real": 0.006878167390823364, + "rewards/accuracies": 1.0, + "rewards/generated": -236.34498596191406, + "rewards/margins": 249.7677001953125, + "rewards/real": 13.422710418701172, + "step": 159 + }, + { + "epoch": 0.67, + "grad_norm": 30.892653140615913, + "learning_rate": 4.569088319088319e-07, + "logits/generated": -1.6592109203338623, + "logits/oppo_generated": -2.6892812252044678, + "logits/oppo_real": -2.527797222137451, + "logits/real": -2.02337646484375, + "logps/generated": -255.10365295410156, + "logps/oppo_gen": -56.507102966308594, + "logps/oppo_real": -203.99942016601562, + "logps/real": -214.63226318359375, + "loss": 0.2654, + "loss/gen": 0.4655250906944275, + "loss/real": 0.16440606117248535, + "rewards/accuracies": 0.875, + "rewards/generated": -198.5965576171875, + "rewards/margins": 187.96371459960938, + "rewards/real": -10.632861137390137, + "step": 160 + }, + { + "epoch": 0.67, + "grad_norm": 29.88231515108999, + "learning_rate": 4.5655270655270654e-07, + "logits/generated": -2.1158018112182617, + "logits/oppo_generated": -2.892515182495117, + "logits/oppo_real": -2.87583589553833, + "logits/real": -2.2688913345336914, + "logps/generated": -284.7113342285156, + "logps/oppo_gen": -70.63409423828125, + "logps/oppo_real": -236.45480346679688, + "logps/real": -220.61276245117188, + "loss": 0.2059, + "loss/gen": 0.16342338919639587, + "loss/real": 0.02121652662754059, + "rewards/accuracies": 1.0, + "rewards/generated": -214.07723999023438, + "rewards/margins": 229.91929626464844, + "rewards/real": 15.842063903808594, + "step": 161 + }, + { + "epoch": 0.68, + "grad_norm": 38.31896420283467, + "learning_rate": 4.5619658119658116e-07, + "logits/generated": -1.7529704570770264, + "logits/oppo_generated": -2.2372124195098877, + "logits/oppo_real": -2.6531500816345215, + "logits/real": -1.4958126544952393, + "logps/generated": -241.34754943847656, + "logps/oppo_gen": -49.9699821472168, + "logps/oppo_real": -257.7629699707031, + "logps/real": -258.93359375, + "loss": 0.2438, + "loss/gen": 0.5050817728042603, + "loss/real": 0.21669313311576843, + "rewards/accuracies": 1.0, + "rewards/generated": -191.3775634765625, + "rewards/margins": 190.20692443847656, + "rewards/real": -1.170628547668457, + "step": 162 + }, + { + "epoch": 0.68, + "grad_norm": 59.104889302435254, + "learning_rate": 4.5584045584045584e-07, + "logits/generated": -1.8979967832565308, + "logits/oppo_generated": -2.6594979763031006, + "logits/oppo_real": -2.72336483001709, + "logits/real": -1.9622243642807007, + "logps/generated": -281.2121887207031, + "logps/oppo_gen": -69.47285461425781, + "logps/oppo_real": -203.925048828125, + "logps/real": -180.85427856445312, + "loss": 0.2324, + "loss/gen": 0.22617992758750916, + "loss/real": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -211.7393341064453, + "rewards/margins": 234.8101043701172, + "rewards/real": 23.070770263671875, + "step": 163 + }, + { + "epoch": 0.69, + "grad_norm": 53.67125863911009, + "learning_rate": 4.5548433048433046e-07, + "logits/generated": -2.1446683406829834, + "logits/oppo_generated": -2.84741473197937, + "logits/oppo_real": -2.9322423934936523, + "logits/real": -2.304154396057129, + "logps/generated": -333.1722106933594, + "logps/oppo_gen": -72.28129577636719, + "logps/oppo_real": -342.0706787109375, + "logps/real": -361.2486572265625, + "loss": 0.2267, + "loss/gen": 0.0, + "loss/real": 0.2817186713218689, + "rewards/accuracies": 1.0, + "rewards/generated": -260.8908996582031, + "rewards/margins": 241.7129364013672, + "rewards/real": -19.177961349487305, + "step": 164 + }, + { + "epoch": 0.69, + "grad_norm": 41.20307611629637, + "learning_rate": 4.551282051282051e-07, + "logits/generated": -2.2117128372192383, + "logits/oppo_generated": -2.8123486042022705, + "logits/oppo_real": -2.9484448432922363, + "logits/real": -2.303635597229004, + "logps/generated": -301.5762634277344, + "logps/oppo_gen": -78.67784118652344, + "logps/oppo_real": -224.94638061523438, + "logps/real": -219.7537841796875, + "loss": 0.2616, + "loss/gen": 0.048871323466300964, + "loss/real": 0.12874145805835724, + "rewards/accuracies": 1.0, + "rewards/generated": -222.8984375, + "rewards/margins": 228.09103393554688, + "rewards/real": 5.192612648010254, + "step": 165 + }, + { + "epoch": 0.69, + "grad_norm": 64.1642332466071, + "learning_rate": 4.5477207977207976e-07, + "logits/generated": -2.0593137741088867, + "logits/oppo_generated": -2.6430654525756836, + "logits/oppo_real": -2.7417783737182617, + "logits/real": -2.0025062561035156, + "logps/generated": -283.02716064453125, + "logps/oppo_gen": -63.871150970458984, + "logps/oppo_real": -224.14703369140625, + "logps/real": -193.3045654296875, + "loss": 0.1827, + "loss/gen": 0.320221483707428, + "loss/real": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -219.15602111816406, + "rewards/margins": 249.99851989746094, + "rewards/real": 30.842487335205078, + "step": 166 + }, + { + "epoch": 0.7, + "grad_norm": 44.01251874910847, + "learning_rate": 4.544159544159544e-07, + "logits/generated": -2.1278610229492188, + "logits/oppo_generated": -2.757966995239258, + "logits/oppo_real": -2.906935691833496, + "logits/real": -2.026312828063965, + "logps/generated": -245.8863525390625, + "logps/oppo_gen": -53.980133056640625, + "logps/oppo_real": -168.99293518066406, + "logps/real": -175.02117919921875, + "loss": 0.1984, + "loss/gen": 0.3063175082206726, + "loss/real": 0.224711611866951, + "rewards/accuracies": 1.0, + "rewards/generated": -191.90621948242188, + "rewards/margins": 185.87799072265625, + "rewards/real": -6.028232574462891, + "step": 167 + }, + { + "epoch": 0.7, + "grad_norm": 89.25379401512157, + "learning_rate": 4.5405982905982905e-07, + "logits/generated": -1.7046711444854736, + "logits/oppo_generated": -2.34848690032959, + "logits/oppo_real": -2.549453020095825, + "logits/real": -1.6289647817611694, + "logps/generated": -291.48760986328125, + "logps/oppo_gen": -41.99907684326172, + "logps/oppo_real": -137.05735778808594, + "logps/real": -138.64942932128906, + "loss": 0.2167, + "loss/gen": 0.0, + "loss/real": 0.2769484519958496, + "rewards/accuracies": 1.0, + "rewards/generated": -249.48851013183594, + "rewards/margins": 247.8964385986328, + "rewards/real": -1.5920724868774414, + "step": 168 + }, + { + "epoch": 0.71, + "grad_norm": 85.32059828779624, + "learning_rate": 4.537037037037037e-07, + "logits/generated": -1.9084529876708984, + "logits/oppo_generated": -2.5094847679138184, + "logits/oppo_real": -2.6891722679138184, + "logits/real": -1.900597095489502, + "logps/generated": -450.51837158203125, + "logps/oppo_gen": -68.40258026123047, + "logps/oppo_real": -223.42794799804688, + "logps/real": -218.4866943359375, + "loss": 0.2427, + "loss/gen": 0.2575632333755493, + "loss/real": 0.20119953155517578, + "rewards/accuracies": 1.0, + "rewards/generated": -382.1158142089844, + "rewards/margins": 387.05706787109375, + "rewards/real": 4.941247940063477, + "step": 169 + }, + { + "epoch": 0.71, + "grad_norm": 63.92922684439313, + "learning_rate": 4.533475783475783e-07, + "logits/generated": -1.9882678985595703, + "logits/oppo_generated": -2.8935999870300293, + "logits/oppo_real": -2.775484561920166, + "logits/real": -2.3655059337615967, + "logps/generated": -269.10772705078125, + "logps/oppo_gen": -50.93283462524414, + "logps/oppo_real": -316.0002136230469, + "logps/real": -292.96356201171875, + "loss": 0.2371, + "loss/gen": 0.10934163630008698, + "loss/real": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -218.1748809814453, + "rewards/margins": 241.2115478515625, + "rewards/real": 23.036657333374023, + "step": 170 + }, + { + "epoch": 0.72, + "grad_norm": 109.75823929778575, + "learning_rate": 4.5299145299145297e-07, + "logits/generated": -2.2796077728271484, + "logits/oppo_generated": -2.8526816368103027, + "logits/oppo_real": -3.2386014461517334, + "logits/real": -2.2865777015686035, + "logps/generated": -342.9628601074219, + "logps/oppo_gen": -113.54923248291016, + "logps/oppo_real": -351.7125549316406, + "logps/real": -339.4488220214844, + "loss": 0.2334, + "loss/gen": 0.26814204454421997, + "loss/real": 0.026034392416477203, + "rewards/accuracies": 0.875, + "rewards/generated": -229.4136199951172, + "rewards/margins": 241.67733764648438, + "rewards/real": 12.263713836669922, + "step": 171 + }, + { + "epoch": 0.72, + "grad_norm": 98.73111963497122, + "learning_rate": 4.5263532763532765e-07, + "logits/generated": -2.3369998931884766, + "logits/oppo_generated": -2.9850940704345703, + "logits/oppo_real": -3.0315611362457275, + "logits/real": -2.358889102935791, + "logps/generated": -314.4837341308594, + "logps/oppo_gen": -61.65489196777344, + "logps/oppo_real": -151.10653686523438, + "logps/real": -144.9267578125, + "loss": 0.2096, + "loss/gen": 0.0, + "loss/real": 0.1058526411652565, + "rewards/accuracies": 1.0, + "rewards/generated": -252.82882690429688, + "rewards/margins": 259.00860595703125, + "rewards/real": 6.179767608642578, + "step": 172 + }, + { + "epoch": 0.72, + "grad_norm": 43.88860577412861, + "learning_rate": 4.522792022792022e-07, + "logits/generated": -2.205157995223999, + "logits/oppo_generated": -2.891350746154785, + "logits/oppo_real": -3.0990657806396484, + "logits/real": -2.274484634399414, + "logps/generated": -485.29437255859375, + "logps/oppo_gen": -212.02532958984375, + "logps/oppo_real": -549.8078002929688, + "logps/real": -530.3729248046875, + "loss": 0.2064, + "loss/gen": 0.0, + "loss/real": 0.04228302091360092, + "rewards/accuracies": 1.0, + "rewards/generated": -273.2690734863281, + "rewards/margins": 292.70391845703125, + "rewards/real": 19.434844970703125, + "step": 173 + }, + { + "epoch": 0.73, + "grad_norm": 31.48113836496238, + "learning_rate": 4.519230769230769e-07, + "logits/generated": -1.9153656959533691, + "logits/oppo_generated": -2.861656904220581, + "logits/oppo_real": -2.749734878540039, + "logits/real": -2.23591947555542, + "logps/generated": -283.47076416015625, + "logps/oppo_gen": -52.08341598510742, + "logps/oppo_real": -268.2560119628906, + "logps/real": -241.11734008789062, + "loss": 0.146, + "loss/gen": 0.0, + "loss/real": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -231.38734436035156, + "rewards/margins": 258.5260314941406, + "rewards/real": 27.138673782348633, + "step": 174 + }, + { + "epoch": 0.73, + "grad_norm": 55.337732592124695, + "learning_rate": 4.5156695156695157e-07, + "logits/generated": -2.0015411376953125, + "logits/oppo_generated": -2.8331031799316406, + "logits/oppo_real": -2.8462958335876465, + "logits/real": -2.170417308807373, + "logps/generated": -387.2781677246094, + "logps/oppo_gen": -78.92254638671875, + "logps/oppo_real": -224.86373901367188, + "logps/real": -244.69886779785156, + "loss": 0.2824, + "loss/gen": 0.0006727427244186401, + "loss/real": 0.33926331996917725, + "rewards/accuracies": 1.0, + "rewards/generated": -308.3556213378906, + "rewards/margins": 288.5204772949219, + "rewards/real": -19.835121154785156, + "step": 175 + }, + { + "epoch": 0.74, + "grad_norm": 143.149881843734, + "learning_rate": 4.512108262108262e-07, + "logits/generated": -2.24777889251709, + "logits/oppo_generated": -2.879185199737549, + "logits/oppo_real": -2.873112678527832, + "logits/real": -2.3650031089782715, + "logps/generated": -310.72698974609375, + "logps/oppo_gen": -49.27460479736328, + "logps/oppo_real": -375.43463134765625, + "logps/real": -367.69134521484375, + "loss": 0.2742, + "loss/gen": 0.003482311964035034, + "loss/real": 0.10677148401737213, + "rewards/accuracies": 1.0, + "rewards/generated": -261.4523620605469, + "rewards/margins": 269.1956481933594, + "rewards/real": 7.7432966232299805, + "step": 176 + }, + { + "epoch": 0.74, + "grad_norm": 105.91241867546742, + "learning_rate": 4.5085470085470087e-07, + "logits/generated": -2.244475841522217, + "logits/oppo_generated": -3.0462043285369873, + "logits/oppo_real": -3.1089582443237305, + "logits/real": -2.443125009536743, + "logps/generated": -331.522705078125, + "logps/oppo_gen": -77.79332733154297, + "logps/oppo_real": -319.2231750488281, + "logps/real": -294.63909912109375, + "loss": 0.2243, + "loss/gen": 0.0, + "loss/real": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -253.72940063476562, + "rewards/margins": 278.3134765625, + "rewards/real": 24.58407974243164, + "step": 177 + }, + { + "epoch": 0.74, + "grad_norm": 39.86911416934715, + "learning_rate": 4.5049857549857543e-07, + "logits/generated": -2.201946973800659, + "logits/oppo_generated": -2.815687656402588, + "logits/oppo_real": -2.9501237869262695, + "logits/real": -2.219613552093506, + "logps/generated": -352.42193603515625, + "logps/oppo_gen": -103.51431274414062, + "logps/oppo_real": -308.8333435058594, + "logps/real": -304.56707763671875, + "loss": 0.2701, + "loss/gen": 0.01318824291229248, + "loss/real": 0.14666341245174408, + "rewards/accuracies": 1.0, + "rewards/generated": -248.90762329101562, + "rewards/margins": 253.17388916015625, + "rewards/real": 4.266262054443359, + "step": 178 + }, + { + "epoch": 0.75, + "grad_norm": 52.815620285042094, + "learning_rate": 4.501424501424501e-07, + "logits/generated": -2.0335657596588135, + "logits/oppo_generated": -2.779146194458008, + "logits/oppo_real": -2.8336267471313477, + "logits/real": -2.1648244857788086, + "logps/generated": -302.6240234375, + "logps/oppo_gen": -72.71639251708984, + "logps/oppo_real": -196.57557678222656, + "logps/real": -177.44334411621094, + "loss": 0.1641, + "loss/gen": 0.03493678569793701, + "loss/real": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -229.90762329101562, + "rewards/margins": 249.03985595703125, + "rewards/real": 19.132234573364258, + "step": 179 + }, + { + "epoch": 0.75, + "grad_norm": 53.27667685652888, + "learning_rate": 4.497863247863248e-07, + "logits/generated": -2.264275074005127, + "logits/oppo_generated": -2.8425636291503906, + "logits/oppo_real": -2.9093685150146484, + "logits/real": -2.2906460762023926, + "logps/generated": -324.7326354980469, + "logps/oppo_gen": -95.93893432617188, + "logps/oppo_real": -207.11392211914062, + "logps/real": -191.6349334716797, + "loss": 0.2242, + "loss/gen": 0.2739104628562927, + "loss/real": 0.014250755310058594, + "rewards/accuracies": 1.0, + "rewards/generated": -228.793701171875, + "rewards/margins": 244.272705078125, + "rewards/real": 15.478999137878418, + "step": 180 + }, + { + "epoch": 0.76, + "grad_norm": 71.11992993242998, + "learning_rate": 4.494301994301994e-07, + "logits/generated": -2.115266799926758, + "logits/oppo_generated": -2.8224010467529297, + "logits/oppo_real": -2.778409957885742, + "logits/real": -2.3558645248413086, + "logps/generated": -344.7413024902344, + "logps/oppo_gen": -88.16463470458984, + "logps/oppo_real": -239.9169921875, + "logps/real": -255.3130645751953, + "loss": 0.2213, + "loss/gen": 0.029619291424751282, + "loss/real": 0.297105997800827, + "rewards/accuracies": 1.0, + "rewards/generated": -256.57666015625, + "rewards/margins": 241.18060302734375, + "rewards/real": -15.396068572998047, + "step": 181 + }, + { + "epoch": 0.76, + "grad_norm": 86.04380073271548, + "learning_rate": 4.4907407407407403e-07, + "logits/generated": -2.3114571571350098, + "logits/oppo_generated": -2.9657952785491943, + "logits/oppo_real": -2.9425137042999268, + "logits/real": -2.491687059402466, + "logps/generated": -314.3035583496094, + "logps/oppo_gen": -76.42547607421875, + "logps/oppo_real": -261.8043518066406, + "logps/real": -245.9702911376953, + "loss": 0.3062, + "loss/gen": 0.0, + "loss/real": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -237.87808227539062, + "rewards/margins": 253.712158203125, + "rewards/real": 15.834070205688477, + "step": 182 + }, + { + "epoch": 0.77, + "grad_norm": 106.78727210850083, + "learning_rate": 4.487179487179487e-07, + "logits/generated": -1.8033708333969116, + "logits/oppo_generated": -2.6656646728515625, + "logits/oppo_real": -2.512063980102539, + "logits/real": -2.0389981269836426, + "logps/generated": -216.2008056640625, + "logps/oppo_gen": -61.16596603393555, + "logps/oppo_real": -89.70797729492188, + "logps/real": -71.5185317993164, + "loss": 0.1824, + "loss/gen": 0.8215519785881042, + "loss/real": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -155.03482055664062, + "rewards/margins": 173.22427368164062, + "rewards/real": 18.189449310302734, + "step": 183 + }, + { + "epoch": 0.77, + "grad_norm": 101.12620711578676, + "learning_rate": 4.4836182336182333e-07, + "logits/generated": -1.9014736413955688, + "logits/oppo_generated": -2.679591655731201, + "logits/oppo_real": -2.5152084827423096, + "logits/real": -2.1934709548950195, + "logps/generated": -450.9914855957031, + "logps/oppo_gen": -134.39280700683594, + "logps/oppo_real": -353.8466491699219, + "logps/real": -347.7996520996094, + "loss": 0.2277, + "loss/gen": 0.11133264005184174, + "loss/real": 0.03321786969900131, + "rewards/accuracies": 1.0, + "rewards/generated": -316.59869384765625, + "rewards/margins": 322.64569091796875, + "rewards/real": 6.047005653381348, + "step": 184 + }, + { + "epoch": 0.77, + "grad_norm": 35.871571659467506, + "learning_rate": 4.48005698005698e-07, + "logits/generated": -2.210495710372925, + "logits/oppo_generated": -2.8852622509002686, + "logits/oppo_real": -2.9888343811035156, + "logits/real": -2.316572427749634, + "logps/generated": -349.71429443359375, + "logps/oppo_gen": -86.57408142089844, + "logps/oppo_real": -353.78594970703125, + "logps/real": -361.350830078125, + "loss": 0.1252, + "loss/gen": 0.0, + "loss/real": 0.25374865531921387, + "rewards/accuracies": 1.0, + "rewards/generated": -263.14019775390625, + "rewards/margins": 255.5753631591797, + "rewards/real": -7.564859867095947, + "step": 185 + }, + { + "epoch": 0.78, + "grad_norm": 30.56805032519401, + "learning_rate": 4.476495726495726e-07, + "logits/generated": -2.2187647819519043, + "logits/oppo_generated": -2.894904136657715, + "logits/oppo_real": -2.8833250999450684, + "logits/real": -2.351996421813965, + "logps/generated": -333.04010009765625, + "logps/oppo_gen": -97.552490234375, + "logps/oppo_real": -446.60357666015625, + "logps/real": -427.14239501953125, + "loss": 0.1785, + "loss/gen": 0.018305152654647827, + "loss/real": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -235.4876251220703, + "rewards/margins": 254.9488067626953, + "rewards/real": 19.46118927001953, + "step": 186 + }, + { + "epoch": 0.78, + "grad_norm": 65.2351178849916, + "learning_rate": 4.4729344729344725e-07, + "logits/generated": -2.244483709335327, + "logits/oppo_generated": -2.9238195419311523, + "logits/oppo_real": -2.928109645843506, + "logits/real": -2.411482810974121, + "logps/generated": -433.5346984863281, + "logps/oppo_gen": -99.34373474121094, + "logps/oppo_real": -381.1275634765625, + "logps/real": -367.78045654296875, + "loss": 0.2426, + "loss/gen": 0.0, + "loss/real": 0.021128714084625244, + "rewards/accuracies": 1.0, + "rewards/generated": -334.19097900390625, + "rewards/margins": 347.5380859375, + "rewards/real": 13.347097396850586, + "step": 187 + }, + { + "epoch": 0.79, + "grad_norm": 73.61953834262512, + "learning_rate": 4.469373219373219e-07, + "logits/generated": -1.8134526014328003, + "logits/oppo_generated": -2.7080626487731934, + "logits/oppo_real": -2.5767087936401367, + "logits/real": -2.030604124069214, + "logps/generated": -408.92938232421875, + "logps/oppo_gen": -46.502037048339844, + "logps/oppo_real": -149.05059814453125, + "logps/real": -196.5388641357422, + "loss": 0.2718, + "loss/gen": 0.0, + "loss/real": 0.5971862077713013, + "rewards/accuracies": 1.0, + "rewards/generated": -362.4273376464844, + "rewards/margins": 314.9390869140625, + "rewards/real": -47.488250732421875, + "step": 188 + }, + { + "epoch": 0.79, + "grad_norm": 37.51273384059488, + "learning_rate": 4.465811965811966e-07, + "logits/generated": -2.0432450771331787, + "logits/oppo_generated": -2.9217922687530518, + "logits/oppo_real": -3.0358145236968994, + "logits/real": -2.355529546737671, + "logps/generated": -332.09490966796875, + "logps/oppo_gen": -72.13301849365234, + "logps/oppo_real": -295.51861572265625, + "logps/real": -308.53802490234375, + "loss": 0.2465, + "loss/gen": 0.0, + "loss/real": 0.3299695551395416, + "rewards/accuracies": 1.0, + "rewards/generated": -259.96185302734375, + "rewards/margins": 246.94244384765625, + "rewards/real": -13.019420623779297, + "step": 189 + }, + { + "epoch": 0.79, + "grad_norm": 54.30798222723571, + "learning_rate": 4.4622507122507117e-07, + "logits/generated": -2.0533862113952637, + "logits/oppo_generated": -2.7406344413757324, + "logits/oppo_real": -2.799593925476074, + "logits/real": -2.181865930557251, + "logps/generated": -351.9316101074219, + "logps/oppo_gen": -102.60955810546875, + "logps/oppo_real": -305.8299255371094, + "logps/real": -278.4617004394531, + "loss": 0.1727, + "loss/gen": 0.01339229941368103, + "loss/real": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -249.32205200195312, + "rewards/margins": 276.6903076171875, + "rewards/real": 27.368215560913086, + "step": 190 + }, + { + "epoch": 0.8, + "grad_norm": 31.494153645830643, + "learning_rate": 4.4586894586894584e-07, + "logits/generated": -2.32715106010437, + "logits/oppo_generated": -2.8220396041870117, + "logits/oppo_real": -3.0663821697235107, + "logits/real": -2.326672077178955, + "logps/generated": -296.7817077636719, + "logps/oppo_gen": -80.95722961425781, + "logps/oppo_real": -339.0364074707031, + "logps/real": -326.7790832519531, + "loss": 0.1931, + "loss/gen": 0.2630905210971832, + "loss/real": 0.00023437291383743286, + "rewards/accuracies": 0.875, + "rewards/generated": -215.82449340820312, + "rewards/margins": 228.08184814453125, + "rewards/real": 12.25734806060791, + "step": 191 + }, + { + "epoch": 0.8, + "grad_norm": 54.0069326966166, + "learning_rate": 4.455128205128205e-07, + "logits/generated": -2.081082344055176, + "logits/oppo_generated": -2.8528313636779785, + "logits/oppo_real": -2.9469070434570312, + "logits/real": -2.2097737789154053, + "logps/generated": -291.0604248046875, + "logps/oppo_gen": -55.95906066894531, + "logps/oppo_real": -228.37322998046875, + "logps/real": -207.25762939453125, + "loss": 0.185, + "loss/gen": 0.0, + "loss/real": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -235.10134887695312, + "rewards/margins": 256.2169494628906, + "rewards/real": 21.115596771240234, + "step": 192 + }, + { + "epoch": 0.81, + "grad_norm": 49.09352870335229, + "learning_rate": 4.4515669515669514e-07, + "logits/generated": -2.103522539138794, + "logits/oppo_generated": -2.759657859802246, + "logits/oppo_real": -2.7739434242248535, + "logits/real": -2.251528263092041, + "logps/generated": -327.42047119140625, + "logps/oppo_gen": -55.900001525878906, + "logps/oppo_real": -240.51673889160156, + "logps/real": -249.0882568359375, + "loss": 0.197, + "loss/gen": 0.0, + "loss/real": 0.29675740003585815, + "rewards/accuracies": 0.875, + "rewards/generated": -271.5204772949219, + "rewards/margins": 262.9489440917969, + "rewards/real": -8.571529388427734, + "step": 193 + }, + { + "epoch": 0.81, + "grad_norm": 51.20193296598406, + "learning_rate": 4.448005698005698e-07, + "logits/generated": -2.0290396213531494, + "logits/oppo_generated": -2.714049816131592, + "logits/oppo_real": -2.821863889694214, + "logits/real": -2.0799052715301514, + "logps/generated": -250.7839813232422, + "logps/oppo_gen": -61.66150665283203, + "logps/oppo_real": -281.81561279296875, + "logps/real": -291.30224609375, + "loss": 0.2763, + "loss/gen": 0.3449553847312927, + "loss/real": 0.29574069380760193, + "rewards/accuracies": 1.0, + "rewards/generated": -189.12246704101562, + "rewards/margins": 179.63583374023438, + "rewards/real": -9.486623764038086, + "step": 194 + }, + { + "epoch": 0.82, + "grad_norm": 61.67454112871453, + "learning_rate": 4.444444444444444e-07, + "logits/generated": -1.8745107650756836, + "logits/oppo_generated": -2.7336645126342773, + "logits/oppo_real": -2.6636435985565186, + "logits/real": -2.19765043258667, + "logps/generated": -274.8359375, + "logps/oppo_gen": -66.04891204833984, + "logps/oppo_real": -343.6158447265625, + "logps/real": -310.6270751953125, + "loss": 0.2015, + "loss/gen": 0.22997678816318512, + "loss/real": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -208.78701782226562, + "rewards/margins": 241.7758026123047, + "rewards/real": 32.98878479003906, + "step": 195 + }, + { + "epoch": 0.82, + "grad_norm": 53.72602799374224, + "learning_rate": 4.4408831908831906e-07, + "logits/generated": -2.1320905685424805, + "logits/oppo_generated": -3.0542874336242676, + "logits/oppo_real": -2.803119659423828, + "logits/real": -2.5108633041381836, + "logps/generated": -288.25958251953125, + "logps/oppo_gen": -81.553955078125, + "logps/oppo_real": -376.17071533203125, + "logps/real": -342.6852722167969, + "loss": 0.2462, + "loss/gen": 0.2779223918914795, + "loss/real": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -206.70559692382812, + "rewards/margins": 240.19102478027344, + "rewards/real": 33.485435485839844, + "step": 196 + }, + { + "epoch": 0.82, + "grad_norm": 25.261345155831098, + "learning_rate": 4.4373219373219373e-07, + "logits/generated": -2.1422460079193115, + "logits/oppo_generated": -2.791293144226074, + "logits/oppo_real": -2.8689441680908203, + "logits/real": -2.3548340797424316, + "logps/generated": -355.26690673828125, + "logps/oppo_gen": -90.10079956054688, + "logps/oppo_real": -387.6597900390625, + "logps/real": -367.0665588378906, + "loss": 0.2392, + "loss/gen": 0.17274703085422516, + "loss/real": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -265.1661071777344, + "rewards/margins": 285.75933837890625, + "rewards/real": 20.593231201171875, + "step": 197 + }, + { + "epoch": 0.83, + "grad_norm": 29.314516563430757, + "learning_rate": 4.4337606837606836e-07, + "logits/generated": -2.09847354888916, + "logits/oppo_generated": -2.8356850147247314, + "logits/oppo_real": -2.917833089828491, + "logits/real": -2.1983418464660645, + "logps/generated": -326.11285400390625, + "logps/oppo_gen": -76.40264892578125, + "logps/oppo_real": -278.172607421875, + "logps/real": -253.1783447265625, + "loss": 0.1414, + "loss/gen": 0.12514609098434448, + "loss/real": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -249.710205078125, + "rewards/margins": 274.7044677734375, + "rewards/real": 24.9942684173584, + "step": 198 + }, + { + "epoch": 0.83, + "grad_norm": 48.53973837148151, + "learning_rate": 4.43019943019943e-07, + "logits/generated": -2.2969937324523926, + "logits/oppo_generated": -3.0011539459228516, + "logits/oppo_real": -3.069876194000244, + "logits/real": -2.477539539337158, + "logps/generated": -303.3049011230469, + "logps/oppo_gen": -69.13575744628906, + "logps/oppo_real": -340.70343017578125, + "logps/real": -357.84661865234375, + "loss": 0.2047, + "loss/gen": 0.19307354092597961, + "loss/real": 0.3465298116207123, + "rewards/accuracies": 1.0, + "rewards/generated": -234.16915893554688, + "rewards/margins": 217.02597045898438, + "rewards/real": -17.143173217773438, + "step": 199 + }, + { + "epoch": 0.84, + "grad_norm": 40.12700023003137, + "learning_rate": 4.4266381766381765e-07, + "logits/generated": -2.04579758644104, + "logits/oppo_generated": -2.821411609649658, + "logits/oppo_real": -2.9697532653808594, + "logits/real": -2.3006458282470703, + "logps/generated": -352.8709411621094, + "logps/oppo_gen": -94.25292205810547, + "logps/oppo_real": -449.1705322265625, + "logps/real": -422.84283447265625, + "loss": 0.134, + "loss/gen": 0.19948835670948029, + "loss/real": 0.013253934681415558, + "rewards/accuracies": 1.0, + "rewards/generated": -258.6180419921875, + "rewards/margins": 284.94573974609375, + "rewards/real": 26.32770347595215, + "step": 200 + }, + { + "epoch": 0.84, + "grad_norm": 55.41589471188524, + "learning_rate": 4.423076923076923e-07, + "logits/generated": -2.029297351837158, + "logits/oppo_generated": -2.9498441219329834, + "logits/oppo_real": -2.889374017715454, + "logits/real": -2.3880996704101562, + "logps/generated": -370.95904541015625, + "logps/oppo_gen": -93.28401184082031, + "logps/oppo_real": -446.9027099609375, + "logps/real": -425.0364074707031, + "loss": 0.1887, + "loss/gen": 0.0018385052680969238, + "loss/real": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -277.6750183105469, + "rewards/margins": 299.541259765625, + "rewards/real": 21.866281509399414, + "step": 201 + }, + { + "epoch": 0.85, + "grad_norm": 56.23495458509325, + "learning_rate": 4.4195156695156695e-07, + "logits/generated": -1.6252273321151733, + "logits/oppo_generated": -2.5877699851989746, + "logits/oppo_real": -2.4145617485046387, + "logits/real": -1.9977871179580688, + "logps/generated": -345.75006103515625, + "logps/oppo_gen": -58.147544860839844, + "logps/oppo_real": -256.63494873046875, + "logps/real": -243.7563018798828, + "loss": 0.2266, + "loss/gen": 0.0, + "loss/real": 0.011003687977790833, + "rewards/accuracies": 1.0, + "rewards/generated": -287.6025085449219, + "rewards/margins": 300.481201171875, + "rewards/real": 12.878662109375, + "step": 202 + }, + { + "epoch": 0.85, + "grad_norm": 66.24101870135868, + "learning_rate": 4.4159544159544157e-07, + "logits/generated": -2.048314094543457, + "logits/oppo_generated": -2.825096607208252, + "logits/oppo_real": -2.919394016265869, + "logits/real": -2.1845545768737793, + "logps/generated": -257.5550842285156, + "logps/oppo_gen": -62.71122360229492, + "logps/oppo_real": -234.44354248046875, + "logps/real": -217.26864624023438, + "loss": 0.1972, + "loss/gen": 0.24772684276103973, + "loss/real": 0.03300228714942932, + "rewards/accuracies": 1.0, + "rewards/generated": -194.84384155273438, + "rewards/margins": 212.01873779296875, + "rewards/real": 17.17490005493164, + "step": 203 + }, + { + "epoch": 0.85, + "grad_norm": 45.537051363422734, + "learning_rate": 4.412393162393162e-07, + "logits/generated": -2.0483438968658447, + "logits/oppo_generated": -2.681910276412964, + "logits/oppo_real": -2.8930723667144775, + "logits/real": -2.0234827995300293, + "logps/generated": -298.2679138183594, + "logps/oppo_gen": -69.35714721679688, + "logps/oppo_real": -321.68878173828125, + "logps/real": -300.1239013671875, + "loss": 0.1831, + "loss/gen": 0.39896392822265625, + "loss/real": 0.027880370616912842, + "rewards/accuracies": 1.0, + "rewards/generated": -228.91075134277344, + "rewards/margins": 250.47564697265625, + "rewards/real": 21.564884185791016, + "step": 204 + }, + { + "epoch": 0.86, + "grad_norm": 51.88816192212102, + "learning_rate": 4.4088319088319087e-07, + "logits/generated": -1.9750864505767822, + "logits/oppo_generated": -2.910146951675415, + "logits/oppo_real": -2.842686653137207, + "logits/real": -2.2580361366271973, + "logps/generated": -364.1310119628906, + "logps/oppo_gen": -55.29602813720703, + "logps/oppo_real": -188.457763671875, + "logps/real": -190.8536834716797, + "loss": 0.1574, + "loss/gen": 0.0, + "loss/real": 0.2326948642730713, + "rewards/accuracies": 1.0, + "rewards/generated": -308.8349609375, + "rewards/margins": 306.4390869140625, + "rewards/real": -2.3959202766418457, + "step": 205 + }, + { + "epoch": 0.86, + "grad_norm": 49.47076354082783, + "learning_rate": 4.4052706552706555e-07, + "logits/generated": -2.08099365234375, + "logits/oppo_generated": -2.9482345581054688, + "logits/oppo_real": -3.0109448432922363, + "logits/real": -2.36570405960083, + "logps/generated": -295.0722961425781, + "logps/oppo_gen": -70.6409912109375, + "logps/oppo_real": -375.189697265625, + "logps/real": -361.89434814453125, + "loss": 0.1832, + "loss/gen": 0.013277322053909302, + "loss/real": 0.03908447176218033, + "rewards/accuracies": 1.0, + "rewards/generated": -224.4313201904297, + "rewards/margins": 237.72665405273438, + "rewards/real": 13.295326232910156, + "step": 206 + }, + { + "epoch": 0.87, + "grad_norm": 23.347891198939145, + "learning_rate": 4.4017094017094017e-07, + "logits/generated": -2.0084404945373535, + "logits/oppo_generated": -2.7811834812164307, + "logits/oppo_real": -2.923962116241455, + "logits/real": -2.1404595375061035, + "logps/generated": -311.7547607421875, + "logps/oppo_gen": -71.71026611328125, + "logps/oppo_real": -353.846923828125, + "logps/real": -363.43988037109375, + "loss": 0.1818, + "loss/gen": 0.030606284737586975, + "loss/real": 0.2629862129688263, + "rewards/accuracies": 1.0, + "rewards/generated": -240.0445098876953, + "rewards/margins": 230.45156860351562, + "rewards/real": -9.592939376831055, + "step": 207 + }, + { + "epoch": 0.87, + "grad_norm": 19.88992382650619, + "learning_rate": 4.398148148148148e-07, + "logits/generated": -2.0685057640075684, + "logits/oppo_generated": -2.8043360710144043, + "logits/oppo_real": -3.0211949348449707, + "logits/real": -2.244368314743042, + "logps/generated": -308.2628173828125, + "logps/oppo_gen": -77.71004486083984, + "logps/oppo_real": -389.77301025390625, + "logps/real": -367.9434509277344, + "loss": 0.1625, + "loss/gen": 0.025741413235664368, + "loss/real": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -230.55276489257812, + "rewards/margins": 252.38232421875, + "rewards/real": 21.829570770263672, + "step": 208 + }, + { + "epoch": 0.87, + "grad_norm": 25.132100938015384, + "learning_rate": 4.394586894586894e-07, + "logits/generated": -1.9696589708328247, + "logits/oppo_generated": -2.7760987281799316, + "logits/oppo_real": -2.740163803100586, + "logits/real": -2.227613925933838, + "logps/generated": -362.2425231933594, + "logps/oppo_gen": -88.69313049316406, + "logps/oppo_real": -338.8006591796875, + "logps/real": -327.68731689453125, + "loss": 0.1775, + "loss/gen": 0.0, + "loss/real": 0.09456821531057358, + "rewards/accuracies": 1.0, + "rewards/generated": -273.5494384765625, + "rewards/margins": 284.6627502441406, + "rewards/real": 11.113346099853516, + "step": 209 + }, + { + "epoch": 0.88, + "grad_norm": 79.51874594730006, + "learning_rate": 4.391025641025641e-07, + "logits/generated": -1.8997169733047485, + "logits/oppo_generated": -2.7127938270568848, + "logits/oppo_real": -2.803234577178955, + "logits/real": -2.1146082878112793, + "logps/generated": -387.34759521484375, + "logps/oppo_gen": -85.75541687011719, + "logps/oppo_real": -242.4071807861328, + "logps/real": -236.056884765625, + "loss": 0.1955, + "loss/gen": 0.0035225003957748413, + "loss/real": 0.07803569734096527, + "rewards/accuracies": 1.0, + "rewards/generated": -301.59222412109375, + "rewards/margins": 307.9425048828125, + "rewards/real": 6.350289821624756, + "step": 210 + }, + { + "epoch": 0.88, + "grad_norm": 82.73605256797819, + "learning_rate": 4.3874643874643876e-07, + "logits/generated": -2.0518431663513184, + "logits/oppo_generated": -2.995426654815674, + "logits/oppo_real": -2.8803281784057617, + "logits/real": -2.3870060443878174, + "logps/generated": -346.0346984863281, + "logps/oppo_gen": -68.82854461669922, + "logps/oppo_real": -337.844482421875, + "logps/real": -309.0092468261719, + "loss": 0.1528, + "loss/gen": 0.0, + "loss/real": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -277.2061767578125, + "rewards/margins": 306.04144287109375, + "rewards/real": 28.835275650024414, + "step": 211 + }, + { + "epoch": 0.89, + "grad_norm": 35.13018825672765, + "learning_rate": 4.3839031339031333e-07, + "logits/generated": -2.083667278289795, + "logits/oppo_generated": -2.6126418113708496, + "logits/oppo_real": -3.0222294330596924, + "logits/real": -2.0695085525512695, + "logps/generated": -289.85260009765625, + "logps/oppo_gen": -56.36054992675781, + "logps/oppo_real": -325.3075256347656, + "logps/real": -330.01385498046875, + "loss": 0.2521, + "loss/gen": 0.009135901927947998, + "loss/real": 0.25433236360549927, + "rewards/accuracies": 1.0, + "rewards/generated": -233.49203491210938, + "rewards/margins": 228.78570556640625, + "rewards/real": -4.706315994262695, + "step": 212 + }, + { + "epoch": 0.89, + "grad_norm": 27.606224618649186, + "learning_rate": 4.38034188034188e-07, + "logits/generated": -2.159607410430908, + "logits/oppo_generated": -3.026592254638672, + "logits/oppo_real": -2.9974026679992676, + "logits/real": -2.492272138595581, + "logps/generated": -330.615478515625, + "logps/oppo_gen": -81.62860107421875, + "logps/oppo_real": -354.01513671875, + "logps/real": -344.7056579589844, + "loss": 0.2107, + "loss/gen": 0.0, + "loss/real": 0.024997137486934662, + "rewards/accuracies": 1.0, + "rewards/generated": -248.98684692382812, + "rewards/margins": 258.29632568359375, + "rewards/real": 9.309473991394043, + "step": 213 + }, + { + "epoch": 0.9, + "grad_norm": 15.104872244654826, + "learning_rate": 4.376780626780627e-07, + "logits/generated": -2.0585901737213135, + "logits/oppo_generated": -2.86299991607666, + "logits/oppo_real": -2.897392749786377, + "logits/real": -2.3068199157714844, + "logps/generated": -285.48638916015625, + "logps/oppo_gen": -55.654396057128906, + "logps/oppo_real": -286.4037170410156, + "logps/real": -298.23529052734375, + "loss": 0.14, + "loss/gen": 0.011369600892066956, + "loss/real": 0.27243572473526, + "rewards/accuracies": 1.0, + "rewards/generated": -229.83200073242188, + "rewards/margins": 218.0004425048828, + "rewards/real": -11.831571578979492, + "step": 214 + }, + { + "epoch": 0.9, + "grad_norm": 46.20299554006212, + "learning_rate": 4.373219373219373e-07, + "logits/generated": -2.1441431045532227, + "logits/oppo_generated": -2.8678367137908936, + "logits/oppo_real": -2.797013759613037, + "logits/real": -2.3208460807800293, + "logps/generated": -526.330810546875, + "logps/oppo_gen": -154.916748046875, + "logps/oppo_real": -268.4582824707031, + "logps/real": -262.050537109375, + "loss": 0.1611, + "loss/gen": 0.0, + "loss/real": 0.10568805783987045, + "rewards/accuracies": 1.0, + "rewards/generated": -371.41400146484375, + "rewards/margins": 377.82177734375, + "rewards/real": 6.407746315002441, + "step": 215 + }, + { + "epoch": 0.9, + "grad_norm": 42.97747780678634, + "learning_rate": 4.3696581196581193e-07, + "logits/generated": -2.335385799407959, + "logits/oppo_generated": -2.879833221435547, + "logits/oppo_real": -3.0112786293029785, + "logits/real": -2.5094590187072754, + "logps/generated": -324.74005126953125, + "logps/oppo_gen": -96.10844421386719, + "logps/oppo_real": -492.59039306640625, + "logps/real": -506.662109375, + "loss": 0.2189, + "loss/gen": 0.263028621673584, + "loss/real": 0.28753662109375, + "rewards/accuracies": 0.875, + "rewards/generated": -228.631591796875, + "rewards/margins": 214.55987548828125, + "rewards/real": -14.071721076965332, + "step": 216 + }, + { + "epoch": 0.91, + "grad_norm": 96.03388887522988, + "learning_rate": 4.366096866096866e-07, + "logits/generated": -2.3405416011810303, + "logits/oppo_generated": -2.855457305908203, + "logits/oppo_real": -3.161579132080078, + "logits/real": -2.4299869537353516, + "logps/generated": -266.0459289550781, + "logps/oppo_gen": -79.04156494140625, + "logps/oppo_real": -508.73779296875, + "logps/real": -511.5892333984375, + "loss": 0.218, + "loss/gen": 0.43865615129470825, + "loss/real": 0.2312847524881363, + "rewards/accuracies": 0.875, + "rewards/generated": -187.00439453125, + "rewards/margins": 184.15298461914062, + "rewards/real": -2.851390838623047, + "step": 217 + }, + { + "epoch": 0.91, + "grad_norm": 49.46367136754257, + "learning_rate": 4.362535612535612e-07, + "logits/generated": -2.3023407459259033, + "logits/oppo_generated": -2.8270015716552734, + "logits/oppo_real": -2.9884450435638428, + "logits/real": -2.3669018745422363, + "logps/generated": -310.2086181640625, + "logps/oppo_gen": -79.96229553222656, + "logps/oppo_real": -295.296630859375, + "logps/real": -278.3116455078125, + "loss": 0.119, + "loss/gen": 0.24770958721637726, + "loss/real": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -230.246337890625, + "rewards/margins": 247.2313232421875, + "rewards/real": 16.985002517700195, + "step": 218 + }, + { + "epoch": 0.92, + "grad_norm": 54.32108273310534, + "learning_rate": 4.358974358974359e-07, + "logits/generated": -2.0078024864196777, + "logits/oppo_generated": -2.7040886878967285, + "logits/oppo_real": -2.816561698913574, + "logits/real": -2.148149013519287, + "logps/generated": -327.4352111816406, + "logps/oppo_gen": -55.71031188964844, + "logps/oppo_real": -202.95962524414062, + "logps/real": -176.31039428710938, + "loss": 0.2428, + "loss/gen": 0.0, + "loss/real": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -271.72491455078125, + "rewards/margins": 298.3741455078125, + "rewards/real": 26.649229049682617, + "step": 219 + }, + { + "epoch": 0.92, + "grad_norm": 38.766303588881314, + "learning_rate": 4.355413105413105e-07, + "logits/generated": -1.851919412612915, + "logits/oppo_generated": -2.385345458984375, + "logits/oppo_real": -2.4835422039031982, + "logits/real": -1.8688819408416748, + "logps/generated": -298.13861083984375, + "logps/oppo_gen": -75.58077239990234, + "logps/oppo_real": -339.3034973144531, + "logps/real": -292.58990478515625, + "loss": 0.1566, + "loss/gen": 0.2625175714492798, + "loss/real": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -222.55783081054688, + "rewards/margins": 269.27142333984375, + "rewards/real": 46.713592529296875, + "step": 220 + }, + { + "epoch": 0.92, + "grad_norm": 38.3459545642733, + "learning_rate": 4.3518518518518514e-07, + "logits/generated": -2.3448195457458496, + "logits/oppo_generated": -3.011491060256958, + "logits/oppo_real": -3.0487937927246094, + "logits/real": -2.592437744140625, + "logps/generated": -380.2159118652344, + "logps/oppo_gen": -131.22396850585938, + "logps/oppo_real": -400.33868408203125, + "logps/real": -382.6166687011719, + "loss": 0.2109, + "loss/gen": 0.0, + "loss/real": 0.009188689291477203, + "rewards/accuracies": 1.0, + "rewards/generated": -248.991943359375, + "rewards/margins": 266.71392822265625, + "rewards/real": 17.722003936767578, + "step": 221 + }, + { + "epoch": 0.93, + "grad_norm": 39.05013241714905, + "learning_rate": 4.348290598290598e-07, + "logits/generated": -2.286303997039795, + "logits/oppo_generated": -2.755108118057251, + "logits/oppo_real": -2.8694067001342773, + "logits/real": -2.2778568267822266, + "logps/generated": -286.8591613769531, + "logps/oppo_gen": -61.73572540283203, + "logps/oppo_real": -230.838134765625, + "logps/real": -234.90147399902344, + "loss": 0.1586, + "loss/gen": 0.23340168595314026, + "loss/real": 0.2623848617076874, + "rewards/accuracies": 0.875, + "rewards/generated": -225.12344360351562, + "rewards/margins": 221.0601043701172, + "rewards/real": -4.063333511352539, + "step": 222 + }, + { + "epoch": 0.93, + "grad_norm": 49.061494814543714, + "learning_rate": 4.3447293447293444e-07, + "logits/generated": -2.1600050926208496, + "logits/oppo_generated": -2.8574419021606445, + "logits/oppo_real": -2.923137903213501, + "logits/real": -2.323585033416748, + "logps/generated": -341.39599609375, + "logps/oppo_gen": -82.77210998535156, + "logps/oppo_real": -252.58892822265625, + "logps/real": -277.39813232421875, + "loss": 0.1963, + "loss/gen": 0.011263325810432434, + "loss/real": 0.3864287734031677, + "rewards/accuracies": 0.875, + "rewards/generated": -258.62384033203125, + "rewards/margins": 233.81466674804688, + "rewards/real": -24.80919075012207, + "step": 223 + }, + { + "epoch": 0.94, + "grad_norm": 49.97145181094652, + "learning_rate": 4.341168091168091e-07, + "logits/generated": -2.0936217308044434, + "logits/oppo_generated": -2.994565010070801, + "logits/oppo_real": -2.8149280548095703, + "logits/real": -2.4390323162078857, + "logps/generated": -267.6043395996094, + "logps/oppo_gen": -48.2861213684082, + "logps/oppo_real": -137.37625122070312, + "logps/real": -160.69287109375, + "loss": 0.301, + "loss/gen": 0.5201160907745361, + "loss/real": 0.3812367916107178, + "rewards/accuracies": 0.875, + "rewards/generated": -219.31820678710938, + "rewards/margins": 196.00160217285156, + "rewards/real": -23.31661605834961, + "step": 224 + }, + { + "epoch": 0.94, + "grad_norm": 54.01997514664136, + "learning_rate": 4.3376068376068374e-07, + "logits/generated": -2.2345826625823975, + "logits/oppo_generated": -2.816603422164917, + "logits/oppo_real": -2.9343314170837402, + "logits/real": -2.339372158050537, + "logps/generated": -238.27630615234375, + "logps/oppo_gen": -30.44548988342285, + "logps/oppo_real": -174.9966278076172, + "logps/real": -159.52218627929688, + "loss": 0.2248, + "loss/gen": 0.07372879981994629, + "loss/real": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -207.830810546875, + "rewards/margins": 223.30526733398438, + "rewards/real": 15.47445297241211, + "step": 225 + }, + { + "epoch": 0.95, + "grad_norm": 26.33121852189253, + "learning_rate": 4.3340455840455836e-07, + "logits/generated": -2.161853075027466, + "logits/oppo_generated": -2.6415185928344727, + "logits/oppo_real": -3.0115818977355957, + "logits/real": -2.075220823287964, + "logps/generated": -362.1558532714844, + "logps/oppo_gen": -93.466064453125, + "logps/oppo_real": -340.529296875, + "logps/real": -308.37493896484375, + "loss": 0.1905, + "loss/gen": 0.0, + "loss/real": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -268.68975830078125, + "rewards/margins": 300.8441162109375, + "rewards/real": 32.154354095458984, + "step": 226 + }, + { + "epoch": 0.95, + "grad_norm": 37.262840566835145, + "learning_rate": 4.3304843304843304e-07, + "logits/generated": -2.1919541358947754, + "logits/oppo_generated": -2.7984108924865723, + "logits/oppo_real": -2.9754528999328613, + "logits/real": -2.3059372901916504, + "logps/generated": -304.6134338378906, + "logps/oppo_gen": -69.67858123779297, + "logps/oppo_real": -268.7974853515625, + "logps/real": -259.7208251953125, + "loss": 0.1259, + "loss/gen": 0.0012919306755065918, + "loss/real": 0.04866264760494232, + "rewards/accuracies": 1.0, + "rewards/generated": -234.93484497070312, + "rewards/margins": 244.01148986816406, + "rewards/real": 9.076637268066406, + "step": 227 + }, + { + "epoch": 0.95, + "grad_norm": 21.304152478843108, + "learning_rate": 4.326923076923077e-07, + "logits/generated": -2.002150058746338, + "logits/oppo_generated": -2.7994847297668457, + "logits/oppo_real": -2.687981605529785, + "logits/real": -2.2970423698425293, + "logps/generated": -286.0855712890625, + "logps/oppo_gen": -76.17577362060547, + "logps/oppo_real": -381.5020751953125, + "logps/real": -376.30322265625, + "loss": 0.1755, + "loss/gen": 0.2530289888381958, + "loss/real": 0.24476546049118042, + "rewards/accuracies": 1.0, + "rewards/generated": -209.9097900390625, + "rewards/margins": 215.108642578125, + "rewards/real": 5.198863983154297, + "step": 228 + }, + { + "epoch": 0.96, + "grad_norm": 26.23410279415064, + "learning_rate": 4.323361823361823e-07, + "logits/generated": -2.1952879428863525, + "logits/oppo_generated": -2.8429031372070312, + "logits/oppo_real": -3.0224597454071045, + "logits/real": -2.2873964309692383, + "logps/generated": -302.1619873046875, + "logps/oppo_gen": -78.5534439086914, + "logps/oppo_real": -246.5026397705078, + "logps/real": -227.45849609375, + "loss": 0.1693, + "loss/gen": 0.23556922376155853, + "loss/real": 0.0011737123131752014, + "rewards/accuracies": 1.0, + "rewards/generated": -223.60853576660156, + "rewards/margins": 242.65267944335938, + "rewards/real": 19.044147491455078, + "step": 229 + }, + { + "epoch": 0.96, + "grad_norm": 30.91998695442349, + "learning_rate": 4.3198005698005696e-07, + "logits/generated": -1.982604742050171, + "logits/oppo_generated": -2.5529236793518066, + "logits/oppo_real": -2.7146146297454834, + "logits/real": -1.9502203464508057, + "logps/generated": -342.57330322265625, + "logps/oppo_gen": -79.70944213867188, + "logps/oppo_real": -106.01055145263672, + "logps/real": -143.1019744873047, + "loss": 0.2063, + "loss/gen": 0.0, + "loss/real": 0.45912817120552063, + "rewards/accuracies": 1.0, + "rewards/generated": -262.8638610839844, + "rewards/margins": 225.77243041992188, + "rewards/real": -37.09141540527344, + "step": 230 + }, + { + "epoch": 0.97, + "grad_norm": 68.66924234145881, + "learning_rate": 4.3162393162393163e-07, + "logits/generated": -1.856884241104126, + "logits/oppo_generated": -2.5894346237182617, + "logits/oppo_real": -2.6849865913391113, + "logits/real": -1.9867148399353027, + "logps/generated": -390.6427001953125, + "logps/oppo_gen": -67.09019470214844, + "logps/oppo_real": -256.4427185058594, + "logps/real": -235.26731872558594, + "loss": 0.1923, + "loss/gen": 0.02218911051750183, + "loss/real": 0.002973802387714386, + "rewards/accuracies": 1.0, + "rewards/generated": -323.552490234375, + "rewards/margins": 344.7278747558594, + "rewards/real": 21.17538070678711, + "step": 231 + }, + { + "epoch": 0.97, + "grad_norm": 25.47653974048355, + "learning_rate": 4.3126780626780625e-07, + "logits/generated": -2.229971408843994, + "logits/oppo_generated": -2.959817886352539, + "logits/oppo_real": -2.9362192153930664, + "logits/real": -2.4872889518737793, + "logps/generated": -313.7774658203125, + "logps/oppo_gen": -82.48292541503906, + "logps/oppo_real": -458.88818359375, + "logps/real": -453.343017578125, + "loss": 0.1714, + "loss/gen": 0.06390117108821869, + "loss/real": 0.09897678345441818, + "rewards/accuracies": 1.0, + "rewards/generated": -231.29452514648438, + "rewards/margins": 236.83969116210938, + "rewards/real": 5.545146942138672, + "step": 232 + }, + { + "epoch": 0.97, + "grad_norm": 44.17547296984351, + "learning_rate": 4.309116809116809e-07, + "logits/generated": -2.093695878982544, + "logits/oppo_generated": -2.7284858226776123, + "logits/oppo_real": -2.8326492309570312, + "logits/real": -2.21309757232666, + "logps/generated": -323.655029296875, + "logps/oppo_gen": -60.89936828613281, + "logps/oppo_real": -245.58233642578125, + "logps/real": -238.11309814453125, + "loss": 0.173, + "loss/gen": 0.0006021559238433838, + "loss/real": 0.018122456967830658, + "rewards/accuracies": 1.0, + "rewards/generated": -262.75567626953125, + "rewards/margins": 270.2248840332031, + "rewards/real": 7.469233512878418, + "step": 233 + }, + { + "epoch": 0.98, + "grad_norm": 33.680569135700296, + "learning_rate": 4.3055555555555555e-07, + "logits/generated": -2.0983548164367676, + "logits/oppo_generated": -2.884782075881958, + "logits/oppo_real": -3.007986545562744, + "logits/real": -2.3658394813537598, + "logps/generated": -301.1651611328125, + "logps/oppo_gen": -64.29571533203125, + "logps/oppo_real": -445.2386169433594, + "logps/real": -415.5571594238281, + "loss": 0.1804, + "loss/gen": 0.0, + "loss/real": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -236.8694610595703, + "rewards/margins": 266.5509338378906, + "rewards/real": 29.681480407714844, + "step": 234 + }, + { + "epoch": 0.98, + "grad_norm": 20.284545818732685, + "learning_rate": 4.3019943019943017e-07, + "logits/generated": -2.0810956954956055, + "logits/oppo_generated": -2.8430304527282715, + "logits/oppo_real": -2.873483657836914, + "logits/real": -2.3326306343078613, + "logps/generated": -320.660400390625, + "logps/oppo_gen": -68.79239654541016, + "logps/oppo_real": -391.89910888671875, + "logps/real": -372.5426330566406, + "loss": 0.1924, + "loss/gen": 0.0, + "loss/real": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -251.86802673339844, + "rewards/margins": 271.22454833984375, + "rewards/real": 19.35650634765625, + "step": 235 + }, + { + "epoch": 0.99, + "grad_norm": 31.462047177474897, + "learning_rate": 4.2984330484330485e-07, + "logits/generated": -2.0807740688323975, + "logits/oppo_generated": -2.8508265018463135, + "logits/oppo_real": -2.9677348136901855, + "logits/real": -2.363823890686035, + "logps/generated": -337.72705078125, + "logps/oppo_gen": -88.43344116210938, + "logps/oppo_real": -438.55322265625, + "logps/real": -411.73760986328125, + "loss": 0.2283, + "loss/gen": 0.009022071957588196, + "loss/real": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -249.2935791015625, + "rewards/margins": 276.10919189453125, + "rewards/real": 26.81560516357422, + "step": 236 + }, + { + "epoch": 0.99, + "grad_norm": 30.65675470245862, + "learning_rate": 4.294871794871794e-07, + "logits/generated": -2.2450156211853027, + "logits/oppo_generated": -2.816070079803467, + "logits/oppo_real": -3.012850761413574, + "logits/real": -2.3096275329589844, + "logps/generated": -328.40545654296875, + "logps/oppo_gen": -55.2912483215332, + "logps/oppo_real": -255.20977783203125, + "logps/real": -242.8808135986328, + "loss": 0.1844, + "loss/gen": 0.014506042003631592, + "loss/real": 0.023922577500343323, + "rewards/accuracies": 1.0, + "rewards/generated": -273.1142272949219, + "rewards/margins": 285.44317626953125, + "rewards/real": 12.328951835632324, + "step": 237 + }, + { + "epoch": 1.0, + "grad_norm": 60.6619853354945, + "learning_rate": 4.291310541310541e-07, + "logits/generated": -2.2790613174438477, + "logits/oppo_generated": -2.701869487762451, + "logits/oppo_real": -2.963564872741699, + "logits/real": -2.223146915435791, + "logps/generated": -349.9393310546875, + "logps/oppo_gen": -83.03327941894531, + "logps/oppo_real": -312.4057312011719, + "logps/real": -290.92095947265625, + "loss": 0.1719, + "loss/gen": 0.0, + "loss/real": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -266.9060363769531, + "rewards/margins": 288.39080810546875, + "rewards/real": 21.484760284423828, + "step": 238 + }, + { + "epoch": 1.0, + "grad_norm": 47.305535835704525, + "learning_rate": 4.2877492877492877e-07, + "logits/generated": -2.2459330558776855, + "logits/oppo_generated": -2.8546152114868164, + "logits/oppo_real": -3.036848545074463, + "logits/real": -2.419419050216675, + "logps/generated": -338.7462158203125, + "logps/oppo_gen": -75.19477844238281, + "logps/oppo_real": -314.191162109375, + "logps/real": -302.6313171386719, + "loss": 0.1123, + "loss/gen": 0.0, + "loss/real": 0.11184393614530563, + "rewards/accuracies": 1.0, + "rewards/generated": -263.5514221191406, + "rewards/margins": 275.1112365722656, + "rewards/real": 11.559805870056152, + "step": 239 + } + ], + "logging_steps": 1.0, + "max_steps": 1434, + "num_input_tokens_seen": 0, + "num_train_epochs": 6, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}