diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,5040 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 100.0, + "global_step": 239, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "grad_norm": 0.0, + "learning_rate": 0.0, + "logits/generated": -3.130502223968506, + "logits/oppo_generated": -3.1088104248046875, + "logits/oppo_real": -3.130502223968506, + "logits/real": -3.1088104248046875, + "logps/generated": -99.40917205810547, + "logps/oppo_gen": -99.40917205810547, + "logps/oppo_real": -459.3097229003906, + "logps/real": -459.3097229003906, + "loss": 0.9762, + "loss/gen": 3.6945278644561768, + "loss/real": -2.7182817459106445, + "rewards/accuracies": 0.0, + "rewards/generated": 0.0, + "rewards/margins": 0.0, + "rewards/real": 0.0, + "step": 1 + }, + { + "epoch": 0.01, + "grad_norm": 0.0, + "learning_rate": 0.0, + "logits/generated": -3.0933988094329834, + "logits/oppo_generated": -2.919645309448242, + "logits/oppo_real": -3.0933988094329834, + "logits/real": -2.919645309448242, + "logps/generated": -103.65153503417969, + "logps/oppo_gen": -103.65153503417969, + "logps/oppo_real": -392.1358642578125, + "logps/real": -392.1358642578125, + "loss": 0.9762, + "loss/gen": 3.6945278644561768, + "loss/real": -2.7182817459106445, + "rewards/accuracies": 0.0, + "rewards/generated": 0.0, + "rewards/margins": 0.0, + "rewards/real": 0.0, + "step": 2 + }, + { + "epoch": 0.01, + "grad_norm": 0.0, + "learning_rate": 0.0, + "logits/generated": -2.6572537422180176, + "logits/oppo_generated": -2.8074941635131836, + "logits/oppo_real": -2.6572537422180176, + "logits/real": -2.8074941635131836, + "logps/generated": -72.88986206054688, + "logps/oppo_gen": -72.88986206054688, + "logps/oppo_real": -291.916748046875, + "logps/real": -291.916748046875, + "loss": 0.9762, + "loss/gen": 3.6945278644561768, + "loss/real": -2.7182817459106445, + "rewards/accuracies": 0.0, + "rewards/generated": 0.0, + "rewards/margins": 0.0, + "rewards/real": 0.0, + "step": 3 + }, + { + "epoch": 0.02, + "grad_norm": 0.0, + "learning_rate": 0.0, + "logits/generated": -2.8966193199157715, + "logits/oppo_generated": -2.768460273742676, + "logits/oppo_real": -2.8966193199157715, + "logits/real": -2.768460273742676, + "logps/generated": -64.05287170410156, + "logps/oppo_gen": -64.05287170410156, + "logps/oppo_real": -376.8367919921875, + "logps/real": -376.8367919921875, + "loss": 0.9762, + "loss/gen": 3.6945278644561768, + "loss/real": -2.7182817459106445, + "rewards/accuracies": 0.0, + "rewards/generated": 0.0, + "rewards/margins": 0.0, + "rewards/real": 0.0, + "step": 4 + }, + { + "epoch": 0.02, + "grad_norm": 0.0, + "learning_rate": 0.0, + "logits/generated": -2.889317512512207, + "logits/oppo_generated": -2.708950996398926, + "logits/oppo_real": -2.889317512512207, + "logits/real": -2.708950996398926, + "logps/generated": -48.29164123535156, + "logps/oppo_gen": -48.29164123535156, + "logps/oppo_real": -173.0751953125, + "logps/real": -173.0751953125, + "loss": 0.9762, + "loss/gen": 3.6945278644561768, + "loss/real": -2.7182817459106445, + "rewards/accuracies": 0.0, + "rewards/generated": 0.0, + "rewards/margins": 0.0, + "rewards/real": 0.0, + "step": 5 + }, + { + "epoch": 0.03, + "grad_norm": 0.0, + "learning_rate": 0.0, + "logits/generated": -2.957958698272705, + "logits/oppo_generated": -2.749436378479004, + "logits/oppo_real": -2.957958698272705, + "logits/real": -2.749436378479004, + "logps/generated": -48.84138488769531, + "logps/oppo_gen": -48.84138488769531, + "logps/oppo_real": -139.2998046875, + "logps/real": -139.2998046875, + "loss": 0.9762, + "loss/gen": 3.6945278644561768, + "loss/real": -2.7182817459106445, + "rewards/accuracies": 0.0, + "rewards/generated": 0.0, + "rewards/margins": 0.0, + "rewards/real": 0.0, + "step": 6 + }, + { + "epoch": 0.03, + "grad_norm": 140.3248950538535, + "learning_rate": 1.6666666666666667e-08, + "logits/generated": -3.1195316314697266, + "logits/oppo_generated": -2.9545342922210693, + "logits/oppo_real": -3.1195316314697266, + "logits/real": -2.9545342922210693, + "logps/generated": -163.2059783935547, + "logps/oppo_gen": -163.2059783935547, + "logps/oppo_real": -432.88226318359375, + "logps/real": -432.88226318359375, + "loss": 0.9762, + "loss/gen": 3.6945278644561768, + "loss/real": -2.7182817459106445, + "rewards/accuracies": 0.0, + "rewards/generated": 0.0, + "rewards/margins": 0.0, + "rewards/real": 0.0, + "step": 7 + }, + { + "epoch": 0.03, + "grad_norm": 140.3248950538535, + "learning_rate": 1.6666666666666667e-08, + "logits/generated": -2.910332441329956, + "logits/oppo_generated": -2.9416637420654297, + "logits/oppo_real": -2.910332441329956, + "logits/real": -2.9416637420654297, + "logps/generated": -69.29386901855469, + "logps/oppo_gen": -69.29386901855469, + "logps/oppo_real": -311.59619140625, + "logps/real": -311.59619140625, + "loss": 0.9762, + "loss/gen": 3.6945278644561768, + "loss/real": -2.7182817459106445, + "rewards/accuracies": 0.0, + "rewards/generated": 0.0, + "rewards/margins": 0.0, + "rewards/real": 0.0, + "step": 8 + }, + { + "epoch": 0.04, + "grad_norm": 144.39084058121554, + "learning_rate": 3.3333333333333334e-08, + "logits/generated": -2.409976005554199, + "logits/oppo_generated": -2.294548273086548, + "logits/oppo_real": -2.409976005554199, + "logits/real": -2.294548273086548, + "logps/generated": -82.20011138916016, + "logps/oppo_gen": -82.20011138916016, + "logps/oppo_real": -381.1852111816406, + "logps/real": -381.1852111816406, + "loss": 0.9762, + "loss/gen": 3.6945278644561768, + "loss/real": -2.7182817459106445, + "rewards/accuracies": 0.0, + "rewards/generated": 0.0, + "rewards/margins": 0.0, + "rewards/real": 0.0, + "step": 9 + }, + { + "epoch": 0.04, + "grad_norm": 144.39084058121554, + "learning_rate": 3.3333333333333334e-08, + "logits/generated": -2.963313579559326, + "logits/oppo_generated": -2.9239017963409424, + "logits/oppo_real": -2.963313579559326, + "logits/real": -2.9239017963409424, + "logps/generated": -93.09856414794922, + "logps/oppo_gen": -93.09856414794922, + "logps/oppo_real": -233.10401916503906, + "logps/real": -233.10401916503906, + "loss": 0.9762, + "loss/gen": 3.6945278644561768, + "loss/real": -2.7182817459106445, + "rewards/accuracies": 0.0, + "rewards/generated": 0.0, + "rewards/margins": 0.0, + "rewards/real": 0.0, + "step": 10 + }, + { + "epoch": 0.05, + "grad_norm": 147.09211346550842, + "learning_rate": 5e-08, + "logits/generated": -2.857771396636963, + "logits/oppo_generated": -2.837850570678711, + "logits/oppo_real": -2.857771396636963, + "logits/real": -2.837850570678711, + "logps/generated": -59.46293640136719, + "logps/oppo_gen": -59.46293640136719, + "logps/oppo_real": -142.69805908203125, + "logps/real": -142.69805908203125, + "loss": 0.9762, + "loss/gen": 3.6945278644561768, + "loss/real": -2.7182817459106445, + "rewards/accuracies": 0.0, + "rewards/generated": 0.0, + "rewards/margins": 0.0, + "rewards/real": 0.0, + "step": 11 + }, + { + "epoch": 0.05, + "grad_norm": 147.36241597037218, + "learning_rate": 6.666666666666667e-08, + "logits/generated": -2.8778512477874756, + "logits/oppo_generated": -2.7672762870788574, + "logits/oppo_real": -2.8780808448791504, + "logits/real": -2.766920328140259, + "logps/generated": -70.60530090332031, + "logps/oppo_gen": -70.58644104003906, + "logps/oppo_real": -343.4704284667969, + "logps/real": -343.4797058105469, + "loss": 0.9737, + "loss/gen": 3.693136215209961, + "loss/real": -2.718029499053955, + "rewards/accuracies": 0.5, + "rewards/generated": -0.01885223388671875, + "rewards/margins": 0.009566187858581543, + "rewards/real": -0.009286046028137207, + "step": 12 + }, + { + "epoch": 0.05, + "grad_norm": 141.20291665498627, + "learning_rate": 8.333333333333333e-08, + "logits/generated": -2.8214950561523438, + "logits/oppo_generated": -2.8374581336975098, + "logits/oppo_real": -2.822021961212158, + "logits/real": -2.836732864379883, + "logps/generated": -106.83735656738281, + "logps/oppo_gen": -106.73956298828125, + "logps/oppo_real": -280.41741943359375, + "logps/real": -280.4453430175781, + "loss": 0.9675, + "loss/gen": 3.6873114109039307, + "loss/real": -2.7175238132476807, + "rewards/accuracies": 0.875, + "rewards/generated": -0.09777355194091797, + "rewards/margins": 0.06986618041992188, + "rewards/real": -0.027907371520996094, + "step": 13 + }, + { + "epoch": 0.06, + "grad_norm": 147.7225611683097, + "learning_rate": 1e-07, + "logits/generated": -2.7692794799804688, + "logits/oppo_generated": -2.8255615234375, + "logits/oppo_real": -2.771684169769287, + "logits/real": -2.8233795166015625, + "logps/generated": -86.35212707519531, + "logps/oppo_gen": -85.86231994628906, + "logps/oppo_real": -289.01318359375, + "logps/real": -289.048095703125, + "loss": 0.9484, + "loss/gen": 3.6585421562194824, + "loss/real": -2.717336654663086, + "rewards/accuracies": 0.875, + "rewards/generated": -0.48981738090515137, + "rewards/margins": 0.4548964500427246, + "rewards/real": -0.03492093086242676, + "step": 14 + }, + { + "epoch": 0.06, + "grad_norm": 135.88560072492965, + "learning_rate": 1.1666666666666667e-07, + "logits/generated": -3.1533312797546387, + "logits/oppo_generated": -2.7394165992736816, + "logits/oppo_real": -3.1553921699523926, + "logits/real": -2.7368688583374023, + "logps/generated": -75.06793212890625, + "logps/oppo_gen": -74.47514343261719, + "logps/oppo_real": -366.370361328125, + "logps/real": -366.428466796875, + "loss": 0.9381, + "loss/gen": 3.6510140895843506, + "loss/real": -2.716707944869995, + "rewards/accuracies": 1.0, + "rewards/generated": -0.5927925109863281, + "rewards/margins": 0.5347006320953369, + "rewards/real": -0.05809187889099121, + "step": 15 + }, + { + "epoch": 0.07, + "grad_norm": 138.72056275567078, + "learning_rate": 1.3333333333333334e-07, + "logits/generated": -2.1314597129821777, + "logits/oppo_generated": -2.1468427181243896, + "logits/oppo_real": -2.142064094543457, + "logits/real": -2.137998580932617, + "logps/generated": -79.51522064208984, + "logps/oppo_gen": -78.08332824707031, + "logps/oppo_real": -437.152587890625, + "logps/real": -437.42681884765625, + "loss": 0.8706, + "loss/gen": 3.590456008911133, + "loss/real": -2.710862874984741, + "rewards/accuracies": 1.0, + "rewards/generated": -1.4318904876708984, + "rewards/margins": 1.1576709747314453, + "rewards/real": -0.2742195129394531, + "step": 16 + }, + { + "epoch": 0.07, + "grad_norm": 128.9259113793655, + "learning_rate": 1.5e-07, + "logits/generated": -2.9416465759277344, + "logits/oppo_generated": -2.902646064758301, + "logits/oppo_real": -2.953411817550659, + "logits/real": -2.887700080871582, + "logps/generated": -74.43273162841797, + "logps/oppo_gen": -72.53976440429688, + "logps/oppo_real": -310.7004089355469, + "logps/real": -310.87109375, + "loss": 0.8455, + "loss/gen": 3.557424545288086, + "loss/real": -2.7137060165405273, + "rewards/accuracies": 1.0, + "rewards/generated": -1.8929705619812012, + "rewards/margins": 1.7222943305969238, + "rewards/real": -0.17067623138427734, + "step": 17 + }, + { + "epoch": 0.08, + "grad_norm": 131.5700784634371, + "learning_rate": 1.6666666666666665e-07, + "logits/generated": -2.94179630279541, + "logits/oppo_generated": -2.947140693664551, + "logits/oppo_real": -2.9634807109832764, + "logits/real": -2.920558214187622, + "logps/generated": -79.8861083984375, + "logps/oppo_gen": -74.80116271972656, + "logps/oppo_real": -309.46124267578125, + "logps/real": -310.43719482421875, + "loss": 0.6796, + "loss/gen": 3.3385138511657715, + "loss/real": -2.6920909881591797, + "rewards/accuracies": 1.0, + "rewards/generated": -5.084942817687988, + "rewards/margins": 4.109025955200195, + "rewards/real": -0.9759171009063721, + "step": 18 + }, + { + "epoch": 0.08, + "grad_norm": 132.43793505119672, + "learning_rate": 1.833333333333333e-07, + "logits/generated": -2.4398093223571777, + "logits/oppo_generated": -2.6668543815612793, + "logits/oppo_real": -2.47564697265625, + "logits/real": -2.6329777240753174, + "logps/generated": -73.18605041503906, + "logps/oppo_gen": -67.190673828125, + "logps/oppo_real": -285.60797119140625, + "logps/real": -287.226806640625, + "loss": 0.5909, + "loss/gen": 3.2783122062683105, + "loss/real": -2.6751227378845215, + "rewards/accuracies": 1.0, + "rewards/generated": -5.995372772216797, + "rewards/margins": 4.3765668869018555, + "rewards/real": -1.6188058853149414, + "step": 19 + }, + { + "epoch": 0.08, + "grad_norm": 122.69142409669472, + "learning_rate": 2e-07, + "logits/generated": -3.083611011505127, + "logits/oppo_generated": -2.7376956939697266, + "logits/oppo_real": -3.1153059005737305, + "logits/real": -2.70223331451416, + "logps/generated": -102.092529296875, + "logps/oppo_gen": -93.65745544433594, + "logps/oppo_real": -173.968994140625, + "logps/real": -176.39892578125, + "loss": 0.5272, + "loss/gen": 3.124610185623169, + "loss/real": -2.653409957885742, + "rewards/accuracies": 1.0, + "rewards/generated": -8.435081481933594, + "rewards/margins": 6.005127429962158, + "rewards/real": -2.4299545288085938, + "step": 20 + }, + { + "epoch": 0.09, + "grad_norm": 107.67073560019686, + "learning_rate": 2.1666666666666667e-07, + "logits/generated": -2.8323276042938232, + "logits/oppo_generated": -2.6699156761169434, + "logits/oppo_real": -2.8930060863494873, + "logits/real": -2.6024348735809326, + "logps/generated": -59.84043884277344, + "logps/oppo_gen": -50.189754486083984, + "logps/oppo_real": -197.0562286376953, + "logps/real": -199.4170379638672, + "loss": 0.291, + "loss/gen": 3.0484681129455566, + "loss/real": -2.6573870182037354, + "rewards/accuracies": 1.0, + "rewards/generated": -9.650688171386719, + "rewards/margins": 7.289878845214844, + "rewards/real": -2.3608102798461914, + "step": 21 + }, + { + "epoch": 0.09, + "grad_norm": 97.9560398453689, + "learning_rate": 2.3333333333333333e-07, + "logits/generated": -2.9114887714385986, + "logits/oppo_generated": -2.8113152980804443, + "logits/oppo_real": -2.997610330581665, + "logits/real": -2.724991798400879, + "logps/generated": -74.77532196044922, + "logps/oppo_gen": -59.91856384277344, + "logps/oppo_real": -175.6089324951172, + "logps/real": -181.092529296875, + "loss": 0.1564, + "loss/gen": 2.7578284740448, + "loss/real": -2.5756349563598633, + "rewards/accuracies": 1.0, + "rewards/generated": -14.856756210327148, + "rewards/margins": 9.373159408569336, + "rewards/real": -5.4835968017578125, + "step": 22 + }, + { + "epoch": 0.1, + "grad_norm": 101.1866700089493, + "learning_rate": 2.5e-07, + "logits/generated": -2.738328218460083, + "logits/oppo_generated": -2.712057113647461, + "logits/oppo_real": -2.83805513381958, + "logits/real": -2.6210412979125977, + "logps/generated": -103.88157653808594, + "logps/oppo_gen": -84.5518798828125, + "logps/oppo_real": -331.96221923828125, + "logps/real": -338.4169616699219, + "loss": -0.0064, + "loss/gen": 2.5180134773254395, + "loss/real": -2.5506632328033447, + "rewards/accuracies": 1.0, + "rewards/generated": -19.329689025878906, + "rewards/margins": 12.874977111816406, + "rewards/real": -6.454712867736816, + "step": 23 + }, + { + "epoch": 0.1, + "grad_norm": 91.5968525574842, + "learning_rate": 2.6666666666666667e-07, + "logits/generated": -2.2416625022888184, + "logits/oppo_generated": -2.4313888549804688, + "logits/oppo_real": -2.3368191719055176, + "logits/real": -2.3420183658599854, + "logps/generated": -93.81153106689453, + "logps/oppo_gen": -70.7446060180664, + "logps/oppo_real": -186.56976318359375, + "logps/real": -194.1738739013672, + "loss": -0.0762, + "loss/gen": 2.358870029449463, + "loss/real": -2.5205307006835938, + "rewards/accuracies": 1.0, + "rewards/generated": -23.066925048828125, + "rewards/margins": 15.46281623840332, + "rewards/real": -7.604110240936279, + "step": 24 + }, + { + "epoch": 0.1, + "grad_norm": 91.5968525574842, + "learning_rate": 2.6666666666666667e-07, + "logits/generated": -2.808882474899292, + "logits/oppo_generated": -2.8222999572753906, + "logits/oppo_real": -2.956730842590332, + "logits/real": -2.6913347244262695, + "logps/generated": -77.1277847290039, + "logps/oppo_gen": -55.461936950683594, + "logps/oppo_real": -125.98847198486328, + "logps/real": -135.24478149414062, + "loss": -0.2171, + "loss/gen": 2.427062511444092, + "loss/real": -2.4810240268707275, + "rewards/accuracies": 0.875, + "rewards/generated": -21.665851593017578, + "rewards/margins": 12.40954875946045, + "rewards/real": -9.256302833557129, + "step": 25 + }, + { + "epoch": 0.11, + "grad_norm": 90.71163423694614, + "learning_rate": 2.833333333333333e-07, + "logits/generated": -2.524838447570801, + "logits/oppo_generated": -2.9076757431030273, + "logits/oppo_real": -2.661245822906494, + "logits/real": -2.7569193840026855, + "logps/generated": -94.57086181640625, + "logps/oppo_gen": -71.46342468261719, + "logps/oppo_real": -293.69677734375, + "logps/real": -298.1125183105469, + "loss": -0.199, + "loss/gen": 2.345475196838379, + "loss/real": -2.6118550300598145, + "rewards/accuracies": 1.0, + "rewards/generated": -23.107433319091797, + "rewards/margins": 18.69169807434082, + "rewards/real": -4.415735244750977, + "step": 26 + }, + { + "epoch": 0.11, + "grad_norm": 71.87999982609905, + "learning_rate": 3e-07, + "logits/generated": -2.6987175941467285, + "logits/oppo_generated": -3.018123149871826, + "logits/oppo_real": -2.837935447692871, + "logits/real": -2.857689142227173, + "logps/generated": -76.849853515625, + "logps/oppo_gen": -51.06623458862305, + "logps/oppo_real": -151.72972106933594, + "logps/real": -170.76156616210938, + "loss": -0.2983, + "loss/gen": 2.2412900924682617, + "loss/real": -2.2491354942321777, + "rewards/accuracies": 0.75, + "rewards/generated": -25.783626556396484, + "rewards/margins": 6.751780033111572, + "rewards/real": -19.031845092773438, + "step": 27 + }, + { + "epoch": 0.12, + "grad_norm": 77.17411637512444, + "learning_rate": 3.166666666666666e-07, + "logits/generated": -2.4447317123413086, + "logits/oppo_generated": -2.7700376510620117, + "logits/oppo_real": -2.6328747272491455, + "logits/real": -2.6101927757263184, + "logps/generated": -109.12590026855469, + "logps/oppo_gen": -72.09120178222656, + "logps/oppo_real": -411.427978515625, + "logps/real": -411.268310546875, + "loss": -0.5509, + "loss/gen": 1.7849677801132202, + "loss/real": -2.807443141937256, + "rewards/accuracies": 1.0, + "rewards/generated": -37.03469467163086, + "rewards/margins": 37.19430160522461, + "rewards/real": 0.1596088409423828, + "step": 28 + }, + { + "epoch": 0.12, + "grad_norm": 77.17411637512444, + "learning_rate": 3.166666666666666e-07, + "logits/generated": -2.7265658378601074, + "logits/oppo_generated": -2.91198468208313, + "logits/oppo_real": -2.9211230278015137, + "logits/real": -2.7112436294555664, + "logps/generated": -121.98545837402344, + "logps/oppo_gen": -82.21741485595703, + "logps/oppo_real": -301.3589172363281, + "logps/real": -312.0211181640625, + "loss": -0.7401, + "loss/gen": 1.702211618423462, + "loss/real": -2.461573600769043, + "rewards/accuracies": 0.875, + "rewards/generated": -39.768035888671875, + "rewards/margins": 29.1058292388916, + "rewards/real": -10.66220760345459, + "step": 29 + }, + { + "epoch": 0.13, + "grad_norm": 85.01337498171243, + "learning_rate": 3.333333333333333e-07, + "logits/generated": -2.7819857597351074, + "logits/oppo_generated": -2.4022648334503174, + "logits/oppo_real": -2.97650146484375, + "logits/real": -2.2471132278442383, + "logps/generated": -130.29348754882812, + "logps/oppo_gen": -99.30915832519531, + "logps/oppo_real": -226.3162841796875, + "logps/real": -240.45065307617188, + "loss": -0.6496, + "loss/gen": 2.034857749938965, + "loss/real": -2.3712759017944336, + "rewards/accuracies": 1.0, + "rewards/generated": -30.984325408935547, + "rewards/margins": 16.849956512451172, + "rewards/real": -14.134370803833008, + "step": 30 + }, + { + "epoch": 0.13, + "grad_norm": 58.31929890696561, + "learning_rate": 3.5e-07, + "logits/generated": -2.751274585723877, + "logits/oppo_generated": -2.854034900665283, + "logits/oppo_real": -2.9424033164978027, + "logits/real": -2.689624309539795, + "logps/generated": -90.87772369384766, + "logps/oppo_gen": -54.3837890625, + "logps/oppo_real": -252.91123962402344, + "logps/real": -270.4813537597656, + "loss": -0.6919, + "loss/gen": 1.8102836608886719, + "loss/real": -2.287971258163452, + "rewards/accuracies": 0.875, + "rewards/generated": -36.493934631347656, + "rewards/margins": 18.923805236816406, + "rewards/real": -17.570131301879883, + "step": 31 + }, + { + "epoch": 0.13, + "grad_norm": 49.377771381874105, + "learning_rate": 3.666666666666666e-07, + "logits/generated": -2.703281879425049, + "logits/oppo_generated": -2.9263906478881836, + "logits/oppo_real": -2.9535346031188965, + "logits/real": -2.689378261566162, + "logps/generated": -125.86929321289062, + "logps/oppo_gen": -78.93435668945312, + "logps/oppo_real": -298.2490234375, + "logps/real": -317.0907287597656, + "loss": -0.7566, + "loss/gen": 1.4581267833709717, + "loss/real": -2.2758255004882812, + "rewards/accuracies": 1.0, + "rewards/generated": -46.9349365234375, + "rewards/margins": 28.093202590942383, + "rewards/real": -18.841733932495117, + "step": 32 + }, + { + "epoch": 0.14, + "grad_norm": 47.132596888492415, + "learning_rate": 3.8333333333333335e-07, + "logits/generated": -2.8561768531799316, + "logits/oppo_generated": -2.9521539211273193, + "logits/oppo_real": -3.0699048042297363, + "logits/real": -2.701744794845581, + "logps/generated": -182.20703125, + "logps/oppo_gen": -136.80690002441406, + "logps/oppo_real": -344.64990234375, + "logps/real": -365.87115478515625, + "loss": -0.8056, + "loss/gen": 1.5829627513885498, + "loss/real": -2.2397522926330566, + "rewards/accuracies": 1.0, + "rewards/generated": -45.400123596191406, + "rewards/margins": 24.178863525390625, + "rewards/real": -21.22126007080078, + "step": 33 + }, + { + "epoch": 0.14, + "grad_norm": 47.136771677116634, + "learning_rate": 4e-07, + "logits/generated": -2.74558162689209, + "logits/oppo_generated": -2.8447458744049072, + "logits/oppo_real": -2.998192548751831, + "logits/real": -2.603461742401123, + "logps/generated": -134.50888061523438, + "logps/oppo_gen": -79.24800109863281, + "logps/oppo_real": -401.9757385253906, + "logps/real": -427.4682922363281, + "loss": -0.995, + "loss/gen": 1.2431423664093018, + "loss/real": -2.120981454849243, + "rewards/accuracies": 1.0, + "rewards/generated": -55.260887145996094, + "rewards/margins": 29.768321990966797, + "rewards/real": -25.492568969726562, + "step": 34 + }, + { + "epoch": 0.15, + "grad_norm": 47.136771677116634, + "learning_rate": 4e-07, + "logits/generated": -2.696960926055908, + "logits/oppo_generated": -2.942030906677246, + "logits/oppo_real": -2.9536867141723633, + "logits/real": -2.6742172241210938, + "logps/generated": -125.61725616455078, + "logps/oppo_gen": -62.21235656738281, + "logps/oppo_real": -296.8402404785156, + "logps/real": -324.08892822265625, + "loss": -5.4743, + "loss/gen": 1.1838252544403076, + "loss/real": -2.137930154800415, + "rewards/accuracies": 0.875, + "rewards/generated": -63.40489959716797, + "rewards/margins": 36.15622329711914, + "rewards/real": -27.248676300048828, + "step": 35 + }, + { + "epoch": 0.15, + "grad_norm": 38.149193463480486, + "learning_rate": 4.1666666666666667e-07, + "logits/generated": -2.420623779296875, + "logits/oppo_generated": -2.792217493057251, + "logits/oppo_real": -2.680948257446289, + "logits/real": -2.556100845336914, + "logps/generated": -105.52731323242188, + "logps/oppo_gen": -49.044715881347656, + "logps/oppo_real": -183.3726348876953, + "logps/real": -208.43609619140625, + "loss": -0.9189, + "loss/gen": 1.2789992094039917, + "loss/real": -2.150240182876587, + "rewards/accuracies": 0.75, + "rewards/generated": -56.48259735107422, + "rewards/margins": 31.41913604736328, + "rewards/real": -25.063465118408203, + "step": 36 + }, + { + "epoch": 0.15, + "grad_norm": 40.49399399669891, + "learning_rate": 4.3333333333333335e-07, + "logits/generated": -2.5155656337738037, + "logits/oppo_generated": -2.5968940258026123, + "logits/oppo_real": -2.84472393989563, + "logits/real": -2.363577127456665, + "logps/generated": -156.7322235107422, + "logps/oppo_gen": -96.46727752685547, + "logps/oppo_real": -441.2087097167969, + "logps/real": -452.6773681640625, + "loss": -0.993, + "loss/gen": 1.218324899673462, + "loss/real": -2.478170394897461, + "rewards/accuracies": 1.0, + "rewards/generated": -60.26493835449219, + "rewards/margins": 48.7962532043457, + "rewards/real": -11.4686861038208, + "step": 37 + }, + { + "epoch": 0.16, + "grad_norm": 38.69477383912377, + "learning_rate": 4.5e-07, + "logits/generated": -2.845750331878662, + "logits/oppo_generated": -3.097993850708008, + "logits/oppo_real": -3.161780834197998, + "logits/real": -2.804795742034912, + "logps/generated": -169.83187866210938, + "logps/oppo_gen": -86.33152770996094, + "logps/oppo_real": -374.5130615234375, + "logps/real": -400.9438171386719, + "loss": -1.0833, + "loss/gen": 0.779202401638031, + "loss/real": -2.109189987182617, + "rewards/accuracies": 0.875, + "rewards/generated": -83.50035095214844, + "rewards/margins": 57.069610595703125, + "rewards/real": -26.430742263793945, + "step": 38 + }, + { + "epoch": 0.16, + "grad_norm": 39.95823930895698, + "learning_rate": 4.6666666666666666e-07, + "logits/generated": -2.4254915714263916, + "logits/oppo_generated": -2.648486614227295, + "logits/oppo_real": -2.7488012313842773, + "logits/real": -2.350640296936035, + "logps/generated": -155.58016967773438, + "logps/oppo_gen": -78.30477142333984, + "logps/oppo_real": -363.86407470703125, + "logps/real": -395.0137939453125, + "loss": -1.1548, + "loss/gen": 0.8230071067810059, + "loss/real": -2.0378403663635254, + "rewards/accuracies": 1.0, + "rewards/generated": -77.275390625, + "rewards/margins": 46.12569046020508, + "rewards/real": -31.149703979492188, + "step": 39 + }, + { + "epoch": 0.17, + "grad_norm": 40.89272509652924, + "learning_rate": 4.833333333333333e-07, + "logits/generated": -2.461397409439087, + "logits/oppo_generated": -2.864193916320801, + "logits/oppo_real": -2.7761850357055664, + "logits/real": -2.5565099716186523, + "logps/generated": -136.72689819335938, + "logps/oppo_gen": -60.6450309753418, + "logps/oppo_real": -320.1565856933594, + "logps/real": -337.87396240234375, + "loss": -1.1347, + "loss/gen": 0.9023051857948303, + "loss/real": -2.3106727600097656, + "rewards/accuracies": 1.0, + "rewards/generated": -76.08185577392578, + "rewards/margins": 58.36448669433594, + "rewards/real": -17.717369079589844, + "step": 40 + }, + { + "epoch": 0.17, + "grad_norm": 89.36429678043967, + "learning_rate": 5e-07, + "logits/generated": -2.6618571281433105, + "logits/oppo_generated": -2.812058210372925, + "logits/oppo_real": -2.982236862182617, + "logits/real": -2.515589714050293, + "logps/generated": -162.60000610351562, + "logps/oppo_gen": -90.06674194335938, + "logps/oppo_real": -176.9713592529297, + "logps/real": -211.77285766601562, + "loss": -1.1724, + "loss/gen": 0.8981304168701172, + "loss/real": -1.989371657371521, + "rewards/accuracies": 1.0, + "rewards/generated": -72.53326416015625, + "rewards/margins": 37.731773376464844, + "rewards/real": -34.80148696899414, + "step": 41 + }, + { + "epoch": 0.18, + "grad_norm": 89.36429678043967, + "learning_rate": 5e-07, + "logits/generated": -2.5393388271331787, + "logits/oppo_generated": -2.9253015518188477, + "logits/oppo_real": -2.9079301357269287, + "logits/real": -2.55344557762146, + "logps/generated": -130.3933868408203, + "logps/oppo_gen": -54.79414367675781, + "logps/oppo_real": -186.92176818847656, + "logps/real": -235.45858764648438, + "loss": -5.6809, + "loss/gen": 0.8535439372062683, + "loss/real": -1.694696307182312, + "rewards/accuracies": 1.0, + "rewards/generated": -75.5992431640625, + "rewards/margins": 27.062450408935547, + "rewards/real": -48.53679275512695, + "step": 42 + }, + { + "epoch": 0.18, + "grad_norm": 43.34401895870049, + "learning_rate": 4.996438746438746e-07, + "logits/generated": -2.4617252349853516, + "logits/oppo_generated": -2.9949498176574707, + "logits/oppo_real": -2.9107003211975098, + "logits/real": -2.6696996688842773, + "logps/generated": -172.76368713378906, + "logps/oppo_gen": -79.9820785522461, + "logps/oppo_real": -404.1100158691406, + "logps/real": -422.02642822265625, + "loss": -1.2059, + "loss/gen": 0.6165514588356018, + "loss/real": -2.3567748069763184, + "rewards/accuracies": 1.0, + "rewards/generated": -92.7816162109375, + "rewards/margins": 74.865234375, + "rewards/real": -17.916383743286133, + "step": 43 + }, + { + "epoch": 0.18, + "grad_norm": 49.41177428002358, + "learning_rate": 4.992877492877492e-07, + "logits/generated": -2.026392698287964, + "logits/oppo_generated": -2.4440221786499023, + "logits/oppo_real": -2.3998050689697266, + "logits/real": -2.089980125427246, + "logps/generated": -209.39190673828125, + "logps/oppo_gen": -93.22187805175781, + "logps/oppo_real": -290.8685302734375, + "logps/real": -320.3968811035156, + "loss": -1.3765, + "loss/gen": 0.5650486350059509, + "loss/real": -2.131740093231201, + "rewards/accuracies": 1.0, + "rewards/generated": -116.17002868652344, + "rewards/margins": 86.64169311523438, + "rewards/real": -29.528339385986328, + "step": 44 + }, + { + "epoch": 0.19, + "grad_norm": 59.38357051631053, + "learning_rate": 4.98931623931624e-07, + "logits/generated": -2.3456368446350098, + "logits/oppo_generated": -2.9232547283172607, + "logits/oppo_real": -2.7114880084991455, + "logits/real": -2.5829110145568848, + "logps/generated": -168.85809326171875, + "logps/oppo_gen": -64.50846862792969, + "logps/oppo_real": -239.8323974609375, + "logps/real": -297.2595520019531, + "loss": -1.4436, + "loss/gen": 0.5499280691146851, + "loss/real": -1.58909273147583, + "rewards/accuracies": 0.875, + "rewards/generated": -104.34961700439453, + "rewards/margins": 46.92247009277344, + "rewards/real": -57.42715072631836, + "step": 45 + }, + { + "epoch": 0.19, + "grad_norm": 44.12861838917575, + "learning_rate": 4.985754985754986e-07, + "logits/generated": -2.6069109439849854, + "logits/oppo_generated": -2.741456985473633, + "logits/oppo_real": -2.9938759803771973, + "logits/real": -2.428788185119629, + "logps/generated": -149.2159423828125, + "logps/oppo_gen": -58.174400329589844, + "logps/oppo_real": -258.21685791015625, + "logps/real": -301.842041015625, + "loss": -1.4547, + "loss/gen": 0.676410973072052, + "loss/real": -1.867649793624878, + "rewards/accuracies": 1.0, + "rewards/generated": -91.04153442382812, + "rewards/margins": 47.41633605957031, + "rewards/real": -43.62519836425781, + "step": 46 + }, + { + "epoch": 0.2, + "grad_norm": 44.34332055817426, + "learning_rate": 4.982193732193732e-07, + "logits/generated": -2.587238311767578, + "logits/oppo_generated": -2.814079761505127, + "logits/oppo_real": -2.964923620223999, + "logits/real": -2.480611801147461, + "logps/generated": -175.05799865722656, + "logps/oppo_gen": -78.5189208984375, + "logps/oppo_real": -288.56396484375, + "logps/real": -318.1793518066406, + "loss": -1.5609, + "loss/gen": 0.7453894019126892, + "loss/real": -2.1940207481384277, + "rewards/accuracies": 0.875, + "rewards/generated": -96.53907775878906, + "rewards/margins": 66.9237060546875, + "rewards/real": -29.615373611450195, + "step": 47 + }, + { + "epoch": 0.2, + "grad_norm": 41.77245636004139, + "learning_rate": 4.978632478632478e-07, + "logits/generated": -2.5828328132629395, + "logits/oppo_generated": -2.7121076583862305, + "logits/oppo_real": -2.932806968688965, + "logits/real": -2.3821425437927246, + "logps/generated": -170.45315551757812, + "logps/oppo_gen": -72.10917663574219, + "logps/oppo_real": -299.3392333984375, + "logps/real": -351.05755615234375, + "loss": -1.5561, + "loss/gen": 0.6000806093215942, + "loss/real": -1.664915680885315, + "rewards/accuracies": 0.875, + "rewards/generated": -98.34397888183594, + "rewards/margins": 46.625675201416016, + "rewards/real": -51.718299865722656, + "step": 48 + }, + { + "epoch": 0.21, + "grad_norm": 40.92458049952987, + "learning_rate": 4.975071225071225e-07, + "logits/generated": -2.7721643447875977, + "logits/oppo_generated": -2.814209461212158, + "logits/oppo_real": -3.157527208328247, + "logits/real": -2.545376777648926, + "logps/generated": -190.73538208007812, + "logps/oppo_gen": -80.24543762207031, + "logps/oppo_real": -294.9969482421875, + "logps/real": -325.6192626953125, + "loss": -1.6521, + "loss/gen": 0.5949017405509949, + "loss/real": -2.104870319366455, + "rewards/accuracies": 1.0, + "rewards/generated": -110.48993682861328, + "rewards/margins": 79.86763000488281, + "rewards/real": -30.622314453125, + "step": 49 + }, + { + "epoch": 0.21, + "grad_norm": 40.13348241970552, + "learning_rate": 4.971509971509972e-07, + "logits/generated": -2.4653735160827637, + "logits/oppo_generated": -2.9343652725219727, + "logits/oppo_real": -2.7617945671081543, + "logits/real": -2.6601219177246094, + "logps/generated": -198.1933135986328, + "logps/oppo_gen": -82.74765014648438, + "logps/oppo_real": -315.32562255859375, + "logps/real": -342.7396240234375, + "loss": -1.6584, + "loss/gen": 0.4857123792171478, + "loss/real": -2.1372337341308594, + "rewards/accuracies": 1.0, + "rewards/generated": -115.44567108154297, + "rewards/margins": 88.03167724609375, + "rewards/real": -27.413999557495117, + "step": 50 + }, + { + "epoch": 0.21, + "grad_norm": 601.4569550267084, + "learning_rate": 4.967948717948718e-07, + "logits/generated": -2.5435636043548584, + "logits/oppo_generated": -2.805569648742676, + "logits/oppo_real": -2.7846250534057617, + "logits/real": -2.5612943172454834, + "logps/generated": -126.34326934814453, + "logps/oppo_gen": -45.456573486328125, + "logps/oppo_real": -161.39598083496094, + "logps/real": -196.76950073242188, + "loss": -2.1364, + "loss/gen": 0.7522258758544922, + "loss/real": -2.0092098712921143, + "rewards/accuracies": 1.0, + "rewards/generated": -80.88670349121094, + "rewards/margins": 45.51318359375, + "rewards/real": -35.3735237121582, + "step": 51 + }, + { + "epoch": 0.22, + "grad_norm": 38.65978819953379, + "learning_rate": 4.964387464387464e-07, + "logits/generated": -2.464820384979248, + "logits/oppo_generated": -2.7444612979888916, + "logits/oppo_real": -2.7595162391662598, + "logits/real": -2.45442271232605, + "logps/generated": -139.13876342773438, + "logps/oppo_gen": -50.193504333496094, + "logps/oppo_real": -148.25294494628906, + "logps/real": -181.2758026123047, + "loss": -1.7596, + "loss/gen": 0.6654144525527954, + "loss/real": -2.03794002532959, + "rewards/accuracies": 1.0, + "rewards/generated": -88.94526672363281, + "rewards/margins": 55.92240524291992, + "rewards/real": -33.022857666015625, + "step": 52 + }, + { + "epoch": 0.22, + "grad_norm": 167.4533750406363, + "learning_rate": 4.96082621082621e-07, + "logits/generated": -2.297238349914551, + "logits/oppo_generated": -2.660369396209717, + "logits/oppo_real": -2.6082496643066406, + "logits/real": -2.3693835735321045, + "logps/generated": -138.73458862304688, + "logps/oppo_gen": -55.80210876464844, + "logps/oppo_real": -201.49038696289062, + "logps/real": -226.22634887695312, + "loss": -2.0472, + "loss/gen": 0.7817223072052002, + "loss/real": -2.228806257247925, + "rewards/accuracies": 0.875, + "rewards/generated": -82.93248748779297, + "rewards/margins": 58.19652557373047, + "rewards/real": -24.735958099365234, + "step": 53 + }, + { + "epoch": 0.23, + "grad_norm": 38.20762371262606, + "learning_rate": 4.957264957264958e-07, + "logits/generated": -2.6939735412597656, + "logits/oppo_generated": -2.746832847595215, + "logits/oppo_real": -2.973560333251953, + "logits/real": -2.453509569168091, + "logps/generated": -155.50794982910156, + "logps/oppo_gen": -77.28608703613281, + "logps/oppo_real": -547.3628540039062, + "logps/real": -561.0299072265625, + "loss": -1.7775, + "loss/gen": 0.9930198192596436, + "loss/real": -2.6198465824127197, + "rewards/accuracies": 0.875, + "rewards/generated": -78.22187042236328, + "rewards/margins": 64.55480194091797, + "rewards/real": -13.66706657409668, + "step": 54 + }, + { + "epoch": 0.23, + "grad_norm": 111.84459258553808, + "learning_rate": 4.953703703703703e-07, + "logits/generated": -2.3411145210266113, + "logits/oppo_generated": -2.664555072784424, + "logits/oppo_real": -2.6400251388549805, + "logits/real": -2.3643062114715576, + "logps/generated": -196.82240295410156, + "logps/oppo_gen": -78.57785034179688, + "logps/oppo_real": -398.628662109375, + "logps/real": -393.2767333984375, + "loss": -2.0424, + "loss/gen": 0.5210200548171997, + "loss/real": -2.969128131866455, + "rewards/accuracies": 1.0, + "rewards/generated": -118.24455261230469, + "rewards/margins": 123.59645080566406, + "rewards/real": 5.351901054382324, + "step": 55 + }, + { + "epoch": 0.23, + "grad_norm": 50.014668258578155, + "learning_rate": 4.95014245014245e-07, + "logits/generated": -2.841848373413086, + "logits/oppo_generated": -2.638930320739746, + "logits/oppo_real": -3.1015210151672363, + "logits/real": -2.4049315452575684, + "logps/generated": -181.6864471435547, + "logps/oppo_gen": -84.6130599975586, + "logps/oppo_real": -310.54534912109375, + "logps/real": -329.8880615234375, + "loss": -1.8582, + "loss/gen": 0.7178683876991272, + "loss/real": -2.3166608810424805, + "rewards/accuracies": 0.875, + "rewards/generated": -97.0733871459961, + "rewards/margins": 77.73066711425781, + "rewards/real": -19.342731475830078, + "step": 56 + }, + { + "epoch": 0.24, + "grad_norm": 578.0217340204432, + "learning_rate": 4.946581196581196e-07, + "logits/generated": -2.5697083473205566, + "logits/oppo_generated": -2.9305167198181152, + "logits/oppo_real": -2.7986156940460205, + "logits/real": -2.666802406311035, + "logps/generated": -182.54356384277344, + "logps/oppo_gen": -55.247596740722656, + "logps/oppo_real": -159.6094970703125, + "logps/real": -191.58706665039062, + "loss": -3.1972, + "loss/gen": 0.5683310031890869, + "loss/real": -2.0497186183929443, + "rewards/accuracies": 1.0, + "rewards/generated": -127.29595947265625, + "rewards/margins": 95.31836700439453, + "rewards/real": -31.977588653564453, + "step": 57 + }, + { + "epoch": 0.24, + "grad_norm": 51.64103394316142, + "learning_rate": 4.943019943019943e-07, + "logits/generated": -2.674006462097168, + "logits/oppo_generated": -2.733177900314331, + "logits/oppo_real": -3.0261659622192383, + "logits/real": -2.440023899078369, + "logps/generated": -159.27865600585938, + "logps/oppo_gen": -77.4105453491211, + "logps/oppo_real": -291.50042724609375, + "logps/real": -305.1040954589844, + "loss": -1.8105, + "loss/gen": 0.7389234900474548, + "loss/real": -2.533874988555908, + "rewards/accuracies": 0.875, + "rewards/generated": -81.86811065673828, + "rewards/margins": 68.26446533203125, + "rewards/real": -13.60364055633545, + "step": 58 + }, + { + "epoch": 0.25, + "grad_norm": 93.81772033276816, + "learning_rate": 4.93945868945869e-07, + "logits/generated": -2.253323554992676, + "logits/oppo_generated": -2.70068359375, + "logits/oppo_real": -2.622352361679077, + "logits/real": -2.379178047180176, + "logps/generated": -198.3895263671875, + "logps/oppo_gen": -66.53448486328125, + "logps/oppo_real": -142.07913208007812, + "logps/real": -186.49630737304688, + "loss": -2.138, + "loss/gen": 0.36330240964889526, + "loss/real": -1.9270637035369873, + "rewards/accuracies": 1.0, + "rewards/generated": -131.85504150390625, + "rewards/margins": 87.43788146972656, + "rewards/real": -44.41715621948242, + "step": 59 + }, + { + "epoch": 0.25, + "grad_norm": 39.40381974811817, + "learning_rate": 4.935897435897436e-07, + "logits/generated": -2.8230233192443848, + "logits/oppo_generated": -3.0608558654785156, + "logits/oppo_real": -3.0881457328796387, + "logits/real": -2.815178394317627, + "logps/generated": -176.8870849609375, + "logps/oppo_gen": -78.30126953125, + "logps/oppo_real": -296.7585144042969, + "logps/real": -305.8564453125, + "loss": -1.9511, + "loss/gen": 0.5859768390655518, + "loss/real": -2.5944645404815674, + "rewards/accuracies": 1.0, + "rewards/generated": -98.58580780029297, + "rewards/margins": 89.48786926269531, + "rewards/real": -9.097940444946289, + "step": 60 + }, + { + "epoch": 0.26, + "grad_norm": 37.537286150739504, + "learning_rate": 4.932336182336182e-07, + "logits/generated": -2.67462158203125, + "logits/oppo_generated": -2.904336929321289, + "logits/oppo_real": -3.0007967948913574, + "logits/real": -2.706181526184082, + "logps/generated": -194.5768585205078, + "logps/oppo_gen": -78.76142883300781, + "logps/oppo_real": -321.17315673828125, + "logps/real": -332.7289733886719, + "loss": -2.0148, + "loss/gen": 0.5784947276115417, + "loss/real": -2.6833224296569824, + "rewards/accuracies": 0.875, + "rewards/generated": -115.8154296875, + "rewards/margins": 104.25957489013672, + "rewards/real": -11.5558500289917, + "step": 61 + }, + { + "epoch": 0.26, + "grad_norm": 52.37389057595874, + "learning_rate": 4.928774928774928e-07, + "logits/generated": -2.8456006050109863, + "logits/oppo_generated": -3.0246148109436035, + "logits/oppo_real": -3.155604839324951, + "logits/real": -2.7388291358947754, + "logps/generated": -199.48080444335938, + "logps/oppo_gen": -99.78816986083984, + "logps/oppo_real": -357.6624755859375, + "logps/real": -361.3135070800781, + "loss": -2.1519, + "loss/gen": 0.5312547087669373, + "loss/real": -2.7395927906036377, + "rewards/accuracies": 1.0, + "rewards/generated": -99.692626953125, + "rewards/margins": 96.04158020019531, + "rewards/real": -3.65103816986084, + "step": 62 + }, + { + "epoch": 0.26, + "grad_norm": 42.730668543561166, + "learning_rate": 4.925213675213676e-07, + "logits/generated": -2.5994668006896973, + "logits/oppo_generated": -2.718918800354004, + "logits/oppo_real": -2.8950438499450684, + "logits/real": -2.5016493797302246, + "logps/generated": -158.23098754882812, + "logps/oppo_gen": -73.73533630371094, + "logps/oppo_real": -276.2977294921875, + "logps/real": -278.3821105957031, + "loss": -2.1712, + "loss/gen": 0.7339967489242554, + "loss/real": -2.8307507038116455, + "rewards/accuracies": 0.875, + "rewards/generated": -84.49565124511719, + "rewards/margins": 82.41130065917969, + "rewards/real": -2.084348678588867, + "step": 63 + }, + { + "epoch": 0.27, + "grad_norm": 42.60172940894316, + "learning_rate": 4.921652421652421e-07, + "logits/generated": -2.6288089752197266, + "logits/oppo_generated": -2.7741386890411377, + "logits/oppo_real": -2.8905487060546875, + "logits/real": -2.5671515464782715, + "logps/generated": -164.08560180664062, + "logps/oppo_gen": -70.42605590820312, + "logps/oppo_real": -291.8798522949219, + "logps/real": -327.316650390625, + "loss": -2.0118, + "loss/gen": 0.6031943559646606, + "loss/real": -2.0373241901397705, + "rewards/accuracies": 1.0, + "rewards/generated": -93.65955352783203, + "rewards/margins": 58.222755432128906, + "rewards/real": -35.436798095703125, + "step": 64 + }, + { + "epoch": 0.27, + "grad_norm": 525.7316627805482, + "learning_rate": 4.918091168091168e-07, + "logits/generated": -2.4973931312561035, + "logits/oppo_generated": -2.731257438659668, + "logits/oppo_real": -2.804780960083008, + "logits/real": -2.5444960594177246, + "logps/generated": -230.39053344726562, + "logps/oppo_gen": -143.67832946777344, + "logps/oppo_real": -309.55450439453125, + "logps/real": -315.0069274902344, + "loss": -2.927, + "loss/gen": 0.7850175499916077, + "loss/real": -2.6631596088409424, + "rewards/accuracies": 1.0, + "rewards/generated": -86.71220397949219, + "rewards/margins": 81.25978088378906, + "rewards/real": -5.452421188354492, + "step": 65 + }, + { + "epoch": 0.28, + "grad_norm": 80.44494186631624, + "learning_rate": 4.914529914529914e-07, + "logits/generated": -2.6201300621032715, + "logits/oppo_generated": -2.710496664047241, + "logits/oppo_real": -2.980191707611084, + "logits/real": -2.4632492065429688, + "logps/generated": -194.9330291748047, + "logps/oppo_gen": -71.51214599609375, + "logps/oppo_real": -284.34765625, + "logps/real": -298.09637451171875, + "loss": -2.3734, + "loss/gen": 0.33017057180404663, + "loss/real": -2.5317859649658203, + "rewards/accuracies": 1.0, + "rewards/generated": -123.42086791992188, + "rewards/margins": 109.67212677001953, + "rewards/real": -13.74874210357666, + "step": 66 + }, + { + "epoch": 0.28, + "grad_norm": 45.73295767790172, + "learning_rate": 4.910968660968661e-07, + "logits/generated": -2.7911667823791504, + "logits/oppo_generated": -3.0934062004089355, + "logits/oppo_real": -3.077010154724121, + "logits/real": -2.8539376258850098, + "logps/generated": -222.52537536621094, + "logps/oppo_gen": -109.1805419921875, + "logps/oppo_real": -348.23834228515625, + "logps/real": -337.4581298828125, + "loss": -2.0979, + "loss/gen": 0.41786307096481323, + "loss/real": -3.0975918769836426, + "rewards/accuracies": 1.0, + "rewards/generated": -113.3448257446289, + "rewards/margins": 124.12504577636719, + "rewards/real": 10.780221939086914, + "step": 67 + }, + { + "epoch": 0.28, + "grad_norm": 79.69396419859851, + "learning_rate": 4.907407407407407e-07, + "logits/generated": -2.657637596130371, + "logits/oppo_generated": -2.838265895843506, + "logits/oppo_real": -3.01387357711792, + "logits/real": -2.6080217361450195, + "logps/generated": -174.41976928710938, + "logps/oppo_gen": -75.5096206665039, + "logps/oppo_real": -242.11915588378906, + "logps/real": -260.3476867675781, + "loss": -2.2245, + "loss/gen": 0.5530567765235901, + "loss/real": -2.540099620819092, + "rewards/accuracies": 0.875, + "rewards/generated": -98.91015625, + "rewards/margins": 80.68161010742188, + "rewards/real": -18.228544235229492, + "step": 68 + }, + { + "epoch": 0.29, + "grad_norm": 57.15850101499557, + "learning_rate": 4.903846153846153e-07, + "logits/generated": -2.718892812728882, + "logits/oppo_generated": -2.786154270172119, + "logits/oppo_real": -2.980445146560669, + "logits/real": -2.5882253646850586, + "logps/generated": -203.54293823242188, + "logps/oppo_gen": -78.40753173828125, + "logps/oppo_real": -188.29739379882812, + "logps/real": -220.8904571533203, + "loss": -2.1241, + "loss/gen": 0.3356163501739502, + "loss/real": -2.0496373176574707, + "rewards/accuracies": 1.0, + "rewards/generated": -125.13542175292969, + "rewards/margins": 92.5423583984375, + "rewards/real": -32.59306335449219, + "step": 69 + }, + { + "epoch": 0.29, + "grad_norm": 55.560476534442856, + "learning_rate": 4.9002849002849e-07, + "logits/generated": -2.484227180480957, + "logits/oppo_generated": -2.8353500366210938, + "logits/oppo_real": -2.788581371307373, + "logits/real": -2.584005832672119, + "logps/generated": -167.95159912109375, + "logps/oppo_gen": -74.27359008789062, + "logps/oppo_real": -262.4258728027344, + "logps/real": -275.72314453125, + "loss": -2.2186, + "loss/gen": 0.6950039863586426, + "loss/real": -2.613152027130127, + "rewards/accuracies": 0.875, + "rewards/generated": -93.67799377441406, + "rewards/margins": 80.38072967529297, + "rewards/real": -13.297256469726562, + "step": 70 + }, + { + "epoch": 0.3, + "grad_norm": 40.88330591021765, + "learning_rate": 4.896723646723647e-07, + "logits/generated": -2.44921612739563, + "logits/oppo_generated": -2.8188014030456543, + "logits/oppo_real": -2.757133960723877, + "logits/real": -2.499187469482422, + "logps/generated": -161.24481201171875, + "logps/oppo_gen": -55.317054748535156, + "logps/oppo_real": -178.10824584960938, + "logps/real": -189.52215576171875, + "loss": -2.1209, + "loss/gen": 0.4848253130912781, + "loss/real": -2.4801671504974365, + "rewards/accuracies": 1.0, + "rewards/generated": -105.9277572631836, + "rewards/margins": 94.51385498046875, + "rewards/real": -11.413912773132324, + "step": 71 + }, + { + "epoch": 0.3, + "grad_norm": 35.69595968854091, + "learning_rate": 4.893162393162393e-07, + "logits/generated": -2.509648323059082, + "logits/oppo_generated": -2.865746259689331, + "logits/oppo_real": -2.85042142868042, + "logits/real": -2.612628936767578, + "logps/generated": -207.73446655273438, + "logps/oppo_gen": -101.81581115722656, + "logps/oppo_real": -463.47314453125, + "logps/real": -449.06451416015625, + "loss": -2.2045, + "loss/gen": 0.5114428997039795, + "loss/real": -3.246914863586426, + "rewards/accuracies": 1.0, + "rewards/generated": -105.91865539550781, + "rewards/margins": 120.32732391357422, + "rewards/real": 14.408672332763672, + "step": 72 + }, + { + "epoch": 0.31, + "grad_norm": 46.60751808654372, + "learning_rate": 4.889601139601139e-07, + "logits/generated": -2.513535499572754, + "logits/oppo_generated": -2.9923882484436035, + "logits/oppo_real": -2.813816547393799, + "logits/real": -2.6687417030334473, + "logps/generated": -200.91436767578125, + "logps/oppo_gen": -78.51251220703125, + "logps/oppo_real": -286.4658508300781, + "logps/real": -272.64630126953125, + "loss": -2.3923, + "loss/gen": 0.3351864218711853, + "loss/real": -3.2229790687561035, + "rewards/accuracies": 1.0, + "rewards/generated": -122.40186309814453, + "rewards/margins": 136.22140502929688, + "rewards/real": 13.81955337524414, + "step": 73 + }, + { + "epoch": 0.31, + "grad_norm": 46.60751808654372, + "learning_rate": 4.889601139601139e-07, + "logits/generated": -2.667757987976074, + "logits/oppo_generated": -2.7725887298583984, + "logits/oppo_real": -3.063380002975464, + "logits/real": -2.553708076477051, + "logps/generated": -177.4560546875, + "logps/oppo_gen": -79.40229034423828, + "logps/oppo_real": -383.419677734375, + "logps/real": -384.32568359375, + "loss": -22602.7559, + "loss/gen": 0.5979279279708862, + "loss/real": -2.8606982231140137, + "rewards/accuracies": 0.875, + "rewards/generated": -98.05377960205078, + "rewards/margins": 97.14777374267578, + "rewards/real": -0.9059967994689941, + "step": 74 + }, + { + "epoch": 0.31, + "grad_norm": 41.5718210882534, + "learning_rate": 4.886039886039886e-07, + "logits/generated": -2.7659826278686523, + "logits/oppo_generated": -2.8321666717529297, + "logits/oppo_real": -3.1668171882629395, + "logits/real": -2.5931761264801025, + "logps/generated": -241.8350067138672, + "logps/oppo_gen": -99.83964538574219, + "logps/oppo_real": -322.6613464355469, + "logps/real": -311.7099914550781, + "loss": -2.2896, + "loss/gen": 0.5622150897979736, + "loss/real": -3.6310153007507324, + "rewards/accuracies": 1.0, + "rewards/generated": -141.99537658691406, + "rewards/margins": 152.9467315673828, + "rewards/real": 10.951353073120117, + "step": 75 + }, + { + "epoch": 0.32, + "grad_norm": 44.716500642240554, + "learning_rate": 4.882478632478633e-07, + "logits/generated": -2.7758758068084717, + "logits/oppo_generated": -3.000812530517578, + "logits/oppo_real": -3.1619484424591064, + "logits/real": -2.7301864624023438, + "logps/generated": -200.3653564453125, + "logps/oppo_gen": -83.82888793945312, + "logps/oppo_real": -441.3746337890625, + "logps/real": -431.2779541015625, + "loss": -2.3134, + "loss/gen": 0.3644047975540161, + "loss/real": -3.1670141220092773, + "rewards/accuracies": 1.0, + "rewards/generated": -116.53646850585938, + "rewards/margins": 126.63313293457031, + "rewards/real": 10.096664428710938, + "step": 76 + }, + { + "epoch": 0.32, + "grad_norm": 44.716500642240554, + "learning_rate": 4.882478632478633e-07, + "logits/generated": -2.254303455352783, + "logits/oppo_generated": -2.4111037254333496, + "logits/oppo_real": -2.622360944747925, + "logits/real": -2.1454672813415527, + "logps/generated": -177.43157958984375, + "logps/oppo_gen": -94.29784393310547, + "logps/oppo_real": -307.8828125, + "logps/real": -284.0107727050781, + "loss": -17.4644, + "loss/gen": 1.3658581972122192, + "loss/real": -3.9946789741516113, + "rewards/accuracies": 1.0, + "rewards/generated": -83.13372802734375, + "rewards/margins": 107.00576782226562, + "rewards/real": 23.872041702270508, + "step": 77 + }, + { + "epoch": 0.33, + "grad_norm": 44.716500642240554, + "learning_rate": 4.882478632478633e-07, + "logits/generated": -2.561386823654175, + "logits/oppo_generated": -2.7816574573516846, + "logits/oppo_real": -2.923349380493164, + "logits/real": -2.5139307975769043, + "logps/generated": -170.64508056640625, + "logps/oppo_gen": -70.22672271728516, + "logps/oppo_real": -286.0644836425781, + "logps/real": -304.5027160644531, + "loss": -51.313, + "loss/gen": 0.5637646317481995, + "loss/real": -2.394735813140869, + "rewards/accuracies": 0.875, + "rewards/generated": -100.41835021972656, + "rewards/margins": 81.98014831542969, + "rewards/real": -18.438209533691406, + "step": 78 + }, + { + "epoch": 0.33, + "grad_norm": 57.8592273155015, + "learning_rate": 4.878917378917379e-07, + "logits/generated": -2.341658115386963, + "logits/oppo_generated": -2.624129056930542, + "logits/oppo_real": -2.6314826011657715, + "logits/real": -2.3068737983703613, + "logps/generated": -137.337646484375, + "logps/oppo_gen": -48.185340881347656, + "logps/oppo_real": -148.66656494140625, + "logps/real": -167.26583862304688, + "loss": -2.4266, + "loss/gen": 0.7307255268096924, + "loss/real": -2.38840389251709, + "rewards/accuracies": 1.0, + "rewards/generated": -89.15231323242188, + "rewards/margins": 70.55303955078125, + "rewards/real": -18.599275588989258, + "step": 79 + }, + { + "epoch": 0.33, + "grad_norm": 61.02295290503402, + "learning_rate": 4.875356125356125e-07, + "logits/generated": -2.563333034515381, + "logits/oppo_generated": -2.668670177459717, + "logits/oppo_real": -2.9500231742858887, + "logits/real": -2.375744581222534, + "logps/generated": -193.91883850097656, + "logps/oppo_gen": -76.79248809814453, + "logps/oppo_real": -287.1414794921875, + "logps/real": -309.30792236328125, + "loss": -2.2982, + "loss/gen": 0.386036217212677, + "loss/real": -2.4096016883850098, + "rewards/accuracies": 1.0, + "rewards/generated": -117.1263427734375, + "rewards/margins": 94.95994567871094, + "rewards/real": -22.166412353515625, + "step": 80 + }, + { + "epoch": 0.34, + "grad_norm": 169.99358903352797, + "learning_rate": 4.871794871794871e-07, + "logits/generated": -2.613680601119995, + "logits/oppo_generated": -2.8624868392944336, + "logits/oppo_real": -3.0077338218688965, + "logits/real": -2.5658488273620605, + "logps/generated": -205.80078125, + "logps/oppo_gen": -103.01863861083984, + "logps/oppo_real": -484.10565185546875, + "logps/real": -483.44097900390625, + "loss": -3.0697, + "loss/gen": 0.7189458608627319, + "loss/real": -2.95969820022583, + "rewards/accuracies": 0.875, + "rewards/generated": -102.78215026855469, + "rewards/margins": 103.44681549072266, + "rewards/real": 0.6646575927734375, + "step": 81 + }, + { + "epoch": 0.34, + "grad_norm": 43.23124085134354, + "learning_rate": 4.868233618233618e-07, + "logits/generated": -2.5590624809265137, + "logits/oppo_generated": -2.976921796798706, + "logits/oppo_real": -3.0094780921936035, + "logits/real": -2.6058220863342285, + "logps/generated": -179.38499450683594, + "logps/oppo_gen": -66.51390075683594, + "logps/oppo_real": -174.39071655273438, + "logps/real": -176.55557250976562, + "loss": -2.4127, + "loss/gen": 0.44477635622024536, + "loss/real": -2.9173386096954346, + "rewards/accuracies": 1.0, + "rewards/generated": -112.87110137939453, + "rewards/margins": 110.70625305175781, + "rewards/real": -2.1648406982421875, + "step": 82 + }, + { + "epoch": 0.35, + "grad_norm": 58.48061786622663, + "learning_rate": 4.864672364672365e-07, + "logits/generated": -2.5132930278778076, + "logits/oppo_generated": -3.01529598236084, + "logits/oppo_real": -2.9185380935668945, + "logits/real": -2.643099308013916, + "logps/generated": -246.02755737304688, + "logps/oppo_gen": -86.220458984375, + "logps/oppo_real": -329.8023376464844, + "logps/real": -310.6354064941406, + "loss": -2.4253, + "loss/gen": 0.5145424008369446, + "loss/real": -3.408470392227173, + "rewards/accuracies": 1.0, + "rewards/generated": -159.80709838867188, + "rewards/margins": 178.97406005859375, + "rewards/real": 19.166940689086914, + "step": 83 + }, + { + "epoch": 0.35, + "grad_norm": 54.441314178233476, + "learning_rate": 4.861111111111111e-07, + "logits/generated": -2.3987717628479004, + "logits/oppo_generated": -2.864108085632324, + "logits/oppo_real": -2.8596436977386475, + "logits/real": -2.5680923461914062, + "logps/generated": -177.2393798828125, + "logps/oppo_gen": -79.35113525390625, + "logps/oppo_real": -357.43438720703125, + "logps/real": -336.9925537109375, + "loss": -2.5489, + "loss/gen": 0.5846430659294128, + "loss/real": -3.432420492172241, + "rewards/accuracies": 1.0, + "rewards/generated": -97.88824462890625, + "rewards/margins": 118.33008575439453, + "rewards/real": 20.441844940185547, + "step": 84 + }, + { + "epoch": 0.36, + "grad_norm": 66.89113415188145, + "learning_rate": 4.857549857549857e-07, + "logits/generated": -2.436213493347168, + "logits/oppo_generated": -2.635812282562256, + "logits/oppo_real": -2.784547805786133, + "logits/real": -2.3119587898254395, + "logps/generated": -188.01727294921875, + "logps/oppo_gen": -87.48421478271484, + "logps/oppo_real": -250.10626220703125, + "logps/real": -244.0000457763672, + "loss": -2.5429, + "loss/gen": 0.7033488154411316, + "loss/real": -2.972754955291748, + "rewards/accuracies": 1.0, + "rewards/generated": -100.53305053710938, + "rewards/margins": 106.6392593383789, + "rewards/real": 6.106204986572266, + "step": 85 + }, + { + "epoch": 0.36, + "grad_norm": 60.396710964360466, + "learning_rate": 4.853988603988603e-07, + "logits/generated": -2.538017749786377, + "logits/oppo_generated": -2.9845218658447266, + "logits/oppo_real": -3.016307830810547, + "logits/real": -2.62971830368042, + "logps/generated": -155.26116943359375, + "logps/oppo_gen": -55.523197174072266, + "logps/oppo_real": -291.81378173828125, + "logps/real": -305.18359375, + "loss": -2.5841, + "loss/gen": 0.6104675531387329, + "loss/real": -2.7806365489959717, + "rewards/accuracies": 0.875, + "rewards/generated": -99.73796081542969, + "rewards/margins": 86.36811828613281, + "rewards/real": -13.369840621948242, + "step": 86 + }, + { + "epoch": 0.36, + "grad_norm": 53.259132139474445, + "learning_rate": 4.850427350427351e-07, + "logits/generated": -2.45882511138916, + "logits/oppo_generated": -2.8317785263061523, + "logits/oppo_real": -2.849785327911377, + "logits/real": -2.4766674041748047, + "logps/generated": -163.40484619140625, + "logps/oppo_gen": -65.48351287841797, + "logps/oppo_real": -259.8980712890625, + "logps/real": -273.74273681640625, + "loss": -2.6208, + "loss/gen": 0.5979644656181335, + "loss/real": -2.596888542175293, + "rewards/accuracies": 1.0, + "rewards/generated": -97.92133331298828, + "rewards/margins": 84.07666015625, + "rewards/real": -13.844667434692383, + "step": 87 + }, + { + "epoch": 0.37, + "grad_norm": 1690.1221109730504, + "learning_rate": 4.846866096866097e-07, + "logits/generated": -2.439664602279663, + "logits/oppo_generated": -2.9616637229919434, + "logits/oppo_real": -2.8549320697784424, + "logits/real": -2.6093478202819824, + "logps/generated": -177.17694091796875, + "logps/oppo_gen": -66.1073226928711, + "logps/oppo_real": -297.0393981933594, + "logps/real": -275.50140380859375, + "loss": -7.118, + "loss/gen": 0.45209312438964844, + "loss/real": -3.4258365631103516, + "rewards/accuracies": 1.0, + "rewards/generated": -111.06962585449219, + "rewards/margins": 132.60760498046875, + "rewards/real": 21.537994384765625, + "step": 88 + }, + { + "epoch": 0.37, + "grad_norm": 74.6983974790457, + "learning_rate": 4.843304843304843e-07, + "logits/generated": -2.5593514442443848, + "logits/oppo_generated": -2.944060802459717, + "logits/oppo_real": -2.977362632751465, + "logits/real": -2.5549235343933105, + "logps/generated": -160.49493408203125, + "logps/oppo_gen": -49.032493591308594, + "logps/oppo_real": -197.13412475585938, + "logps/real": -235.08087158203125, + "loss": -2.3729, + "loss/gen": 0.49238741397857666, + "loss/real": -2.1076858043670654, + "rewards/accuracies": 0.875, + "rewards/generated": -111.46244812011719, + "rewards/margins": 73.51570892333984, + "rewards/real": -37.94673538208008, + "step": 89 + }, + { + "epoch": 0.38, + "grad_norm": 70.76835216372322, + "learning_rate": 4.839743589743589e-07, + "logits/generated": -2.497036933898926, + "logits/oppo_generated": -2.9935152530670166, + "logits/oppo_real": -2.782620906829834, + "logits/real": -2.689803123474121, + "logps/generated": -172.08953857421875, + "logps/oppo_gen": -79.41259002685547, + "logps/oppo_real": -304.58465576171875, + "logps/real": -297.86407470703125, + "loss": -2.6668, + "loss/gen": 0.9129126071929932, + "loss/real": -3.119077205657959, + "rewards/accuracies": 0.875, + "rewards/generated": -92.67694854736328, + "rewards/margins": 99.39753723144531, + "rewards/real": 6.720589637756348, + "step": 90 + }, + { + "epoch": 0.38, + "grad_norm": 68.6735548002741, + "learning_rate": 4.836182336182337e-07, + "logits/generated": -2.508333683013916, + "logits/oppo_generated": -3.0348973274230957, + "logits/oppo_real": -2.8550362586975098, + "logits/real": -2.699089527130127, + "logps/generated": -235.1026611328125, + "logps/oppo_gen": -147.11734008789062, + "logps/oppo_real": -324.0049743652344, + "logps/real": -307.71380615234375, + "loss": -2.9712, + "loss/gen": 0.950553297996521, + "loss/real": -3.3368782997131348, + "rewards/accuracies": 1.0, + "rewards/generated": -87.98532104492188, + "rewards/margins": 104.27648162841797, + "rewards/real": 16.291156768798828, + "step": 91 + }, + { + "epoch": 0.38, + "grad_norm": 70.78148799628862, + "learning_rate": 4.832621082621082e-07, + "logits/generated": -2.5086488723754883, + "logits/oppo_generated": -2.8708338737487793, + "logits/oppo_real": -2.8143606185913086, + "logits/real": -2.600031852722168, + "logps/generated": -205.0748291015625, + "logps/oppo_gen": -81.77798461914062, + "logps/oppo_real": -330.5220031738281, + "logps/real": -311.5235900878906, + "loss": -2.4969, + "loss/gen": 0.43894362449645996, + "loss/real": -3.3736114501953125, + "rewards/accuracies": 1.0, + "rewards/generated": -123.29684448242188, + "rewards/margins": 142.29525756835938, + "rewards/real": 18.99840545654297, + "step": 92 + }, + { + "epoch": 0.39, + "grad_norm": 76.29366705574257, + "learning_rate": 4.829059829059829e-07, + "logits/generated": -2.3690929412841797, + "logits/oppo_generated": -2.7298922538757324, + "logits/oppo_real": -2.698655605316162, + "logits/real": -2.4298644065856934, + "logps/generated": -173.87249755859375, + "logps/oppo_gen": -74.60616302490234, + "logps/oppo_real": -251.41427612304688, + "logps/real": -237.06617736816406, + "loss": -2.5674, + "loss/gen": 0.6722112894058228, + "loss/real": -3.512993335723877, + "rewards/accuracies": 0.875, + "rewards/generated": -99.26634216308594, + "rewards/margins": 113.61441802978516, + "rewards/real": 14.348082542419434, + "step": 93 + }, + { + "epoch": 0.39, + "grad_norm": 249.33590702353723, + "learning_rate": 4.825498575498575e-07, + "logits/generated": -2.570150375366211, + "logits/oppo_generated": -2.9584808349609375, + "logits/oppo_real": -2.8358330726623535, + "logits/real": -2.728276491165161, + "logps/generated": -160.3553466796875, + "logps/oppo_gen": -83.23335266113281, + "logps/oppo_real": -311.66064453125, + "logps/real": -289.5158996582031, + "loss": -3.7493, + "loss/gen": 1.0953956842422485, + "loss/real": -3.4564929008483887, + "rewards/accuracies": 1.0, + "rewards/generated": -77.12197875976562, + "rewards/margins": 99.26671600341797, + "rewards/real": 22.14473533630371, + "step": 94 + }, + { + "epoch": 0.4, + "grad_norm": 1622.4536202924262, + "learning_rate": 4.821937321937321e-07, + "logits/generated": -2.4929990768432617, + "logits/oppo_generated": -2.83894681930542, + "logits/oppo_real": -2.731696605682373, + "logits/real": -2.6017203330993652, + "logps/generated": -202.414306640625, + "logps/oppo_gen": -103.72628021240234, + "logps/oppo_real": -218.9561767578125, + "logps/real": -203.55921936035156, + "loss": -7.3232, + "loss/gen": 0.5733932256698608, + "loss/real": -3.2266201972961426, + "rewards/accuracies": 1.0, + "rewards/generated": -98.68803405761719, + "rewards/margins": 114.0849838256836, + "rewards/real": 15.396947860717773, + "step": 95 + }, + { + "epoch": 0.4, + "grad_norm": 66.96757975529091, + "learning_rate": 4.818376068376069e-07, + "logits/generated": -2.6456146240234375, + "logits/oppo_generated": -2.7633142471313477, + "logits/oppo_real": -2.9560418128967285, + "logits/real": -2.4849910736083984, + "logps/generated": -207.35745239257812, + "logps/oppo_gen": -74.91079711914062, + "logps/oppo_real": -299.2713623046875, + "logps/real": -269.4769287109375, + "loss": -2.824, + "loss/gen": 0.35913562774658203, + "loss/real": -3.8368678092956543, + "rewards/accuracies": 1.0, + "rewards/generated": -132.4466552734375, + "rewards/margins": 162.2410888671875, + "rewards/real": 29.7944393157959, + "step": 96 + }, + { + "epoch": 0.41, + "grad_norm": 702.0471182548932, + "learning_rate": 4.814814814814814e-07, + "logits/generated": -2.798750400543213, + "logits/oppo_generated": -2.8308515548706055, + "logits/oppo_real": -3.085522174835205, + "logits/real": -2.5982208251953125, + "logps/generated": -237.33787536621094, + "logps/oppo_gen": -134.01483154296875, + "logps/oppo_real": -442.37945556640625, + "logps/real": -406.73846435546875, + "loss": -1.4584, + "loss/gen": 0.540956437587738, + "loss/real": -3.9525904655456543, + "rewards/accuracies": 1.0, + "rewards/generated": -103.32305908203125, + "rewards/margins": 138.96401977539062, + "rewards/real": 35.640968322753906, + "step": 97 + }, + { + "epoch": 0.41, + "grad_norm": 54.58323990131952, + "learning_rate": 4.811253561253561e-07, + "logits/generated": -2.40437650680542, + "logits/oppo_generated": -2.8044867515563965, + "logits/oppo_real": -2.8060150146484375, + "logits/real": -2.5216751098632812, + "logps/generated": -167.4996337890625, + "logps/oppo_gen": -51.423309326171875, + "logps/oppo_real": -222.54879760742188, + "logps/real": -225.21975708007812, + "loss": -2.7112, + "loss/gen": 0.3818909823894501, + "loss/real": -2.9921202659606934, + "rewards/accuracies": 1.0, + "rewards/generated": -116.07633972167969, + "rewards/margins": 113.40538024902344, + "rewards/real": -2.670961380004883, + "step": 98 + }, + { + "epoch": 0.41, + "grad_norm": 45.42335040173446, + "learning_rate": 4.807692307692307e-07, + "logits/generated": -2.6010522842407227, + "logits/oppo_generated": -2.932793140411377, + "logits/oppo_real": -2.9959638118743896, + "logits/real": -2.675575017929077, + "logps/generated": -186.99935913085938, + "logps/oppo_gen": -68.20332336425781, + "logps/oppo_real": -376.541015625, + "logps/real": -360.2162170410156, + "loss": -2.6531, + "loss/gen": 0.3653205931186676, + "loss/real": -3.4390110969543457, + "rewards/accuracies": 1.0, + "rewards/generated": -118.7960205078125, + "rewards/margins": 135.12083435058594, + "rewards/real": 16.324806213378906, + "step": 99 + }, + { + "epoch": 0.42, + "grad_norm": 183.07126984797478, + "learning_rate": 4.804131054131054e-07, + "logits/generated": -2.4377760887145996, + "logits/oppo_generated": -2.780601739883423, + "logits/oppo_real": -2.8726038932800293, + "logits/real": -2.5523815155029297, + "logps/generated": -195.5144500732422, + "logps/oppo_gen": -75.83106994628906, + "logps/oppo_real": -327.609619140625, + "logps/real": -326.1234130859375, + "loss": -2.9695, + "loss/gen": 0.4366985857486725, + "loss/real": -2.959474563598633, + "rewards/accuracies": 1.0, + "rewards/generated": -119.68338012695312, + "rewards/margins": 121.16956329345703, + "rewards/real": 1.4861793518066406, + "step": 100 + }, + { + "epoch": 0.42, + "grad_norm": 549.6681100900797, + "learning_rate": 4.8005698005698e-07, + "logits/generated": -2.454486846923828, + "logits/oppo_generated": -2.91953706741333, + "logits/oppo_real": -2.820370674133301, + "logits/real": -2.6601805686950684, + "logps/generated": -184.12876892089844, + "logps/oppo_gen": -75.91517639160156, + "logps/oppo_real": -531.0400390625, + "logps/real": -524.9949340820312, + "loss": -4.2352, + "loss/gen": 0.5441170334815979, + "loss/real": -3.007982015609741, + "rewards/accuracies": 0.875, + "rewards/generated": -108.21359252929688, + "rewards/margins": 114.25873565673828, + "rewards/real": 6.045146942138672, + "step": 101 + }, + { + "epoch": 0.43, + "grad_norm": 60.524447459141456, + "learning_rate": 4.797008547008547e-07, + "logits/generated": -2.492274761199951, + "logits/oppo_generated": -2.927794933319092, + "logits/oppo_real": -2.8259315490722656, + "logits/real": -2.6615185737609863, + "logps/generated": -186.5205078125, + "logps/oppo_gen": -75.32722473144531, + "logps/oppo_real": -334.3116149902344, + "logps/real": -322.076904296875, + "loss": -2.7939, + "loss/gen": 0.4799632132053375, + "loss/real": -3.1898889541625977, + "rewards/accuracies": 1.0, + "rewards/generated": -111.19327545166016, + "rewards/margins": 123.42797088623047, + "rewards/real": 12.23469066619873, + "step": 102 + }, + { + "epoch": 0.43, + "grad_norm": 72.22195132995404, + "learning_rate": 4.793447293447293e-07, + "logits/generated": -2.681981325149536, + "logits/oppo_generated": -2.798323154449463, + "logits/oppo_real": -3.0827927589416504, + "logits/real": -2.6118640899658203, + "logps/generated": -193.87255859375, + "logps/oppo_gen": -85.98326110839844, + "logps/oppo_real": -484.7052001953125, + "logps/real": -468.4195556640625, + "loss": -2.7665, + "loss/gen": 0.48763740062713623, + "loss/real": -3.367074489593506, + "rewards/accuracies": 1.0, + "rewards/generated": -107.88929748535156, + "rewards/margins": 124.17497253417969, + "rewards/real": 16.285675048828125, + "step": 103 + }, + { + "epoch": 0.44, + "grad_norm": 120.03039979521854, + "learning_rate": 4.78988603988604e-07, + "logits/generated": -2.369338035583496, + "logits/oppo_generated": -2.820817232131958, + "logits/oppo_real": -2.7580766677856445, + "logits/real": -2.554074287414551, + "logps/generated": -224.9398651123047, + "logps/oppo_gen": -98.39456176757812, + "logps/oppo_real": -435.86871337890625, + "logps/real": -420.9836730957031, + "loss": -2.9534, + "loss/gen": 0.3016844391822815, + "loss/real": -3.4657280445098877, + "rewards/accuracies": 1.0, + "rewards/generated": -126.5452880859375, + "rewards/margins": 141.43032836914062, + "rewards/real": 14.885029792785645, + "step": 104 + }, + { + "epoch": 0.44, + "grad_norm": 1841.6121334309241, + "learning_rate": 4.786324786324786e-07, + "logits/generated": -2.6123902797698975, + "logits/oppo_generated": -2.991581439971924, + "logits/oppo_real": -3.002182960510254, + "logits/real": -2.710818290710449, + "logps/generated": -204.66802978515625, + "logps/oppo_gen": -81.12940216064453, + "logps/oppo_real": -296.61138916015625, + "logps/real": -273.9120788574219, + "loss": -8.6558, + "loss/gen": 0.40045416355133057, + "loss/real": -3.615565776824951, + "rewards/accuracies": 1.0, + "rewards/generated": -123.53861236572266, + "rewards/margins": 146.23793029785156, + "rewards/real": 22.69931411743164, + "step": 105 + }, + { + "epoch": 0.44, + "grad_norm": 50.14175949168393, + "learning_rate": 4.782763532763532e-07, + "logits/generated": -2.58475399017334, + "logits/oppo_generated": -2.8433456420898438, + "logits/oppo_real": -3.012195110321045, + "logits/real": -2.584439992904663, + "logps/generated": -161.61810302734375, + "logps/oppo_gen": -63.396881103515625, + "logps/oppo_real": -288.55780029296875, + "logps/real": -261.4425048828125, + "loss": -2.8566, + "loss/gen": 0.7563031315803528, + "loss/real": -3.606753349304199, + "rewards/accuracies": 1.0, + "rewards/generated": -98.22122192382812, + "rewards/margins": 125.3365249633789, + "rewards/real": 27.115306854248047, + "step": 106 + }, + { + "epoch": 0.45, + "grad_norm": 365.902095853522, + "learning_rate": 4.779202279202279e-07, + "logits/generated": -2.642536163330078, + "logits/oppo_generated": -2.75607967376709, + "logits/oppo_real": -3.044626235961914, + "logits/real": -2.5140504837036133, + "logps/generated": -215.69821166992188, + "logps/oppo_gen": -89.79308319091797, + "logps/oppo_real": -237.51071166992188, + "logps/real": -235.16732788085938, + "loss": -4.2838, + "loss/gen": 0.3768947720527649, + "loss/real": -2.9775023460388184, + "rewards/accuracies": 1.0, + "rewards/generated": -125.90511322021484, + "rewards/margins": 128.24850463867188, + "rewards/real": 2.3433871269226074, + "step": 107 + }, + { + "epoch": 0.45, + "grad_norm": 100.40770890630111, + "learning_rate": 4.775641025641026e-07, + "logits/generated": -2.698265790939331, + "logits/oppo_generated": -2.9334537982940674, + "logits/oppo_real": -3.0197911262512207, + "logits/real": -2.6614885330200195, + "logps/generated": -193.49476623535156, + "logps/oppo_gen": -86.25882720947266, + "logps/oppo_real": -171.73361206054688, + "logps/real": -154.22259521484375, + "loss": -3.0162, + "loss/gen": 0.695202112197876, + "loss/real": -3.3781354427337646, + "rewards/accuracies": 1.0, + "rewards/generated": -107.23593139648438, + "rewards/margins": 124.74696350097656, + "rewards/real": 17.511028289794922, + "step": 108 + }, + { + "epoch": 0.46, + "grad_norm": 95.44328629315685, + "learning_rate": 4.772079772079772e-07, + "logits/generated": -2.5307321548461914, + "logits/oppo_generated": -2.8885016441345215, + "logits/oppo_real": -2.9670629501342773, + "logits/real": -2.6184444427490234, + "logps/generated": -137.50279235839844, + "logps/oppo_gen": -52.36747741699219, + "logps/oppo_real": -234.88699340820312, + "logps/real": -211.22215270996094, + "loss": -2.7515, + "loss/gen": 1.0063905715942383, + "loss/real": -3.66544771194458, + "rewards/accuracies": 0.875, + "rewards/generated": -85.13531494140625, + "rewards/margins": 108.8001708984375, + "rewards/real": 23.664859771728516, + "step": 109 + }, + { + "epoch": 0.46, + "grad_norm": 92.31076504801594, + "learning_rate": 4.768518518518518e-07, + "logits/generated": -2.4487879276275635, + "logits/oppo_generated": -2.902094841003418, + "logits/oppo_real": -2.738150119781494, + "logits/real": -2.5988502502441406, + "logps/generated": -183.7650146484375, + "logps/oppo_gen": -71.77503967285156, + "logps/oppo_real": -226.59805297851562, + "logps/real": -210.24148559570312, + "loss": -2.6027, + "loss/gen": 0.4320296347141266, + "loss/real": -3.250072956085205, + "rewards/accuracies": 1.0, + "rewards/generated": -111.989990234375, + "rewards/margins": 128.34658813476562, + "rewards/real": 16.356592178344727, + "step": 110 + }, + { + "epoch": 0.46, + "grad_norm": 92.31076504801594, + "learning_rate": 4.768518518518518e-07, + "logits/generated": -2.4111056327819824, + "logits/oppo_generated": -2.78233003616333, + "logits/oppo_real": -2.810633420944214, + "logits/real": -2.52742075920105, + "logps/generated": -161.51727294921875, + "logps/oppo_gen": -51.96064758300781, + "logps/oppo_real": -160.8415069580078, + "logps/real": -171.3201446533203, + "loss": -128.9964, + "loss/gen": 0.43095916509628296, + "loss/real": -2.8278121948242188, + "rewards/accuracies": 1.0, + "rewards/generated": -109.556640625, + "rewards/margins": 99.07798767089844, + "rewards/real": -10.478641510009766, + "step": 111 + }, + { + "epoch": 0.47, + "grad_norm": 68.96497993207313, + "learning_rate": 4.764957264957264e-07, + "logits/generated": -2.3004653453826904, + "logits/oppo_generated": -2.7906460762023926, + "logits/oppo_real": -2.7454147338867188, + "logits/real": -2.5157923698425293, + "logps/generated": -148.928955078125, + "logps/oppo_gen": -67.77021789550781, + "logps/oppo_real": -355.9058837890625, + "logps/real": -322.17315673828125, + "loss": -2.7744, + "loss/gen": 0.9168766736984253, + "loss/real": -3.8560690879821777, + "rewards/accuracies": 1.0, + "rewards/generated": -81.15873718261719, + "rewards/margins": 114.89146423339844, + "rewards/real": 33.73272705078125, + "step": 112 + }, + { + "epoch": 0.47, + "grad_norm": 82.86539584244439, + "learning_rate": 4.761396011396011e-07, + "logits/generated": -2.309685707092285, + "logits/oppo_generated": -2.784420967102051, + "logits/oppo_real": -2.58797550201416, + "logits/real": -2.521721363067627, + "logps/generated": -174.80889892578125, + "logps/oppo_gen": -53.4489631652832, + "logps/oppo_real": -213.77337646484375, + "logps/real": -204.11801147460938, + "loss": -2.8077, + "loss/gen": 0.35567396879196167, + "loss/real": -3.218747138977051, + "rewards/accuracies": 1.0, + "rewards/generated": -121.35994720458984, + "rewards/margins": 131.01528930664062, + "rewards/real": 9.655345916748047, + "step": 113 + }, + { + "epoch": 0.48, + "grad_norm": 61.933964171274795, + "learning_rate": 4.7578347578347577e-07, + "logits/generated": -2.5905487537384033, + "logits/oppo_generated": -2.9693868160247803, + "logits/oppo_real": -2.897064208984375, + "logits/real": -2.695655345916748, + "logps/generated": -178.83404541015625, + "logps/oppo_gen": -65.07535552978516, + "logps/oppo_real": -380.3414306640625, + "logps/real": -379.77105712890625, + "loss": -2.6882, + "loss/gen": 0.38962453603744507, + "loss/real": -3.060286521911621, + "rewards/accuracies": 1.0, + "rewards/generated": -113.75869750976562, + "rewards/margins": 114.32907104492188, + "rewards/real": 0.5703763961791992, + "step": 114 + }, + { + "epoch": 0.48, + "grad_norm": 66.47038447097135, + "learning_rate": 4.754273504273504e-07, + "logits/generated": -2.6704883575439453, + "logits/oppo_generated": -2.8074076175689697, + "logits/oppo_real": -2.9744620323181152, + "logits/real": -2.5615124702453613, + "logps/generated": -175.31643676757812, + "logps/oppo_gen": -81.67523193359375, + "logps/oppo_real": -332.10321044921875, + "logps/real": -320.36962890625, + "loss": -2.8096, + "loss/gen": 1.0026687383651733, + "loss/real": -3.2720324993133545, + "rewards/accuracies": 0.875, + "rewards/generated": -93.64120483398438, + "rewards/margins": 105.37479400634766, + "rewards/real": 11.733586311340332, + "step": 115 + }, + { + "epoch": 0.49, + "grad_norm": 54.16138220416485, + "learning_rate": 4.7507122507122507e-07, + "logits/generated": -2.586947441101074, + "logits/oppo_generated": -2.8780970573425293, + "logits/oppo_real": -2.880333185195923, + "logits/real": -2.6156821250915527, + "logps/generated": -207.31790161132812, + "logps/oppo_gen": -83.72149658203125, + "logps/oppo_real": -272.17291259765625, + "logps/real": -258.2806701660156, + "loss": -2.7218, + "loss/gen": 0.3455054759979248, + "loss/real": -3.568074941635132, + "rewards/accuracies": 1.0, + "rewards/generated": -123.59640502929688, + "rewards/margins": 137.48866271972656, + "rewards/real": 13.892251968383789, + "step": 116 + }, + { + "epoch": 0.49, + "grad_norm": 64.72151128826876, + "learning_rate": 4.747150997150997e-07, + "logits/generated": -2.5787789821624756, + "logits/oppo_generated": -2.8689210414886475, + "logits/oppo_real": -3.036574602127075, + "logits/real": -2.5574660301208496, + "logps/generated": -197.68472290039062, + "logps/oppo_gen": -61.806739807128906, + "logps/oppo_real": -213.864013671875, + "logps/real": -206.0244140625, + "loss": -2.8435, + "loss/gen": 0.28257864713668823, + "loss/real": -3.154269218444824, + "rewards/accuracies": 1.0, + "rewards/generated": -135.8779754638672, + "rewards/margins": 143.7175750732422, + "rewards/real": 7.839590072631836, + "step": 117 + }, + { + "epoch": 0.49, + "grad_norm": 63.413190000311644, + "learning_rate": 4.743589743589743e-07, + "logits/generated": -2.537674903869629, + "logits/oppo_generated": -2.847443103790283, + "logits/oppo_real": -2.9110074043273926, + "logits/real": -2.5497055053710938, + "logps/generated": -195.18397521972656, + "logps/oppo_gen": -68.70259857177734, + "logps/oppo_real": -252.70947265625, + "logps/real": -234.55947875976562, + "loss": -2.7751, + "loss/gen": 0.33247071504592896, + "loss/real": -3.345210075378418, + "rewards/accuracies": 1.0, + "rewards/generated": -126.48136901855469, + "rewards/margins": 144.63134765625, + "rewards/real": 18.149982452392578, + "step": 118 + }, + { + "epoch": 0.5, + "grad_norm": 67.1754822100761, + "learning_rate": 4.74002849002849e-07, + "logits/generated": -2.6043078899383545, + "logits/oppo_generated": -2.850525140762329, + "logits/oppo_real": -2.9623799324035645, + "logits/real": -2.5423567295074463, + "logps/generated": -186.6025390625, + "logps/oppo_gen": -70.65492248535156, + "logps/oppo_real": -241.07968139648438, + "logps/real": -243.4227294921875, + "loss": -2.7129, + "loss/gen": 0.38259631395339966, + "loss/real": -2.913571357727051, + "rewards/accuracies": 0.875, + "rewards/generated": -115.94760131835938, + "rewards/margins": 113.60454559326172, + "rewards/real": -2.3430585861206055, + "step": 119 + }, + { + "epoch": 0.5, + "grad_norm": 64.89191358881898, + "learning_rate": 4.7364672364672366e-07, + "logits/generated": -2.326094150543213, + "logits/oppo_generated": -2.760641574859619, + "logits/oppo_real": -2.835960865020752, + "logits/real": -2.4331917762756348, + "logps/generated": -192.00738525390625, + "logps/oppo_gen": -77.80702209472656, + "logps/oppo_real": -309.97265625, + "logps/real": -309.2704162597656, + "loss": -2.821, + "loss/gen": 0.539390504360199, + "loss/real": -3.1048460006713867, + "rewards/accuracies": 1.0, + "rewards/generated": -114.20036315917969, + "rewards/margins": 114.902587890625, + "rewards/real": 0.7022085189819336, + "step": 120 + }, + { + "epoch": 0.51, + "grad_norm": 118.88704263396411, + "learning_rate": 4.7329059829059823e-07, + "logits/generated": -2.453880786895752, + "logits/oppo_generated": -2.762300491333008, + "logits/oppo_real": -2.91391658782959, + "logits/real": -2.4560084342956543, + "logps/generated": -197.6976318359375, + "logps/oppo_gen": -79.30331420898438, + "logps/oppo_real": -206.95407104492188, + "logps/real": -221.3215789794922, + "loss": -3.2051, + "loss/gen": 0.37958478927612305, + "loss/real": -2.6625490188598633, + "rewards/accuracies": 1.0, + "rewards/generated": -118.39431762695312, + "rewards/margins": 104.02679443359375, + "rewards/real": -14.367520332336426, + "step": 121 + }, + { + "epoch": 0.51, + "grad_norm": 118.88704263396411, + "learning_rate": 4.7329059829059823e-07, + "logits/generated": -2.224625825881958, + "logits/oppo_generated": -2.8723740577697754, + "logits/oppo_real": -2.730229139328003, + "logits/real": -2.5184946060180664, + "logps/generated": -221.04774475097656, + "logps/oppo_gen": -68.4917984008789, + "logps/oppo_real": -205.74790954589844, + "logps/real": -212.26702880859375, + "loss": -316.7941, + "loss/gen": 0.3251597583293915, + "loss/real": -2.7914090156555176, + "rewards/accuracies": 1.0, + "rewards/generated": -152.55593872070312, + "rewards/margins": 146.0368194580078, + "rewards/real": -6.51912784576416, + "step": 122 + }, + { + "epoch": 0.51, + "grad_norm": 1343.0752491949213, + "learning_rate": 4.729344729344729e-07, + "logits/generated": -2.4904122352600098, + "logits/oppo_generated": -2.833265781402588, + "logits/oppo_real": -2.8581643104553223, + "logits/real": -2.5306711196899414, + "logps/generated": -204.9275360107422, + "logps/oppo_gen": -72.44357299804688, + "logps/oppo_real": -294.85699462890625, + "logps/real": -290.6130065917969, + "loss": -4.5958, + "loss/gen": 0.28193220496177673, + "loss/real": -3.1163246631622314, + "rewards/accuracies": 1.0, + "rewards/generated": -132.48397827148438, + "rewards/margins": 136.72792053222656, + "rewards/real": 4.243948936462402, + "step": 123 + }, + { + "epoch": 0.52, + "grad_norm": 664.7720944052932, + "learning_rate": 4.725783475783476e-07, + "logits/generated": -2.349301815032959, + "logits/oppo_generated": -2.8131227493286133, + "logits/oppo_real": -2.815453052520752, + "logits/real": -2.5294294357299805, + "logps/generated": -248.4262237548828, + "logps/oppo_gen": -118.46414184570312, + "logps/oppo_real": -350.6376953125, + "logps/real": -330.9083557128906, + "loss": -5.5005, + "loss/gen": 0.29418256878852844, + "loss/real": -3.4984450340270996, + "rewards/accuracies": 1.0, + "rewards/generated": -129.9620819091797, + "rewards/margins": 149.69143676757812, + "rewards/real": 19.729347229003906, + "step": 124 + }, + { + "epoch": 0.52, + "grad_norm": 40.89130414749342, + "learning_rate": 4.722222222222222e-07, + "logits/generated": -2.5400872230529785, + "logits/oppo_generated": -2.868478775024414, + "logits/oppo_real": -2.87443208694458, + "logits/real": -2.5824098587036133, + "logps/generated": -186.3829345703125, + "logps/oppo_gen": -72.4801025390625, + "logps/oppo_real": -315.2503356933594, + "logps/real": -298.22100830078125, + "loss": -2.7752, + "loss/gen": 0.39562442898750305, + "loss/real": -3.2849442958831787, + "rewards/accuracies": 1.0, + "rewards/generated": -113.90283203125, + "rewards/margins": 130.93215942382812, + "rewards/real": 17.02932357788086, + "step": 125 + }, + { + "epoch": 0.53, + "grad_norm": 94.72539979956538, + "learning_rate": 4.7186609686609683e-07, + "logits/generated": -2.2407426834106445, + "logits/oppo_generated": -2.5010550022125244, + "logits/oppo_real": -2.635188102722168, + "logits/real": -2.2052998542785645, + "logps/generated": -200.30931091308594, + "logps/oppo_gen": -80.23007202148438, + "logps/oppo_real": -347.019287109375, + "logps/real": -327.673828125, + "loss": -2.9739, + "loss/gen": 0.40606454014778137, + "loss/real": -4.382803916931152, + "rewards/accuracies": 1.0, + "rewards/generated": -120.0792465209961, + "rewards/margins": 139.4246826171875, + "rewards/real": 19.345449447631836, + "step": 126 + }, + { + "epoch": 0.53, + "grad_norm": 60.16715201536986, + "learning_rate": 4.715099715099715e-07, + "logits/generated": -2.1603076457977295, + "logits/oppo_generated": -2.6126623153686523, + "logits/oppo_real": -2.6145567893981934, + "logits/real": -2.2864603996276855, + "logps/generated": -194.54769897460938, + "logps/oppo_gen": -73.5291748046875, + "logps/oppo_real": -317.5265808105469, + "logps/real": -296.9075012207031, + "loss": -2.8726, + "loss/gen": 0.3832360804080963, + "loss/real": -3.425445795059204, + "rewards/accuracies": 1.0, + "rewards/generated": -121.01852416992188, + "rewards/margins": 141.63758850097656, + "rewards/real": 20.619068145751953, + "step": 127 + }, + { + "epoch": 0.54, + "grad_norm": 99.14064346926541, + "learning_rate": 4.711538461538461e-07, + "logits/generated": -2.6689248085021973, + "logits/oppo_generated": -3.0297465324401855, + "logits/oppo_real": -3.101362705230713, + "logits/real": -2.7913174629211426, + "logps/generated": -225.02159118652344, + "logps/oppo_gen": -120.2161865234375, + "logps/oppo_real": -532.0965576171875, + "logps/real": -496.509033203125, + "loss": -2.6576, + "loss/gen": 0.5110812187194824, + "loss/real": -3.9389498233795166, + "rewards/accuracies": 1.0, + "rewards/generated": -104.80540466308594, + "rewards/margins": 140.39291381835938, + "rewards/real": 35.58751678466797, + "step": 128 + }, + { + "epoch": 0.54, + "grad_norm": 70.39239557984051, + "learning_rate": 4.707977207977208e-07, + "logits/generated": -2.5018911361694336, + "logits/oppo_generated": -2.4462087154388428, + "logits/oppo_real": -2.882254123687744, + "logits/real": -2.1281325817108154, + "logps/generated": -162.50857543945312, + "logps/oppo_gen": -74.71348571777344, + "logps/oppo_real": -324.086669921875, + "logps/real": -299.44586181640625, + "loss": -2.7935, + "loss/gen": 0.9241290092468262, + "loss/real": -3.661430597305298, + "rewards/accuracies": 1.0, + "rewards/generated": -87.79508972167969, + "rewards/margins": 112.4359130859375, + "rewards/real": 24.640825271606445, + "step": 129 + }, + { + "epoch": 0.54, + "grad_norm": 19246.478742876578, + "learning_rate": 4.7044159544159537e-07, + "logits/generated": -2.6820337772369385, + "logits/oppo_generated": -2.9427778720855713, + "logits/oppo_real": -2.9869794845581055, + "logits/real": -2.646888494491577, + "logps/generated": -157.2148895263672, + "logps/oppo_gen": -57.98387908935547, + "logps/oppo_real": -299.8202209472656, + "logps/real": -309.00274658203125, + "loss": -58.9365, + "loss/gen": 0.7235317230224609, + "loss/real": -2.887176990509033, + "rewards/accuracies": 1.0, + "rewards/generated": -99.23101806640625, + "rewards/margins": 90.04846954345703, + "rewards/real": -9.182544708251953, + "step": 130 + }, + { + "epoch": 0.55, + "grad_norm": 132.2171139995689, + "learning_rate": 4.7008547008547005e-07, + "logits/generated": -2.256178855895996, + "logits/oppo_generated": -2.462200880050659, + "logits/oppo_real": -2.7382378578186035, + "logits/real": -2.1479060649871826, + "logps/generated": -191.7882537841797, + "logps/oppo_gen": -109.31198120117188, + "logps/oppo_real": -333.22021484375, + "logps/real": -315.614013671875, + "loss": -3.274, + "loss/gen": 1.3347947597503662, + "loss/real": -3.325333595275879, + "rewards/accuracies": 1.0, + "rewards/generated": -82.47628021240234, + "rewards/margins": 100.08245849609375, + "rewards/real": 17.606182098388672, + "step": 131 + }, + { + "epoch": 0.55, + "grad_norm": 74.27982684269834, + "learning_rate": 4.697293447293447e-07, + "logits/generated": -2.584226131439209, + "logits/oppo_generated": -2.9814329147338867, + "logits/oppo_real": -2.8366198539733887, + "logits/real": -2.7369401454925537, + "logps/generated": -231.37564086914062, + "logps/oppo_gen": -117.97686767578125, + "logps/oppo_real": -333.4208679199219, + "logps/real": -292.571044921875, + "loss": -2.9361, + "loss/gen": 0.7850100994110107, + "loss/real": -4.297349452972412, + "rewards/accuracies": 1.0, + "rewards/generated": -113.39878845214844, + "rewards/margins": 154.24859619140625, + "rewards/real": 40.84980773925781, + "step": 132 + }, + { + "epoch": 0.56, + "grad_norm": 62.146904934749166, + "learning_rate": 4.6937321937321934e-07, + "logits/generated": -2.1917073726654053, + "logits/oppo_generated": -2.6781723499298096, + "logits/oppo_real": -2.516916513442993, + "logits/real": -2.4100053310394287, + "logps/generated": -191.9784393310547, + "logps/oppo_gen": -60.19814682006836, + "logps/oppo_real": -262.58551025390625, + "logps/real": -243.27468872070312, + "loss": -2.8222, + "loss/gen": 0.7389452457427979, + "loss/real": -3.3546838760375977, + "rewards/accuracies": 1.0, + "rewards/generated": -131.78028869628906, + "rewards/margins": 151.09109497070312, + "rewards/real": 19.31081771850586, + "step": 133 + }, + { + "epoch": 0.56, + "grad_norm": 62.146904934749166, + "learning_rate": 4.6937321937321934e-07, + "logits/generated": -2.438559055328369, + "logits/oppo_generated": -2.8787498474121094, + "logits/oppo_real": -2.805894374847412, + "logits/real": -2.6161952018737793, + "logps/generated": -236.9315185546875, + "logps/oppo_gen": -124.28936767578125, + "logps/oppo_real": -606.1627807617188, + "logps/real": -575.891845703125, + "loss": -30094.6035, + "loss/gen": 0.43543320894241333, + "loss/real": -4.007241725921631, + "rewards/accuracies": 1.0, + "rewards/generated": -112.64216613769531, + "rewards/margins": 142.91311645507812, + "rewards/real": 30.27095603942871, + "step": 134 + }, + { + "epoch": 0.56, + "grad_norm": 78.11314597568548, + "learning_rate": 4.69017094017094e-07, + "logits/generated": -2.526062488555908, + "logits/oppo_generated": -2.765538454055786, + "logits/oppo_real": -2.839543342590332, + "logits/real": -2.4912123680114746, + "logps/generated": -192.7285919189453, + "logps/oppo_gen": -83.72669982910156, + "logps/oppo_real": -361.6756591796875, + "logps/real": -346.46002197265625, + "loss": -3.0693, + "loss/gen": 0.47312071919441223, + "loss/real": -3.3567910194396973, + "rewards/accuracies": 1.0, + "rewards/generated": -109.00190734863281, + "rewards/margins": 124.21757507324219, + "rewards/real": 15.215664863586426, + "step": 135 + }, + { + "epoch": 0.57, + "grad_norm": 54.23171224736731, + "learning_rate": 4.6866096866096864e-07, + "logits/generated": -2.607853889465332, + "logits/oppo_generated": -2.7416014671325684, + "logits/oppo_real": -2.8941569328308105, + "logits/real": -2.4866786003112793, + "logps/generated": -161.49307250976562, + "logps/oppo_gen": -51.659912109375, + "logps/oppo_real": -267.5926513671875, + "logps/real": -247.1339111328125, + "loss": -2.9068, + "loss/gen": 0.421144962310791, + "loss/real": -3.4416115283966064, + "rewards/accuracies": 1.0, + "rewards/generated": -109.8331527709961, + "rewards/margins": 130.2919158935547, + "rewards/real": 20.458759307861328, + "step": 136 + }, + { + "epoch": 0.57, + "grad_norm": 72.46861187459885, + "learning_rate": 4.6830484330484326e-07, + "logits/generated": -2.2220163345336914, + "logits/oppo_generated": -2.609920024871826, + "logits/oppo_real": -2.5399818420410156, + "logits/real": -2.291043758392334, + "logps/generated": -210.66543579101562, + "logps/oppo_gen": -81.96345520019531, + "logps/oppo_real": -258.99554443359375, + "logps/real": -252.50823974609375, + "loss": -3.0398, + "loss/gen": 0.3264577388763428, + "loss/real": -3.0477218627929688, + "rewards/accuracies": 1.0, + "rewards/generated": -128.70199584960938, + "rewards/margins": 135.1892852783203, + "rewards/real": 6.4872941970825195, + "step": 137 + }, + { + "epoch": 0.58, + "grad_norm": 72.46861187459885, + "learning_rate": 4.6830484330484326e-07, + "logits/generated": -2.443568706512451, + "logits/oppo_generated": -2.89731502532959, + "logits/oppo_real": -2.861166000366211, + "logits/real": -2.563661575317383, + "logps/generated": -180.6520538330078, + "logps/oppo_gen": -61.10588073730469, + "logps/oppo_real": -297.8720703125, + "logps/real": -281.30902099609375, + "loss": -231011.7969, + "loss/gen": 0.3417486846446991, + "loss/real": -3.499724864959717, + "rewards/accuracies": 1.0, + "rewards/generated": -119.54617309570312, + "rewards/margins": 136.10922241210938, + "rewards/real": 16.563053131103516, + "step": 138 + }, + { + "epoch": 0.58, + "grad_norm": 68.7764082802926, + "learning_rate": 4.6794871794871794e-07, + "logits/generated": -2.4255595207214355, + "logits/oppo_generated": -2.8648695945739746, + "logits/oppo_real": -2.711393356323242, + "logits/real": -2.6028270721435547, + "logps/generated": -223.78445434570312, + "logps/oppo_gen": -111.59371948242188, + "logps/oppo_real": -521.255859375, + "logps/real": -493.6855773925781, + "loss": -2.7501, + "loss/gen": 0.418493390083313, + "loss/real": -3.638298749923706, + "rewards/accuracies": 1.0, + "rewards/generated": -112.19073486328125, + "rewards/margins": 139.76104736328125, + "rewards/real": 27.570310592651367, + "step": 139 + }, + { + "epoch": 0.59, + "grad_norm": 60.205945382287624, + "learning_rate": 4.675925925925926e-07, + "logits/generated": -2.525608539581299, + "logits/oppo_generated": -2.8064088821411133, + "logits/oppo_real": -2.845989227294922, + "logits/real": -2.5157408714294434, + "logps/generated": -162.0449676513672, + "logps/oppo_gen": -52.78784942626953, + "logps/oppo_real": -172.55088806152344, + "logps/real": -161.2114715576172, + "loss": -2.904, + "loss/gen": 0.4695492088794708, + "loss/real": -3.171067237854004, + "rewards/accuracies": 1.0, + "rewards/generated": -109.25712585449219, + "rewards/margins": 120.59654235839844, + "rewards/real": 11.339418411254883, + "step": 140 + }, + { + "epoch": 0.59, + "grad_norm": 70.03484065984587, + "learning_rate": 4.672364672364672e-07, + "logits/generated": -2.5758299827575684, + "logits/oppo_generated": -3.0264251232147217, + "logits/oppo_real": -2.836057186126709, + "logits/real": -2.738698959350586, + "logps/generated": -218.2009735107422, + "logps/oppo_gen": -74.337158203125, + "logps/oppo_real": -371.032470703125, + "logps/real": -321.6204833984375, + "loss": -2.9986, + "loss/gen": 0.24696138501167297, + "loss/real": -4.774725437164307, + "rewards/accuracies": 1.0, + "rewards/generated": -143.8638153076172, + "rewards/margins": 193.27581787109375, + "rewards/real": 49.41199493408203, + "step": 141 + }, + { + "epoch": 0.59, + "grad_norm": 7817.082320149891, + "learning_rate": 4.6688034188034186e-07, + "logits/generated": -2.6040773391723633, + "logits/oppo_generated": -2.876476764678955, + "logits/oppo_real": -2.912707805633545, + "logits/real": -2.6422886848449707, + "logps/generated": -201.4759063720703, + "logps/oppo_gen": -90.53692626953125, + "logps/oppo_real": -383.74615478515625, + "logps/real": -350.41131591796875, + "loss": -19.0115, + "loss/gen": 0.4651464819908142, + "loss/real": -3.848228931427002, + "rewards/accuracies": 1.0, + "rewards/generated": -110.93898010253906, + "rewards/margins": 144.27383422851562, + "rewards/real": 33.33485794067383, + "step": 142 + }, + { + "epoch": 0.6, + "grad_norm": 105.7367382346363, + "learning_rate": 4.6652421652421653e-07, + "logits/generated": -2.674943447113037, + "logits/oppo_generated": -2.9819746017456055, + "logits/oppo_real": -3.1959123611450195, + "logits/real": -2.714082717895508, + "logps/generated": -279.74652099609375, + "logps/oppo_gen": -152.70217895507812, + "logps/oppo_real": -483.54266357421875, + "logps/real": -443.05084228515625, + "loss": -3.1497, + "loss/gen": 0.3712007403373718, + "loss/real": -4.19202184677124, + "rewards/accuracies": 1.0, + "rewards/generated": -127.04434204101562, + "rewards/margins": 167.5361328125, + "rewards/real": 40.49180603027344, + "step": 143 + }, + { + "epoch": 0.6, + "grad_norm": 320.13193081134534, + "learning_rate": 4.6616809116809116e-07, + "logits/generated": -2.6575989723205566, + "logits/oppo_generated": -2.7378830909729004, + "logits/oppo_real": -3.110536813735962, + "logits/real": -2.483811378479004, + "logps/generated": -208.11447143554688, + "logps/oppo_gen": -86.0918960571289, + "logps/oppo_real": -447.7939147949219, + "logps/real": -449.82757568359375, + "loss": -3.1025, + "loss/gen": 0.3562849164009094, + "loss/real": -3.004971981048584, + "rewards/accuracies": 1.0, + "rewards/generated": -122.0225830078125, + "rewards/margins": 119.98893737792969, + "rewards/real": -2.033646583557129, + "step": 144 + }, + { + "epoch": 0.61, + "grad_norm": 56.81756199946017, + "learning_rate": 4.658119658119658e-07, + "logits/generated": -2.8450493812561035, + "logits/oppo_generated": -2.7491419315338135, + "logits/oppo_real": -3.191051483154297, + "logits/real": -2.4600586891174316, + "logps/generated": -215.31295776367188, + "logps/oppo_gen": -96.26548767089844, + "logps/oppo_real": -305.7531433105469, + "logps/real": -280.7630615234375, + "loss": -2.8775, + "loss/gen": 0.5062470436096191, + "loss/real": -3.553849220275879, + "rewards/accuracies": 1.0, + "rewards/generated": -119.04747009277344, + "rewards/margins": 144.03756713867188, + "rewards/real": 24.990097045898438, + "step": 145 + }, + { + "epoch": 0.61, + "grad_norm": 56.194760202520214, + "learning_rate": 4.654558404558404e-07, + "logits/generated": -2.451647996902466, + "logits/oppo_generated": -2.8662476539611816, + "logits/oppo_real": -2.7619881629943848, + "logits/real": -2.580970287322998, + "logps/generated": -190.9722900390625, + "logps/oppo_gen": -76.39656066894531, + "logps/oppo_real": -342.36138916015625, + "logps/real": -320.16766357421875, + "loss": -2.8042, + "loss/gen": 0.40042293071746826, + "loss/real": -3.5132551193237305, + "rewards/accuracies": 1.0, + "rewards/generated": -114.57573699951172, + "rewards/margins": 136.7694854736328, + "rewards/real": 22.193754196166992, + "step": 146 + }, + { + "epoch": 0.62, + "grad_norm": 54.320377107864644, + "learning_rate": 4.650997150997151e-07, + "logits/generated": -2.6203999519348145, + "logits/oppo_generated": -2.973456859588623, + "logits/oppo_real": -2.9541869163513184, + "logits/real": -2.606893539428711, + "logps/generated": -158.5798797607422, + "logps/oppo_gen": -58.52758026123047, + "logps/oppo_real": -196.6337127685547, + "logps/real": -194.17990112304688, + "loss": -3.0855, + "loss/gen": 0.5960197448730469, + "loss/real": -3.0248513221740723, + "rewards/accuracies": 1.0, + "rewards/generated": -100.05229949951172, + "rewards/margins": 102.50611877441406, + "rewards/real": 2.453828811645508, + "step": 147 + }, + { + "epoch": 0.62, + "grad_norm": 75.8143516622595, + "learning_rate": 4.6474358974358975e-07, + "logits/generated": -2.4562883377075195, + "logits/oppo_generated": -2.9579458236694336, + "logits/oppo_real": -2.8345115184783936, + "logits/real": -2.6457347869873047, + "logps/generated": -204.59548950195312, + "logps/oppo_gen": -86.37559509277344, + "logps/oppo_real": -329.4002685546875, + "logps/real": -317.261962890625, + "loss": -3.0377, + "loss/gen": 0.4204404056072235, + "loss/real": -3.355125904083252, + "rewards/accuracies": 1.0, + "rewards/generated": -118.21987915039062, + "rewards/margins": 130.35821533203125, + "rewards/real": 12.138343811035156, + "step": 148 + }, + { + "epoch": 0.62, + "grad_norm": 75.08773189056086, + "learning_rate": 4.643874643874643e-07, + "logits/generated": -2.1544671058654785, + "logits/oppo_generated": -2.4297678470611572, + "logits/oppo_real": -2.5349526405334473, + "logits/real": -2.043349027633667, + "logps/generated": -245.35202026367188, + "logps/oppo_gen": -139.25880432128906, + "logps/oppo_real": -366.9024658203125, + "logps/real": -337.96356201171875, + "loss": -2.9923, + "loss/gen": 0.5029778480529785, + "loss/real": -3.993164300918579, + "rewards/accuracies": 1.0, + "rewards/generated": -106.09322357177734, + "rewards/margins": 135.03216552734375, + "rewards/real": 28.938934326171875, + "step": 149 + }, + { + "epoch": 0.63, + "grad_norm": 576.2930027338524, + "learning_rate": 4.64031339031339e-07, + "logits/generated": -2.114830493927002, + "logits/oppo_generated": -2.59027099609375, + "logits/oppo_real": -2.5751681327819824, + "logits/real": -2.2125301361083984, + "logps/generated": -178.6730499267578, + "logps/oppo_gen": -44.13750076293945, + "logps/oppo_real": -126.39328002929688, + "logps/real": -146.06198120117188, + "loss": -4.0466, + "loss/gen": 0.268838495016098, + "loss/real": -2.595045566558838, + "rewards/accuracies": 0.875, + "rewards/generated": -134.53555297851562, + "rewards/margins": 114.86683654785156, + "rewards/real": -19.668712615966797, + "step": 150 + }, + { + "epoch": 0.63, + "grad_norm": 54.61241720782235, + "learning_rate": 4.6367521367521367e-07, + "logits/generated": -2.424686908721924, + "logits/oppo_generated": -2.8061888217926025, + "logits/oppo_real": -2.885352611541748, + "logits/real": -2.449500560760498, + "logps/generated": -222.48379516601562, + "logps/oppo_gen": -82.9956283569336, + "logps/oppo_real": -287.7582702636719, + "logps/real": -284.30731201171875, + "loss": -3.0876, + "loss/gen": 0.32225707173347473, + "loss/real": -2.8702611923217773, + "rewards/accuracies": 1.0, + "rewards/generated": -139.4881591796875, + "rewards/margins": 142.93911743164062, + "rewards/real": 3.450957775115967, + "step": 151 + }, + { + "epoch": 0.64, + "grad_norm": 85.11727319006701, + "learning_rate": 4.633190883190883e-07, + "logits/generated": -2.086930274963379, + "logits/oppo_generated": -2.6804826259613037, + "logits/oppo_real": -2.560675621032715, + "logits/real": -2.307936668395996, + "logps/generated": -248.29327392578125, + "logps/oppo_gen": -125.20469665527344, + "logps/oppo_real": -214.75454711914062, + "logps/real": -237.43409729003906, + "loss": -2.705, + "loss/gen": 0.33005163073539734, + "loss/real": -2.6607136726379395, + "rewards/accuracies": 0.875, + "rewards/generated": -123.08856201171875, + "rewards/margins": 100.40898895263672, + "rewards/real": -22.67957878112793, + "step": 152 + }, + { + "epoch": 0.64, + "grad_norm": 76.16983490857781, + "learning_rate": 4.6296296296296297e-07, + "logits/generated": -2.4300737380981445, + "logits/oppo_generated": -2.8161306381225586, + "logits/oppo_real": -2.873737096786499, + "logits/real": -2.3974549770355225, + "logps/generated": -211.3475341796875, + "logps/oppo_gen": -39.4675178527832, + "logps/oppo_real": -94.7720718383789, + "logps/real": -112.44818115234375, + "loss": -2.8425, + "loss/gen": 0.24527525901794434, + "loss/real": -2.5662083625793457, + "rewards/accuracies": 0.875, + "rewards/generated": -171.88002014160156, + "rewards/margins": 154.2039031982422, + "rewards/real": -17.676116943359375, + "step": 153 + }, + { + "epoch": 0.64, + "grad_norm": 52.66541092417774, + "learning_rate": 4.626068376068376e-07, + "logits/generated": -2.2791929244995117, + "logits/oppo_generated": -2.754338026046753, + "logits/oppo_real": -2.6611428260803223, + "logits/real": -2.365473747253418, + "logps/generated": -180.72125244140625, + "logps/oppo_gen": -53.64311981201172, + "logps/oppo_real": -189.60964965820312, + "logps/real": -185.12130737304688, + "loss": -2.8864, + "loss/gen": 0.31177347898483276, + "loss/real": -3.0672144889831543, + "rewards/accuracies": 1.0, + "rewards/generated": -127.07814025878906, + "rewards/margins": 131.56646728515625, + "rewards/real": 4.488343238830566, + "step": 154 + }, + { + "epoch": 0.65, + "grad_norm": 89.10240608160505, + "learning_rate": 4.622507122507122e-07, + "logits/generated": -2.525489568710327, + "logits/oppo_generated": -2.8700437545776367, + "logits/oppo_real": -3.012883186340332, + "logits/real": -2.459331512451172, + "logps/generated": -198.41644287109375, + "logps/oppo_gen": -64.43563842773438, + "logps/oppo_real": -366.68572998046875, + "logps/real": -346.22503662109375, + "loss": -3.2314, + "loss/gen": 0.27841734886169434, + "loss/real": -3.4703869819641113, + "rewards/accuracies": 1.0, + "rewards/generated": -133.98080444335938, + "rewards/margins": 154.4415283203125, + "rewards/real": 20.46072006225586, + "step": 155 + }, + { + "epoch": 0.65, + "grad_norm": 1254.2688109013952, + "learning_rate": 4.618945868945869e-07, + "logits/generated": -2.289478302001953, + "logits/oppo_generated": -2.896176338195801, + "logits/oppo_real": -2.7520911693573, + "logits/real": -2.514561653137207, + "logps/generated": -221.06515502929688, + "logps/oppo_gen": -94.6259765625, + "logps/oppo_real": -329.9571533203125, + "logps/real": -310.5443115234375, + "loss": -5.4989, + "loss/gen": 0.3454495668411255, + "loss/real": -3.5192689895629883, + "rewards/accuracies": 1.0, + "rewards/generated": -126.43919372558594, + "rewards/margins": 145.85203552246094, + "rewards/real": 19.412845611572266, + "step": 156 + }, + { + "epoch": 0.66, + "grad_norm": 49.926036602752426, + "learning_rate": 4.6153846153846156e-07, + "logits/generated": -2.4086711406707764, + "logits/oppo_generated": -2.72526478767395, + "logits/oppo_real": -2.760162591934204, + "logits/real": -2.3816709518432617, + "logps/generated": -189.68716430664062, + "logps/oppo_gen": -70.71673583984375, + "logps/oppo_real": -391.76458740234375, + "logps/real": -400.5279846191406, + "loss": -2.873, + "loss/gen": 0.38557717204093933, + "loss/real": -2.8944320678710938, + "rewards/accuracies": 0.875, + "rewards/generated": -118.97042846679688, + "rewards/margins": 110.20704650878906, + "rewards/real": -8.763385772705078, + "step": 157 + }, + { + "epoch": 0.66, + "grad_norm": 108.6453699255058, + "learning_rate": 4.6118233618233613e-07, + "logits/generated": -2.9071619510650635, + "logits/oppo_generated": -2.979785919189453, + "logits/oppo_real": -3.2641677856445312, + "logits/real": -2.598475933074951, + "logps/generated": -202.7903594970703, + "logps/oppo_gen": -92.89317321777344, + "logps/oppo_real": -330.3245849609375, + "logps/real": -319.0426025390625, + "loss": -3.0021, + "loss/gen": 0.49392998218536377, + "loss/real": -3.2759296894073486, + "rewards/accuracies": 1.0, + "rewards/generated": -109.89718627929688, + "rewards/margins": 121.17916870117188, + "rewards/real": 11.28197956085205, + "step": 158 + }, + { + "epoch": 0.67, + "grad_norm": 59.173970949148114, + "learning_rate": 4.608262108262108e-07, + "logits/generated": -2.1723835468292236, + "logits/oppo_generated": -2.775574207305908, + "logits/oppo_real": -2.598371744155884, + "logits/real": -2.396592140197754, + "logps/generated": -187.12681579589844, + "logps/oppo_gen": -65.71693420410156, + "logps/oppo_real": -220.19737243652344, + "logps/real": -200.814453125, + "loss": -3.0664, + "loss/gen": 0.3779526352882385, + "loss/real": -3.3624069690704346, + "rewards/accuracies": 1.0, + "rewards/generated": -121.40988159179688, + "rewards/margins": 140.79281616210938, + "rewards/real": 19.38292694091797, + "step": 159 + }, + { + "epoch": 0.67, + "grad_norm": 75.21973020757254, + "learning_rate": 4.6047008547008543e-07, + "logits/generated": -2.0699994564056396, + "logits/oppo_generated": -2.6892812252044678, + "logits/oppo_real": -2.527797222137451, + "logits/real": -2.2784643173217773, + "logps/generated": -172.57388305664062, + "logps/oppo_gen": -56.507102966308594, + "logps/oppo_real": -203.99942016601562, + "logps/real": -220.61575317382812, + "loss": -2.9974, + "loss/gen": 0.3955667018890381, + "loss/real": -2.5530004501342773, + "rewards/accuracies": 1.0, + "rewards/generated": -116.06678771972656, + "rewards/margins": 99.45044708251953, + "rewards/real": -16.6163387298584, + "step": 160 + }, + { + "epoch": 0.67, + "grad_norm": 69.13334884030283, + "learning_rate": 4.601139601139601e-07, + "logits/generated": -2.4354324340820312, + "logits/oppo_generated": -2.892515182495117, + "logits/oppo_real": -2.87583589553833, + "logits/real": -2.509371280670166, + "logps/generated": -193.70269775390625, + "logps/oppo_gen": -70.63409423828125, + "logps/oppo_real": -236.45480346679688, + "logps/real": -236.11924743652344, + "loss": -2.9182, + "loss/gen": 0.3297494649887085, + "loss/real": -3.017835855484009, + "rewards/accuracies": 0.875, + "rewards/generated": -123.068603515625, + "rewards/margins": 123.4041748046875, + "rewards/real": 0.3355722427368164, + "step": 161 + }, + { + "epoch": 0.68, + "grad_norm": 96.21606544647312, + "learning_rate": 4.5975783475783473e-07, + "logits/generated": -2.1324949264526367, + "logits/oppo_generated": -2.2372124195098877, + "logits/oppo_real": -2.6531500816345215, + "logits/real": -1.7291717529296875, + "logps/generated": -151.3114471435547, + "logps/oppo_gen": -49.9699821472168, + "logps/oppo_real": -257.7629699707031, + "logps/real": -268.58935546875, + "loss": -3.1964, + "loss/gen": 1.1664835214614868, + "loss/real": -2.592834234237671, + "rewards/accuracies": 0.875, + "rewards/generated": -101.34146118164062, + "rewards/margins": 90.51508331298828, + "rewards/real": -10.826382637023926, + "step": 162 + }, + { + "epoch": 0.68, + "grad_norm": 1100.3644850684705, + "learning_rate": 4.5940170940170935e-07, + "logits/generated": -2.265676736831665, + "logits/oppo_generated": -2.6594979763031006, + "logits/oppo_real": -2.72336483001709, + "logits/real": -2.2372395992279053, + "logps/generated": -186.74288940429688, + "logps/oppo_gen": -69.47285461425781, + "logps/oppo_real": -203.925048828125, + "logps/real": -171.45407104492188, + "loss": -4.8218, + "loss/gen": 0.37407732009887695, + "loss/real": -3.8044371604919434, + "rewards/accuracies": 1.0, + "rewards/generated": -117.27003479003906, + "rewards/margins": 149.74102783203125, + "rewards/real": 32.470985412597656, + "step": 163 + }, + { + "epoch": 0.69, + "grad_norm": 48.85153938608921, + "learning_rate": 4.59045584045584e-07, + "logits/generated": -2.573637008666992, + "logits/oppo_generated": -2.84741473197937, + "logits/oppo_real": -2.9322423934936523, + "logits/real": -2.466978073120117, + "logps/generated": -193.22213745117188, + "logps/oppo_gen": -72.28129577636719, + "logps/oppo_real": -342.0706787109375, + "logps/real": -368.0989990234375, + "loss": -2.7908, + "loss/gen": 0.3612110912799835, + "loss/real": -2.5483293533325195, + "rewards/accuracies": 0.875, + "rewards/generated": -120.94083404541016, + "rewards/margins": 94.91249084472656, + "rewards/real": -26.028343200683594, + "step": 164 + }, + { + "epoch": 0.69, + "grad_norm": 61.90407697473366, + "learning_rate": 4.586894586894587e-07, + "logits/generated": -2.5092499256134033, + "logits/oppo_generated": -2.8123486042022705, + "logits/oppo_real": -2.9484448432922363, + "logits/real": -2.4517569541931152, + "logps/generated": -189.08139038085938, + "logps/oppo_gen": -78.67784118652344, + "logps/oppo_real": -224.94638061523438, + "logps/real": -222.41128540039062, + "loss": -2.8246, + "loss/gen": 0.4234386086463928, + "loss/real": -3.0529117584228516, + "rewards/accuracies": 1.0, + "rewards/generated": -110.4035415649414, + "rewards/margins": 112.93864440917969, + "rewards/real": 2.5351076126098633, + "step": 165 + }, + { + "epoch": 0.69, + "grad_norm": 162.38187413278305, + "learning_rate": 4.5833333333333327e-07, + "logits/generated": -2.1803882122039795, + "logits/oppo_generated": -2.6430654525756836, + "logits/oppo_real": -2.7417783737182617, + "logits/real": -2.243424415588379, + "logps/generated": -164.8372802734375, + "logps/oppo_gen": -63.871150970458984, + "logps/oppo_real": -224.14703369140625, + "logps/real": -182.39511108398438, + "loss": -3.7083, + "loss/gen": 0.7786407470703125, + "loss/real": -4.343791961669922, + "rewards/accuracies": 0.875, + "rewards/generated": -100.96613311767578, + "rewards/margins": 142.7180633544922, + "rewards/real": 41.75192642211914, + "step": 166 + }, + { + "epoch": 0.7, + "grad_norm": 49.259806784569555, + "learning_rate": 4.5797720797720794e-07, + "logits/generated": -2.4166438579559326, + "logits/oppo_generated": -2.757966995239258, + "logits/oppo_real": -2.906935691833496, + "logits/real": -2.3613169193267822, + "logps/generated": -156.58056640625, + "logps/oppo_gen": -53.980133056640625, + "logps/oppo_real": -168.99293518066406, + "logps/real": -158.2866973876953, + "loss": -2.9404, + "loss/gen": 0.9939805269241333, + "loss/real": -3.2384204864501953, + "rewards/accuracies": 1.0, + "rewards/generated": -102.6004409790039, + "rewards/margins": 113.30667114257812, + "rewards/real": 10.706242561340332, + "step": 167 + }, + { + "epoch": 0.7, + "grad_norm": 49.259806784569555, + "learning_rate": 4.5797720797720794e-07, + "logits/generated": -2.1523754596710205, + "logits/oppo_generated": -2.34848690032959, + "logits/oppo_real": -2.549453020095825, + "logits/real": -1.9709889888763428, + "logps/generated": -172.17507934570312, + "logps/oppo_gen": -41.99907684326172, + "logps/oppo_real": -137.05735778808594, + "logps/real": -117.31524658203125, + "loss": -36278.4766, + "loss/gen": 0.308816134929657, + "loss/real": -3.7620303630828857, + "rewards/accuracies": 1.0, + "rewards/generated": -130.17599487304688, + "rewards/margins": 149.91812133789062, + "rewards/real": 19.742107391357422, + "step": 168 + }, + { + "epoch": 0.71, + "grad_norm": 58.361834787518674, + "learning_rate": 4.576210826210826e-07, + "logits/generated": -2.2917189598083496, + "logits/oppo_generated": -2.5094847679138184, + "logits/oppo_real": -2.6891722679138184, + "logits/real": -2.1305155754089355, + "logps/generated": -260.20501708984375, + "logps/oppo_gen": -68.40258026123047, + "logps/oppo_real": -223.42794799804688, + "logps/real": -207.68116760253906, + "loss": -2.9255, + "loss/gen": 0.7335460186004639, + "loss/real": -3.7286908626556396, + "rewards/accuracies": 1.0, + "rewards/generated": -191.8024444580078, + "rewards/margins": 207.54922485351562, + "rewards/real": 15.746776580810547, + "step": 169 + }, + { + "epoch": 0.71, + "grad_norm": 170.55447260332988, + "learning_rate": 4.5726495726495724e-07, + "logits/generated": -2.4084904193878174, + "logits/oppo_generated": -2.8935999870300293, + "logits/oppo_real": -2.775484561920166, + "logits/real": -2.5147581100463867, + "logps/generated": -170.41473388671875, + "logps/oppo_gen": -50.93283462524414, + "logps/oppo_real": -316.0002136230469, + "logps/real": -287.05810546875, + "loss": -3.0655, + "loss/gen": 0.39951732754707336, + "loss/real": -3.662087917327881, + "rewards/accuracies": 1.0, + "rewards/generated": -119.48190307617188, + "rewards/margins": 148.4240264892578, + "rewards/real": 28.942119598388672, + "step": 170 + }, + { + "epoch": 0.72, + "grad_norm": 2776.5446151646834, + "learning_rate": 4.569088319088319e-07, + "logits/generated": -2.689021110534668, + "logits/oppo_generated": -2.8526816368103027, + "logits/oppo_real": -3.2386014461517334, + "logits/real": -2.449385166168213, + "logps/generated": -224.29080200195312, + "logps/oppo_gen": -113.54923248291016, + "logps/oppo_real": -351.7125549316406, + "logps/real": -352.888427734375, + "loss": -9.9952, + "loss/gen": 0.5014157295227051, + "loss/real": -2.938952922821045, + "rewards/accuracies": 0.875, + "rewards/generated": -110.7415771484375, + "rewards/margins": 109.56568145751953, + "rewards/real": -1.1758899688720703, + "step": 171 + }, + { + "epoch": 0.72, + "grad_norm": 74.08580527313414, + "learning_rate": 4.5655270655270654e-07, + "logits/generated": -2.553030252456665, + "logits/oppo_generated": -2.9850940704345703, + "logits/oppo_real": -3.0315611362457275, + "logits/real": -2.5720577239990234, + "logps/generated": -181.4952392578125, + "logps/oppo_gen": -61.65489196777344, + "logps/oppo_real": -151.10653686523438, + "logps/real": -148.4203643798828, + "loss": -3.289, + "loss/gen": 0.38128042221069336, + "loss/real": -2.9334371089935303, + "rewards/accuracies": 1.0, + "rewards/generated": -119.84036254882812, + "rewards/margins": 122.52653503417969, + "rewards/real": 2.686166763305664, + "step": 172 + }, + { + "epoch": 0.72, + "grad_norm": 63.58691225524518, + "learning_rate": 4.5619658119658116e-07, + "logits/generated": -2.707376480102539, + "logits/oppo_generated": -2.891350746154785, + "logits/oppo_real": -3.0990657806396484, + "logits/real": -2.544325828552246, + "logps/generated": -313.1190490722656, + "logps/oppo_gen": -212.02532958984375, + "logps/oppo_real": -549.8078002929688, + "logps/real": -522.171142578125, + "loss": -3.0226, + "loss/gen": 0.6081419587135315, + "loss/real": -3.838395118713379, + "rewards/accuracies": 1.0, + "rewards/generated": -101.09373474121094, + "rewards/margins": 128.73036193847656, + "rewards/real": 27.636632919311523, + "step": 173 + }, + { + "epoch": 0.73, + "grad_norm": 54.7705752933886, + "learning_rate": 4.5584045584045584e-07, + "logits/generated": -2.31124210357666, + "logits/oppo_generated": -2.861656904220581, + "logits/oppo_real": -2.749734878540039, + "logits/real": -2.4704031944274902, + "logps/generated": -180.6676025390625, + "logps/oppo_gen": -52.08341598510742, + "logps/oppo_real": -268.2560119628906, + "logps/real": -232.18594360351562, + "loss": -3.0632, + "loss/gen": 0.29419511556625366, + "loss/real": -3.9939093589782715, + "rewards/accuracies": 1.0, + "rewards/generated": -128.5841827392578, + "rewards/margins": 164.6542510986328, + "rewards/real": 36.070064544677734, + "step": 174 + }, + { + "epoch": 0.73, + "grad_norm": 96.52861966309526, + "learning_rate": 4.5548433048433046e-07, + "logits/generated": -2.3479509353637695, + "logits/oppo_generated": -2.8331031799316406, + "logits/oppo_real": -2.8462958335876465, + "logits/real": -2.4217453002929688, + "logps/generated": -206.61517333984375, + "logps/oppo_gen": -78.92254638671875, + "logps/oppo_real": -224.86373901367188, + "logps/real": -216.6464385986328, + "loss": -3.2511, + "loss/gen": 0.3076120615005493, + "loss/real": -3.089078903198242, + "rewards/accuracies": 1.0, + "rewards/generated": -127.69261169433594, + "rewards/margins": 135.909912109375, + "rewards/real": 8.217292785644531, + "step": 175 + }, + { + "epoch": 0.74, + "grad_norm": 56.16997780175812, + "learning_rate": 4.551282051282051e-07, + "logits/generated": -2.3228254318237305, + "logits/oppo_generated": -2.879185199737549, + "logits/oppo_real": -2.873112678527832, + "logits/real": -2.4802536964416504, + "logps/generated": -157.61184692382812, + "logps/oppo_gen": -49.27460479736328, + "logps/oppo_real": -375.43463134765625, + "logps/real": -348.29833984375, + "loss": -2.954, + "loss/gen": 0.6059376001358032, + "loss/real": -3.6162662506103516, + "rewards/accuracies": 1.0, + "rewards/generated": -108.33724212646484, + "rewards/margins": 135.4735107421875, + "rewards/real": 27.136272430419922, + "step": 176 + }, + { + "epoch": 0.74, + "grad_norm": 47.747249184508, + "learning_rate": 4.5477207977207976e-07, + "logits/generated": -2.5688347816467285, + "logits/oppo_generated": -3.0462043285369873, + "logits/oppo_real": -3.1089582443237305, + "logits/real": -2.586811065673828, + "logps/generated": -204.5193634033203, + "logps/oppo_gen": -77.79332733154297, + "logps/oppo_real": -319.2231750488281, + "logps/real": -287.8105773925781, + "loss": -2.9293, + "loss/gen": 0.32107144594192505, + "loss/real": -3.8302066326141357, + "rewards/accuracies": 1.0, + "rewards/generated": -126.72602844238281, + "rewards/margins": 158.13864135742188, + "rewards/real": 31.412609100341797, + "step": 177 + }, + { + "epoch": 0.74, + "grad_norm": 56.68338256821835, + "learning_rate": 4.544159544159544e-07, + "logits/generated": -2.477287530899048, + "logits/oppo_generated": -2.815687656402588, + "logits/oppo_real": -2.9501237869262695, + "logits/real": -2.401658058166504, + "logps/generated": -221.77561950683594, + "logps/oppo_gen": -103.51431274414062, + "logps/oppo_real": -308.8333435058594, + "logps/real": -306.259521484375, + "loss": -2.9378, + "loss/gen": 0.3601089119911194, + "loss/real": -3.115224599838257, + "rewards/accuracies": 1.0, + "rewards/generated": -118.26129913330078, + "rewards/margins": 120.83515930175781, + "rewards/real": 2.5738563537597656, + "step": 178 + }, + { + "epoch": 0.75, + "grad_norm": 132.54113453471717, + "learning_rate": 4.5405982905982905e-07, + "logits/generated": -2.3259053230285645, + "logits/oppo_generated": -2.779146194458008, + "logits/oppo_real": -2.8336267471313477, + "logits/real": -2.3255388736724854, + "logps/generated": -202.4166259765625, + "logps/oppo_gen": -72.71639251708984, + "logps/oppo_real": -196.57557678222656, + "logps/real": -192.04830932617188, + "loss": -3.1315, + "loss/gen": 0.3035447299480438, + "loss/real": -3.032370090484619, + "rewards/accuracies": 1.0, + "rewards/generated": -129.70025634765625, + "rewards/margins": 134.2274932861328, + "rewards/real": 4.52725076675415, + "step": 179 + }, + { + "epoch": 0.75, + "grad_norm": 51.83515977656961, + "learning_rate": 4.537037037037037e-07, + "logits/generated": -2.5221238136291504, + "logits/oppo_generated": -2.8425636291503906, + "logits/oppo_real": -2.9093685150146484, + "logits/real": -2.4757275581359863, + "logps/generated": -224.296142578125, + "logps/oppo_gen": -95.93893432617188, + "logps/oppo_real": -207.11392211914062, + "logps/real": -186.50677490234375, + "loss": -3.0553, + "loss/gen": 0.6323412656784058, + "loss/real": -3.4342591762542725, + "rewards/accuracies": 1.0, + "rewards/generated": -128.35723876953125, + "rewards/margins": 148.96438598632812, + "rewards/real": 20.607158660888672, + "step": 180 + }, + { + "epoch": 0.76, + "grad_norm": 57.13147870810802, + "learning_rate": 4.533475783475783e-07, + "logits/generated": -2.2876157760620117, + "logits/oppo_generated": -2.8224010467529297, + "logits/oppo_real": -2.778409957885742, + "logits/real": -2.416560649871826, + "logps/generated": -224.8665313720703, + "logps/oppo_gen": -88.16463470458984, + "logps/oppo_real": -239.9169921875, + "logps/real": -240.70309448242188, + "loss": -3.0029, + "loss/gen": 0.25222891569137573, + "loss/real": -3.012547016143799, + "rewards/accuracies": 1.0, + "rewards/generated": -136.701904296875, + "rewards/margins": 135.91583251953125, + "rewards/real": -0.7860813140869141, + "step": 181 + }, + { + "epoch": 0.76, + "grad_norm": 89.10157585762724, + "learning_rate": 4.5299145299145297e-07, + "logits/generated": -2.4986462593078613, + "logits/oppo_generated": -2.9657952785491943, + "logits/oppo_real": -2.9425137042999268, + "logits/real": -2.509366035461426, + "logps/generated": -201.0703125, + "logps/oppo_gen": -76.42547607421875, + "logps/oppo_real": -261.8043518066406, + "logps/real": -241.64723205566406, + "loss": -3.1181, + "loss/gen": 0.3136594295501709, + "loss/real": -3.37326717376709, + "rewards/accuracies": 1.0, + "rewards/generated": -124.64483642578125, + "rewards/margins": 144.8019561767578, + "rewards/real": 20.157115936279297, + "step": 182 + }, + { + "epoch": 0.77, + "grad_norm": 59.75890689365896, + "learning_rate": 4.5263532763532765e-07, + "logits/generated": -1.9646540880203247, + "logits/oppo_generated": -2.6656646728515625, + "logits/oppo_real": -2.512063980102539, + "logits/real": -2.13295316696167, + "logps/generated": -146.4997100830078, + "logps/oppo_gen": -61.16596603393555, + "logps/oppo_real": -89.70797729492188, + "logps/real": -67.80735778808594, + "loss": -3.1443, + "loss/gen": 0.8715238571166992, + "loss/real": -3.4107680320739746, + "rewards/accuracies": 1.0, + "rewards/generated": -85.333740234375, + "rewards/margins": 107.23435974121094, + "rewards/real": 21.90062141418457, + "step": 183 + }, + { + "epoch": 0.77, + "grad_norm": 9260.308850141015, + "learning_rate": 4.522792022792022e-07, + "logits/generated": -2.074495792388916, + "logits/oppo_generated": -2.679591655731201, + "logits/oppo_real": -2.5152084827423096, + "logits/real": -2.2176570892333984, + "logps/generated": -301.60516357421875, + "logps/oppo_gen": -134.39280700683594, + "logps/oppo_real": -353.8466491699219, + "logps/real": -354.19549560546875, + "loss": -41.3972, + "loss/gen": 0.28073543310165405, + "loss/real": -2.867943525314331, + "rewards/accuracies": 1.0, + "rewards/generated": -167.2123565673828, + "rewards/margins": 166.86349487304688, + "rewards/real": -0.34885168075561523, + "step": 184 + }, + { + "epoch": 0.77, + "grad_norm": 57.11031975962729, + "learning_rate": 4.519230769230769e-07, + "logits/generated": -2.515873908996582, + "logits/oppo_generated": -2.8852622509002686, + "logits/oppo_real": -2.9888343811035156, + "logits/real": -2.4470033645629883, + "logps/generated": -221.6116485595703, + "logps/oppo_gen": -86.57408142089844, + "logps/oppo_real": -353.78594970703125, + "logps/real": -337.73291015625, + "loss": -3.0984, + "loss/gen": 0.3061344623565674, + "loss/real": -3.636414051055908, + "rewards/accuracies": 1.0, + "rewards/generated": -135.03756713867188, + "rewards/margins": 151.0906219482422, + "rewards/real": 16.053056716918945, + "step": 185 + }, + { + "epoch": 0.78, + "grad_norm": 242.2639153640096, + "learning_rate": 4.5156695156695157e-07, + "logits/generated": -2.5487115383148193, + "logits/oppo_generated": -2.894904136657715, + "logits/oppo_real": -2.8833250999450684, + "logits/real": -2.4917829036712646, + "logps/generated": -208.41036987304688, + "logps/oppo_gen": -97.552490234375, + "logps/oppo_real": -446.60357666015625, + "logps/real": -420.88616943359375, + "loss": -3.9814, + "loss/gen": 0.4838281273841858, + "loss/real": -3.5608205795288086, + "rewards/accuracies": 1.0, + "rewards/generated": -110.85787963867188, + "rewards/margins": 136.57525634765625, + "rewards/real": 25.71738052368164, + "step": 186 + }, + { + "epoch": 0.78, + "grad_norm": 61.190406675725114, + "learning_rate": 4.512108262108262e-07, + "logits/generated": -2.471825122833252, + "logits/oppo_generated": -2.9238195419311523, + "logits/oppo_real": -2.928109645843506, + "logits/real": -2.5521817207336426, + "logps/generated": -248.10580444335938, + "logps/oppo_gen": -99.34373474121094, + "logps/oppo_real": -381.1275634765625, + "logps/real": -371.27154541015625, + "loss": -3.1159, + "loss/gen": 0.34282225370407104, + "loss/real": -3.2633209228515625, + "rewards/accuracies": 1.0, + "rewards/generated": -148.7620849609375, + "rewards/margins": 158.61810302734375, + "rewards/real": 9.856016159057617, + "step": 187 + }, + { + "epoch": 0.79, + "grad_norm": 59.89950894519086, + "learning_rate": 4.5085470085470087e-07, + "logits/generated": -2.162811279296875, + "logits/oppo_generated": -2.7080626487731934, + "logits/oppo_real": -2.5767087936401367, + "logits/real": -2.30268931388855, + "logps/generated": -199.12115478515625, + "logps/oppo_gen": -46.502037048339844, + "logps/oppo_real": -149.05059814453125, + "logps/real": -149.08099365234375, + "loss": -2.8636, + "loss/gen": 0.46407026052474976, + "loss/real": -2.982802391052246, + "rewards/accuracies": 1.0, + "rewards/generated": -152.61912536621094, + "rewards/margins": 152.58872985839844, + "rewards/real": -0.030394554138183594, + "step": 188 + }, + { + "epoch": 0.79, + "grad_norm": 63.384206370258354, + "learning_rate": 4.5049857549857543e-07, + "logits/generated": -2.5746748447418213, + "logits/oppo_generated": -2.9217922687530518, + "logits/oppo_real": -3.0358145236968994, + "logits/real": -2.5657949447631836, + "logps/generated": -201.57012939453125, + "logps/oppo_gen": -72.13301849365234, + "logps/oppo_real": -295.51861572265625, + "logps/real": -298.053955078125, + "loss": -2.9233, + "loss/gen": 0.28429698944091797, + "loss/real": -3.108995199203491, + "rewards/accuracies": 1.0, + "rewards/generated": -129.4371337890625, + "rewards/margins": 126.90178680419922, + "rewards/real": -2.5353341102600098, + "step": 189 + }, + { + "epoch": 0.79, + "grad_norm": 94.99060911644108, + "learning_rate": 4.501424501424501e-07, + "logits/generated": -2.3787384033203125, + "logits/oppo_generated": -2.7406344413757324, + "logits/oppo_real": -2.799593925476074, + "logits/real": -2.3933238983154297, + "logps/generated": -222.70245361328125, + "logps/oppo_gen": -102.60955810546875, + "logps/oppo_real": -305.8299255371094, + "logps/real": -273.30157470703125, + "loss": -3.2776, + "loss/gen": 0.36470329761505127, + "loss/real": -3.8918604850769043, + "rewards/accuracies": 1.0, + "rewards/generated": -120.0928955078125, + "rewards/margins": 152.62124633789062, + "rewards/real": 32.52833938598633, + "step": 190 + }, + { + "epoch": 0.8, + "grad_norm": 350.9167124297445, + "learning_rate": 4.497863247863248e-07, + "logits/generated": -2.6413869857788086, + "logits/oppo_generated": -2.8220396041870117, + "logits/oppo_real": -3.0663821697235107, + "logits/real": -2.490924596786499, + "logps/generated": -178.68812561035156, + "logps/oppo_gen": -80.95722961425781, + "logps/oppo_real": -339.0364074707031, + "logps/real": -321.1693115234375, + "loss": -3.7641, + "loss/gen": 0.9792780876159668, + "loss/real": -3.2799935340881348, + "rewards/accuracies": 1.0, + "rewards/generated": -97.73089599609375, + "rewards/margins": 115.59801483154297, + "rewards/real": 17.867115020751953, + "step": 191 + }, + { + "epoch": 0.8, + "grad_norm": 568.1687689410603, + "learning_rate": 4.494301994301994e-07, + "logits/generated": -2.53959321975708, + "logits/oppo_generated": -2.8528313636779785, + "logits/oppo_real": -2.9469070434570312, + "logits/real": -2.4770290851593018, + "logps/generated": -186.40338134765625, + "logps/oppo_gen": -55.95906066894531, + "logps/oppo_real": -228.37322998046875, + "logps/real": -216.5171661376953, + "loss": -4.4415, + "loss/gen": 0.2824003994464874, + "loss/real": -3.352588653564453, + "rewards/accuracies": 1.0, + "rewards/generated": -130.44430541992188, + "rewards/margins": 142.30039978027344, + "rewards/real": 11.856078147888184, + "step": 192 + }, + { + "epoch": 0.81, + "grad_norm": 82.08388787354983, + "learning_rate": 4.4907407407407403e-07, + "logits/generated": -2.3993959426879883, + "logits/oppo_generated": -2.759657859802246, + "logits/oppo_real": -2.7739434242248535, + "logits/real": -2.4300918579101562, + "logps/generated": -195.13946533203125, + "logps/oppo_gen": -55.900001525878906, + "logps/oppo_real": -240.51673889160156, + "logps/real": -254.91921997070312, + "loss": -3.0531, + "loss/gen": 0.31808772683143616, + "loss/real": -2.974119186401367, + "rewards/accuracies": 0.875, + "rewards/generated": -139.23947143554688, + "rewards/margins": 124.83699035644531, + "rewards/real": -14.402481079101562, + "step": 193 + }, + { + "epoch": 0.81, + "grad_norm": 59.35182524704906, + "learning_rate": 4.487179487179487e-07, + "logits/generated": -2.422110080718994, + "logits/oppo_generated": -2.714049816131592, + "logits/oppo_real": -2.821863889694214, + "logits/real": -2.3553073406219482, + "logps/generated": -191.3336181640625, + "logps/oppo_gen": -61.66150665283203, + "logps/oppo_real": -281.81561279296875, + "logps/real": -268.0882873535156, + "loss": -2.9157, + "loss/gen": 0.2871388792991638, + "loss/real": -3.4819259643554688, + "rewards/accuracies": 1.0, + "rewards/generated": -129.67208862304688, + "rewards/margins": 143.39939880371094, + "rewards/real": 13.727313995361328, + "step": 194 + }, + { + "epoch": 0.82, + "grad_norm": 61.67208283154745, + "learning_rate": 4.4836182336182333e-07, + "logits/generated": -2.2326247692108154, + "logits/oppo_generated": -2.7336645126342773, + "logits/oppo_real": -2.6636435985565186, + "logits/real": -2.38464617729187, + "logps/generated": -191.60995483398438, + "logps/oppo_gen": -66.04891204833984, + "logps/oppo_real": -343.6158447265625, + "logps/real": -311.3708801269531, + "loss": -3.0524, + "loss/gen": 0.30880045890808105, + "loss/real": -4.188716888427734, + "rewards/accuracies": 1.0, + "rewards/generated": -125.56105041503906, + "rewards/margins": 157.8060302734375, + "rewards/real": 32.2449836730957, + "step": 195 + }, + { + "epoch": 0.82, + "grad_norm": 79.39509465129127, + "learning_rate": 4.48005698005698e-07, + "logits/generated": -2.459395408630371, + "logits/oppo_generated": -3.0542874336242676, + "logits/oppo_real": -2.803119659423828, + "logits/real": -2.7458314895629883, + "logps/generated": -202.55088806152344, + "logps/oppo_gen": -81.553955078125, + "logps/oppo_real": -376.17071533203125, + "logps/real": -320.265869140625, + "loss": -3.204, + "loss/gen": 0.3818941116333008, + "loss/real": -5.556370735168457, + "rewards/accuracies": 1.0, + "rewards/generated": -120.99693298339844, + "rewards/margins": 176.90176391601562, + "rewards/real": 55.90484619140625, + "step": 196 + }, + { + "epoch": 0.82, + "grad_norm": 64.34859516119019, + "learning_rate": 4.476495726495726e-07, + "logits/generated": -2.551626205444336, + "logits/oppo_generated": -2.791293144226074, + "logits/oppo_real": -2.8689441680908203, + "logits/real": -2.4949615001678467, + "logps/generated": -214.268310546875, + "logps/oppo_gen": -90.10079956054688, + "logps/oppo_real": -387.6597900390625, + "logps/real": -355.855712890625, + "loss": -2.9635, + "loss/gen": 0.7095820903778076, + "loss/real": -3.8604226112365723, + "rewards/accuracies": 1.0, + "rewards/generated": -124.16752624511719, + "rewards/margins": 155.97164916992188, + "rewards/real": 31.804113388061523, + "step": 197 + }, + { + "epoch": 0.83, + "grad_norm": 55.47510967063292, + "learning_rate": 4.4729344729344725e-07, + "logits/generated": -2.5076422691345215, + "logits/oppo_generated": -2.8356850147247314, + "logits/oppo_real": -2.917833089828491, + "logits/real": -2.4860076904296875, + "logps/generated": -208.80723571777344, + "logps/oppo_gen": -76.40264892578125, + "logps/oppo_real": -278.172607421875, + "logps/real": -251.12782287597656, + "loss": -3.1359, + "loss/gen": 0.3080252408981323, + "loss/real": -3.7138681411743164, + "rewards/accuracies": 1.0, + "rewards/generated": -132.4045867919922, + "rewards/margins": 159.44937133789062, + "rewards/real": 27.04478645324707, + "step": 198 + }, + { + "epoch": 0.83, + "grad_norm": 55.47510967063292, + "learning_rate": 4.4729344729344725e-07, + "logits/generated": -2.7056777477264404, + "logits/oppo_generated": -3.0011539459228516, + "logits/oppo_real": -3.069876194000244, + "logits/real": -2.696037530899048, + "logps/generated": -191.9580078125, + "logps/oppo_gen": -69.13575744628906, + "logps/oppo_real": -340.70343017578125, + "logps/real": -328.8914794921875, + "loss": -1322.4869, + "loss/gen": 0.4668487310409546, + "loss/real": -3.3937647342681885, + "rewards/accuracies": 1.0, + "rewards/generated": -122.82225036621094, + "rewards/margins": 134.6342010498047, + "rewards/real": 11.811951637268066, + "step": 199 + }, + { + "epoch": 0.84, + "grad_norm": 76.39824855854027, + "learning_rate": 4.469373219373219e-07, + "logits/generated": -2.6483988761901855, + "logits/oppo_generated": -2.821411609649658, + "logits/oppo_real": -2.9697532653808594, + "logits/real": -2.5232529640197754, + "logps/generated": -208.79844665527344, + "logps/oppo_gen": -94.25292205810547, + "logps/oppo_real": -449.1705322265625, + "logps/real": -411.73590087890625, + "loss": -3.1662, + "loss/gen": 0.6144514083862305, + "loss/real": -4.066596984863281, + "rewards/accuracies": 1.0, + "rewards/generated": -114.54552459716797, + "rewards/margins": 151.98013305664062, + "rewards/real": 37.434608459472656, + "step": 200 + }, + { + "epoch": 0.84, + "grad_norm": 80.08028198314489, + "learning_rate": 4.465811965811966e-07, + "logits/generated": -2.521160125732422, + "logits/oppo_generated": -2.9498441219329834, + "logits/oppo_real": -2.889374017715454, + "logits/real": -2.6443803310394287, + "logps/generated": -218.7113037109375, + "logps/oppo_gen": -93.28401184082031, + "logps/oppo_real": -446.9027099609375, + "logps/real": -420.687744140625, + "loss": -3.1837, + "loss/gen": 0.3799129128456116, + "loss/real": -3.5870652198791504, + "rewards/accuracies": 1.0, + "rewards/generated": -125.42729187011719, + "rewards/margins": 151.6422119140625, + "rewards/real": 26.214933395385742, + "step": 201 + }, + { + "epoch": 0.85, + "grad_norm": 85.4263034452065, + "learning_rate": 4.4622507122507117e-07, + "logits/generated": -2.0849194526672363, + "logits/oppo_generated": -2.5877699851989746, + "logits/oppo_real": -2.4145617485046387, + "logits/real": -2.3253278732299805, + "logps/generated": -175.31253051757812, + "logps/oppo_gen": -58.147544860839844, + "logps/oppo_real": -256.63494873046875, + "logps/real": -251.91702270507812, + "loss": -3.1424, + "loss/gen": 0.633264422416687, + "loss/real": -2.9826745986938477, + "rewards/accuracies": 1.0, + "rewards/generated": -117.16497802734375, + "rewards/margins": 121.88292694091797, + "rewards/real": 4.7179460525512695, + "step": 202 + }, + { + "epoch": 0.85, + "grad_norm": 61.56183771851885, + "learning_rate": 4.4586894586894584e-07, + "logits/generated": -2.5706653594970703, + "logits/oppo_generated": -2.825096607208252, + "logits/oppo_real": -2.919394016265869, + "logits/real": -2.5202863216400146, + "logps/generated": -173.32174682617188, + "logps/oppo_gen": -62.71122360229492, + "logps/oppo_real": -234.44354248046875, + "logps/real": -211.79794311523438, + "loss": -3.0867, + "loss/gen": 0.7330012321472168, + "loss/real": -3.4876270294189453, + "rewards/accuracies": 1.0, + "rewards/generated": -110.61052703857422, + "rewards/margins": 133.25613403320312, + "rewards/real": 22.645606994628906, + "step": 203 + }, + { + "epoch": 0.85, + "grad_norm": 55.0575072225623, + "learning_rate": 4.455128205128205e-07, + "logits/generated": -2.589749336242676, + "logits/oppo_generated": -2.681910276412964, + "logits/oppo_real": -2.8930723667144775, + "logits/real": -2.393826484680176, + "logps/generated": -184.09539794921875, + "logps/oppo_gen": -69.35714721679688, + "logps/oppo_real": -321.68878173828125, + "logps/real": -300.22052001953125, + "loss": -2.9885, + "loss/gen": 0.6509556770324707, + "loss/real": -3.5244717597961426, + "rewards/accuracies": 1.0, + "rewards/generated": -114.73826599121094, + "rewards/margins": 136.20651245117188, + "rewards/real": 21.46826171875, + "step": 204 + }, + { + "epoch": 0.86, + "grad_norm": 205.9144144875487, + "learning_rate": 4.4515669515669514e-07, + "logits/generated": -2.5176801681518555, + "logits/oppo_generated": -2.910146951675415, + "logits/oppo_real": -2.842686653137207, + "logits/real": -2.6160125732421875, + "logps/generated": -192.1990966796875, + "logps/oppo_gen": -55.29602813720703, + "logps/oppo_real": -188.457763671875, + "logps/real": -170.13516235351562, + "loss": -3.8694, + "loss/gen": 0.29164981842041016, + "loss/real": -3.5206615924835205, + "rewards/accuracies": 1.0, + "rewards/generated": -136.903076171875, + "rewards/margins": 155.22567749023438, + "rewards/real": 18.32259750366211, + "step": 205 + }, + { + "epoch": 0.86, + "grad_norm": 72.13692086277665, + "learning_rate": 4.448005698005698e-07, + "logits/generated": -2.6976194381713867, + "logits/oppo_generated": -2.9482345581054688, + "logits/oppo_real": -3.0109448432922363, + "logits/real": -2.652653455734253, + "logps/generated": -196.82077026367188, + "logps/oppo_gen": -70.6409912109375, + "logps/oppo_real": -375.189697265625, + "logps/real": -351.4737243652344, + "loss": -3.0215, + "loss/gen": 0.35152187943458557, + "loss/real": -3.49497127532959, + "rewards/accuracies": 1.0, + "rewards/generated": -126.1797866821289, + "rewards/margins": 149.89573669433594, + "rewards/real": 23.7159423828125, + "step": 206 + }, + { + "epoch": 0.87, + "grad_norm": 61.61398545074811, + "learning_rate": 4.444444444444444e-07, + "logits/generated": -2.5743110179901123, + "logits/oppo_generated": -2.7811834812164307, + "logits/oppo_real": -2.923962116241455, + "logits/real": -2.454921245574951, + "logps/generated": -196.2818603515625, + "logps/oppo_gen": -71.71026611328125, + "logps/oppo_real": -353.846923828125, + "logps/real": -345.35919189453125, + "loss": -3.0779, + "loss/gen": 0.32238900661468506, + "loss/real": -3.2966091632843018, + "rewards/accuracies": 1.0, + "rewards/generated": -124.57159423828125, + "rewards/margins": 133.05931091308594, + "rewards/real": 8.487724304199219, + "step": 207 + }, + { + "epoch": 0.87, + "grad_norm": 57.15490447432092, + "learning_rate": 4.4408831908831906e-07, + "logits/generated": -2.782914161682129, + "logits/oppo_generated": -2.8043360710144043, + "logits/oppo_real": -3.0211949348449707, + "logits/real": -2.5231986045837402, + "logps/generated": -194.0042724609375, + "logps/oppo_gen": -77.71004486083984, + "logps/oppo_real": -389.77301025390625, + "logps/real": -358.2027282714844, + "loss": -3.0323, + "loss/gen": 0.423714816570282, + "loss/real": -3.7564921379089355, + "rewards/accuracies": 1.0, + "rewards/generated": -116.29424285888672, + "rewards/margins": 147.8645477294922, + "rewards/real": 31.570310592651367, + "step": 208 + }, + { + "epoch": 0.87, + "grad_norm": 59.74080162590932, + "learning_rate": 4.4373219373219373e-07, + "logits/generated": -2.357008934020996, + "logits/oppo_generated": -2.7760987281799316, + "logits/oppo_real": -2.740163803100586, + "logits/real": -2.439347505569458, + "logps/generated": -205.67852783203125, + "logps/oppo_gen": -88.69313049316406, + "logps/oppo_real": -338.8006591796875, + "logps/real": -314.58441162109375, + "loss": -3.0479, + "loss/gen": 0.4672941565513611, + "loss/real": -3.506350040435791, + "rewards/accuracies": 1.0, + "rewards/generated": -116.98541259765625, + "rewards/margins": 141.20166015625, + "rewards/real": 24.21624755859375, + "step": 209 + }, + { + "epoch": 0.88, + "grad_norm": 1848.1626687064102, + "learning_rate": 4.4337606837606836e-07, + "logits/generated": -2.4638514518737793, + "logits/oppo_generated": -2.7127938270568848, + "logits/oppo_real": -2.803234577178955, + "logits/real": -2.38244366645813, + "logps/generated": -237.40353393554688, + "logps/oppo_gen": -85.75541687011719, + "logps/oppo_real": -242.4071807861328, + "logps/real": -237.742431640625, + "loss": -5.9552, + "loss/gen": 0.2063872367143631, + "loss/real": -3.0251030921936035, + "rewards/accuracies": 1.0, + "rewards/generated": -151.64813232421875, + "rewards/margins": 156.31288146972656, + "rewards/real": 4.664756774902344, + "step": 210 + }, + { + "epoch": 0.88, + "grad_norm": 400.5685997741211, + "learning_rate": 4.43019943019943e-07, + "logits/generated": -2.5298070907592773, + "logits/oppo_generated": -2.995426654815674, + "logits/oppo_real": -2.8803281784057617, + "logits/real": -2.6857643127441406, + "logps/generated": -184.82400512695312, + "logps/oppo_gen": -68.82854461669922, + "logps/oppo_real": -337.844482421875, + "logps/real": -296.872314453125, + "loss": -4.4013, + "loss/gen": 0.568313717842102, + "loss/real": -4.289045333862305, + "rewards/accuracies": 1.0, + "rewards/generated": -115.99545288085938, + "rewards/margins": 156.96762084960938, + "rewards/real": 40.97218322753906, + "step": 211 + }, + { + "epoch": 0.89, + "grad_norm": 40892.68061646241, + "learning_rate": 4.4266381766381765e-07, + "logits/generated": -2.705117702484131, + "logits/oppo_generated": -2.6126418113708496, + "logits/oppo_real": -3.0222294330596924, + "logits/real": -2.3141441345214844, + "logps/generated": -171.36810302734375, + "logps/oppo_gen": -56.36054992675781, + "logps/oppo_real": -325.3075256347656, + "logps/real": -308.48114013671875, + "loss": -129.61, + "loss/gen": 0.39068758487701416, + "loss/real": -3.74429988861084, + "rewards/accuracies": 1.0, + "rewards/generated": -115.00755310058594, + "rewards/margins": 131.83392333984375, + "rewards/real": 16.826370239257812, + "step": 212 + }, + { + "epoch": 0.89, + "grad_norm": 3590.9546084572, + "learning_rate": 4.423076923076923e-07, + "logits/generated": -2.696446418762207, + "logits/oppo_generated": -3.026592254638672, + "logits/oppo_real": -2.9974026679992676, + "logits/real": -2.68188214302063, + "logps/generated": -209.5288543701172, + "logps/oppo_gen": -81.62860107421875, + "logps/oppo_real": -354.01513671875, + "logps/real": -335.030517578125, + "loss": -13.5441, + "loss/gen": 0.3259393572807312, + "loss/real": -3.325887680053711, + "rewards/accuracies": 1.0, + "rewards/generated": -127.90025329589844, + "rewards/margins": 146.8848876953125, + "rewards/real": 18.9846248626709, + "step": 213 + }, + { + "epoch": 0.9, + "grad_norm": 62.96425038203723, + "learning_rate": 4.4195156695156695e-07, + "logits/generated": -2.4695000648498535, + "logits/oppo_generated": -2.86299991607666, + "logits/oppo_real": -2.897392749786377, + "logits/real": -2.52815580368042, + "logps/generated": -159.04405212402344, + "logps/oppo_gen": -55.654396057128906, + "logps/oppo_real": -286.4037170410156, + "logps/real": -273.60174560546875, + "loss": -3.259, + "loss/gen": 0.6177021265029907, + "loss/real": -3.30937123298645, + "rewards/accuracies": 1.0, + "rewards/generated": -103.38966369628906, + "rewards/margins": 116.19161224365234, + "rewards/real": 12.801952362060547, + "step": 214 + }, + { + "epoch": 0.9, + "grad_norm": 57.797468485057806, + "learning_rate": 4.4159544159544157e-07, + "logits/generated": -2.4351806640625, + "logits/oppo_generated": -2.8678367137908936, + "logits/oppo_real": -2.797013759613037, + "logits/real": -2.479971408843994, + "logps/generated": -265.5663757324219, + "logps/oppo_gen": -154.916748046875, + "logps/oppo_real": -268.4582824707031, + "logps/real": -245.14251708984375, + "loss": -3.1633, + "loss/gen": 0.5528236627578735, + "loss/real": -3.513732433319092, + "rewards/accuracies": 1.0, + "rewards/generated": -110.6496353149414, + "rewards/margins": 133.96539306640625, + "rewards/real": 23.315759658813477, + "step": 215 + }, + { + "epoch": 0.9, + "grad_norm": 80.14004161173895, + "learning_rate": 4.412393162393162e-07, + "logits/generated": -2.6762161254882812, + "logits/oppo_generated": -2.879833221435547, + "logits/oppo_real": -3.0112786293029785, + "logits/real": -2.480961799621582, + "logps/generated": -193.92112731933594, + "logps/oppo_gen": -96.10844421386719, + "logps/oppo_real": -492.59039306640625, + "logps/real": -480.1392517089844, + "loss": -2.9444, + "loss/gen": 1.0747777223587036, + "loss/real": -3.2505507469177246, + "rewards/accuracies": 0.875, + "rewards/generated": -97.81267547607422, + "rewards/margins": 110.26382446289062, + "rewards/real": 12.45114803314209, + "step": 216 + }, + { + "epoch": 0.91, + "grad_norm": 27165.010436221077, + "learning_rate": 4.4088319088319087e-07, + "logits/generated": -2.661689281463623, + "logits/oppo_generated": -2.855457305908203, + "logits/oppo_real": -3.161579132080078, + "logits/real": -2.475346565246582, + "logps/generated": -173.70448303222656, + "logps/oppo_gen": -79.04156494140625, + "logps/oppo_real": -508.73779296875, + "logps/real": -485.790283203125, + "loss": -63.2568, + "loss/gen": 0.8462377786636353, + "loss/real": -3.7985730171203613, + "rewards/accuracies": 0.875, + "rewards/generated": -94.66291809082031, + "rewards/margins": 117.61046600341797, + "rewards/real": 22.947547912597656, + "step": 217 + }, + { + "epoch": 0.91, + "grad_norm": 79.71609474703807, + "learning_rate": 4.4052706552706555e-07, + "logits/generated": -2.6024856567382812, + "logits/oppo_generated": -2.8270015716552734, + "logits/oppo_real": -2.9884450435638428, + "logits/real": -2.381761074066162, + "logps/generated": -193.1739959716797, + "logps/oppo_gen": -79.96229553222656, + "logps/oppo_real": -295.296630859375, + "logps/real": -280.0252685546875, + "loss": -3.1061, + "loss/gen": 0.8399382829666138, + "loss/real": -3.309168815612793, + "rewards/accuracies": 0.875, + "rewards/generated": -113.2116928100586, + "rewards/margins": 128.48306274414062, + "rewards/real": 15.27135944366455, + "step": 218 + }, + { + "epoch": 0.92, + "grad_norm": 117.95946155486003, + "learning_rate": 4.4017094017094017e-07, + "logits/generated": -2.274564266204834, + "logits/oppo_generated": -2.7040886878967285, + "logits/oppo_real": -2.816561698913574, + "logits/real": -2.2658865451812744, + "logps/generated": -187.32923889160156, + "logps/oppo_gen": -55.71031188964844, + "logps/oppo_real": -202.95962524414062, + "logps/real": -166.00265502929688, + "loss": -3.2002, + "loss/gen": 0.33614322543144226, + "loss/real": -4.0761213302612305, + "rewards/accuracies": 1.0, + "rewards/generated": -131.61892700195312, + "rewards/margins": 168.57589721679688, + "rewards/real": 36.956966400146484, + "step": 219 + }, + { + "epoch": 0.92, + "grad_norm": 88.19558620351276, + "learning_rate": 4.398148148148148e-07, + "logits/generated": -1.9364006519317627, + "logits/oppo_generated": -2.385345458984375, + "logits/oppo_real": -2.4835422039031982, + "logits/real": -1.808083415031433, + "logps/generated": -179.90750122070312, + "logps/oppo_gen": -75.58077239990234, + "logps/oppo_real": -339.3034973144531, + "logps/real": -272.0712585449219, + "loss": -3.2605, + "loss/gen": 0.8947268724441528, + "loss/real": -8.812257766723633, + "rewards/accuracies": 1.0, + "rewards/generated": -104.32673645019531, + "rewards/margins": 171.55897521972656, + "rewards/real": 67.23223876953125, + "step": 220 + }, + { + "epoch": 0.92, + "grad_norm": 5908.862834759325, + "learning_rate": 4.394586894586894e-07, + "logits/generated": -2.601797580718994, + "logits/oppo_generated": -3.011491060256958, + "logits/oppo_real": -3.0487937927246094, + "logits/real": -2.562220573425293, + "logps/generated": -246.65899658203125, + "logps/oppo_gen": -131.22396850585938, + "logps/oppo_real": -400.33868408203125, + "logps/real": -376.3664245605469, + "loss": -13.8663, + "loss/gen": 0.42373475432395935, + "loss/real": -3.5093624591827393, + "rewards/accuracies": 1.0, + "rewards/generated": -115.43504333496094, + "rewards/margins": 139.40728759765625, + "rewards/real": 23.972253799438477, + "step": 221 + }, + { + "epoch": 0.93, + "grad_norm": 95.4678242590141, + "learning_rate": 4.391025641025641e-07, + "logits/generated": -2.3840436935424805, + "logits/oppo_generated": -2.755108118057251, + "logits/oppo_real": -2.8694067001342773, + "logits/real": -2.2530529499053955, + "logps/generated": -177.37356567382812, + "logps/oppo_gen": -61.73572540283203, + "logps/oppo_real": -230.838134765625, + "logps/real": -215.96424865722656, + "loss": -3.0607, + "loss/gen": 0.6986711025238037, + "loss/real": -3.6220858097076416, + "rewards/accuracies": 0.875, + "rewards/generated": -115.63784790039062, + "rewards/margins": 130.51173400878906, + "rewards/real": 14.873891830444336, + "step": 222 + }, + { + "epoch": 0.93, + "grad_norm": 87.51268602233479, + "learning_rate": 4.3874643874643876e-07, + "logits/generated": -2.3197760581970215, + "logits/oppo_generated": -2.8574419021606445, + "logits/oppo_real": -2.923137903213501, + "logits/real": -2.3333818912506104, + "logps/generated": -231.46023559570312, + "logps/oppo_gen": -82.77210998535156, + "logps/oppo_real": -252.58892822265625, + "logps/real": -270.0004577636719, + "loss": -2.98, + "loss/gen": 0.21608535945415497, + "loss/real": -2.6946630477905273, + "rewards/accuracies": 0.875, + "rewards/generated": -148.6881103515625, + "rewards/margins": 131.2765655517578, + "rewards/real": -17.411537170410156, + "step": 223 + }, + { + "epoch": 0.94, + "grad_norm": 87.51268602233479, + "learning_rate": 4.3874643874643876e-07, + "logits/generated": -2.242218017578125, + "logits/oppo_generated": -2.994565010070801, + "logits/oppo_real": -2.8149280548095703, + "logits/real": -2.4609484672546387, + "logps/generated": -181.10708618164062, + "logps/oppo_gen": -48.2861213684082, + "logps/oppo_real": -137.37625122070312, + "logps/real": -162.7267608642578, + "loss": -7932.2959, + "loss/gen": 0.7526332139968872, + "loss/real": -2.542538642883301, + "rewards/accuracies": 0.875, + "rewards/generated": -132.82098388671875, + "rewards/margins": 107.47045135498047, + "rewards/real": -25.350521087646484, + "step": 224 + }, + { + "epoch": 0.94, + "grad_norm": 67.6441819490967, + "learning_rate": 4.3839031339031333e-07, + "logits/generated": -2.315286636352539, + "logits/oppo_generated": -2.816603422164917, + "logits/oppo_real": -2.9343314170837402, + "logits/real": -2.3111538887023926, + "logps/generated": -155.87979125976562, + "logps/oppo_gen": -30.44548988342285, + "logps/oppo_real": -174.9966278076172, + "logps/real": -169.0306854248047, + "loss": -2.8919, + "loss/gen": 0.31436973810195923, + "loss/real": -3.049879550933838, + "rewards/accuracies": 1.0, + "rewards/generated": -125.43431091308594, + "rewards/margins": 131.40023803710938, + "rewards/real": 5.965947151184082, + "step": 225 + }, + { + "epoch": 0.95, + "grad_norm": 61.01237787454485, + "learning_rate": 4.38034188034188e-07, + "logits/generated": -2.3058667182922363, + "logits/oppo_generated": -2.6415185928344727, + "logits/oppo_real": -3.0115818977355957, + "logits/real": -2.004304885864258, + "logps/generated": -215.7899627685547, + "logps/oppo_gen": -93.466064453125, + "logps/oppo_real": -340.529296875, + "logps/real": -337.6749572753906, + "loss": -3.1087, + "loss/gen": 0.5611802339553833, + "loss/real": -2.9437613487243652, + "rewards/accuracies": 1.0, + "rewards/generated": -122.32388305664062, + "rewards/margins": 125.1782455444336, + "rewards/real": 2.8543548583984375, + "step": 226 + }, + { + "epoch": 0.95, + "grad_norm": 68.83477018148241, + "learning_rate": 4.376780626780627e-07, + "logits/generated": -2.3567757606506348, + "logits/oppo_generated": -2.7984108924865723, + "logits/oppo_real": -2.9754528999328613, + "logits/real": -2.275757312774658, + "logps/generated": -208.89105224609375, + "logps/oppo_gen": -69.67858123779297, + "logps/oppo_real": -268.7974853515625, + "logps/real": -266.00445556640625, + "loss": -3.1351, + "loss/gen": 0.24598746001720428, + "loss/real": -3.0755763053894043, + "rewards/accuracies": 1.0, + "rewards/generated": -139.21246337890625, + "rewards/margins": 142.00546264648438, + "rewards/real": 2.7930030822753906, + "step": 227 + }, + { + "epoch": 0.95, + "grad_norm": 103.64887767723296, + "learning_rate": 4.373219373219373e-07, + "logits/generated": -2.2009589672088623, + "logits/oppo_generated": -2.7994847297668457, + "logits/oppo_real": -2.687981605529785, + "logits/real": -2.264253616333008, + "logps/generated": -200.60202026367188, + "logps/oppo_gen": -76.17577362060547, + "logps/oppo_real": -381.5020751953125, + "logps/real": -350.08245849609375, + "loss": -3.0609, + "loss/gen": 0.34163713455200195, + "loss/real": -4.447661399841309, + "rewards/accuracies": 1.0, + "rewards/generated": -124.42623138427734, + "rewards/margins": 155.8458251953125, + "rewards/real": 31.419601440429688, + "step": 228 + }, + { + "epoch": 0.96, + "grad_norm": 4236.734355609282, + "learning_rate": 4.3696581196581193e-07, + "logits/generated": -2.313900947570801, + "logits/oppo_generated": -2.8429031372070312, + "logits/oppo_real": -3.0224597454071045, + "logits/real": -2.2679154872894287, + "logps/generated": -197.47596740722656, + "logps/oppo_gen": -78.5534439086914, + "logps/oppo_real": -246.5026397705078, + "logps/real": -232.70751953125, + "loss": -7.8903, + "loss/gen": 0.3893076479434967, + "loss/real": -3.408906936645508, + "rewards/accuracies": 1.0, + "rewards/generated": -118.92252349853516, + "rewards/margins": 132.71763610839844, + "rewards/real": 13.795119285583496, + "step": 229 + }, + { + "epoch": 0.96, + "grad_norm": 65.91049876655653, + "learning_rate": 4.366096866096866e-07, + "logits/generated": -2.122530221939087, + "logits/oppo_generated": -2.5529236793518066, + "logits/oppo_real": -2.7146146297454834, + "logits/real": -1.90482759475708, + "logps/generated": -217.27114868164062, + "logps/oppo_gen": -79.70944213867188, + "logps/oppo_real": -106.01055145263672, + "logps/real": -128.87461853027344, + "loss": -3.0589, + "loss/gen": 0.2452090084552765, + "loss/real": -2.452601909637451, + "rewards/accuracies": 1.0, + "rewards/generated": -137.56170654296875, + "rewards/margins": 114.6976547241211, + "rewards/real": -22.864065170288086, + "step": 230 + }, + { + "epoch": 0.97, + "grad_norm": 100.04292047666341, + "learning_rate": 4.362535612535612e-07, + "logits/generated": -1.987313985824585, + "logits/oppo_generated": -2.5894346237182617, + "logits/oppo_real": -2.6849865913391113, + "logits/real": -2.029129981994629, + "logps/generated": -221.10101318359375, + "logps/oppo_gen": -67.09019470214844, + "logps/oppo_real": -256.4427185058594, + "logps/real": -237.23843383789062, + "loss": -3.213, + "loss/gen": 0.4131355285644531, + "loss/real": -3.562318801879883, + "rewards/accuracies": 1.0, + "rewards/generated": -154.0108184814453, + "rewards/margins": 173.215087890625, + "rewards/real": 19.204273223876953, + "step": 231 + }, + { + "epoch": 0.97, + "grad_norm": 79.75284391970766, + "learning_rate": 4.358974358974359e-07, + "logits/generated": -2.341341495513916, + "logits/oppo_generated": -2.959817886352539, + "logits/oppo_real": -2.9362192153930664, + "logits/real": -2.416731119155884, + "logps/generated": -216.04949951171875, + "logps/oppo_gen": -82.48292541503906, + "logps/oppo_real": -458.88818359375, + "logps/real": -485.8726806640625, + "loss": -2.8748, + "loss/gen": 0.28640565276145935, + "loss/real": -2.7164149284362793, + "rewards/accuracies": 0.75, + "rewards/generated": -133.5665740966797, + "rewards/margins": 106.58207702636719, + "rewards/real": -26.9844970703125, + "step": 232 + }, + { + "epoch": 0.97, + "grad_norm": 263.329314974615, + "learning_rate": 4.355413105413105e-07, + "logits/generated": -2.236466884613037, + "logits/oppo_generated": -2.7284858226776123, + "logits/oppo_real": -2.8326492309570312, + "logits/real": -2.176626205444336, + "logps/generated": -187.85845947265625, + "logps/oppo_gen": -60.89936828613281, + "logps/oppo_real": -245.58233642578125, + "logps/real": -235.84445190429688, + "loss": -3.2515, + "loss/gen": 0.3334371745586395, + "loss/real": -3.059086322784424, + "rewards/accuracies": 1.0, + "rewards/generated": -126.9590835571289, + "rewards/margins": 136.69699096679688, + "rewards/real": 9.737905502319336, + "step": 233 + }, + { + "epoch": 0.98, + "grad_norm": 130.99742811168088, + "learning_rate": 4.3518518518518514e-07, + "logits/generated": -2.2553353309631348, + "logits/oppo_generated": -2.884782075881958, + "logits/oppo_real": -3.007986545562744, + "logits/real": -2.3814938068389893, + "logps/generated": -198.6071319580078, + "logps/oppo_gen": -64.29571533203125, + "logps/oppo_real": -445.2386169433594, + "logps/real": -404.5937805175781, + "loss": -3.0983, + "loss/gen": 0.26874154806137085, + "loss/real": -4.306643962860107, + "rewards/accuracies": 1.0, + "rewards/generated": -134.31141662597656, + "rewards/margins": 174.95623779296875, + "rewards/real": 40.64482498168945, + "step": 234 + }, + { + "epoch": 0.98, + "grad_norm": 70.3636098964248, + "learning_rate": 4.348290598290598e-07, + "logits/generated": -2.1750454902648926, + "logits/oppo_generated": -2.8430304527282715, + "logits/oppo_real": -2.873483657836914, + "logits/real": -2.3181915283203125, + "logps/generated": -196.6973876953125, + "logps/oppo_gen": -68.79239654541016, + "logps/oppo_real": -391.89910888671875, + "logps/real": -368.2919616699219, + "loss": -2.8684, + "loss/gen": 0.313241183757782, + "loss/real": -3.5294508934020996, + "rewards/accuracies": 1.0, + "rewards/generated": -127.90498352050781, + "rewards/margins": 151.51217651367188, + "rewards/real": 23.607187271118164, + "step": 235 + }, + { + "epoch": 0.99, + "grad_norm": 69.64450397263053, + "learning_rate": 4.3447293447293444e-07, + "logits/generated": -2.3948874473571777, + "logits/oppo_generated": -2.8508265018463135, + "logits/oppo_real": -2.9677348136901855, + "logits/real": -2.3284974098205566, + "logps/generated": -211.0993194580078, + "logps/oppo_gen": -88.43344116210938, + "logps/oppo_real": -438.55322265625, + "logps/real": -395.72943115234375, + "loss": -3.1273, + "loss/gen": 0.3554914891719818, + "loss/real": -4.292209625244141, + "rewards/accuracies": 1.0, + "rewards/generated": -122.66587829589844, + "rewards/margins": 165.48968505859375, + "rewards/real": 42.82379913330078, + "step": 236 + }, + { + "epoch": 0.99, + "grad_norm": 287.0907957423923, + "learning_rate": 4.341168091168091e-07, + "logits/generated": -2.3292388916015625, + "logits/oppo_generated": -2.816070079803467, + "logits/oppo_real": -3.012850761413574, + "logits/real": -2.2773959636688232, + "logps/generated": -189.18850708007812, + "logps/oppo_gen": -55.2912483215332, + "logps/oppo_real": -255.20977783203125, + "logps/real": -236.0473175048828, + "loss": -3.7901, + "loss/gen": 0.27096259593963623, + "loss/real": -3.297393321990967, + "rewards/accuracies": 1.0, + "rewards/generated": -133.89724731445312, + "rewards/margins": 153.05970764160156, + "rewards/real": 19.162452697753906, + "step": 237 + }, + { + "epoch": 1.0, + "grad_norm": 68.14058737960438, + "learning_rate": 4.3376068376068374e-07, + "logits/generated": -2.3537933826446533, + "logits/oppo_generated": -2.701869487762451, + "logits/oppo_real": -2.963564872741699, + "logits/real": -2.1307592391967773, + "logps/generated": -207.35293579101562, + "logps/oppo_gen": -83.03327941894531, + "logps/oppo_real": -312.4057312011719, + "logps/real": -294.8611755371094, + "loss": -3.0868, + "loss/gen": 0.3351461589336395, + "loss/real": -3.4385178089141846, + "rewards/accuracies": 1.0, + "rewards/generated": -124.31964111328125, + "rewards/margins": 141.8642120361328, + "rewards/real": 17.544559478759766, + "step": 238 + }, + { + "epoch": 1.0, + "grad_norm": 66.31466478558872, + "learning_rate": 4.3340455840455836e-07, + "logits/generated": -2.291761636734009, + "logits/oppo_generated": -2.8546152114868164, + "logits/oppo_real": -3.036848545074463, + "logits/real": -2.2752645015716553, + "logps/generated": -213.76364135742188, + "logps/oppo_gen": -75.19477844238281, + "logps/oppo_real": -314.191162109375, + "logps/real": -290.7506103515625, + "loss": -3.1633, + "loss/gen": 0.2661153972148895, + "loss/real": -3.5240395069122314, + "rewards/accuracies": 1.0, + "rewards/generated": -138.56887817382812, + "rewards/margins": 162.0093994140625, + "rewards/real": 23.440532684326172, + "step": 239 + } + ], + "logging_steps": 1.0, + "max_steps": 1434, + "num_input_tokens_seen": 0, + "num_train_epochs": 6, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}