zephyr_lsif_ao01s1_e1 / trainer_state.json
seablue's picture
Upload trainer_state.json with huggingface_hub
712c9ab verified
raw
history blame
182 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100.0,
"global_step": 239,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 0.0,
"learning_rate": 0.0,
"logits/generated": -3.130502223968506,
"logits/oppo_generated": -3.1088104248046875,
"logits/oppo_real": -3.130502223968506,
"logits/real": -3.1088104248046875,
"logps/generated": -99.40917205810547,
"logps/oppo_gen": -99.40917205810547,
"logps/oppo_real": -459.3097229003906,
"logps/real": -459.3097229003906,
"loss": 0.9762,
"loss/gen": 3.6945278644561768,
"loss/real": -2.7182817459106445,
"rewards/accuracies": 0.0,
"rewards/generated": 0.0,
"rewards/margins": 0.0,
"rewards/real": 0.0,
"step": 1
},
{
"epoch": 0.01,
"grad_norm": 0.0,
"learning_rate": 0.0,
"logits/generated": -3.0933988094329834,
"logits/oppo_generated": -2.919645309448242,
"logits/oppo_real": -3.0933988094329834,
"logits/real": -2.919645309448242,
"logps/generated": -103.65153503417969,
"logps/oppo_gen": -103.65153503417969,
"logps/oppo_real": -392.1358642578125,
"logps/real": -392.1358642578125,
"loss": 0.9762,
"loss/gen": 3.6945278644561768,
"loss/real": -2.7182817459106445,
"rewards/accuracies": 0.0,
"rewards/generated": 0.0,
"rewards/margins": 0.0,
"rewards/real": 0.0,
"step": 2
},
{
"epoch": 0.01,
"grad_norm": 0.0,
"learning_rate": 0.0,
"logits/generated": -2.6572537422180176,
"logits/oppo_generated": -2.8074941635131836,
"logits/oppo_real": -2.6572537422180176,
"logits/real": -2.8074941635131836,
"logps/generated": -72.88986206054688,
"logps/oppo_gen": -72.88986206054688,
"logps/oppo_real": -291.916748046875,
"logps/real": -291.916748046875,
"loss": 0.9762,
"loss/gen": 3.6945278644561768,
"loss/real": -2.7182817459106445,
"rewards/accuracies": 0.0,
"rewards/generated": 0.0,
"rewards/margins": 0.0,
"rewards/real": 0.0,
"step": 3
},
{
"epoch": 0.02,
"grad_norm": 0.0,
"learning_rate": 0.0,
"logits/generated": -2.8966193199157715,
"logits/oppo_generated": -2.768460273742676,
"logits/oppo_real": -2.8966193199157715,
"logits/real": -2.768460273742676,
"logps/generated": -64.05287170410156,
"logps/oppo_gen": -64.05287170410156,
"logps/oppo_real": -376.8367919921875,
"logps/real": -376.8367919921875,
"loss": 0.9762,
"loss/gen": 3.6945278644561768,
"loss/real": -2.7182817459106445,
"rewards/accuracies": 0.0,
"rewards/generated": 0.0,
"rewards/margins": 0.0,
"rewards/real": 0.0,
"step": 4
},
{
"epoch": 0.02,
"grad_norm": 0.0,
"learning_rate": 0.0,
"logits/generated": -2.889317512512207,
"logits/oppo_generated": -2.708950996398926,
"logits/oppo_real": -2.889317512512207,
"logits/real": -2.708950996398926,
"logps/generated": -48.29164123535156,
"logps/oppo_gen": -48.29164123535156,
"logps/oppo_real": -173.0751953125,
"logps/real": -173.0751953125,
"loss": 0.9762,
"loss/gen": 3.6945278644561768,
"loss/real": -2.7182817459106445,
"rewards/accuracies": 0.0,
"rewards/generated": 0.0,
"rewards/margins": 0.0,
"rewards/real": 0.0,
"step": 5
},
{
"epoch": 0.03,
"grad_norm": 0.0,
"learning_rate": 0.0,
"logits/generated": -2.957958698272705,
"logits/oppo_generated": -2.749436378479004,
"logits/oppo_real": -2.957958698272705,
"logits/real": -2.749436378479004,
"logps/generated": -48.84138488769531,
"logps/oppo_gen": -48.84138488769531,
"logps/oppo_real": -139.2998046875,
"logps/real": -139.2998046875,
"loss": 0.9762,
"loss/gen": 3.6945278644561768,
"loss/real": -2.7182817459106445,
"rewards/accuracies": 0.0,
"rewards/generated": 0.0,
"rewards/margins": 0.0,
"rewards/real": 0.0,
"step": 6
},
{
"epoch": 0.03,
"grad_norm": 140.3248950538535,
"learning_rate": 1.6666666666666667e-08,
"logits/generated": -3.1195316314697266,
"logits/oppo_generated": -2.9545342922210693,
"logits/oppo_real": -3.1195316314697266,
"logits/real": -2.9545342922210693,
"logps/generated": -163.2059783935547,
"logps/oppo_gen": -163.2059783935547,
"logps/oppo_real": -432.88226318359375,
"logps/real": -432.88226318359375,
"loss": 0.9762,
"loss/gen": 3.6945278644561768,
"loss/real": -2.7182817459106445,
"rewards/accuracies": 0.0,
"rewards/generated": 0.0,
"rewards/margins": 0.0,
"rewards/real": 0.0,
"step": 7
},
{
"epoch": 0.03,
"grad_norm": 140.3248950538535,
"learning_rate": 1.6666666666666667e-08,
"logits/generated": -2.910332441329956,
"logits/oppo_generated": -2.9416637420654297,
"logits/oppo_real": -2.910332441329956,
"logits/real": -2.9416637420654297,
"logps/generated": -69.29386901855469,
"logps/oppo_gen": -69.29386901855469,
"logps/oppo_real": -311.59619140625,
"logps/real": -311.59619140625,
"loss": 0.9762,
"loss/gen": 3.6945278644561768,
"loss/real": -2.7182817459106445,
"rewards/accuracies": 0.0,
"rewards/generated": 0.0,
"rewards/margins": 0.0,
"rewards/real": 0.0,
"step": 8
},
{
"epoch": 0.04,
"grad_norm": 144.39084058121554,
"learning_rate": 3.3333333333333334e-08,
"logits/generated": -2.409976005554199,
"logits/oppo_generated": -2.294548273086548,
"logits/oppo_real": -2.409976005554199,
"logits/real": -2.294548273086548,
"logps/generated": -82.20011138916016,
"logps/oppo_gen": -82.20011138916016,
"logps/oppo_real": -381.1852111816406,
"logps/real": -381.1852111816406,
"loss": 0.9762,
"loss/gen": 3.6945278644561768,
"loss/real": -2.7182817459106445,
"rewards/accuracies": 0.0,
"rewards/generated": 0.0,
"rewards/margins": 0.0,
"rewards/real": 0.0,
"step": 9
},
{
"epoch": 0.04,
"grad_norm": 144.39084058121554,
"learning_rate": 3.3333333333333334e-08,
"logits/generated": -2.963313579559326,
"logits/oppo_generated": -2.9239017963409424,
"logits/oppo_real": -2.963313579559326,
"logits/real": -2.9239017963409424,
"logps/generated": -93.09856414794922,
"logps/oppo_gen": -93.09856414794922,
"logps/oppo_real": -233.10401916503906,
"logps/real": -233.10401916503906,
"loss": 0.9762,
"loss/gen": 3.6945278644561768,
"loss/real": -2.7182817459106445,
"rewards/accuracies": 0.0,
"rewards/generated": 0.0,
"rewards/margins": 0.0,
"rewards/real": 0.0,
"step": 10
},
{
"epoch": 0.05,
"grad_norm": 147.09211346550842,
"learning_rate": 5e-08,
"logits/generated": -2.857771396636963,
"logits/oppo_generated": -2.837850570678711,
"logits/oppo_real": -2.857771396636963,
"logits/real": -2.837850570678711,
"logps/generated": -59.46293640136719,
"logps/oppo_gen": -59.46293640136719,
"logps/oppo_real": -142.69805908203125,
"logps/real": -142.69805908203125,
"loss": 0.9762,
"loss/gen": 3.6945278644561768,
"loss/real": -2.7182817459106445,
"rewards/accuracies": 0.0,
"rewards/generated": 0.0,
"rewards/margins": 0.0,
"rewards/real": 0.0,
"step": 11
},
{
"epoch": 0.05,
"grad_norm": 147.36241597037218,
"learning_rate": 6.666666666666667e-08,
"logits/generated": -2.8778512477874756,
"logits/oppo_generated": -2.7672762870788574,
"logits/oppo_real": -2.8780808448791504,
"logits/real": -2.766920328140259,
"logps/generated": -70.60530090332031,
"logps/oppo_gen": -70.58644104003906,
"logps/oppo_real": -343.4704284667969,
"logps/real": -343.4797058105469,
"loss": 0.9737,
"loss/gen": 3.693136215209961,
"loss/real": -2.718029499053955,
"rewards/accuracies": 0.5,
"rewards/generated": -0.01885223388671875,
"rewards/margins": 0.009566187858581543,
"rewards/real": -0.009286046028137207,
"step": 12
},
{
"epoch": 0.05,
"grad_norm": 141.20291665498627,
"learning_rate": 8.333333333333333e-08,
"logits/generated": -2.8214950561523438,
"logits/oppo_generated": -2.8374581336975098,
"logits/oppo_real": -2.822021961212158,
"logits/real": -2.836732864379883,
"logps/generated": -106.83735656738281,
"logps/oppo_gen": -106.73956298828125,
"logps/oppo_real": -280.41741943359375,
"logps/real": -280.4453430175781,
"loss": 0.9675,
"loss/gen": 3.6873114109039307,
"loss/real": -2.7175238132476807,
"rewards/accuracies": 0.875,
"rewards/generated": -0.09777355194091797,
"rewards/margins": 0.06986618041992188,
"rewards/real": -0.027907371520996094,
"step": 13
},
{
"epoch": 0.06,
"grad_norm": 147.7225611683097,
"learning_rate": 1e-07,
"logits/generated": -2.7692794799804688,
"logits/oppo_generated": -2.8255615234375,
"logits/oppo_real": -2.771684169769287,
"logits/real": -2.8233795166015625,
"logps/generated": -86.35212707519531,
"logps/oppo_gen": -85.86231994628906,
"logps/oppo_real": -289.01318359375,
"logps/real": -289.048095703125,
"loss": 0.9484,
"loss/gen": 3.6585421562194824,
"loss/real": -2.717336654663086,
"rewards/accuracies": 0.875,
"rewards/generated": -0.48981738090515137,
"rewards/margins": 0.4548964500427246,
"rewards/real": -0.03492093086242676,
"step": 14
},
{
"epoch": 0.06,
"grad_norm": 135.88560072492965,
"learning_rate": 1.1666666666666667e-07,
"logits/generated": -3.1533312797546387,
"logits/oppo_generated": -2.7394165992736816,
"logits/oppo_real": -3.1553921699523926,
"logits/real": -2.7368688583374023,
"logps/generated": -75.06793212890625,
"logps/oppo_gen": -74.47514343261719,
"logps/oppo_real": -366.370361328125,
"logps/real": -366.428466796875,
"loss": 0.9381,
"loss/gen": 3.6510140895843506,
"loss/real": -2.716707944869995,
"rewards/accuracies": 1.0,
"rewards/generated": -0.5927925109863281,
"rewards/margins": 0.5347006320953369,
"rewards/real": -0.05809187889099121,
"step": 15
},
{
"epoch": 0.07,
"grad_norm": 138.72056275567078,
"learning_rate": 1.3333333333333334e-07,
"logits/generated": -2.1314597129821777,
"logits/oppo_generated": -2.1468427181243896,
"logits/oppo_real": -2.142064094543457,
"logits/real": -2.137998580932617,
"logps/generated": -79.51522064208984,
"logps/oppo_gen": -78.08332824707031,
"logps/oppo_real": -437.152587890625,
"logps/real": -437.42681884765625,
"loss": 0.8706,
"loss/gen": 3.590456008911133,
"loss/real": -2.710862874984741,
"rewards/accuracies": 1.0,
"rewards/generated": -1.4318904876708984,
"rewards/margins": 1.1576709747314453,
"rewards/real": -0.2742195129394531,
"step": 16
},
{
"epoch": 0.07,
"grad_norm": 128.9259113793655,
"learning_rate": 1.5e-07,
"logits/generated": -2.9416465759277344,
"logits/oppo_generated": -2.902646064758301,
"logits/oppo_real": -2.953411817550659,
"logits/real": -2.887700080871582,
"logps/generated": -74.43273162841797,
"logps/oppo_gen": -72.53976440429688,
"logps/oppo_real": -310.7004089355469,
"logps/real": -310.87109375,
"loss": 0.8455,
"loss/gen": 3.557424545288086,
"loss/real": -2.7137060165405273,
"rewards/accuracies": 1.0,
"rewards/generated": -1.8929705619812012,
"rewards/margins": 1.7222943305969238,
"rewards/real": -0.17067623138427734,
"step": 17
},
{
"epoch": 0.08,
"grad_norm": 131.5700784634371,
"learning_rate": 1.6666666666666665e-07,
"logits/generated": -2.94179630279541,
"logits/oppo_generated": -2.947140693664551,
"logits/oppo_real": -2.9634807109832764,
"logits/real": -2.920558214187622,
"logps/generated": -79.8861083984375,
"logps/oppo_gen": -74.80116271972656,
"logps/oppo_real": -309.46124267578125,
"logps/real": -310.43719482421875,
"loss": 0.6796,
"loss/gen": 3.3385138511657715,
"loss/real": -2.6920909881591797,
"rewards/accuracies": 1.0,
"rewards/generated": -5.084942817687988,
"rewards/margins": 4.109025955200195,
"rewards/real": -0.9759171009063721,
"step": 18
},
{
"epoch": 0.08,
"grad_norm": 132.43793505119672,
"learning_rate": 1.833333333333333e-07,
"logits/generated": -2.4398093223571777,
"logits/oppo_generated": -2.6668543815612793,
"logits/oppo_real": -2.47564697265625,
"logits/real": -2.6329777240753174,
"logps/generated": -73.18605041503906,
"logps/oppo_gen": -67.190673828125,
"logps/oppo_real": -285.60797119140625,
"logps/real": -287.226806640625,
"loss": 0.5909,
"loss/gen": 3.2783122062683105,
"loss/real": -2.6751227378845215,
"rewards/accuracies": 1.0,
"rewards/generated": -5.995372772216797,
"rewards/margins": 4.3765668869018555,
"rewards/real": -1.6188058853149414,
"step": 19
},
{
"epoch": 0.08,
"grad_norm": 122.69142409669472,
"learning_rate": 2e-07,
"logits/generated": -3.083611011505127,
"logits/oppo_generated": -2.7376956939697266,
"logits/oppo_real": -3.1153059005737305,
"logits/real": -2.70223331451416,
"logps/generated": -102.092529296875,
"logps/oppo_gen": -93.65745544433594,
"logps/oppo_real": -173.968994140625,
"logps/real": -176.39892578125,
"loss": 0.5272,
"loss/gen": 3.124610185623169,
"loss/real": -2.653409957885742,
"rewards/accuracies": 1.0,
"rewards/generated": -8.435081481933594,
"rewards/margins": 6.005127429962158,
"rewards/real": -2.4299545288085938,
"step": 20
},
{
"epoch": 0.09,
"grad_norm": 107.67073560019686,
"learning_rate": 2.1666666666666667e-07,
"logits/generated": -2.8323276042938232,
"logits/oppo_generated": -2.6699156761169434,
"logits/oppo_real": -2.8930060863494873,
"logits/real": -2.6024348735809326,
"logps/generated": -59.84043884277344,
"logps/oppo_gen": -50.189754486083984,
"logps/oppo_real": -197.0562286376953,
"logps/real": -199.4170379638672,
"loss": 0.291,
"loss/gen": 3.0484681129455566,
"loss/real": -2.6573870182037354,
"rewards/accuracies": 1.0,
"rewards/generated": -9.650688171386719,
"rewards/margins": 7.289878845214844,
"rewards/real": -2.3608102798461914,
"step": 21
},
{
"epoch": 0.09,
"grad_norm": 97.9560398453689,
"learning_rate": 2.3333333333333333e-07,
"logits/generated": -2.9114887714385986,
"logits/oppo_generated": -2.8113152980804443,
"logits/oppo_real": -2.997610330581665,
"logits/real": -2.724991798400879,
"logps/generated": -74.77532196044922,
"logps/oppo_gen": -59.91856384277344,
"logps/oppo_real": -175.6089324951172,
"logps/real": -181.092529296875,
"loss": 0.1564,
"loss/gen": 2.7578284740448,
"loss/real": -2.5756349563598633,
"rewards/accuracies": 1.0,
"rewards/generated": -14.856756210327148,
"rewards/margins": 9.373159408569336,
"rewards/real": -5.4835968017578125,
"step": 22
},
{
"epoch": 0.1,
"grad_norm": 101.1866700089493,
"learning_rate": 2.5e-07,
"logits/generated": -2.738328218460083,
"logits/oppo_generated": -2.712057113647461,
"logits/oppo_real": -2.83805513381958,
"logits/real": -2.6210412979125977,
"logps/generated": -103.88157653808594,
"logps/oppo_gen": -84.5518798828125,
"logps/oppo_real": -331.96221923828125,
"logps/real": -338.4169616699219,
"loss": -0.0064,
"loss/gen": 2.5180134773254395,
"loss/real": -2.5506632328033447,
"rewards/accuracies": 1.0,
"rewards/generated": -19.329689025878906,
"rewards/margins": 12.874977111816406,
"rewards/real": -6.454712867736816,
"step": 23
},
{
"epoch": 0.1,
"grad_norm": 91.5968525574842,
"learning_rate": 2.6666666666666667e-07,
"logits/generated": -2.2416625022888184,
"logits/oppo_generated": -2.4313888549804688,
"logits/oppo_real": -2.3368191719055176,
"logits/real": -2.3420183658599854,
"logps/generated": -93.81153106689453,
"logps/oppo_gen": -70.7446060180664,
"logps/oppo_real": -186.56976318359375,
"logps/real": -194.1738739013672,
"loss": -0.0762,
"loss/gen": 2.358870029449463,
"loss/real": -2.5205307006835938,
"rewards/accuracies": 1.0,
"rewards/generated": -23.066925048828125,
"rewards/margins": 15.46281623840332,
"rewards/real": -7.604110240936279,
"step": 24
},
{
"epoch": 0.1,
"grad_norm": 91.5968525574842,
"learning_rate": 2.6666666666666667e-07,
"logits/generated": -2.808882474899292,
"logits/oppo_generated": -2.8222999572753906,
"logits/oppo_real": -2.956730842590332,
"logits/real": -2.6913347244262695,
"logps/generated": -77.1277847290039,
"logps/oppo_gen": -55.461936950683594,
"logps/oppo_real": -125.98847198486328,
"logps/real": -135.24478149414062,
"loss": -0.2171,
"loss/gen": 2.427062511444092,
"loss/real": -2.4810240268707275,
"rewards/accuracies": 0.875,
"rewards/generated": -21.665851593017578,
"rewards/margins": 12.40954875946045,
"rewards/real": -9.256302833557129,
"step": 25
},
{
"epoch": 0.11,
"grad_norm": 90.71163423694614,
"learning_rate": 2.833333333333333e-07,
"logits/generated": -2.524838447570801,
"logits/oppo_generated": -2.9076757431030273,
"logits/oppo_real": -2.661245822906494,
"logits/real": -2.7569193840026855,
"logps/generated": -94.57086181640625,
"logps/oppo_gen": -71.46342468261719,
"logps/oppo_real": -293.69677734375,
"logps/real": -298.1125183105469,
"loss": -0.199,
"loss/gen": 2.345475196838379,
"loss/real": -2.6118550300598145,
"rewards/accuracies": 1.0,
"rewards/generated": -23.107433319091797,
"rewards/margins": 18.69169807434082,
"rewards/real": -4.415735244750977,
"step": 26
},
{
"epoch": 0.11,
"grad_norm": 71.87999982609905,
"learning_rate": 3e-07,
"logits/generated": -2.6987175941467285,
"logits/oppo_generated": -3.018123149871826,
"logits/oppo_real": -2.837935447692871,
"logits/real": -2.857689142227173,
"logps/generated": -76.849853515625,
"logps/oppo_gen": -51.06623458862305,
"logps/oppo_real": -151.72972106933594,
"logps/real": -170.76156616210938,
"loss": -0.2983,
"loss/gen": 2.2412900924682617,
"loss/real": -2.2491354942321777,
"rewards/accuracies": 0.75,
"rewards/generated": -25.783626556396484,
"rewards/margins": 6.751780033111572,
"rewards/real": -19.031845092773438,
"step": 27
},
{
"epoch": 0.12,
"grad_norm": 77.17411637512444,
"learning_rate": 3.166666666666666e-07,
"logits/generated": -2.4447317123413086,
"logits/oppo_generated": -2.7700376510620117,
"logits/oppo_real": -2.6328747272491455,
"logits/real": -2.6101927757263184,
"logps/generated": -109.12590026855469,
"logps/oppo_gen": -72.09120178222656,
"logps/oppo_real": -411.427978515625,
"logps/real": -411.268310546875,
"loss": -0.5509,
"loss/gen": 1.7849677801132202,
"loss/real": -2.807443141937256,
"rewards/accuracies": 1.0,
"rewards/generated": -37.03469467163086,
"rewards/margins": 37.19430160522461,
"rewards/real": 0.1596088409423828,
"step": 28
},
{
"epoch": 0.12,
"grad_norm": 77.17411637512444,
"learning_rate": 3.166666666666666e-07,
"logits/generated": -2.7265658378601074,
"logits/oppo_generated": -2.91198468208313,
"logits/oppo_real": -2.9211230278015137,
"logits/real": -2.7112436294555664,
"logps/generated": -121.98545837402344,
"logps/oppo_gen": -82.21741485595703,
"logps/oppo_real": -301.3589172363281,
"logps/real": -312.0211181640625,
"loss": -0.7401,
"loss/gen": 1.702211618423462,
"loss/real": -2.461573600769043,
"rewards/accuracies": 0.875,
"rewards/generated": -39.768035888671875,
"rewards/margins": 29.1058292388916,
"rewards/real": -10.66220760345459,
"step": 29
},
{
"epoch": 0.13,
"grad_norm": 85.01337498171243,
"learning_rate": 3.333333333333333e-07,
"logits/generated": -2.7819857597351074,
"logits/oppo_generated": -2.4022648334503174,
"logits/oppo_real": -2.97650146484375,
"logits/real": -2.2471132278442383,
"logps/generated": -130.29348754882812,
"logps/oppo_gen": -99.30915832519531,
"logps/oppo_real": -226.3162841796875,
"logps/real": -240.45065307617188,
"loss": -0.6496,
"loss/gen": 2.034857749938965,
"loss/real": -2.3712759017944336,
"rewards/accuracies": 1.0,
"rewards/generated": -30.984325408935547,
"rewards/margins": 16.849956512451172,
"rewards/real": -14.134370803833008,
"step": 30
},
{
"epoch": 0.13,
"grad_norm": 58.31929890696561,
"learning_rate": 3.5e-07,
"logits/generated": -2.751274585723877,
"logits/oppo_generated": -2.854034900665283,
"logits/oppo_real": -2.9424033164978027,
"logits/real": -2.689624309539795,
"logps/generated": -90.87772369384766,
"logps/oppo_gen": -54.3837890625,
"logps/oppo_real": -252.91123962402344,
"logps/real": -270.4813537597656,
"loss": -0.6919,
"loss/gen": 1.8102836608886719,
"loss/real": -2.287971258163452,
"rewards/accuracies": 0.875,
"rewards/generated": -36.493934631347656,
"rewards/margins": 18.923805236816406,
"rewards/real": -17.570131301879883,
"step": 31
},
{
"epoch": 0.13,
"grad_norm": 49.377771381874105,
"learning_rate": 3.666666666666666e-07,
"logits/generated": -2.703281879425049,
"logits/oppo_generated": -2.9263906478881836,
"logits/oppo_real": -2.9535346031188965,
"logits/real": -2.689378261566162,
"logps/generated": -125.86929321289062,
"logps/oppo_gen": -78.93435668945312,
"logps/oppo_real": -298.2490234375,
"logps/real": -317.0907287597656,
"loss": -0.7566,
"loss/gen": 1.4581267833709717,
"loss/real": -2.2758255004882812,
"rewards/accuracies": 1.0,
"rewards/generated": -46.9349365234375,
"rewards/margins": 28.093202590942383,
"rewards/real": -18.841733932495117,
"step": 32
},
{
"epoch": 0.14,
"grad_norm": 47.132596888492415,
"learning_rate": 3.8333333333333335e-07,
"logits/generated": -2.8561768531799316,
"logits/oppo_generated": -2.9521539211273193,
"logits/oppo_real": -3.0699048042297363,
"logits/real": -2.701744794845581,
"logps/generated": -182.20703125,
"logps/oppo_gen": -136.80690002441406,
"logps/oppo_real": -344.64990234375,
"logps/real": -365.87115478515625,
"loss": -0.8056,
"loss/gen": 1.5829627513885498,
"loss/real": -2.2397522926330566,
"rewards/accuracies": 1.0,
"rewards/generated": -45.400123596191406,
"rewards/margins": 24.178863525390625,
"rewards/real": -21.22126007080078,
"step": 33
},
{
"epoch": 0.14,
"grad_norm": 47.136771677116634,
"learning_rate": 4e-07,
"logits/generated": -2.74558162689209,
"logits/oppo_generated": -2.8447458744049072,
"logits/oppo_real": -2.998192548751831,
"logits/real": -2.603461742401123,
"logps/generated": -134.50888061523438,
"logps/oppo_gen": -79.24800109863281,
"logps/oppo_real": -401.9757385253906,
"logps/real": -427.4682922363281,
"loss": -0.995,
"loss/gen": 1.2431423664093018,
"loss/real": -2.120981454849243,
"rewards/accuracies": 1.0,
"rewards/generated": -55.260887145996094,
"rewards/margins": 29.768321990966797,
"rewards/real": -25.492568969726562,
"step": 34
},
{
"epoch": 0.15,
"grad_norm": 47.136771677116634,
"learning_rate": 4e-07,
"logits/generated": -2.696960926055908,
"logits/oppo_generated": -2.942030906677246,
"logits/oppo_real": -2.9536867141723633,
"logits/real": -2.6742172241210938,
"logps/generated": -125.61725616455078,
"logps/oppo_gen": -62.21235656738281,
"logps/oppo_real": -296.8402404785156,
"logps/real": -324.08892822265625,
"loss": -5.4743,
"loss/gen": 1.1838252544403076,
"loss/real": -2.137930154800415,
"rewards/accuracies": 0.875,
"rewards/generated": -63.40489959716797,
"rewards/margins": 36.15622329711914,
"rewards/real": -27.248676300048828,
"step": 35
},
{
"epoch": 0.15,
"grad_norm": 38.149193463480486,
"learning_rate": 4.1666666666666667e-07,
"logits/generated": -2.420623779296875,
"logits/oppo_generated": -2.792217493057251,
"logits/oppo_real": -2.680948257446289,
"logits/real": -2.556100845336914,
"logps/generated": -105.52731323242188,
"logps/oppo_gen": -49.044715881347656,
"logps/oppo_real": -183.3726348876953,
"logps/real": -208.43609619140625,
"loss": -0.9189,
"loss/gen": 1.2789992094039917,
"loss/real": -2.150240182876587,
"rewards/accuracies": 0.75,
"rewards/generated": -56.48259735107422,
"rewards/margins": 31.41913604736328,
"rewards/real": -25.063465118408203,
"step": 36
},
{
"epoch": 0.15,
"grad_norm": 40.49399399669891,
"learning_rate": 4.3333333333333335e-07,
"logits/generated": -2.5155656337738037,
"logits/oppo_generated": -2.5968940258026123,
"logits/oppo_real": -2.84472393989563,
"logits/real": -2.363577127456665,
"logps/generated": -156.7322235107422,
"logps/oppo_gen": -96.46727752685547,
"logps/oppo_real": -441.2087097167969,
"logps/real": -452.6773681640625,
"loss": -0.993,
"loss/gen": 1.218324899673462,
"loss/real": -2.478170394897461,
"rewards/accuracies": 1.0,
"rewards/generated": -60.26493835449219,
"rewards/margins": 48.7962532043457,
"rewards/real": -11.4686861038208,
"step": 37
},
{
"epoch": 0.16,
"grad_norm": 38.69477383912377,
"learning_rate": 4.5e-07,
"logits/generated": -2.845750331878662,
"logits/oppo_generated": -3.097993850708008,
"logits/oppo_real": -3.161780834197998,
"logits/real": -2.804795742034912,
"logps/generated": -169.83187866210938,
"logps/oppo_gen": -86.33152770996094,
"logps/oppo_real": -374.5130615234375,
"logps/real": -400.9438171386719,
"loss": -1.0833,
"loss/gen": 0.779202401638031,
"loss/real": -2.109189987182617,
"rewards/accuracies": 0.875,
"rewards/generated": -83.50035095214844,
"rewards/margins": 57.069610595703125,
"rewards/real": -26.430742263793945,
"step": 38
},
{
"epoch": 0.16,
"grad_norm": 39.95823930895698,
"learning_rate": 4.6666666666666666e-07,
"logits/generated": -2.4254915714263916,
"logits/oppo_generated": -2.648486614227295,
"logits/oppo_real": -2.7488012313842773,
"logits/real": -2.350640296936035,
"logps/generated": -155.58016967773438,
"logps/oppo_gen": -78.30477142333984,
"logps/oppo_real": -363.86407470703125,
"logps/real": -395.0137939453125,
"loss": -1.1548,
"loss/gen": 0.8230071067810059,
"loss/real": -2.0378403663635254,
"rewards/accuracies": 1.0,
"rewards/generated": -77.275390625,
"rewards/margins": 46.12569046020508,
"rewards/real": -31.149703979492188,
"step": 39
},
{
"epoch": 0.17,
"grad_norm": 40.89272509652924,
"learning_rate": 4.833333333333333e-07,
"logits/generated": -2.461397409439087,
"logits/oppo_generated": -2.864193916320801,
"logits/oppo_real": -2.7761850357055664,
"logits/real": -2.5565099716186523,
"logps/generated": -136.72689819335938,
"logps/oppo_gen": -60.6450309753418,
"logps/oppo_real": -320.1565856933594,
"logps/real": -337.87396240234375,
"loss": -1.1347,
"loss/gen": 0.9023051857948303,
"loss/real": -2.3106727600097656,
"rewards/accuracies": 1.0,
"rewards/generated": -76.08185577392578,
"rewards/margins": 58.36448669433594,
"rewards/real": -17.717369079589844,
"step": 40
},
{
"epoch": 0.17,
"grad_norm": 89.36429678043967,
"learning_rate": 5e-07,
"logits/generated": -2.6618571281433105,
"logits/oppo_generated": -2.812058210372925,
"logits/oppo_real": -2.982236862182617,
"logits/real": -2.515589714050293,
"logps/generated": -162.60000610351562,
"logps/oppo_gen": -90.06674194335938,
"logps/oppo_real": -176.9713592529297,
"logps/real": -211.77285766601562,
"loss": -1.1724,
"loss/gen": 0.8981304168701172,
"loss/real": -1.989371657371521,
"rewards/accuracies": 1.0,
"rewards/generated": -72.53326416015625,
"rewards/margins": 37.731773376464844,
"rewards/real": -34.80148696899414,
"step": 41
},
{
"epoch": 0.18,
"grad_norm": 89.36429678043967,
"learning_rate": 5e-07,
"logits/generated": -2.5393388271331787,
"logits/oppo_generated": -2.9253015518188477,
"logits/oppo_real": -2.9079301357269287,
"logits/real": -2.55344557762146,
"logps/generated": -130.3933868408203,
"logps/oppo_gen": -54.79414367675781,
"logps/oppo_real": -186.92176818847656,
"logps/real": -235.45858764648438,
"loss": -5.6809,
"loss/gen": 0.8535439372062683,
"loss/real": -1.694696307182312,
"rewards/accuracies": 1.0,
"rewards/generated": -75.5992431640625,
"rewards/margins": 27.062450408935547,
"rewards/real": -48.53679275512695,
"step": 42
},
{
"epoch": 0.18,
"grad_norm": 43.34401895870049,
"learning_rate": 4.996438746438746e-07,
"logits/generated": -2.4617252349853516,
"logits/oppo_generated": -2.9949498176574707,
"logits/oppo_real": -2.9107003211975098,
"logits/real": -2.6696996688842773,
"logps/generated": -172.76368713378906,
"logps/oppo_gen": -79.9820785522461,
"logps/oppo_real": -404.1100158691406,
"logps/real": -422.02642822265625,
"loss": -1.2059,
"loss/gen": 0.6165514588356018,
"loss/real": -2.3567748069763184,
"rewards/accuracies": 1.0,
"rewards/generated": -92.7816162109375,
"rewards/margins": 74.865234375,
"rewards/real": -17.916383743286133,
"step": 43
},
{
"epoch": 0.18,
"grad_norm": 49.41177428002358,
"learning_rate": 4.992877492877492e-07,
"logits/generated": -2.026392698287964,
"logits/oppo_generated": -2.4440221786499023,
"logits/oppo_real": -2.3998050689697266,
"logits/real": -2.089980125427246,
"logps/generated": -209.39190673828125,
"logps/oppo_gen": -93.22187805175781,
"logps/oppo_real": -290.8685302734375,
"logps/real": -320.3968811035156,
"loss": -1.3765,
"loss/gen": 0.5650486350059509,
"loss/real": -2.131740093231201,
"rewards/accuracies": 1.0,
"rewards/generated": -116.17002868652344,
"rewards/margins": 86.64169311523438,
"rewards/real": -29.528339385986328,
"step": 44
},
{
"epoch": 0.19,
"grad_norm": 59.38357051631053,
"learning_rate": 4.98931623931624e-07,
"logits/generated": -2.3456368446350098,
"logits/oppo_generated": -2.9232547283172607,
"logits/oppo_real": -2.7114880084991455,
"logits/real": -2.5829110145568848,
"logps/generated": -168.85809326171875,
"logps/oppo_gen": -64.50846862792969,
"logps/oppo_real": -239.8323974609375,
"logps/real": -297.2595520019531,
"loss": -1.4436,
"loss/gen": 0.5499280691146851,
"loss/real": -1.58909273147583,
"rewards/accuracies": 0.875,
"rewards/generated": -104.34961700439453,
"rewards/margins": 46.92247009277344,
"rewards/real": -57.42715072631836,
"step": 45
},
{
"epoch": 0.19,
"grad_norm": 44.12861838917575,
"learning_rate": 4.985754985754986e-07,
"logits/generated": -2.6069109439849854,
"logits/oppo_generated": -2.741456985473633,
"logits/oppo_real": -2.9938759803771973,
"logits/real": -2.428788185119629,
"logps/generated": -149.2159423828125,
"logps/oppo_gen": -58.174400329589844,
"logps/oppo_real": -258.21685791015625,
"logps/real": -301.842041015625,
"loss": -1.4547,
"loss/gen": 0.676410973072052,
"loss/real": -1.867649793624878,
"rewards/accuracies": 1.0,
"rewards/generated": -91.04153442382812,
"rewards/margins": 47.41633605957031,
"rewards/real": -43.62519836425781,
"step": 46
},
{
"epoch": 0.2,
"grad_norm": 44.34332055817426,
"learning_rate": 4.982193732193732e-07,
"logits/generated": -2.587238311767578,
"logits/oppo_generated": -2.814079761505127,
"logits/oppo_real": -2.964923620223999,
"logits/real": -2.480611801147461,
"logps/generated": -175.05799865722656,
"logps/oppo_gen": -78.5189208984375,
"logps/oppo_real": -288.56396484375,
"logps/real": -318.1793518066406,
"loss": -1.5609,
"loss/gen": 0.7453894019126892,
"loss/real": -2.1940207481384277,
"rewards/accuracies": 0.875,
"rewards/generated": -96.53907775878906,
"rewards/margins": 66.9237060546875,
"rewards/real": -29.615373611450195,
"step": 47
},
{
"epoch": 0.2,
"grad_norm": 41.77245636004139,
"learning_rate": 4.978632478632478e-07,
"logits/generated": -2.5828328132629395,
"logits/oppo_generated": -2.7121076583862305,
"logits/oppo_real": -2.932806968688965,
"logits/real": -2.3821425437927246,
"logps/generated": -170.45315551757812,
"logps/oppo_gen": -72.10917663574219,
"logps/oppo_real": -299.3392333984375,
"logps/real": -351.05755615234375,
"loss": -1.5561,
"loss/gen": 0.6000806093215942,
"loss/real": -1.664915680885315,
"rewards/accuracies": 0.875,
"rewards/generated": -98.34397888183594,
"rewards/margins": 46.625675201416016,
"rewards/real": -51.718299865722656,
"step": 48
},
{
"epoch": 0.21,
"grad_norm": 40.92458049952987,
"learning_rate": 4.975071225071225e-07,
"logits/generated": -2.7721643447875977,
"logits/oppo_generated": -2.814209461212158,
"logits/oppo_real": -3.157527208328247,
"logits/real": -2.545376777648926,
"logps/generated": -190.73538208007812,
"logps/oppo_gen": -80.24543762207031,
"logps/oppo_real": -294.9969482421875,
"logps/real": -325.6192626953125,
"loss": -1.6521,
"loss/gen": 0.5949017405509949,
"loss/real": -2.104870319366455,
"rewards/accuracies": 1.0,
"rewards/generated": -110.48993682861328,
"rewards/margins": 79.86763000488281,
"rewards/real": -30.622314453125,
"step": 49
},
{
"epoch": 0.21,
"grad_norm": 40.13348241970552,
"learning_rate": 4.971509971509972e-07,
"logits/generated": -2.4653735160827637,
"logits/oppo_generated": -2.9343652725219727,
"logits/oppo_real": -2.7617945671081543,
"logits/real": -2.6601219177246094,
"logps/generated": -198.1933135986328,
"logps/oppo_gen": -82.74765014648438,
"logps/oppo_real": -315.32562255859375,
"logps/real": -342.7396240234375,
"loss": -1.6584,
"loss/gen": 0.4857123792171478,
"loss/real": -2.1372337341308594,
"rewards/accuracies": 1.0,
"rewards/generated": -115.44567108154297,
"rewards/margins": 88.03167724609375,
"rewards/real": -27.413999557495117,
"step": 50
},
{
"epoch": 0.21,
"grad_norm": 601.4569550267084,
"learning_rate": 4.967948717948718e-07,
"logits/generated": -2.5435636043548584,
"logits/oppo_generated": -2.805569648742676,
"logits/oppo_real": -2.7846250534057617,
"logits/real": -2.5612943172454834,
"logps/generated": -126.34326934814453,
"logps/oppo_gen": -45.456573486328125,
"logps/oppo_real": -161.39598083496094,
"logps/real": -196.76950073242188,
"loss": -2.1364,
"loss/gen": 0.7522258758544922,
"loss/real": -2.0092098712921143,
"rewards/accuracies": 1.0,
"rewards/generated": -80.88670349121094,
"rewards/margins": 45.51318359375,
"rewards/real": -35.3735237121582,
"step": 51
},
{
"epoch": 0.22,
"grad_norm": 38.65978819953379,
"learning_rate": 4.964387464387464e-07,
"logits/generated": -2.464820384979248,
"logits/oppo_generated": -2.7444612979888916,
"logits/oppo_real": -2.7595162391662598,
"logits/real": -2.45442271232605,
"logps/generated": -139.13876342773438,
"logps/oppo_gen": -50.193504333496094,
"logps/oppo_real": -148.25294494628906,
"logps/real": -181.2758026123047,
"loss": -1.7596,
"loss/gen": 0.6654144525527954,
"loss/real": -2.03794002532959,
"rewards/accuracies": 1.0,
"rewards/generated": -88.94526672363281,
"rewards/margins": 55.92240524291992,
"rewards/real": -33.022857666015625,
"step": 52
},
{
"epoch": 0.22,
"grad_norm": 167.4533750406363,
"learning_rate": 4.96082621082621e-07,
"logits/generated": -2.297238349914551,
"logits/oppo_generated": -2.660369396209717,
"logits/oppo_real": -2.6082496643066406,
"logits/real": -2.3693835735321045,
"logps/generated": -138.73458862304688,
"logps/oppo_gen": -55.80210876464844,
"logps/oppo_real": -201.49038696289062,
"logps/real": -226.22634887695312,
"loss": -2.0472,
"loss/gen": 0.7817223072052002,
"loss/real": -2.228806257247925,
"rewards/accuracies": 0.875,
"rewards/generated": -82.93248748779297,
"rewards/margins": 58.19652557373047,
"rewards/real": -24.735958099365234,
"step": 53
},
{
"epoch": 0.23,
"grad_norm": 38.20762371262606,
"learning_rate": 4.957264957264958e-07,
"logits/generated": -2.6939735412597656,
"logits/oppo_generated": -2.746832847595215,
"logits/oppo_real": -2.973560333251953,
"logits/real": -2.453509569168091,
"logps/generated": -155.50794982910156,
"logps/oppo_gen": -77.28608703613281,
"logps/oppo_real": -547.3628540039062,
"logps/real": -561.0299072265625,
"loss": -1.7775,
"loss/gen": 0.9930198192596436,
"loss/real": -2.6198465824127197,
"rewards/accuracies": 0.875,
"rewards/generated": -78.22187042236328,
"rewards/margins": 64.55480194091797,
"rewards/real": -13.66706657409668,
"step": 54
},
{
"epoch": 0.23,
"grad_norm": 111.84459258553808,
"learning_rate": 4.953703703703703e-07,
"logits/generated": -2.3411145210266113,
"logits/oppo_generated": -2.664555072784424,
"logits/oppo_real": -2.6400251388549805,
"logits/real": -2.3643062114715576,
"logps/generated": -196.82240295410156,
"logps/oppo_gen": -78.57785034179688,
"logps/oppo_real": -398.628662109375,
"logps/real": -393.2767333984375,
"loss": -2.0424,
"loss/gen": 0.5210200548171997,
"loss/real": -2.969128131866455,
"rewards/accuracies": 1.0,
"rewards/generated": -118.24455261230469,
"rewards/margins": 123.59645080566406,
"rewards/real": 5.351901054382324,
"step": 55
},
{
"epoch": 0.23,
"grad_norm": 50.014668258578155,
"learning_rate": 4.95014245014245e-07,
"logits/generated": -2.841848373413086,
"logits/oppo_generated": -2.638930320739746,
"logits/oppo_real": -3.1015210151672363,
"logits/real": -2.4049315452575684,
"logps/generated": -181.6864471435547,
"logps/oppo_gen": -84.6130599975586,
"logps/oppo_real": -310.54534912109375,
"logps/real": -329.8880615234375,
"loss": -1.8582,
"loss/gen": 0.7178683876991272,
"loss/real": -2.3166608810424805,
"rewards/accuracies": 0.875,
"rewards/generated": -97.0733871459961,
"rewards/margins": 77.73066711425781,
"rewards/real": -19.342731475830078,
"step": 56
},
{
"epoch": 0.24,
"grad_norm": 578.0217340204432,
"learning_rate": 4.946581196581196e-07,
"logits/generated": -2.5697083473205566,
"logits/oppo_generated": -2.9305167198181152,
"logits/oppo_real": -2.7986156940460205,
"logits/real": -2.666802406311035,
"logps/generated": -182.54356384277344,
"logps/oppo_gen": -55.247596740722656,
"logps/oppo_real": -159.6094970703125,
"logps/real": -191.58706665039062,
"loss": -3.1972,
"loss/gen": 0.5683310031890869,
"loss/real": -2.0497186183929443,
"rewards/accuracies": 1.0,
"rewards/generated": -127.29595947265625,
"rewards/margins": 95.31836700439453,
"rewards/real": -31.977588653564453,
"step": 57
},
{
"epoch": 0.24,
"grad_norm": 51.64103394316142,
"learning_rate": 4.943019943019943e-07,
"logits/generated": -2.674006462097168,
"logits/oppo_generated": -2.733177900314331,
"logits/oppo_real": -3.0261659622192383,
"logits/real": -2.440023899078369,
"logps/generated": -159.27865600585938,
"logps/oppo_gen": -77.4105453491211,
"logps/oppo_real": -291.50042724609375,
"logps/real": -305.1040954589844,
"loss": -1.8105,
"loss/gen": 0.7389234900474548,
"loss/real": -2.533874988555908,
"rewards/accuracies": 0.875,
"rewards/generated": -81.86811065673828,
"rewards/margins": 68.26446533203125,
"rewards/real": -13.60364055633545,
"step": 58
},
{
"epoch": 0.25,
"grad_norm": 93.81772033276816,
"learning_rate": 4.93945868945869e-07,
"logits/generated": -2.253323554992676,
"logits/oppo_generated": -2.70068359375,
"logits/oppo_real": -2.622352361679077,
"logits/real": -2.379178047180176,
"logps/generated": -198.3895263671875,
"logps/oppo_gen": -66.53448486328125,
"logps/oppo_real": -142.07913208007812,
"logps/real": -186.49630737304688,
"loss": -2.138,
"loss/gen": 0.36330240964889526,
"loss/real": -1.9270637035369873,
"rewards/accuracies": 1.0,
"rewards/generated": -131.85504150390625,
"rewards/margins": 87.43788146972656,
"rewards/real": -44.41715621948242,
"step": 59
},
{
"epoch": 0.25,
"grad_norm": 39.40381974811817,
"learning_rate": 4.935897435897436e-07,
"logits/generated": -2.8230233192443848,
"logits/oppo_generated": -3.0608558654785156,
"logits/oppo_real": -3.0881457328796387,
"logits/real": -2.815178394317627,
"logps/generated": -176.8870849609375,
"logps/oppo_gen": -78.30126953125,
"logps/oppo_real": -296.7585144042969,
"logps/real": -305.8564453125,
"loss": -1.9511,
"loss/gen": 0.5859768390655518,
"loss/real": -2.5944645404815674,
"rewards/accuracies": 1.0,
"rewards/generated": -98.58580780029297,
"rewards/margins": 89.48786926269531,
"rewards/real": -9.097940444946289,
"step": 60
},
{
"epoch": 0.26,
"grad_norm": 37.537286150739504,
"learning_rate": 4.932336182336182e-07,
"logits/generated": -2.67462158203125,
"logits/oppo_generated": -2.904336929321289,
"logits/oppo_real": -3.0007967948913574,
"logits/real": -2.706181526184082,
"logps/generated": -194.5768585205078,
"logps/oppo_gen": -78.76142883300781,
"logps/oppo_real": -321.17315673828125,
"logps/real": -332.7289733886719,
"loss": -2.0148,
"loss/gen": 0.5784947276115417,
"loss/real": -2.6833224296569824,
"rewards/accuracies": 0.875,
"rewards/generated": -115.8154296875,
"rewards/margins": 104.25957489013672,
"rewards/real": -11.5558500289917,
"step": 61
},
{
"epoch": 0.26,
"grad_norm": 52.37389057595874,
"learning_rate": 4.928774928774928e-07,
"logits/generated": -2.8456006050109863,
"logits/oppo_generated": -3.0246148109436035,
"logits/oppo_real": -3.155604839324951,
"logits/real": -2.7388291358947754,
"logps/generated": -199.48080444335938,
"logps/oppo_gen": -99.78816986083984,
"logps/oppo_real": -357.6624755859375,
"logps/real": -361.3135070800781,
"loss": -2.1519,
"loss/gen": 0.5312547087669373,
"loss/real": -2.7395927906036377,
"rewards/accuracies": 1.0,
"rewards/generated": -99.692626953125,
"rewards/margins": 96.04158020019531,
"rewards/real": -3.65103816986084,
"step": 62
},
{
"epoch": 0.26,
"grad_norm": 42.730668543561166,
"learning_rate": 4.925213675213676e-07,
"logits/generated": -2.5994668006896973,
"logits/oppo_generated": -2.718918800354004,
"logits/oppo_real": -2.8950438499450684,
"logits/real": -2.5016493797302246,
"logps/generated": -158.23098754882812,
"logps/oppo_gen": -73.73533630371094,
"logps/oppo_real": -276.2977294921875,
"logps/real": -278.3821105957031,
"loss": -2.1712,
"loss/gen": 0.7339967489242554,
"loss/real": -2.8307507038116455,
"rewards/accuracies": 0.875,
"rewards/generated": -84.49565124511719,
"rewards/margins": 82.41130065917969,
"rewards/real": -2.084348678588867,
"step": 63
},
{
"epoch": 0.27,
"grad_norm": 42.60172940894316,
"learning_rate": 4.921652421652421e-07,
"logits/generated": -2.6288089752197266,
"logits/oppo_generated": -2.7741386890411377,
"logits/oppo_real": -2.8905487060546875,
"logits/real": -2.5671515464782715,
"logps/generated": -164.08560180664062,
"logps/oppo_gen": -70.42605590820312,
"logps/oppo_real": -291.8798522949219,
"logps/real": -327.316650390625,
"loss": -2.0118,
"loss/gen": 0.6031943559646606,
"loss/real": -2.0373241901397705,
"rewards/accuracies": 1.0,
"rewards/generated": -93.65955352783203,
"rewards/margins": 58.222755432128906,
"rewards/real": -35.436798095703125,
"step": 64
},
{
"epoch": 0.27,
"grad_norm": 525.7316627805482,
"learning_rate": 4.918091168091168e-07,
"logits/generated": -2.4973931312561035,
"logits/oppo_generated": -2.731257438659668,
"logits/oppo_real": -2.804780960083008,
"logits/real": -2.5444960594177246,
"logps/generated": -230.39053344726562,
"logps/oppo_gen": -143.67832946777344,
"logps/oppo_real": -309.55450439453125,
"logps/real": -315.0069274902344,
"loss": -2.927,
"loss/gen": 0.7850175499916077,
"loss/real": -2.6631596088409424,
"rewards/accuracies": 1.0,
"rewards/generated": -86.71220397949219,
"rewards/margins": 81.25978088378906,
"rewards/real": -5.452421188354492,
"step": 65
},
{
"epoch": 0.28,
"grad_norm": 80.44494186631624,
"learning_rate": 4.914529914529914e-07,
"logits/generated": -2.6201300621032715,
"logits/oppo_generated": -2.710496664047241,
"logits/oppo_real": -2.980191707611084,
"logits/real": -2.4632492065429688,
"logps/generated": -194.9330291748047,
"logps/oppo_gen": -71.51214599609375,
"logps/oppo_real": -284.34765625,
"logps/real": -298.09637451171875,
"loss": -2.3734,
"loss/gen": 0.33017057180404663,
"loss/real": -2.5317859649658203,
"rewards/accuracies": 1.0,
"rewards/generated": -123.42086791992188,
"rewards/margins": 109.67212677001953,
"rewards/real": -13.74874210357666,
"step": 66
},
{
"epoch": 0.28,
"grad_norm": 45.73295767790172,
"learning_rate": 4.910968660968661e-07,
"logits/generated": -2.7911667823791504,
"logits/oppo_generated": -3.0934062004089355,
"logits/oppo_real": -3.077010154724121,
"logits/real": -2.8539376258850098,
"logps/generated": -222.52537536621094,
"logps/oppo_gen": -109.1805419921875,
"logps/oppo_real": -348.23834228515625,
"logps/real": -337.4581298828125,
"loss": -2.0979,
"loss/gen": 0.41786307096481323,
"loss/real": -3.0975918769836426,
"rewards/accuracies": 1.0,
"rewards/generated": -113.3448257446289,
"rewards/margins": 124.12504577636719,
"rewards/real": 10.780221939086914,
"step": 67
},
{
"epoch": 0.28,
"grad_norm": 79.69396419859851,
"learning_rate": 4.907407407407407e-07,
"logits/generated": -2.657637596130371,
"logits/oppo_generated": -2.838265895843506,
"logits/oppo_real": -3.01387357711792,
"logits/real": -2.6080217361450195,
"logps/generated": -174.41976928710938,
"logps/oppo_gen": -75.5096206665039,
"logps/oppo_real": -242.11915588378906,
"logps/real": -260.3476867675781,
"loss": -2.2245,
"loss/gen": 0.5530567765235901,
"loss/real": -2.540099620819092,
"rewards/accuracies": 0.875,
"rewards/generated": -98.91015625,
"rewards/margins": 80.68161010742188,
"rewards/real": -18.228544235229492,
"step": 68
},
{
"epoch": 0.29,
"grad_norm": 57.15850101499557,
"learning_rate": 4.903846153846153e-07,
"logits/generated": -2.718892812728882,
"logits/oppo_generated": -2.786154270172119,
"logits/oppo_real": -2.980445146560669,
"logits/real": -2.5882253646850586,
"logps/generated": -203.54293823242188,
"logps/oppo_gen": -78.40753173828125,
"logps/oppo_real": -188.29739379882812,
"logps/real": -220.8904571533203,
"loss": -2.1241,
"loss/gen": 0.3356163501739502,
"loss/real": -2.0496373176574707,
"rewards/accuracies": 1.0,
"rewards/generated": -125.13542175292969,
"rewards/margins": 92.5423583984375,
"rewards/real": -32.59306335449219,
"step": 69
},
{
"epoch": 0.29,
"grad_norm": 55.560476534442856,
"learning_rate": 4.9002849002849e-07,
"logits/generated": -2.484227180480957,
"logits/oppo_generated": -2.8353500366210938,
"logits/oppo_real": -2.788581371307373,
"logits/real": -2.584005832672119,
"logps/generated": -167.95159912109375,
"logps/oppo_gen": -74.27359008789062,
"logps/oppo_real": -262.4258728027344,
"logps/real": -275.72314453125,
"loss": -2.2186,
"loss/gen": 0.6950039863586426,
"loss/real": -2.613152027130127,
"rewards/accuracies": 0.875,
"rewards/generated": -93.67799377441406,
"rewards/margins": 80.38072967529297,
"rewards/real": -13.297256469726562,
"step": 70
},
{
"epoch": 0.3,
"grad_norm": 40.88330591021765,
"learning_rate": 4.896723646723647e-07,
"logits/generated": -2.44921612739563,
"logits/oppo_generated": -2.8188014030456543,
"logits/oppo_real": -2.757133960723877,
"logits/real": -2.499187469482422,
"logps/generated": -161.24481201171875,
"logps/oppo_gen": -55.317054748535156,
"logps/oppo_real": -178.10824584960938,
"logps/real": -189.52215576171875,
"loss": -2.1209,
"loss/gen": 0.4848253130912781,
"loss/real": -2.4801671504974365,
"rewards/accuracies": 1.0,
"rewards/generated": -105.9277572631836,
"rewards/margins": 94.51385498046875,
"rewards/real": -11.413912773132324,
"step": 71
},
{
"epoch": 0.3,
"grad_norm": 35.69595968854091,
"learning_rate": 4.893162393162393e-07,
"logits/generated": -2.509648323059082,
"logits/oppo_generated": -2.865746259689331,
"logits/oppo_real": -2.85042142868042,
"logits/real": -2.612628936767578,
"logps/generated": -207.73446655273438,
"logps/oppo_gen": -101.81581115722656,
"logps/oppo_real": -463.47314453125,
"logps/real": -449.06451416015625,
"loss": -2.2045,
"loss/gen": 0.5114428997039795,
"loss/real": -3.246914863586426,
"rewards/accuracies": 1.0,
"rewards/generated": -105.91865539550781,
"rewards/margins": 120.32732391357422,
"rewards/real": 14.408672332763672,
"step": 72
},
{
"epoch": 0.31,
"grad_norm": 46.60751808654372,
"learning_rate": 4.889601139601139e-07,
"logits/generated": -2.513535499572754,
"logits/oppo_generated": -2.9923882484436035,
"logits/oppo_real": -2.813816547393799,
"logits/real": -2.6687417030334473,
"logps/generated": -200.91436767578125,
"logps/oppo_gen": -78.51251220703125,
"logps/oppo_real": -286.4658508300781,
"logps/real": -272.64630126953125,
"loss": -2.3923,
"loss/gen": 0.3351864218711853,
"loss/real": -3.2229790687561035,
"rewards/accuracies": 1.0,
"rewards/generated": -122.40186309814453,
"rewards/margins": 136.22140502929688,
"rewards/real": 13.81955337524414,
"step": 73
},
{
"epoch": 0.31,
"grad_norm": 46.60751808654372,
"learning_rate": 4.889601139601139e-07,
"logits/generated": -2.667757987976074,
"logits/oppo_generated": -2.7725887298583984,
"logits/oppo_real": -3.063380002975464,
"logits/real": -2.553708076477051,
"logps/generated": -177.4560546875,
"logps/oppo_gen": -79.40229034423828,
"logps/oppo_real": -383.419677734375,
"logps/real": -384.32568359375,
"loss": -22602.7559,
"loss/gen": 0.5979279279708862,
"loss/real": -2.8606982231140137,
"rewards/accuracies": 0.875,
"rewards/generated": -98.05377960205078,
"rewards/margins": 97.14777374267578,
"rewards/real": -0.9059967994689941,
"step": 74
},
{
"epoch": 0.31,
"grad_norm": 41.5718210882534,
"learning_rate": 4.886039886039886e-07,
"logits/generated": -2.7659826278686523,
"logits/oppo_generated": -2.8321666717529297,
"logits/oppo_real": -3.1668171882629395,
"logits/real": -2.5931761264801025,
"logps/generated": -241.8350067138672,
"logps/oppo_gen": -99.83964538574219,
"logps/oppo_real": -322.6613464355469,
"logps/real": -311.7099914550781,
"loss": -2.2896,
"loss/gen": 0.5622150897979736,
"loss/real": -3.6310153007507324,
"rewards/accuracies": 1.0,
"rewards/generated": -141.99537658691406,
"rewards/margins": 152.9467315673828,
"rewards/real": 10.951353073120117,
"step": 75
},
{
"epoch": 0.32,
"grad_norm": 44.716500642240554,
"learning_rate": 4.882478632478633e-07,
"logits/generated": -2.7758758068084717,
"logits/oppo_generated": -3.000812530517578,
"logits/oppo_real": -3.1619484424591064,
"logits/real": -2.7301864624023438,
"logps/generated": -200.3653564453125,
"logps/oppo_gen": -83.82888793945312,
"logps/oppo_real": -441.3746337890625,
"logps/real": -431.2779541015625,
"loss": -2.3134,
"loss/gen": 0.3644047975540161,
"loss/real": -3.1670141220092773,
"rewards/accuracies": 1.0,
"rewards/generated": -116.53646850585938,
"rewards/margins": 126.63313293457031,
"rewards/real": 10.096664428710938,
"step": 76
},
{
"epoch": 0.32,
"grad_norm": 44.716500642240554,
"learning_rate": 4.882478632478633e-07,
"logits/generated": -2.254303455352783,
"logits/oppo_generated": -2.4111037254333496,
"logits/oppo_real": -2.622360944747925,
"logits/real": -2.1454672813415527,
"logps/generated": -177.43157958984375,
"logps/oppo_gen": -94.29784393310547,
"logps/oppo_real": -307.8828125,
"logps/real": -284.0107727050781,
"loss": -17.4644,
"loss/gen": 1.3658581972122192,
"loss/real": -3.9946789741516113,
"rewards/accuracies": 1.0,
"rewards/generated": -83.13372802734375,
"rewards/margins": 107.00576782226562,
"rewards/real": 23.872041702270508,
"step": 77
},
{
"epoch": 0.33,
"grad_norm": 44.716500642240554,
"learning_rate": 4.882478632478633e-07,
"logits/generated": -2.561386823654175,
"logits/oppo_generated": -2.7816574573516846,
"logits/oppo_real": -2.923349380493164,
"logits/real": -2.5139307975769043,
"logps/generated": -170.64508056640625,
"logps/oppo_gen": -70.22672271728516,
"logps/oppo_real": -286.0644836425781,
"logps/real": -304.5027160644531,
"loss": -51.313,
"loss/gen": 0.5637646317481995,
"loss/real": -2.394735813140869,
"rewards/accuracies": 0.875,
"rewards/generated": -100.41835021972656,
"rewards/margins": 81.98014831542969,
"rewards/real": -18.438209533691406,
"step": 78
},
{
"epoch": 0.33,
"grad_norm": 57.8592273155015,
"learning_rate": 4.878917378917379e-07,
"logits/generated": -2.341658115386963,
"logits/oppo_generated": -2.624129056930542,
"logits/oppo_real": -2.6314826011657715,
"logits/real": -2.3068737983703613,
"logps/generated": -137.337646484375,
"logps/oppo_gen": -48.185340881347656,
"logps/oppo_real": -148.66656494140625,
"logps/real": -167.26583862304688,
"loss": -2.4266,
"loss/gen": 0.7307255268096924,
"loss/real": -2.38840389251709,
"rewards/accuracies": 1.0,
"rewards/generated": -89.15231323242188,
"rewards/margins": 70.55303955078125,
"rewards/real": -18.599275588989258,
"step": 79
},
{
"epoch": 0.33,
"grad_norm": 61.02295290503402,
"learning_rate": 4.875356125356125e-07,
"logits/generated": -2.563333034515381,
"logits/oppo_generated": -2.668670177459717,
"logits/oppo_real": -2.9500231742858887,
"logits/real": -2.375744581222534,
"logps/generated": -193.91883850097656,
"logps/oppo_gen": -76.79248809814453,
"logps/oppo_real": -287.1414794921875,
"logps/real": -309.30792236328125,
"loss": -2.2982,
"loss/gen": 0.386036217212677,
"loss/real": -2.4096016883850098,
"rewards/accuracies": 1.0,
"rewards/generated": -117.1263427734375,
"rewards/margins": 94.95994567871094,
"rewards/real": -22.166412353515625,
"step": 80
},
{
"epoch": 0.34,
"grad_norm": 169.99358903352797,
"learning_rate": 4.871794871794871e-07,
"logits/generated": -2.613680601119995,
"logits/oppo_generated": -2.8624868392944336,
"logits/oppo_real": -3.0077338218688965,
"logits/real": -2.5658488273620605,
"logps/generated": -205.80078125,
"logps/oppo_gen": -103.01863861083984,
"logps/oppo_real": -484.10565185546875,
"logps/real": -483.44097900390625,
"loss": -3.0697,
"loss/gen": 0.7189458608627319,
"loss/real": -2.95969820022583,
"rewards/accuracies": 0.875,
"rewards/generated": -102.78215026855469,
"rewards/margins": 103.44681549072266,
"rewards/real": 0.6646575927734375,
"step": 81
},
{
"epoch": 0.34,
"grad_norm": 43.23124085134354,
"learning_rate": 4.868233618233618e-07,
"logits/generated": -2.5590624809265137,
"logits/oppo_generated": -2.976921796798706,
"logits/oppo_real": -3.0094780921936035,
"logits/real": -2.6058220863342285,
"logps/generated": -179.38499450683594,
"logps/oppo_gen": -66.51390075683594,
"logps/oppo_real": -174.39071655273438,
"logps/real": -176.55557250976562,
"loss": -2.4127,
"loss/gen": 0.44477635622024536,
"loss/real": -2.9173386096954346,
"rewards/accuracies": 1.0,
"rewards/generated": -112.87110137939453,
"rewards/margins": 110.70625305175781,
"rewards/real": -2.1648406982421875,
"step": 82
},
{
"epoch": 0.35,
"grad_norm": 58.48061786622663,
"learning_rate": 4.864672364672365e-07,
"logits/generated": -2.5132930278778076,
"logits/oppo_generated": -3.01529598236084,
"logits/oppo_real": -2.9185380935668945,
"logits/real": -2.643099308013916,
"logps/generated": -246.02755737304688,
"logps/oppo_gen": -86.220458984375,
"logps/oppo_real": -329.8023376464844,
"logps/real": -310.6354064941406,
"loss": -2.4253,
"loss/gen": 0.5145424008369446,
"loss/real": -3.408470392227173,
"rewards/accuracies": 1.0,
"rewards/generated": -159.80709838867188,
"rewards/margins": 178.97406005859375,
"rewards/real": 19.166940689086914,
"step": 83
},
{
"epoch": 0.35,
"grad_norm": 54.441314178233476,
"learning_rate": 4.861111111111111e-07,
"logits/generated": -2.3987717628479004,
"logits/oppo_generated": -2.864108085632324,
"logits/oppo_real": -2.8596436977386475,
"logits/real": -2.5680923461914062,
"logps/generated": -177.2393798828125,
"logps/oppo_gen": -79.35113525390625,
"logps/oppo_real": -357.43438720703125,
"logps/real": -336.9925537109375,
"loss": -2.5489,
"loss/gen": 0.5846430659294128,
"loss/real": -3.432420492172241,
"rewards/accuracies": 1.0,
"rewards/generated": -97.88824462890625,
"rewards/margins": 118.33008575439453,
"rewards/real": 20.441844940185547,
"step": 84
},
{
"epoch": 0.36,
"grad_norm": 66.89113415188145,
"learning_rate": 4.857549857549857e-07,
"logits/generated": -2.436213493347168,
"logits/oppo_generated": -2.635812282562256,
"logits/oppo_real": -2.784547805786133,
"logits/real": -2.3119587898254395,
"logps/generated": -188.01727294921875,
"logps/oppo_gen": -87.48421478271484,
"logps/oppo_real": -250.10626220703125,
"logps/real": -244.0000457763672,
"loss": -2.5429,
"loss/gen": 0.7033488154411316,
"loss/real": -2.972754955291748,
"rewards/accuracies": 1.0,
"rewards/generated": -100.53305053710938,
"rewards/margins": 106.6392593383789,
"rewards/real": 6.106204986572266,
"step": 85
},
{
"epoch": 0.36,
"grad_norm": 60.396710964360466,
"learning_rate": 4.853988603988603e-07,
"logits/generated": -2.538017749786377,
"logits/oppo_generated": -2.9845218658447266,
"logits/oppo_real": -3.016307830810547,
"logits/real": -2.62971830368042,
"logps/generated": -155.26116943359375,
"logps/oppo_gen": -55.523197174072266,
"logps/oppo_real": -291.81378173828125,
"logps/real": -305.18359375,
"loss": -2.5841,
"loss/gen": 0.6104675531387329,
"loss/real": -2.7806365489959717,
"rewards/accuracies": 0.875,
"rewards/generated": -99.73796081542969,
"rewards/margins": 86.36811828613281,
"rewards/real": -13.369840621948242,
"step": 86
},
{
"epoch": 0.36,
"grad_norm": 53.259132139474445,
"learning_rate": 4.850427350427351e-07,
"logits/generated": -2.45882511138916,
"logits/oppo_generated": -2.8317785263061523,
"logits/oppo_real": -2.849785327911377,
"logits/real": -2.4766674041748047,
"logps/generated": -163.40484619140625,
"logps/oppo_gen": -65.48351287841797,
"logps/oppo_real": -259.8980712890625,
"logps/real": -273.74273681640625,
"loss": -2.6208,
"loss/gen": 0.5979644656181335,
"loss/real": -2.596888542175293,
"rewards/accuracies": 1.0,
"rewards/generated": -97.92133331298828,
"rewards/margins": 84.07666015625,
"rewards/real": -13.844667434692383,
"step": 87
},
{
"epoch": 0.37,
"grad_norm": 1690.1221109730504,
"learning_rate": 4.846866096866097e-07,
"logits/generated": -2.439664602279663,
"logits/oppo_generated": -2.9616637229919434,
"logits/oppo_real": -2.8549320697784424,
"logits/real": -2.6093478202819824,
"logps/generated": -177.17694091796875,
"logps/oppo_gen": -66.1073226928711,
"logps/oppo_real": -297.0393981933594,
"logps/real": -275.50140380859375,
"loss": -7.118,
"loss/gen": 0.45209312438964844,
"loss/real": -3.4258365631103516,
"rewards/accuracies": 1.0,
"rewards/generated": -111.06962585449219,
"rewards/margins": 132.60760498046875,
"rewards/real": 21.537994384765625,
"step": 88
},
{
"epoch": 0.37,
"grad_norm": 74.6983974790457,
"learning_rate": 4.843304843304843e-07,
"logits/generated": -2.5593514442443848,
"logits/oppo_generated": -2.944060802459717,
"logits/oppo_real": -2.977362632751465,
"logits/real": -2.5549235343933105,
"logps/generated": -160.49493408203125,
"logps/oppo_gen": -49.032493591308594,
"logps/oppo_real": -197.13412475585938,
"logps/real": -235.08087158203125,
"loss": -2.3729,
"loss/gen": 0.49238741397857666,
"loss/real": -2.1076858043670654,
"rewards/accuracies": 0.875,
"rewards/generated": -111.46244812011719,
"rewards/margins": 73.51570892333984,
"rewards/real": -37.94673538208008,
"step": 89
},
{
"epoch": 0.38,
"grad_norm": 70.76835216372322,
"learning_rate": 4.839743589743589e-07,
"logits/generated": -2.497036933898926,
"logits/oppo_generated": -2.9935152530670166,
"logits/oppo_real": -2.782620906829834,
"logits/real": -2.689803123474121,
"logps/generated": -172.08953857421875,
"logps/oppo_gen": -79.41259002685547,
"logps/oppo_real": -304.58465576171875,
"logps/real": -297.86407470703125,
"loss": -2.6668,
"loss/gen": 0.9129126071929932,
"loss/real": -3.119077205657959,
"rewards/accuracies": 0.875,
"rewards/generated": -92.67694854736328,
"rewards/margins": 99.39753723144531,
"rewards/real": 6.720589637756348,
"step": 90
},
{
"epoch": 0.38,
"grad_norm": 68.6735548002741,
"learning_rate": 4.836182336182337e-07,
"logits/generated": -2.508333683013916,
"logits/oppo_generated": -3.0348973274230957,
"logits/oppo_real": -2.8550362586975098,
"logits/real": -2.699089527130127,
"logps/generated": -235.1026611328125,
"logps/oppo_gen": -147.11734008789062,
"logps/oppo_real": -324.0049743652344,
"logps/real": -307.71380615234375,
"loss": -2.9712,
"loss/gen": 0.950553297996521,
"loss/real": -3.3368782997131348,
"rewards/accuracies": 1.0,
"rewards/generated": -87.98532104492188,
"rewards/margins": 104.27648162841797,
"rewards/real": 16.291156768798828,
"step": 91
},
{
"epoch": 0.38,
"grad_norm": 70.78148799628862,
"learning_rate": 4.832621082621082e-07,
"logits/generated": -2.5086488723754883,
"logits/oppo_generated": -2.8708338737487793,
"logits/oppo_real": -2.8143606185913086,
"logits/real": -2.600031852722168,
"logps/generated": -205.0748291015625,
"logps/oppo_gen": -81.77798461914062,
"logps/oppo_real": -330.5220031738281,
"logps/real": -311.5235900878906,
"loss": -2.4969,
"loss/gen": 0.43894362449645996,
"loss/real": -3.3736114501953125,
"rewards/accuracies": 1.0,
"rewards/generated": -123.29684448242188,
"rewards/margins": 142.29525756835938,
"rewards/real": 18.99840545654297,
"step": 92
},
{
"epoch": 0.39,
"grad_norm": 76.29366705574257,
"learning_rate": 4.829059829059829e-07,
"logits/generated": -2.3690929412841797,
"logits/oppo_generated": -2.7298922538757324,
"logits/oppo_real": -2.698655605316162,
"logits/real": -2.4298644065856934,
"logps/generated": -173.87249755859375,
"logps/oppo_gen": -74.60616302490234,
"logps/oppo_real": -251.41427612304688,
"logps/real": -237.06617736816406,
"loss": -2.5674,
"loss/gen": 0.6722112894058228,
"loss/real": -3.512993335723877,
"rewards/accuracies": 0.875,
"rewards/generated": -99.26634216308594,
"rewards/margins": 113.61441802978516,
"rewards/real": 14.348082542419434,
"step": 93
},
{
"epoch": 0.39,
"grad_norm": 249.33590702353723,
"learning_rate": 4.825498575498575e-07,
"logits/generated": -2.570150375366211,
"logits/oppo_generated": -2.9584808349609375,
"logits/oppo_real": -2.8358330726623535,
"logits/real": -2.728276491165161,
"logps/generated": -160.3553466796875,
"logps/oppo_gen": -83.23335266113281,
"logps/oppo_real": -311.66064453125,
"logps/real": -289.5158996582031,
"loss": -3.7493,
"loss/gen": 1.0953956842422485,
"loss/real": -3.4564929008483887,
"rewards/accuracies": 1.0,
"rewards/generated": -77.12197875976562,
"rewards/margins": 99.26671600341797,
"rewards/real": 22.14473533630371,
"step": 94
},
{
"epoch": 0.4,
"grad_norm": 1622.4536202924262,
"learning_rate": 4.821937321937321e-07,
"logits/generated": -2.4929990768432617,
"logits/oppo_generated": -2.83894681930542,
"logits/oppo_real": -2.731696605682373,
"logits/real": -2.6017203330993652,
"logps/generated": -202.414306640625,
"logps/oppo_gen": -103.72628021240234,
"logps/oppo_real": -218.9561767578125,
"logps/real": -203.55921936035156,
"loss": -7.3232,
"loss/gen": 0.5733932256698608,
"loss/real": -3.2266201972961426,
"rewards/accuracies": 1.0,
"rewards/generated": -98.68803405761719,
"rewards/margins": 114.0849838256836,
"rewards/real": 15.396947860717773,
"step": 95
},
{
"epoch": 0.4,
"grad_norm": 66.96757975529091,
"learning_rate": 4.818376068376069e-07,
"logits/generated": -2.6456146240234375,
"logits/oppo_generated": -2.7633142471313477,
"logits/oppo_real": -2.9560418128967285,
"logits/real": -2.4849910736083984,
"logps/generated": -207.35745239257812,
"logps/oppo_gen": -74.91079711914062,
"logps/oppo_real": -299.2713623046875,
"logps/real": -269.4769287109375,
"loss": -2.824,
"loss/gen": 0.35913562774658203,
"loss/real": -3.8368678092956543,
"rewards/accuracies": 1.0,
"rewards/generated": -132.4466552734375,
"rewards/margins": 162.2410888671875,
"rewards/real": 29.7944393157959,
"step": 96
},
{
"epoch": 0.41,
"grad_norm": 702.0471182548932,
"learning_rate": 4.814814814814814e-07,
"logits/generated": -2.798750400543213,
"logits/oppo_generated": -2.8308515548706055,
"logits/oppo_real": -3.085522174835205,
"logits/real": -2.5982208251953125,
"logps/generated": -237.33787536621094,
"logps/oppo_gen": -134.01483154296875,
"logps/oppo_real": -442.37945556640625,
"logps/real": -406.73846435546875,
"loss": -1.4584,
"loss/gen": 0.540956437587738,
"loss/real": -3.9525904655456543,
"rewards/accuracies": 1.0,
"rewards/generated": -103.32305908203125,
"rewards/margins": 138.96401977539062,
"rewards/real": 35.640968322753906,
"step": 97
},
{
"epoch": 0.41,
"grad_norm": 54.58323990131952,
"learning_rate": 4.811253561253561e-07,
"logits/generated": -2.40437650680542,
"logits/oppo_generated": -2.8044867515563965,
"logits/oppo_real": -2.8060150146484375,
"logits/real": -2.5216751098632812,
"logps/generated": -167.4996337890625,
"logps/oppo_gen": -51.423309326171875,
"logps/oppo_real": -222.54879760742188,
"logps/real": -225.21975708007812,
"loss": -2.7112,
"loss/gen": 0.3818909823894501,
"loss/real": -2.9921202659606934,
"rewards/accuracies": 1.0,
"rewards/generated": -116.07633972167969,
"rewards/margins": 113.40538024902344,
"rewards/real": -2.670961380004883,
"step": 98
},
{
"epoch": 0.41,
"grad_norm": 45.42335040173446,
"learning_rate": 4.807692307692307e-07,
"logits/generated": -2.6010522842407227,
"logits/oppo_generated": -2.932793140411377,
"logits/oppo_real": -2.9959638118743896,
"logits/real": -2.675575017929077,
"logps/generated": -186.99935913085938,
"logps/oppo_gen": -68.20332336425781,
"logps/oppo_real": -376.541015625,
"logps/real": -360.2162170410156,
"loss": -2.6531,
"loss/gen": 0.3653205931186676,
"loss/real": -3.4390110969543457,
"rewards/accuracies": 1.0,
"rewards/generated": -118.7960205078125,
"rewards/margins": 135.12083435058594,
"rewards/real": 16.324806213378906,
"step": 99
},
{
"epoch": 0.42,
"grad_norm": 183.07126984797478,
"learning_rate": 4.804131054131054e-07,
"logits/generated": -2.4377760887145996,
"logits/oppo_generated": -2.780601739883423,
"logits/oppo_real": -2.8726038932800293,
"logits/real": -2.5523815155029297,
"logps/generated": -195.5144500732422,
"logps/oppo_gen": -75.83106994628906,
"logps/oppo_real": -327.609619140625,
"logps/real": -326.1234130859375,
"loss": -2.9695,
"loss/gen": 0.4366985857486725,
"loss/real": -2.959474563598633,
"rewards/accuracies": 1.0,
"rewards/generated": -119.68338012695312,
"rewards/margins": 121.16956329345703,
"rewards/real": 1.4861793518066406,
"step": 100
},
{
"epoch": 0.42,
"grad_norm": 549.6681100900797,
"learning_rate": 4.8005698005698e-07,
"logits/generated": -2.454486846923828,
"logits/oppo_generated": -2.91953706741333,
"logits/oppo_real": -2.820370674133301,
"logits/real": -2.6601805686950684,
"logps/generated": -184.12876892089844,
"logps/oppo_gen": -75.91517639160156,
"logps/oppo_real": -531.0400390625,
"logps/real": -524.9949340820312,
"loss": -4.2352,
"loss/gen": 0.5441170334815979,
"loss/real": -3.007982015609741,
"rewards/accuracies": 0.875,
"rewards/generated": -108.21359252929688,
"rewards/margins": 114.25873565673828,
"rewards/real": 6.045146942138672,
"step": 101
},
{
"epoch": 0.43,
"grad_norm": 60.524447459141456,
"learning_rate": 4.797008547008547e-07,
"logits/generated": -2.492274761199951,
"logits/oppo_generated": -2.927794933319092,
"logits/oppo_real": -2.8259315490722656,
"logits/real": -2.6615185737609863,
"logps/generated": -186.5205078125,
"logps/oppo_gen": -75.32722473144531,
"logps/oppo_real": -334.3116149902344,
"logps/real": -322.076904296875,
"loss": -2.7939,
"loss/gen": 0.4799632132053375,
"loss/real": -3.1898889541625977,
"rewards/accuracies": 1.0,
"rewards/generated": -111.19327545166016,
"rewards/margins": 123.42797088623047,
"rewards/real": 12.23469066619873,
"step": 102
},
{
"epoch": 0.43,
"grad_norm": 72.22195132995404,
"learning_rate": 4.793447293447293e-07,
"logits/generated": -2.681981325149536,
"logits/oppo_generated": -2.798323154449463,
"logits/oppo_real": -3.0827927589416504,
"logits/real": -2.6118640899658203,
"logps/generated": -193.87255859375,
"logps/oppo_gen": -85.98326110839844,
"logps/oppo_real": -484.7052001953125,
"logps/real": -468.4195556640625,
"loss": -2.7665,
"loss/gen": 0.48763740062713623,
"loss/real": -3.367074489593506,
"rewards/accuracies": 1.0,
"rewards/generated": -107.88929748535156,
"rewards/margins": 124.17497253417969,
"rewards/real": 16.285675048828125,
"step": 103
},
{
"epoch": 0.44,
"grad_norm": 120.03039979521854,
"learning_rate": 4.78988603988604e-07,
"logits/generated": -2.369338035583496,
"logits/oppo_generated": -2.820817232131958,
"logits/oppo_real": -2.7580766677856445,
"logits/real": -2.554074287414551,
"logps/generated": -224.9398651123047,
"logps/oppo_gen": -98.39456176757812,
"logps/oppo_real": -435.86871337890625,
"logps/real": -420.9836730957031,
"loss": -2.9534,
"loss/gen": 0.3016844391822815,
"loss/real": -3.4657280445098877,
"rewards/accuracies": 1.0,
"rewards/generated": -126.5452880859375,
"rewards/margins": 141.43032836914062,
"rewards/real": 14.885029792785645,
"step": 104
},
{
"epoch": 0.44,
"grad_norm": 1841.6121334309241,
"learning_rate": 4.786324786324786e-07,
"logits/generated": -2.6123902797698975,
"logits/oppo_generated": -2.991581439971924,
"logits/oppo_real": -3.002182960510254,
"logits/real": -2.710818290710449,
"logps/generated": -204.66802978515625,
"logps/oppo_gen": -81.12940216064453,
"logps/oppo_real": -296.61138916015625,
"logps/real": -273.9120788574219,
"loss": -8.6558,
"loss/gen": 0.40045416355133057,
"loss/real": -3.615565776824951,
"rewards/accuracies": 1.0,
"rewards/generated": -123.53861236572266,
"rewards/margins": 146.23793029785156,
"rewards/real": 22.69931411743164,
"step": 105
},
{
"epoch": 0.44,
"grad_norm": 50.14175949168393,
"learning_rate": 4.782763532763532e-07,
"logits/generated": -2.58475399017334,
"logits/oppo_generated": -2.8433456420898438,
"logits/oppo_real": -3.012195110321045,
"logits/real": -2.584439992904663,
"logps/generated": -161.61810302734375,
"logps/oppo_gen": -63.396881103515625,
"logps/oppo_real": -288.55780029296875,
"logps/real": -261.4425048828125,
"loss": -2.8566,
"loss/gen": 0.7563031315803528,
"loss/real": -3.606753349304199,
"rewards/accuracies": 1.0,
"rewards/generated": -98.22122192382812,
"rewards/margins": 125.3365249633789,
"rewards/real": 27.115306854248047,
"step": 106
},
{
"epoch": 0.45,
"grad_norm": 365.902095853522,
"learning_rate": 4.779202279202279e-07,
"logits/generated": -2.642536163330078,
"logits/oppo_generated": -2.75607967376709,
"logits/oppo_real": -3.044626235961914,
"logits/real": -2.5140504837036133,
"logps/generated": -215.69821166992188,
"logps/oppo_gen": -89.79308319091797,
"logps/oppo_real": -237.51071166992188,
"logps/real": -235.16732788085938,
"loss": -4.2838,
"loss/gen": 0.3768947720527649,
"loss/real": -2.9775023460388184,
"rewards/accuracies": 1.0,
"rewards/generated": -125.90511322021484,
"rewards/margins": 128.24850463867188,
"rewards/real": 2.3433871269226074,
"step": 107
},
{
"epoch": 0.45,
"grad_norm": 100.40770890630111,
"learning_rate": 4.775641025641026e-07,
"logits/generated": -2.698265790939331,
"logits/oppo_generated": -2.9334537982940674,
"logits/oppo_real": -3.0197911262512207,
"logits/real": -2.6614885330200195,
"logps/generated": -193.49476623535156,
"logps/oppo_gen": -86.25882720947266,
"logps/oppo_real": -171.73361206054688,
"logps/real": -154.22259521484375,
"loss": -3.0162,
"loss/gen": 0.695202112197876,
"loss/real": -3.3781354427337646,
"rewards/accuracies": 1.0,
"rewards/generated": -107.23593139648438,
"rewards/margins": 124.74696350097656,
"rewards/real": 17.511028289794922,
"step": 108
},
{
"epoch": 0.46,
"grad_norm": 95.44328629315685,
"learning_rate": 4.772079772079772e-07,
"logits/generated": -2.5307321548461914,
"logits/oppo_generated": -2.8885016441345215,
"logits/oppo_real": -2.9670629501342773,
"logits/real": -2.6184444427490234,
"logps/generated": -137.50279235839844,
"logps/oppo_gen": -52.36747741699219,
"logps/oppo_real": -234.88699340820312,
"logps/real": -211.22215270996094,
"loss": -2.7515,
"loss/gen": 1.0063905715942383,
"loss/real": -3.66544771194458,
"rewards/accuracies": 0.875,
"rewards/generated": -85.13531494140625,
"rewards/margins": 108.8001708984375,
"rewards/real": 23.664859771728516,
"step": 109
},
{
"epoch": 0.46,
"grad_norm": 92.31076504801594,
"learning_rate": 4.768518518518518e-07,
"logits/generated": -2.4487879276275635,
"logits/oppo_generated": -2.902094841003418,
"logits/oppo_real": -2.738150119781494,
"logits/real": -2.5988502502441406,
"logps/generated": -183.7650146484375,
"logps/oppo_gen": -71.77503967285156,
"logps/oppo_real": -226.59805297851562,
"logps/real": -210.24148559570312,
"loss": -2.6027,
"loss/gen": 0.4320296347141266,
"loss/real": -3.250072956085205,
"rewards/accuracies": 1.0,
"rewards/generated": -111.989990234375,
"rewards/margins": 128.34658813476562,
"rewards/real": 16.356592178344727,
"step": 110
},
{
"epoch": 0.46,
"grad_norm": 92.31076504801594,
"learning_rate": 4.768518518518518e-07,
"logits/generated": -2.4111056327819824,
"logits/oppo_generated": -2.78233003616333,
"logits/oppo_real": -2.810633420944214,
"logits/real": -2.52742075920105,
"logps/generated": -161.51727294921875,
"logps/oppo_gen": -51.96064758300781,
"logps/oppo_real": -160.8415069580078,
"logps/real": -171.3201446533203,
"loss": -128.9964,
"loss/gen": 0.43095916509628296,
"loss/real": -2.8278121948242188,
"rewards/accuracies": 1.0,
"rewards/generated": -109.556640625,
"rewards/margins": 99.07798767089844,
"rewards/real": -10.478641510009766,
"step": 111
},
{
"epoch": 0.47,
"grad_norm": 68.96497993207313,
"learning_rate": 4.764957264957264e-07,
"logits/generated": -2.3004653453826904,
"logits/oppo_generated": -2.7906460762023926,
"logits/oppo_real": -2.7454147338867188,
"logits/real": -2.5157923698425293,
"logps/generated": -148.928955078125,
"logps/oppo_gen": -67.77021789550781,
"logps/oppo_real": -355.9058837890625,
"logps/real": -322.17315673828125,
"loss": -2.7744,
"loss/gen": 0.9168766736984253,
"loss/real": -3.8560690879821777,
"rewards/accuracies": 1.0,
"rewards/generated": -81.15873718261719,
"rewards/margins": 114.89146423339844,
"rewards/real": 33.73272705078125,
"step": 112
},
{
"epoch": 0.47,
"grad_norm": 82.86539584244439,
"learning_rate": 4.761396011396011e-07,
"logits/generated": -2.309685707092285,
"logits/oppo_generated": -2.784420967102051,
"logits/oppo_real": -2.58797550201416,
"logits/real": -2.521721363067627,
"logps/generated": -174.80889892578125,
"logps/oppo_gen": -53.4489631652832,
"logps/oppo_real": -213.77337646484375,
"logps/real": -204.11801147460938,
"loss": -2.8077,
"loss/gen": 0.35567396879196167,
"loss/real": -3.218747138977051,
"rewards/accuracies": 1.0,
"rewards/generated": -121.35994720458984,
"rewards/margins": 131.01528930664062,
"rewards/real": 9.655345916748047,
"step": 113
},
{
"epoch": 0.48,
"grad_norm": 61.933964171274795,
"learning_rate": 4.7578347578347577e-07,
"logits/generated": -2.5905487537384033,
"logits/oppo_generated": -2.9693868160247803,
"logits/oppo_real": -2.897064208984375,
"logits/real": -2.695655345916748,
"logps/generated": -178.83404541015625,
"logps/oppo_gen": -65.07535552978516,
"logps/oppo_real": -380.3414306640625,
"logps/real": -379.77105712890625,
"loss": -2.6882,
"loss/gen": 0.38962453603744507,
"loss/real": -3.060286521911621,
"rewards/accuracies": 1.0,
"rewards/generated": -113.75869750976562,
"rewards/margins": 114.32907104492188,
"rewards/real": 0.5703763961791992,
"step": 114
},
{
"epoch": 0.48,
"grad_norm": 66.47038447097135,
"learning_rate": 4.754273504273504e-07,
"logits/generated": -2.6704883575439453,
"logits/oppo_generated": -2.8074076175689697,
"logits/oppo_real": -2.9744620323181152,
"logits/real": -2.5615124702453613,
"logps/generated": -175.31643676757812,
"logps/oppo_gen": -81.67523193359375,
"logps/oppo_real": -332.10321044921875,
"logps/real": -320.36962890625,
"loss": -2.8096,
"loss/gen": 1.0026687383651733,
"loss/real": -3.2720324993133545,
"rewards/accuracies": 0.875,
"rewards/generated": -93.64120483398438,
"rewards/margins": 105.37479400634766,
"rewards/real": 11.733586311340332,
"step": 115
},
{
"epoch": 0.49,
"grad_norm": 54.16138220416485,
"learning_rate": 4.7507122507122507e-07,
"logits/generated": -2.586947441101074,
"logits/oppo_generated": -2.8780970573425293,
"logits/oppo_real": -2.880333185195923,
"logits/real": -2.6156821250915527,
"logps/generated": -207.31790161132812,
"logps/oppo_gen": -83.72149658203125,
"logps/oppo_real": -272.17291259765625,
"logps/real": -258.2806701660156,
"loss": -2.7218,
"loss/gen": 0.3455054759979248,
"loss/real": -3.568074941635132,
"rewards/accuracies": 1.0,
"rewards/generated": -123.59640502929688,
"rewards/margins": 137.48866271972656,
"rewards/real": 13.892251968383789,
"step": 116
},
{
"epoch": 0.49,
"grad_norm": 64.72151128826876,
"learning_rate": 4.747150997150997e-07,
"logits/generated": -2.5787789821624756,
"logits/oppo_generated": -2.8689210414886475,
"logits/oppo_real": -3.036574602127075,
"logits/real": -2.5574660301208496,
"logps/generated": -197.68472290039062,
"logps/oppo_gen": -61.806739807128906,
"logps/oppo_real": -213.864013671875,
"logps/real": -206.0244140625,
"loss": -2.8435,
"loss/gen": 0.28257864713668823,
"loss/real": -3.154269218444824,
"rewards/accuracies": 1.0,
"rewards/generated": -135.8779754638672,
"rewards/margins": 143.7175750732422,
"rewards/real": 7.839590072631836,
"step": 117
},
{
"epoch": 0.49,
"grad_norm": 63.413190000311644,
"learning_rate": 4.743589743589743e-07,
"logits/generated": -2.537674903869629,
"logits/oppo_generated": -2.847443103790283,
"logits/oppo_real": -2.9110074043273926,
"logits/real": -2.5497055053710938,
"logps/generated": -195.18397521972656,
"logps/oppo_gen": -68.70259857177734,
"logps/oppo_real": -252.70947265625,
"logps/real": -234.55947875976562,
"loss": -2.7751,
"loss/gen": 0.33247071504592896,
"loss/real": -3.345210075378418,
"rewards/accuracies": 1.0,
"rewards/generated": -126.48136901855469,
"rewards/margins": 144.63134765625,
"rewards/real": 18.149982452392578,
"step": 118
},
{
"epoch": 0.5,
"grad_norm": 67.1754822100761,
"learning_rate": 4.74002849002849e-07,
"logits/generated": -2.6043078899383545,
"logits/oppo_generated": -2.850525140762329,
"logits/oppo_real": -2.9623799324035645,
"logits/real": -2.5423567295074463,
"logps/generated": -186.6025390625,
"logps/oppo_gen": -70.65492248535156,
"logps/oppo_real": -241.07968139648438,
"logps/real": -243.4227294921875,
"loss": -2.7129,
"loss/gen": 0.38259631395339966,
"loss/real": -2.913571357727051,
"rewards/accuracies": 0.875,
"rewards/generated": -115.94760131835938,
"rewards/margins": 113.60454559326172,
"rewards/real": -2.3430585861206055,
"step": 119
},
{
"epoch": 0.5,
"grad_norm": 64.89191358881898,
"learning_rate": 4.7364672364672366e-07,
"logits/generated": -2.326094150543213,
"logits/oppo_generated": -2.760641574859619,
"logits/oppo_real": -2.835960865020752,
"logits/real": -2.4331917762756348,
"logps/generated": -192.00738525390625,
"logps/oppo_gen": -77.80702209472656,
"logps/oppo_real": -309.97265625,
"logps/real": -309.2704162597656,
"loss": -2.821,
"loss/gen": 0.539390504360199,
"loss/real": -3.1048460006713867,
"rewards/accuracies": 1.0,
"rewards/generated": -114.20036315917969,
"rewards/margins": 114.902587890625,
"rewards/real": 0.7022085189819336,
"step": 120
},
{
"epoch": 0.51,
"grad_norm": 118.88704263396411,
"learning_rate": 4.7329059829059823e-07,
"logits/generated": -2.453880786895752,
"logits/oppo_generated": -2.762300491333008,
"logits/oppo_real": -2.91391658782959,
"logits/real": -2.4560084342956543,
"logps/generated": -197.6976318359375,
"logps/oppo_gen": -79.30331420898438,
"logps/oppo_real": -206.95407104492188,
"logps/real": -221.3215789794922,
"loss": -3.2051,
"loss/gen": 0.37958478927612305,
"loss/real": -2.6625490188598633,
"rewards/accuracies": 1.0,
"rewards/generated": -118.39431762695312,
"rewards/margins": 104.02679443359375,
"rewards/real": -14.367520332336426,
"step": 121
},
{
"epoch": 0.51,
"grad_norm": 118.88704263396411,
"learning_rate": 4.7329059829059823e-07,
"logits/generated": -2.224625825881958,
"logits/oppo_generated": -2.8723740577697754,
"logits/oppo_real": -2.730229139328003,
"logits/real": -2.5184946060180664,
"logps/generated": -221.04774475097656,
"logps/oppo_gen": -68.4917984008789,
"logps/oppo_real": -205.74790954589844,
"logps/real": -212.26702880859375,
"loss": -316.7941,
"loss/gen": 0.3251597583293915,
"loss/real": -2.7914090156555176,
"rewards/accuracies": 1.0,
"rewards/generated": -152.55593872070312,
"rewards/margins": 146.0368194580078,
"rewards/real": -6.51912784576416,
"step": 122
},
{
"epoch": 0.51,
"grad_norm": 1343.0752491949213,
"learning_rate": 4.729344729344729e-07,
"logits/generated": -2.4904122352600098,
"logits/oppo_generated": -2.833265781402588,
"logits/oppo_real": -2.8581643104553223,
"logits/real": -2.5306711196899414,
"logps/generated": -204.9275360107422,
"logps/oppo_gen": -72.44357299804688,
"logps/oppo_real": -294.85699462890625,
"logps/real": -290.6130065917969,
"loss": -4.5958,
"loss/gen": 0.28193220496177673,
"loss/real": -3.1163246631622314,
"rewards/accuracies": 1.0,
"rewards/generated": -132.48397827148438,
"rewards/margins": 136.72792053222656,
"rewards/real": 4.243948936462402,
"step": 123
},
{
"epoch": 0.52,
"grad_norm": 664.7720944052932,
"learning_rate": 4.725783475783476e-07,
"logits/generated": -2.349301815032959,
"logits/oppo_generated": -2.8131227493286133,
"logits/oppo_real": -2.815453052520752,
"logits/real": -2.5294294357299805,
"logps/generated": -248.4262237548828,
"logps/oppo_gen": -118.46414184570312,
"logps/oppo_real": -350.6376953125,
"logps/real": -330.9083557128906,
"loss": -5.5005,
"loss/gen": 0.29418256878852844,
"loss/real": -3.4984450340270996,
"rewards/accuracies": 1.0,
"rewards/generated": -129.9620819091797,
"rewards/margins": 149.69143676757812,
"rewards/real": 19.729347229003906,
"step": 124
},
{
"epoch": 0.52,
"grad_norm": 40.89130414749342,
"learning_rate": 4.722222222222222e-07,
"logits/generated": -2.5400872230529785,
"logits/oppo_generated": -2.868478775024414,
"logits/oppo_real": -2.87443208694458,
"logits/real": -2.5824098587036133,
"logps/generated": -186.3829345703125,
"logps/oppo_gen": -72.4801025390625,
"logps/oppo_real": -315.2503356933594,
"logps/real": -298.22100830078125,
"loss": -2.7752,
"loss/gen": 0.39562442898750305,
"loss/real": -3.2849442958831787,
"rewards/accuracies": 1.0,
"rewards/generated": -113.90283203125,
"rewards/margins": 130.93215942382812,
"rewards/real": 17.02932357788086,
"step": 125
},
{
"epoch": 0.53,
"grad_norm": 94.72539979956538,
"learning_rate": 4.7186609686609683e-07,
"logits/generated": -2.2407426834106445,
"logits/oppo_generated": -2.5010550022125244,
"logits/oppo_real": -2.635188102722168,
"logits/real": -2.2052998542785645,
"logps/generated": -200.30931091308594,
"logps/oppo_gen": -80.23007202148438,
"logps/oppo_real": -347.019287109375,
"logps/real": -327.673828125,
"loss": -2.9739,
"loss/gen": 0.40606454014778137,
"loss/real": -4.382803916931152,
"rewards/accuracies": 1.0,
"rewards/generated": -120.0792465209961,
"rewards/margins": 139.4246826171875,
"rewards/real": 19.345449447631836,
"step": 126
},
{
"epoch": 0.53,
"grad_norm": 60.16715201536986,
"learning_rate": 4.715099715099715e-07,
"logits/generated": -2.1603076457977295,
"logits/oppo_generated": -2.6126623153686523,
"logits/oppo_real": -2.6145567893981934,
"logits/real": -2.2864603996276855,
"logps/generated": -194.54769897460938,
"logps/oppo_gen": -73.5291748046875,
"logps/oppo_real": -317.5265808105469,
"logps/real": -296.9075012207031,
"loss": -2.8726,
"loss/gen": 0.3832360804080963,
"loss/real": -3.425445795059204,
"rewards/accuracies": 1.0,
"rewards/generated": -121.01852416992188,
"rewards/margins": 141.63758850097656,
"rewards/real": 20.619068145751953,
"step": 127
},
{
"epoch": 0.54,
"grad_norm": 99.14064346926541,
"learning_rate": 4.711538461538461e-07,
"logits/generated": -2.6689248085021973,
"logits/oppo_generated": -3.0297465324401855,
"logits/oppo_real": -3.101362705230713,
"logits/real": -2.7913174629211426,
"logps/generated": -225.02159118652344,
"logps/oppo_gen": -120.2161865234375,
"logps/oppo_real": -532.0965576171875,
"logps/real": -496.509033203125,
"loss": -2.6576,
"loss/gen": 0.5110812187194824,
"loss/real": -3.9389498233795166,
"rewards/accuracies": 1.0,
"rewards/generated": -104.80540466308594,
"rewards/margins": 140.39291381835938,
"rewards/real": 35.58751678466797,
"step": 128
},
{
"epoch": 0.54,
"grad_norm": 70.39239557984051,
"learning_rate": 4.707977207977208e-07,
"logits/generated": -2.5018911361694336,
"logits/oppo_generated": -2.4462087154388428,
"logits/oppo_real": -2.882254123687744,
"logits/real": -2.1281325817108154,
"logps/generated": -162.50857543945312,
"logps/oppo_gen": -74.71348571777344,
"logps/oppo_real": -324.086669921875,
"logps/real": -299.44586181640625,
"loss": -2.7935,
"loss/gen": 0.9241290092468262,
"loss/real": -3.661430597305298,
"rewards/accuracies": 1.0,
"rewards/generated": -87.79508972167969,
"rewards/margins": 112.4359130859375,
"rewards/real": 24.640825271606445,
"step": 129
},
{
"epoch": 0.54,
"grad_norm": 19246.478742876578,
"learning_rate": 4.7044159544159537e-07,
"logits/generated": -2.6820337772369385,
"logits/oppo_generated": -2.9427778720855713,
"logits/oppo_real": -2.9869794845581055,
"logits/real": -2.646888494491577,
"logps/generated": -157.2148895263672,
"logps/oppo_gen": -57.98387908935547,
"logps/oppo_real": -299.8202209472656,
"logps/real": -309.00274658203125,
"loss": -58.9365,
"loss/gen": 0.7235317230224609,
"loss/real": -2.887176990509033,
"rewards/accuracies": 1.0,
"rewards/generated": -99.23101806640625,
"rewards/margins": 90.04846954345703,
"rewards/real": -9.182544708251953,
"step": 130
},
{
"epoch": 0.55,
"grad_norm": 132.2171139995689,
"learning_rate": 4.7008547008547005e-07,
"logits/generated": -2.256178855895996,
"logits/oppo_generated": -2.462200880050659,
"logits/oppo_real": -2.7382378578186035,
"logits/real": -2.1479060649871826,
"logps/generated": -191.7882537841797,
"logps/oppo_gen": -109.31198120117188,
"logps/oppo_real": -333.22021484375,
"logps/real": -315.614013671875,
"loss": -3.274,
"loss/gen": 1.3347947597503662,
"loss/real": -3.325333595275879,
"rewards/accuracies": 1.0,
"rewards/generated": -82.47628021240234,
"rewards/margins": 100.08245849609375,
"rewards/real": 17.606182098388672,
"step": 131
},
{
"epoch": 0.55,
"grad_norm": 74.27982684269834,
"learning_rate": 4.697293447293447e-07,
"logits/generated": -2.584226131439209,
"logits/oppo_generated": -2.9814329147338867,
"logits/oppo_real": -2.8366198539733887,
"logits/real": -2.7369401454925537,
"logps/generated": -231.37564086914062,
"logps/oppo_gen": -117.97686767578125,
"logps/oppo_real": -333.4208679199219,
"logps/real": -292.571044921875,
"loss": -2.9361,
"loss/gen": 0.7850100994110107,
"loss/real": -4.297349452972412,
"rewards/accuracies": 1.0,
"rewards/generated": -113.39878845214844,
"rewards/margins": 154.24859619140625,
"rewards/real": 40.84980773925781,
"step": 132
},
{
"epoch": 0.56,
"grad_norm": 62.146904934749166,
"learning_rate": 4.6937321937321934e-07,
"logits/generated": -2.1917073726654053,
"logits/oppo_generated": -2.6781723499298096,
"logits/oppo_real": -2.516916513442993,
"logits/real": -2.4100053310394287,
"logps/generated": -191.9784393310547,
"logps/oppo_gen": -60.19814682006836,
"logps/oppo_real": -262.58551025390625,
"logps/real": -243.27468872070312,
"loss": -2.8222,
"loss/gen": 0.7389452457427979,
"loss/real": -3.3546838760375977,
"rewards/accuracies": 1.0,
"rewards/generated": -131.78028869628906,
"rewards/margins": 151.09109497070312,
"rewards/real": 19.31081771850586,
"step": 133
},
{
"epoch": 0.56,
"grad_norm": 62.146904934749166,
"learning_rate": 4.6937321937321934e-07,
"logits/generated": -2.438559055328369,
"logits/oppo_generated": -2.8787498474121094,
"logits/oppo_real": -2.805894374847412,
"logits/real": -2.6161952018737793,
"logps/generated": -236.9315185546875,
"logps/oppo_gen": -124.28936767578125,
"logps/oppo_real": -606.1627807617188,
"logps/real": -575.891845703125,
"loss": -30094.6035,
"loss/gen": 0.43543320894241333,
"loss/real": -4.007241725921631,
"rewards/accuracies": 1.0,
"rewards/generated": -112.64216613769531,
"rewards/margins": 142.91311645507812,
"rewards/real": 30.27095603942871,
"step": 134
},
{
"epoch": 0.56,
"grad_norm": 78.11314597568548,
"learning_rate": 4.69017094017094e-07,
"logits/generated": -2.526062488555908,
"logits/oppo_generated": -2.765538454055786,
"logits/oppo_real": -2.839543342590332,
"logits/real": -2.4912123680114746,
"logps/generated": -192.7285919189453,
"logps/oppo_gen": -83.72669982910156,
"logps/oppo_real": -361.6756591796875,
"logps/real": -346.46002197265625,
"loss": -3.0693,
"loss/gen": 0.47312071919441223,
"loss/real": -3.3567910194396973,
"rewards/accuracies": 1.0,
"rewards/generated": -109.00190734863281,
"rewards/margins": 124.21757507324219,
"rewards/real": 15.215664863586426,
"step": 135
},
{
"epoch": 0.57,
"grad_norm": 54.23171224736731,
"learning_rate": 4.6866096866096864e-07,
"logits/generated": -2.607853889465332,
"logits/oppo_generated": -2.7416014671325684,
"logits/oppo_real": -2.8941569328308105,
"logits/real": -2.4866786003112793,
"logps/generated": -161.49307250976562,
"logps/oppo_gen": -51.659912109375,
"logps/oppo_real": -267.5926513671875,
"logps/real": -247.1339111328125,
"loss": -2.9068,
"loss/gen": 0.421144962310791,
"loss/real": -3.4416115283966064,
"rewards/accuracies": 1.0,
"rewards/generated": -109.8331527709961,
"rewards/margins": 130.2919158935547,
"rewards/real": 20.458759307861328,
"step": 136
},
{
"epoch": 0.57,
"grad_norm": 72.46861187459885,
"learning_rate": 4.6830484330484326e-07,
"logits/generated": -2.2220163345336914,
"logits/oppo_generated": -2.609920024871826,
"logits/oppo_real": -2.5399818420410156,
"logits/real": -2.291043758392334,
"logps/generated": -210.66543579101562,
"logps/oppo_gen": -81.96345520019531,
"logps/oppo_real": -258.99554443359375,
"logps/real": -252.50823974609375,
"loss": -3.0398,
"loss/gen": 0.3264577388763428,
"loss/real": -3.0477218627929688,
"rewards/accuracies": 1.0,
"rewards/generated": -128.70199584960938,
"rewards/margins": 135.1892852783203,
"rewards/real": 6.4872941970825195,
"step": 137
},
{
"epoch": 0.58,
"grad_norm": 72.46861187459885,
"learning_rate": 4.6830484330484326e-07,
"logits/generated": -2.443568706512451,
"logits/oppo_generated": -2.89731502532959,
"logits/oppo_real": -2.861166000366211,
"logits/real": -2.563661575317383,
"logps/generated": -180.6520538330078,
"logps/oppo_gen": -61.10588073730469,
"logps/oppo_real": -297.8720703125,
"logps/real": -281.30902099609375,
"loss": -231011.7969,
"loss/gen": 0.3417486846446991,
"loss/real": -3.499724864959717,
"rewards/accuracies": 1.0,
"rewards/generated": -119.54617309570312,
"rewards/margins": 136.10922241210938,
"rewards/real": 16.563053131103516,
"step": 138
},
{
"epoch": 0.58,
"grad_norm": 68.7764082802926,
"learning_rate": 4.6794871794871794e-07,
"logits/generated": -2.4255595207214355,
"logits/oppo_generated": -2.8648695945739746,
"logits/oppo_real": -2.711393356323242,
"logits/real": -2.6028270721435547,
"logps/generated": -223.78445434570312,
"logps/oppo_gen": -111.59371948242188,
"logps/oppo_real": -521.255859375,
"logps/real": -493.6855773925781,
"loss": -2.7501,
"loss/gen": 0.418493390083313,
"loss/real": -3.638298749923706,
"rewards/accuracies": 1.0,
"rewards/generated": -112.19073486328125,
"rewards/margins": 139.76104736328125,
"rewards/real": 27.570310592651367,
"step": 139
},
{
"epoch": 0.59,
"grad_norm": 60.205945382287624,
"learning_rate": 4.675925925925926e-07,
"logits/generated": -2.525608539581299,
"logits/oppo_generated": -2.8064088821411133,
"logits/oppo_real": -2.845989227294922,
"logits/real": -2.5157408714294434,
"logps/generated": -162.0449676513672,
"logps/oppo_gen": -52.78784942626953,
"logps/oppo_real": -172.55088806152344,
"logps/real": -161.2114715576172,
"loss": -2.904,
"loss/gen": 0.4695492088794708,
"loss/real": -3.171067237854004,
"rewards/accuracies": 1.0,
"rewards/generated": -109.25712585449219,
"rewards/margins": 120.59654235839844,
"rewards/real": 11.339418411254883,
"step": 140
},
{
"epoch": 0.59,
"grad_norm": 70.03484065984587,
"learning_rate": 4.672364672364672e-07,
"logits/generated": -2.5758299827575684,
"logits/oppo_generated": -3.0264251232147217,
"logits/oppo_real": -2.836057186126709,
"logits/real": -2.738698959350586,
"logps/generated": -218.2009735107422,
"logps/oppo_gen": -74.337158203125,
"logps/oppo_real": -371.032470703125,
"logps/real": -321.6204833984375,
"loss": -2.9986,
"loss/gen": 0.24696138501167297,
"loss/real": -4.774725437164307,
"rewards/accuracies": 1.0,
"rewards/generated": -143.8638153076172,
"rewards/margins": 193.27581787109375,
"rewards/real": 49.41199493408203,
"step": 141
},
{
"epoch": 0.59,
"grad_norm": 7817.082320149891,
"learning_rate": 4.6688034188034186e-07,
"logits/generated": -2.6040773391723633,
"logits/oppo_generated": -2.876476764678955,
"logits/oppo_real": -2.912707805633545,
"logits/real": -2.6422886848449707,
"logps/generated": -201.4759063720703,
"logps/oppo_gen": -90.53692626953125,
"logps/oppo_real": -383.74615478515625,
"logps/real": -350.41131591796875,
"loss": -19.0115,
"loss/gen": 0.4651464819908142,
"loss/real": -3.848228931427002,
"rewards/accuracies": 1.0,
"rewards/generated": -110.93898010253906,
"rewards/margins": 144.27383422851562,
"rewards/real": 33.33485794067383,
"step": 142
},
{
"epoch": 0.6,
"grad_norm": 105.7367382346363,
"learning_rate": 4.6652421652421653e-07,
"logits/generated": -2.674943447113037,
"logits/oppo_generated": -2.9819746017456055,
"logits/oppo_real": -3.1959123611450195,
"logits/real": -2.714082717895508,
"logps/generated": -279.74652099609375,
"logps/oppo_gen": -152.70217895507812,
"logps/oppo_real": -483.54266357421875,
"logps/real": -443.05084228515625,
"loss": -3.1497,
"loss/gen": 0.3712007403373718,
"loss/real": -4.19202184677124,
"rewards/accuracies": 1.0,
"rewards/generated": -127.04434204101562,
"rewards/margins": 167.5361328125,
"rewards/real": 40.49180603027344,
"step": 143
},
{
"epoch": 0.6,
"grad_norm": 320.13193081134534,
"learning_rate": 4.6616809116809116e-07,
"logits/generated": -2.6575989723205566,
"logits/oppo_generated": -2.7378830909729004,
"logits/oppo_real": -3.110536813735962,
"logits/real": -2.483811378479004,
"logps/generated": -208.11447143554688,
"logps/oppo_gen": -86.0918960571289,
"logps/oppo_real": -447.7939147949219,
"logps/real": -449.82757568359375,
"loss": -3.1025,
"loss/gen": 0.3562849164009094,
"loss/real": -3.004971981048584,
"rewards/accuracies": 1.0,
"rewards/generated": -122.0225830078125,
"rewards/margins": 119.98893737792969,
"rewards/real": -2.033646583557129,
"step": 144
},
{
"epoch": 0.61,
"grad_norm": 56.81756199946017,
"learning_rate": 4.658119658119658e-07,
"logits/generated": -2.8450493812561035,
"logits/oppo_generated": -2.7491419315338135,
"logits/oppo_real": -3.191051483154297,
"logits/real": -2.4600586891174316,
"logps/generated": -215.31295776367188,
"logps/oppo_gen": -96.26548767089844,
"logps/oppo_real": -305.7531433105469,
"logps/real": -280.7630615234375,
"loss": -2.8775,
"loss/gen": 0.5062470436096191,
"loss/real": -3.553849220275879,
"rewards/accuracies": 1.0,
"rewards/generated": -119.04747009277344,
"rewards/margins": 144.03756713867188,
"rewards/real": 24.990097045898438,
"step": 145
},
{
"epoch": 0.61,
"grad_norm": 56.194760202520214,
"learning_rate": 4.654558404558404e-07,
"logits/generated": -2.451647996902466,
"logits/oppo_generated": -2.8662476539611816,
"logits/oppo_real": -2.7619881629943848,
"logits/real": -2.580970287322998,
"logps/generated": -190.9722900390625,
"logps/oppo_gen": -76.39656066894531,
"logps/oppo_real": -342.36138916015625,
"logps/real": -320.16766357421875,
"loss": -2.8042,
"loss/gen": 0.40042293071746826,
"loss/real": -3.5132551193237305,
"rewards/accuracies": 1.0,
"rewards/generated": -114.57573699951172,
"rewards/margins": 136.7694854736328,
"rewards/real": 22.193754196166992,
"step": 146
},
{
"epoch": 0.62,
"grad_norm": 54.320377107864644,
"learning_rate": 4.650997150997151e-07,
"logits/generated": -2.6203999519348145,
"logits/oppo_generated": -2.973456859588623,
"logits/oppo_real": -2.9541869163513184,
"logits/real": -2.606893539428711,
"logps/generated": -158.5798797607422,
"logps/oppo_gen": -58.52758026123047,
"logps/oppo_real": -196.6337127685547,
"logps/real": -194.17990112304688,
"loss": -3.0855,
"loss/gen": 0.5960197448730469,
"loss/real": -3.0248513221740723,
"rewards/accuracies": 1.0,
"rewards/generated": -100.05229949951172,
"rewards/margins": 102.50611877441406,
"rewards/real": 2.453828811645508,
"step": 147
},
{
"epoch": 0.62,
"grad_norm": 75.8143516622595,
"learning_rate": 4.6474358974358975e-07,
"logits/generated": -2.4562883377075195,
"logits/oppo_generated": -2.9579458236694336,
"logits/oppo_real": -2.8345115184783936,
"logits/real": -2.6457347869873047,
"logps/generated": -204.59548950195312,
"logps/oppo_gen": -86.37559509277344,
"logps/oppo_real": -329.4002685546875,
"logps/real": -317.261962890625,
"loss": -3.0377,
"loss/gen": 0.4204404056072235,
"loss/real": -3.355125904083252,
"rewards/accuracies": 1.0,
"rewards/generated": -118.21987915039062,
"rewards/margins": 130.35821533203125,
"rewards/real": 12.138343811035156,
"step": 148
},
{
"epoch": 0.62,
"grad_norm": 75.08773189056086,
"learning_rate": 4.643874643874643e-07,
"logits/generated": -2.1544671058654785,
"logits/oppo_generated": -2.4297678470611572,
"logits/oppo_real": -2.5349526405334473,
"logits/real": -2.043349027633667,
"logps/generated": -245.35202026367188,
"logps/oppo_gen": -139.25880432128906,
"logps/oppo_real": -366.9024658203125,
"logps/real": -337.96356201171875,
"loss": -2.9923,
"loss/gen": 0.5029778480529785,
"loss/real": -3.993164300918579,
"rewards/accuracies": 1.0,
"rewards/generated": -106.09322357177734,
"rewards/margins": 135.03216552734375,
"rewards/real": 28.938934326171875,
"step": 149
},
{
"epoch": 0.63,
"grad_norm": 576.2930027338524,
"learning_rate": 4.64031339031339e-07,
"logits/generated": -2.114830493927002,
"logits/oppo_generated": -2.59027099609375,
"logits/oppo_real": -2.5751681327819824,
"logits/real": -2.2125301361083984,
"logps/generated": -178.6730499267578,
"logps/oppo_gen": -44.13750076293945,
"logps/oppo_real": -126.39328002929688,
"logps/real": -146.06198120117188,
"loss": -4.0466,
"loss/gen": 0.268838495016098,
"loss/real": -2.595045566558838,
"rewards/accuracies": 0.875,
"rewards/generated": -134.53555297851562,
"rewards/margins": 114.86683654785156,
"rewards/real": -19.668712615966797,
"step": 150
},
{
"epoch": 0.63,
"grad_norm": 54.61241720782235,
"learning_rate": 4.6367521367521367e-07,
"logits/generated": -2.424686908721924,
"logits/oppo_generated": -2.8061888217926025,
"logits/oppo_real": -2.885352611541748,
"logits/real": -2.449500560760498,
"logps/generated": -222.48379516601562,
"logps/oppo_gen": -82.9956283569336,
"logps/oppo_real": -287.7582702636719,
"logps/real": -284.30731201171875,
"loss": -3.0876,
"loss/gen": 0.32225707173347473,
"loss/real": -2.8702611923217773,
"rewards/accuracies": 1.0,
"rewards/generated": -139.4881591796875,
"rewards/margins": 142.93911743164062,
"rewards/real": 3.450957775115967,
"step": 151
},
{
"epoch": 0.64,
"grad_norm": 85.11727319006701,
"learning_rate": 4.633190883190883e-07,
"logits/generated": -2.086930274963379,
"logits/oppo_generated": -2.6804826259613037,
"logits/oppo_real": -2.560675621032715,
"logits/real": -2.307936668395996,
"logps/generated": -248.29327392578125,
"logps/oppo_gen": -125.20469665527344,
"logps/oppo_real": -214.75454711914062,
"logps/real": -237.43409729003906,
"loss": -2.705,
"loss/gen": 0.33005163073539734,
"loss/real": -2.6607136726379395,
"rewards/accuracies": 0.875,
"rewards/generated": -123.08856201171875,
"rewards/margins": 100.40898895263672,
"rewards/real": -22.67957878112793,
"step": 152
},
{
"epoch": 0.64,
"grad_norm": 76.16983490857781,
"learning_rate": 4.6296296296296297e-07,
"logits/generated": -2.4300737380981445,
"logits/oppo_generated": -2.8161306381225586,
"logits/oppo_real": -2.873737096786499,
"logits/real": -2.3974549770355225,
"logps/generated": -211.3475341796875,
"logps/oppo_gen": -39.4675178527832,
"logps/oppo_real": -94.7720718383789,
"logps/real": -112.44818115234375,
"loss": -2.8425,
"loss/gen": 0.24527525901794434,
"loss/real": -2.5662083625793457,
"rewards/accuracies": 0.875,
"rewards/generated": -171.88002014160156,
"rewards/margins": 154.2039031982422,
"rewards/real": -17.676116943359375,
"step": 153
},
{
"epoch": 0.64,
"grad_norm": 52.66541092417774,
"learning_rate": 4.626068376068376e-07,
"logits/generated": -2.2791929244995117,
"logits/oppo_generated": -2.754338026046753,
"logits/oppo_real": -2.6611428260803223,
"logits/real": -2.365473747253418,
"logps/generated": -180.72125244140625,
"logps/oppo_gen": -53.64311981201172,
"logps/oppo_real": -189.60964965820312,
"logps/real": -185.12130737304688,
"loss": -2.8864,
"loss/gen": 0.31177347898483276,
"loss/real": -3.0672144889831543,
"rewards/accuracies": 1.0,
"rewards/generated": -127.07814025878906,
"rewards/margins": 131.56646728515625,
"rewards/real": 4.488343238830566,
"step": 154
},
{
"epoch": 0.65,
"grad_norm": 89.10240608160505,
"learning_rate": 4.622507122507122e-07,
"logits/generated": -2.525489568710327,
"logits/oppo_generated": -2.8700437545776367,
"logits/oppo_real": -3.012883186340332,
"logits/real": -2.459331512451172,
"logps/generated": -198.41644287109375,
"logps/oppo_gen": -64.43563842773438,
"logps/oppo_real": -366.68572998046875,
"logps/real": -346.22503662109375,
"loss": -3.2314,
"loss/gen": 0.27841734886169434,
"loss/real": -3.4703869819641113,
"rewards/accuracies": 1.0,
"rewards/generated": -133.98080444335938,
"rewards/margins": 154.4415283203125,
"rewards/real": 20.46072006225586,
"step": 155
},
{
"epoch": 0.65,
"grad_norm": 1254.2688109013952,
"learning_rate": 4.618945868945869e-07,
"logits/generated": -2.289478302001953,
"logits/oppo_generated": -2.896176338195801,
"logits/oppo_real": -2.7520911693573,
"logits/real": -2.514561653137207,
"logps/generated": -221.06515502929688,
"logps/oppo_gen": -94.6259765625,
"logps/oppo_real": -329.9571533203125,
"logps/real": -310.5443115234375,
"loss": -5.4989,
"loss/gen": 0.3454495668411255,
"loss/real": -3.5192689895629883,
"rewards/accuracies": 1.0,
"rewards/generated": -126.43919372558594,
"rewards/margins": 145.85203552246094,
"rewards/real": 19.412845611572266,
"step": 156
},
{
"epoch": 0.66,
"grad_norm": 49.926036602752426,
"learning_rate": 4.6153846153846156e-07,
"logits/generated": -2.4086711406707764,
"logits/oppo_generated": -2.72526478767395,
"logits/oppo_real": -2.760162591934204,
"logits/real": -2.3816709518432617,
"logps/generated": -189.68716430664062,
"logps/oppo_gen": -70.71673583984375,
"logps/oppo_real": -391.76458740234375,
"logps/real": -400.5279846191406,
"loss": -2.873,
"loss/gen": 0.38557717204093933,
"loss/real": -2.8944320678710938,
"rewards/accuracies": 0.875,
"rewards/generated": -118.97042846679688,
"rewards/margins": 110.20704650878906,
"rewards/real": -8.763385772705078,
"step": 157
},
{
"epoch": 0.66,
"grad_norm": 108.6453699255058,
"learning_rate": 4.6118233618233613e-07,
"logits/generated": -2.9071619510650635,
"logits/oppo_generated": -2.979785919189453,
"logits/oppo_real": -3.2641677856445312,
"logits/real": -2.598475933074951,
"logps/generated": -202.7903594970703,
"logps/oppo_gen": -92.89317321777344,
"logps/oppo_real": -330.3245849609375,
"logps/real": -319.0426025390625,
"loss": -3.0021,
"loss/gen": 0.49392998218536377,
"loss/real": -3.2759296894073486,
"rewards/accuracies": 1.0,
"rewards/generated": -109.89718627929688,
"rewards/margins": 121.17916870117188,
"rewards/real": 11.28197956085205,
"step": 158
},
{
"epoch": 0.67,
"grad_norm": 59.173970949148114,
"learning_rate": 4.608262108262108e-07,
"logits/generated": -2.1723835468292236,
"logits/oppo_generated": -2.775574207305908,
"logits/oppo_real": -2.598371744155884,
"logits/real": -2.396592140197754,
"logps/generated": -187.12681579589844,
"logps/oppo_gen": -65.71693420410156,
"logps/oppo_real": -220.19737243652344,
"logps/real": -200.814453125,
"loss": -3.0664,
"loss/gen": 0.3779526352882385,
"loss/real": -3.3624069690704346,
"rewards/accuracies": 1.0,
"rewards/generated": -121.40988159179688,
"rewards/margins": 140.79281616210938,
"rewards/real": 19.38292694091797,
"step": 159
},
{
"epoch": 0.67,
"grad_norm": 75.21973020757254,
"learning_rate": 4.6047008547008543e-07,
"logits/generated": -2.0699994564056396,
"logits/oppo_generated": -2.6892812252044678,
"logits/oppo_real": -2.527797222137451,
"logits/real": -2.2784643173217773,
"logps/generated": -172.57388305664062,
"logps/oppo_gen": -56.507102966308594,
"logps/oppo_real": -203.99942016601562,
"logps/real": -220.61575317382812,
"loss": -2.9974,
"loss/gen": 0.3955667018890381,
"loss/real": -2.5530004501342773,
"rewards/accuracies": 1.0,
"rewards/generated": -116.06678771972656,
"rewards/margins": 99.45044708251953,
"rewards/real": -16.6163387298584,
"step": 160
},
{
"epoch": 0.67,
"grad_norm": 69.13334884030283,
"learning_rate": 4.601139601139601e-07,
"logits/generated": -2.4354324340820312,
"logits/oppo_generated": -2.892515182495117,
"logits/oppo_real": -2.87583589553833,
"logits/real": -2.509371280670166,
"logps/generated": -193.70269775390625,
"logps/oppo_gen": -70.63409423828125,
"logps/oppo_real": -236.45480346679688,
"logps/real": -236.11924743652344,
"loss": -2.9182,
"loss/gen": 0.3297494649887085,
"loss/real": -3.017835855484009,
"rewards/accuracies": 0.875,
"rewards/generated": -123.068603515625,
"rewards/margins": 123.4041748046875,
"rewards/real": 0.3355722427368164,
"step": 161
},
{
"epoch": 0.68,
"grad_norm": 96.21606544647312,
"learning_rate": 4.5975783475783473e-07,
"logits/generated": -2.1324949264526367,
"logits/oppo_generated": -2.2372124195098877,
"logits/oppo_real": -2.6531500816345215,
"logits/real": -1.7291717529296875,
"logps/generated": -151.3114471435547,
"logps/oppo_gen": -49.9699821472168,
"logps/oppo_real": -257.7629699707031,
"logps/real": -268.58935546875,
"loss": -3.1964,
"loss/gen": 1.1664835214614868,
"loss/real": -2.592834234237671,
"rewards/accuracies": 0.875,
"rewards/generated": -101.34146118164062,
"rewards/margins": 90.51508331298828,
"rewards/real": -10.826382637023926,
"step": 162
},
{
"epoch": 0.68,
"grad_norm": 1100.3644850684705,
"learning_rate": 4.5940170940170935e-07,
"logits/generated": -2.265676736831665,
"logits/oppo_generated": -2.6594979763031006,
"logits/oppo_real": -2.72336483001709,
"logits/real": -2.2372395992279053,
"logps/generated": -186.74288940429688,
"logps/oppo_gen": -69.47285461425781,
"logps/oppo_real": -203.925048828125,
"logps/real": -171.45407104492188,
"loss": -4.8218,
"loss/gen": 0.37407732009887695,
"loss/real": -3.8044371604919434,
"rewards/accuracies": 1.0,
"rewards/generated": -117.27003479003906,
"rewards/margins": 149.74102783203125,
"rewards/real": 32.470985412597656,
"step": 163
},
{
"epoch": 0.69,
"grad_norm": 48.85153938608921,
"learning_rate": 4.59045584045584e-07,
"logits/generated": -2.573637008666992,
"logits/oppo_generated": -2.84741473197937,
"logits/oppo_real": -2.9322423934936523,
"logits/real": -2.466978073120117,
"logps/generated": -193.22213745117188,
"logps/oppo_gen": -72.28129577636719,
"logps/oppo_real": -342.0706787109375,
"logps/real": -368.0989990234375,
"loss": -2.7908,
"loss/gen": 0.3612110912799835,
"loss/real": -2.5483293533325195,
"rewards/accuracies": 0.875,
"rewards/generated": -120.94083404541016,
"rewards/margins": 94.91249084472656,
"rewards/real": -26.028343200683594,
"step": 164
},
{
"epoch": 0.69,
"grad_norm": 61.90407697473366,
"learning_rate": 4.586894586894587e-07,
"logits/generated": -2.5092499256134033,
"logits/oppo_generated": -2.8123486042022705,
"logits/oppo_real": -2.9484448432922363,
"logits/real": -2.4517569541931152,
"logps/generated": -189.08139038085938,
"logps/oppo_gen": -78.67784118652344,
"logps/oppo_real": -224.94638061523438,
"logps/real": -222.41128540039062,
"loss": -2.8246,
"loss/gen": 0.4234386086463928,
"loss/real": -3.0529117584228516,
"rewards/accuracies": 1.0,
"rewards/generated": -110.4035415649414,
"rewards/margins": 112.93864440917969,
"rewards/real": 2.5351076126098633,
"step": 165
},
{
"epoch": 0.69,
"grad_norm": 162.38187413278305,
"learning_rate": 4.5833333333333327e-07,
"logits/generated": -2.1803882122039795,
"logits/oppo_generated": -2.6430654525756836,
"logits/oppo_real": -2.7417783737182617,
"logits/real": -2.243424415588379,
"logps/generated": -164.8372802734375,
"logps/oppo_gen": -63.871150970458984,
"logps/oppo_real": -224.14703369140625,
"logps/real": -182.39511108398438,
"loss": -3.7083,
"loss/gen": 0.7786407470703125,
"loss/real": -4.343791961669922,
"rewards/accuracies": 0.875,
"rewards/generated": -100.96613311767578,
"rewards/margins": 142.7180633544922,
"rewards/real": 41.75192642211914,
"step": 166
},
{
"epoch": 0.7,
"grad_norm": 49.259806784569555,
"learning_rate": 4.5797720797720794e-07,
"logits/generated": -2.4166438579559326,
"logits/oppo_generated": -2.757966995239258,
"logits/oppo_real": -2.906935691833496,
"logits/real": -2.3613169193267822,
"logps/generated": -156.58056640625,
"logps/oppo_gen": -53.980133056640625,
"logps/oppo_real": -168.99293518066406,
"logps/real": -158.2866973876953,
"loss": -2.9404,
"loss/gen": 0.9939805269241333,
"loss/real": -3.2384204864501953,
"rewards/accuracies": 1.0,
"rewards/generated": -102.6004409790039,
"rewards/margins": 113.30667114257812,
"rewards/real": 10.706242561340332,
"step": 167
},
{
"epoch": 0.7,
"grad_norm": 49.259806784569555,
"learning_rate": 4.5797720797720794e-07,
"logits/generated": -2.1523754596710205,
"logits/oppo_generated": -2.34848690032959,
"logits/oppo_real": -2.549453020095825,
"logits/real": -1.9709889888763428,
"logps/generated": -172.17507934570312,
"logps/oppo_gen": -41.99907684326172,
"logps/oppo_real": -137.05735778808594,
"logps/real": -117.31524658203125,
"loss": -36278.4766,
"loss/gen": 0.308816134929657,
"loss/real": -3.7620303630828857,
"rewards/accuracies": 1.0,
"rewards/generated": -130.17599487304688,
"rewards/margins": 149.91812133789062,
"rewards/real": 19.742107391357422,
"step": 168
},
{
"epoch": 0.71,
"grad_norm": 58.361834787518674,
"learning_rate": 4.576210826210826e-07,
"logits/generated": -2.2917189598083496,
"logits/oppo_generated": -2.5094847679138184,
"logits/oppo_real": -2.6891722679138184,
"logits/real": -2.1305155754089355,
"logps/generated": -260.20501708984375,
"logps/oppo_gen": -68.40258026123047,
"logps/oppo_real": -223.42794799804688,
"logps/real": -207.68116760253906,
"loss": -2.9255,
"loss/gen": 0.7335460186004639,
"loss/real": -3.7286908626556396,
"rewards/accuracies": 1.0,
"rewards/generated": -191.8024444580078,
"rewards/margins": 207.54922485351562,
"rewards/real": 15.746776580810547,
"step": 169
},
{
"epoch": 0.71,
"grad_norm": 170.55447260332988,
"learning_rate": 4.5726495726495724e-07,
"logits/generated": -2.4084904193878174,
"logits/oppo_generated": -2.8935999870300293,
"logits/oppo_real": -2.775484561920166,
"logits/real": -2.5147581100463867,
"logps/generated": -170.41473388671875,
"logps/oppo_gen": -50.93283462524414,
"logps/oppo_real": -316.0002136230469,
"logps/real": -287.05810546875,
"loss": -3.0655,
"loss/gen": 0.39951732754707336,
"loss/real": -3.662087917327881,
"rewards/accuracies": 1.0,
"rewards/generated": -119.48190307617188,
"rewards/margins": 148.4240264892578,
"rewards/real": 28.942119598388672,
"step": 170
},
{
"epoch": 0.72,
"grad_norm": 2776.5446151646834,
"learning_rate": 4.569088319088319e-07,
"logits/generated": -2.689021110534668,
"logits/oppo_generated": -2.8526816368103027,
"logits/oppo_real": -3.2386014461517334,
"logits/real": -2.449385166168213,
"logps/generated": -224.29080200195312,
"logps/oppo_gen": -113.54923248291016,
"logps/oppo_real": -351.7125549316406,
"logps/real": -352.888427734375,
"loss": -9.9952,
"loss/gen": 0.5014157295227051,
"loss/real": -2.938952922821045,
"rewards/accuracies": 0.875,
"rewards/generated": -110.7415771484375,
"rewards/margins": 109.56568145751953,
"rewards/real": -1.1758899688720703,
"step": 171
},
{
"epoch": 0.72,
"grad_norm": 74.08580527313414,
"learning_rate": 4.5655270655270654e-07,
"logits/generated": -2.553030252456665,
"logits/oppo_generated": -2.9850940704345703,
"logits/oppo_real": -3.0315611362457275,
"logits/real": -2.5720577239990234,
"logps/generated": -181.4952392578125,
"logps/oppo_gen": -61.65489196777344,
"logps/oppo_real": -151.10653686523438,
"logps/real": -148.4203643798828,
"loss": -3.289,
"loss/gen": 0.38128042221069336,
"loss/real": -2.9334371089935303,
"rewards/accuracies": 1.0,
"rewards/generated": -119.84036254882812,
"rewards/margins": 122.52653503417969,
"rewards/real": 2.686166763305664,
"step": 172
},
{
"epoch": 0.72,
"grad_norm": 63.58691225524518,
"learning_rate": 4.5619658119658116e-07,
"logits/generated": -2.707376480102539,
"logits/oppo_generated": -2.891350746154785,
"logits/oppo_real": -3.0990657806396484,
"logits/real": -2.544325828552246,
"logps/generated": -313.1190490722656,
"logps/oppo_gen": -212.02532958984375,
"logps/oppo_real": -549.8078002929688,
"logps/real": -522.171142578125,
"loss": -3.0226,
"loss/gen": 0.6081419587135315,
"loss/real": -3.838395118713379,
"rewards/accuracies": 1.0,
"rewards/generated": -101.09373474121094,
"rewards/margins": 128.73036193847656,
"rewards/real": 27.636632919311523,
"step": 173
},
{
"epoch": 0.73,
"grad_norm": 54.7705752933886,
"learning_rate": 4.5584045584045584e-07,
"logits/generated": -2.31124210357666,
"logits/oppo_generated": -2.861656904220581,
"logits/oppo_real": -2.749734878540039,
"logits/real": -2.4704031944274902,
"logps/generated": -180.6676025390625,
"logps/oppo_gen": -52.08341598510742,
"logps/oppo_real": -268.2560119628906,
"logps/real": -232.18594360351562,
"loss": -3.0632,
"loss/gen": 0.29419511556625366,
"loss/real": -3.9939093589782715,
"rewards/accuracies": 1.0,
"rewards/generated": -128.5841827392578,
"rewards/margins": 164.6542510986328,
"rewards/real": 36.070064544677734,
"step": 174
},
{
"epoch": 0.73,
"grad_norm": 96.52861966309526,
"learning_rate": 4.5548433048433046e-07,
"logits/generated": -2.3479509353637695,
"logits/oppo_generated": -2.8331031799316406,
"logits/oppo_real": -2.8462958335876465,
"logits/real": -2.4217453002929688,
"logps/generated": -206.61517333984375,
"logps/oppo_gen": -78.92254638671875,
"logps/oppo_real": -224.86373901367188,
"logps/real": -216.6464385986328,
"loss": -3.2511,
"loss/gen": 0.3076120615005493,
"loss/real": -3.089078903198242,
"rewards/accuracies": 1.0,
"rewards/generated": -127.69261169433594,
"rewards/margins": 135.909912109375,
"rewards/real": 8.217292785644531,
"step": 175
},
{
"epoch": 0.74,
"grad_norm": 56.16997780175812,
"learning_rate": 4.551282051282051e-07,
"logits/generated": -2.3228254318237305,
"logits/oppo_generated": -2.879185199737549,
"logits/oppo_real": -2.873112678527832,
"logits/real": -2.4802536964416504,
"logps/generated": -157.61184692382812,
"logps/oppo_gen": -49.27460479736328,
"logps/oppo_real": -375.43463134765625,
"logps/real": -348.29833984375,
"loss": -2.954,
"loss/gen": 0.6059376001358032,
"loss/real": -3.6162662506103516,
"rewards/accuracies": 1.0,
"rewards/generated": -108.33724212646484,
"rewards/margins": 135.4735107421875,
"rewards/real": 27.136272430419922,
"step": 176
},
{
"epoch": 0.74,
"grad_norm": 47.747249184508,
"learning_rate": 4.5477207977207976e-07,
"logits/generated": -2.5688347816467285,
"logits/oppo_generated": -3.0462043285369873,
"logits/oppo_real": -3.1089582443237305,
"logits/real": -2.586811065673828,
"logps/generated": -204.5193634033203,
"logps/oppo_gen": -77.79332733154297,
"logps/oppo_real": -319.2231750488281,
"logps/real": -287.8105773925781,
"loss": -2.9293,
"loss/gen": 0.32107144594192505,
"loss/real": -3.8302066326141357,
"rewards/accuracies": 1.0,
"rewards/generated": -126.72602844238281,
"rewards/margins": 158.13864135742188,
"rewards/real": 31.412609100341797,
"step": 177
},
{
"epoch": 0.74,
"grad_norm": 56.68338256821835,
"learning_rate": 4.544159544159544e-07,
"logits/generated": -2.477287530899048,
"logits/oppo_generated": -2.815687656402588,
"logits/oppo_real": -2.9501237869262695,
"logits/real": -2.401658058166504,
"logps/generated": -221.77561950683594,
"logps/oppo_gen": -103.51431274414062,
"logps/oppo_real": -308.8333435058594,
"logps/real": -306.259521484375,
"loss": -2.9378,
"loss/gen": 0.3601089119911194,
"loss/real": -3.115224599838257,
"rewards/accuracies": 1.0,
"rewards/generated": -118.26129913330078,
"rewards/margins": 120.83515930175781,
"rewards/real": 2.5738563537597656,
"step": 178
},
{
"epoch": 0.75,
"grad_norm": 132.54113453471717,
"learning_rate": 4.5405982905982905e-07,
"logits/generated": -2.3259053230285645,
"logits/oppo_generated": -2.779146194458008,
"logits/oppo_real": -2.8336267471313477,
"logits/real": -2.3255388736724854,
"logps/generated": -202.4166259765625,
"logps/oppo_gen": -72.71639251708984,
"logps/oppo_real": -196.57557678222656,
"logps/real": -192.04830932617188,
"loss": -3.1315,
"loss/gen": 0.3035447299480438,
"loss/real": -3.032370090484619,
"rewards/accuracies": 1.0,
"rewards/generated": -129.70025634765625,
"rewards/margins": 134.2274932861328,
"rewards/real": 4.52725076675415,
"step": 179
},
{
"epoch": 0.75,
"grad_norm": 51.83515977656961,
"learning_rate": 4.537037037037037e-07,
"logits/generated": -2.5221238136291504,
"logits/oppo_generated": -2.8425636291503906,
"logits/oppo_real": -2.9093685150146484,
"logits/real": -2.4757275581359863,
"logps/generated": -224.296142578125,
"logps/oppo_gen": -95.93893432617188,
"logps/oppo_real": -207.11392211914062,
"logps/real": -186.50677490234375,
"loss": -3.0553,
"loss/gen": 0.6323412656784058,
"loss/real": -3.4342591762542725,
"rewards/accuracies": 1.0,
"rewards/generated": -128.35723876953125,
"rewards/margins": 148.96438598632812,
"rewards/real": 20.607158660888672,
"step": 180
},
{
"epoch": 0.76,
"grad_norm": 57.13147870810802,
"learning_rate": 4.533475783475783e-07,
"logits/generated": -2.2876157760620117,
"logits/oppo_generated": -2.8224010467529297,
"logits/oppo_real": -2.778409957885742,
"logits/real": -2.416560649871826,
"logps/generated": -224.8665313720703,
"logps/oppo_gen": -88.16463470458984,
"logps/oppo_real": -239.9169921875,
"logps/real": -240.70309448242188,
"loss": -3.0029,
"loss/gen": 0.25222891569137573,
"loss/real": -3.012547016143799,
"rewards/accuracies": 1.0,
"rewards/generated": -136.701904296875,
"rewards/margins": 135.91583251953125,
"rewards/real": -0.7860813140869141,
"step": 181
},
{
"epoch": 0.76,
"grad_norm": 89.10157585762724,
"learning_rate": 4.5299145299145297e-07,
"logits/generated": -2.4986462593078613,
"logits/oppo_generated": -2.9657952785491943,
"logits/oppo_real": -2.9425137042999268,
"logits/real": -2.509366035461426,
"logps/generated": -201.0703125,
"logps/oppo_gen": -76.42547607421875,
"logps/oppo_real": -261.8043518066406,
"logps/real": -241.64723205566406,
"loss": -3.1181,
"loss/gen": 0.3136594295501709,
"loss/real": -3.37326717376709,
"rewards/accuracies": 1.0,
"rewards/generated": -124.64483642578125,
"rewards/margins": 144.8019561767578,
"rewards/real": 20.157115936279297,
"step": 182
},
{
"epoch": 0.77,
"grad_norm": 59.75890689365896,
"learning_rate": 4.5263532763532765e-07,
"logits/generated": -1.9646540880203247,
"logits/oppo_generated": -2.6656646728515625,
"logits/oppo_real": -2.512063980102539,
"logits/real": -2.13295316696167,
"logps/generated": -146.4997100830078,
"logps/oppo_gen": -61.16596603393555,
"logps/oppo_real": -89.70797729492188,
"logps/real": -67.80735778808594,
"loss": -3.1443,
"loss/gen": 0.8715238571166992,
"loss/real": -3.4107680320739746,
"rewards/accuracies": 1.0,
"rewards/generated": -85.333740234375,
"rewards/margins": 107.23435974121094,
"rewards/real": 21.90062141418457,
"step": 183
},
{
"epoch": 0.77,
"grad_norm": 9260.308850141015,
"learning_rate": 4.522792022792022e-07,
"logits/generated": -2.074495792388916,
"logits/oppo_generated": -2.679591655731201,
"logits/oppo_real": -2.5152084827423096,
"logits/real": -2.2176570892333984,
"logps/generated": -301.60516357421875,
"logps/oppo_gen": -134.39280700683594,
"logps/oppo_real": -353.8466491699219,
"logps/real": -354.19549560546875,
"loss": -41.3972,
"loss/gen": 0.28073543310165405,
"loss/real": -2.867943525314331,
"rewards/accuracies": 1.0,
"rewards/generated": -167.2123565673828,
"rewards/margins": 166.86349487304688,
"rewards/real": -0.34885168075561523,
"step": 184
},
{
"epoch": 0.77,
"grad_norm": 57.11031975962729,
"learning_rate": 4.519230769230769e-07,
"logits/generated": -2.515873908996582,
"logits/oppo_generated": -2.8852622509002686,
"logits/oppo_real": -2.9888343811035156,
"logits/real": -2.4470033645629883,
"logps/generated": -221.6116485595703,
"logps/oppo_gen": -86.57408142089844,
"logps/oppo_real": -353.78594970703125,
"logps/real": -337.73291015625,
"loss": -3.0984,
"loss/gen": 0.3061344623565674,
"loss/real": -3.636414051055908,
"rewards/accuracies": 1.0,
"rewards/generated": -135.03756713867188,
"rewards/margins": 151.0906219482422,
"rewards/real": 16.053056716918945,
"step": 185
},
{
"epoch": 0.78,
"grad_norm": 242.2639153640096,
"learning_rate": 4.5156695156695157e-07,
"logits/generated": -2.5487115383148193,
"logits/oppo_generated": -2.894904136657715,
"logits/oppo_real": -2.8833250999450684,
"logits/real": -2.4917829036712646,
"logps/generated": -208.41036987304688,
"logps/oppo_gen": -97.552490234375,
"logps/oppo_real": -446.60357666015625,
"logps/real": -420.88616943359375,
"loss": -3.9814,
"loss/gen": 0.4838281273841858,
"loss/real": -3.5608205795288086,
"rewards/accuracies": 1.0,
"rewards/generated": -110.85787963867188,
"rewards/margins": 136.57525634765625,
"rewards/real": 25.71738052368164,
"step": 186
},
{
"epoch": 0.78,
"grad_norm": 61.190406675725114,
"learning_rate": 4.512108262108262e-07,
"logits/generated": -2.471825122833252,
"logits/oppo_generated": -2.9238195419311523,
"logits/oppo_real": -2.928109645843506,
"logits/real": -2.5521817207336426,
"logps/generated": -248.10580444335938,
"logps/oppo_gen": -99.34373474121094,
"logps/oppo_real": -381.1275634765625,
"logps/real": -371.27154541015625,
"loss": -3.1159,
"loss/gen": 0.34282225370407104,
"loss/real": -3.2633209228515625,
"rewards/accuracies": 1.0,
"rewards/generated": -148.7620849609375,
"rewards/margins": 158.61810302734375,
"rewards/real": 9.856016159057617,
"step": 187
},
{
"epoch": 0.79,
"grad_norm": 59.89950894519086,
"learning_rate": 4.5085470085470087e-07,
"logits/generated": -2.162811279296875,
"logits/oppo_generated": -2.7080626487731934,
"logits/oppo_real": -2.5767087936401367,
"logits/real": -2.30268931388855,
"logps/generated": -199.12115478515625,
"logps/oppo_gen": -46.502037048339844,
"logps/oppo_real": -149.05059814453125,
"logps/real": -149.08099365234375,
"loss": -2.8636,
"loss/gen": 0.46407026052474976,
"loss/real": -2.982802391052246,
"rewards/accuracies": 1.0,
"rewards/generated": -152.61912536621094,
"rewards/margins": 152.58872985839844,
"rewards/real": -0.030394554138183594,
"step": 188
},
{
"epoch": 0.79,
"grad_norm": 63.384206370258354,
"learning_rate": 4.5049857549857543e-07,
"logits/generated": -2.5746748447418213,
"logits/oppo_generated": -2.9217922687530518,
"logits/oppo_real": -3.0358145236968994,
"logits/real": -2.5657949447631836,
"logps/generated": -201.57012939453125,
"logps/oppo_gen": -72.13301849365234,
"logps/oppo_real": -295.51861572265625,
"logps/real": -298.053955078125,
"loss": -2.9233,
"loss/gen": 0.28429698944091797,
"loss/real": -3.108995199203491,
"rewards/accuracies": 1.0,
"rewards/generated": -129.4371337890625,
"rewards/margins": 126.90178680419922,
"rewards/real": -2.5353341102600098,
"step": 189
},
{
"epoch": 0.79,
"grad_norm": 94.99060911644108,
"learning_rate": 4.501424501424501e-07,
"logits/generated": -2.3787384033203125,
"logits/oppo_generated": -2.7406344413757324,
"logits/oppo_real": -2.799593925476074,
"logits/real": -2.3933238983154297,
"logps/generated": -222.70245361328125,
"logps/oppo_gen": -102.60955810546875,
"logps/oppo_real": -305.8299255371094,
"logps/real": -273.30157470703125,
"loss": -3.2776,
"loss/gen": 0.36470329761505127,
"loss/real": -3.8918604850769043,
"rewards/accuracies": 1.0,
"rewards/generated": -120.0928955078125,
"rewards/margins": 152.62124633789062,
"rewards/real": 32.52833938598633,
"step": 190
},
{
"epoch": 0.8,
"grad_norm": 350.9167124297445,
"learning_rate": 4.497863247863248e-07,
"logits/generated": -2.6413869857788086,
"logits/oppo_generated": -2.8220396041870117,
"logits/oppo_real": -3.0663821697235107,
"logits/real": -2.490924596786499,
"logps/generated": -178.68812561035156,
"logps/oppo_gen": -80.95722961425781,
"logps/oppo_real": -339.0364074707031,
"logps/real": -321.1693115234375,
"loss": -3.7641,
"loss/gen": 0.9792780876159668,
"loss/real": -3.2799935340881348,
"rewards/accuracies": 1.0,
"rewards/generated": -97.73089599609375,
"rewards/margins": 115.59801483154297,
"rewards/real": 17.867115020751953,
"step": 191
},
{
"epoch": 0.8,
"grad_norm": 568.1687689410603,
"learning_rate": 4.494301994301994e-07,
"logits/generated": -2.53959321975708,
"logits/oppo_generated": -2.8528313636779785,
"logits/oppo_real": -2.9469070434570312,
"logits/real": -2.4770290851593018,
"logps/generated": -186.40338134765625,
"logps/oppo_gen": -55.95906066894531,
"logps/oppo_real": -228.37322998046875,
"logps/real": -216.5171661376953,
"loss": -4.4415,
"loss/gen": 0.2824003994464874,
"loss/real": -3.352588653564453,
"rewards/accuracies": 1.0,
"rewards/generated": -130.44430541992188,
"rewards/margins": 142.30039978027344,
"rewards/real": 11.856078147888184,
"step": 192
},
{
"epoch": 0.81,
"grad_norm": 82.08388787354983,
"learning_rate": 4.4907407407407403e-07,
"logits/generated": -2.3993959426879883,
"logits/oppo_generated": -2.759657859802246,
"logits/oppo_real": -2.7739434242248535,
"logits/real": -2.4300918579101562,
"logps/generated": -195.13946533203125,
"logps/oppo_gen": -55.900001525878906,
"logps/oppo_real": -240.51673889160156,
"logps/real": -254.91921997070312,
"loss": -3.0531,
"loss/gen": 0.31808772683143616,
"loss/real": -2.974119186401367,
"rewards/accuracies": 0.875,
"rewards/generated": -139.23947143554688,
"rewards/margins": 124.83699035644531,
"rewards/real": -14.402481079101562,
"step": 193
},
{
"epoch": 0.81,
"grad_norm": 59.35182524704906,
"learning_rate": 4.487179487179487e-07,
"logits/generated": -2.422110080718994,
"logits/oppo_generated": -2.714049816131592,
"logits/oppo_real": -2.821863889694214,
"logits/real": -2.3553073406219482,
"logps/generated": -191.3336181640625,
"logps/oppo_gen": -61.66150665283203,
"logps/oppo_real": -281.81561279296875,
"logps/real": -268.0882873535156,
"loss": -2.9157,
"loss/gen": 0.2871388792991638,
"loss/real": -3.4819259643554688,
"rewards/accuracies": 1.0,
"rewards/generated": -129.67208862304688,
"rewards/margins": 143.39939880371094,
"rewards/real": 13.727313995361328,
"step": 194
},
{
"epoch": 0.82,
"grad_norm": 61.67208283154745,
"learning_rate": 4.4836182336182333e-07,
"logits/generated": -2.2326247692108154,
"logits/oppo_generated": -2.7336645126342773,
"logits/oppo_real": -2.6636435985565186,
"logits/real": -2.38464617729187,
"logps/generated": -191.60995483398438,
"logps/oppo_gen": -66.04891204833984,
"logps/oppo_real": -343.6158447265625,
"logps/real": -311.3708801269531,
"loss": -3.0524,
"loss/gen": 0.30880045890808105,
"loss/real": -4.188716888427734,
"rewards/accuracies": 1.0,
"rewards/generated": -125.56105041503906,
"rewards/margins": 157.8060302734375,
"rewards/real": 32.2449836730957,
"step": 195
},
{
"epoch": 0.82,
"grad_norm": 79.39509465129127,
"learning_rate": 4.48005698005698e-07,
"logits/generated": -2.459395408630371,
"logits/oppo_generated": -3.0542874336242676,
"logits/oppo_real": -2.803119659423828,
"logits/real": -2.7458314895629883,
"logps/generated": -202.55088806152344,
"logps/oppo_gen": -81.553955078125,
"logps/oppo_real": -376.17071533203125,
"logps/real": -320.265869140625,
"loss": -3.204,
"loss/gen": 0.3818941116333008,
"loss/real": -5.556370735168457,
"rewards/accuracies": 1.0,
"rewards/generated": -120.99693298339844,
"rewards/margins": 176.90176391601562,
"rewards/real": 55.90484619140625,
"step": 196
},
{
"epoch": 0.82,
"grad_norm": 64.34859516119019,
"learning_rate": 4.476495726495726e-07,
"logits/generated": -2.551626205444336,
"logits/oppo_generated": -2.791293144226074,
"logits/oppo_real": -2.8689441680908203,
"logits/real": -2.4949615001678467,
"logps/generated": -214.268310546875,
"logps/oppo_gen": -90.10079956054688,
"logps/oppo_real": -387.6597900390625,
"logps/real": -355.855712890625,
"loss": -2.9635,
"loss/gen": 0.7095820903778076,
"loss/real": -3.8604226112365723,
"rewards/accuracies": 1.0,
"rewards/generated": -124.16752624511719,
"rewards/margins": 155.97164916992188,
"rewards/real": 31.804113388061523,
"step": 197
},
{
"epoch": 0.83,
"grad_norm": 55.47510967063292,
"learning_rate": 4.4729344729344725e-07,
"logits/generated": -2.5076422691345215,
"logits/oppo_generated": -2.8356850147247314,
"logits/oppo_real": -2.917833089828491,
"logits/real": -2.4860076904296875,
"logps/generated": -208.80723571777344,
"logps/oppo_gen": -76.40264892578125,
"logps/oppo_real": -278.172607421875,
"logps/real": -251.12782287597656,
"loss": -3.1359,
"loss/gen": 0.3080252408981323,
"loss/real": -3.7138681411743164,
"rewards/accuracies": 1.0,
"rewards/generated": -132.4045867919922,
"rewards/margins": 159.44937133789062,
"rewards/real": 27.04478645324707,
"step": 198
},
{
"epoch": 0.83,
"grad_norm": 55.47510967063292,
"learning_rate": 4.4729344729344725e-07,
"logits/generated": -2.7056777477264404,
"logits/oppo_generated": -3.0011539459228516,
"logits/oppo_real": -3.069876194000244,
"logits/real": -2.696037530899048,
"logps/generated": -191.9580078125,
"logps/oppo_gen": -69.13575744628906,
"logps/oppo_real": -340.70343017578125,
"logps/real": -328.8914794921875,
"loss": -1322.4869,
"loss/gen": 0.4668487310409546,
"loss/real": -3.3937647342681885,
"rewards/accuracies": 1.0,
"rewards/generated": -122.82225036621094,
"rewards/margins": 134.6342010498047,
"rewards/real": 11.811951637268066,
"step": 199
},
{
"epoch": 0.84,
"grad_norm": 76.39824855854027,
"learning_rate": 4.469373219373219e-07,
"logits/generated": -2.6483988761901855,
"logits/oppo_generated": -2.821411609649658,
"logits/oppo_real": -2.9697532653808594,
"logits/real": -2.5232529640197754,
"logps/generated": -208.79844665527344,
"logps/oppo_gen": -94.25292205810547,
"logps/oppo_real": -449.1705322265625,
"logps/real": -411.73590087890625,
"loss": -3.1662,
"loss/gen": 0.6144514083862305,
"loss/real": -4.066596984863281,
"rewards/accuracies": 1.0,
"rewards/generated": -114.54552459716797,
"rewards/margins": 151.98013305664062,
"rewards/real": 37.434608459472656,
"step": 200
},
{
"epoch": 0.84,
"grad_norm": 80.08028198314489,
"learning_rate": 4.465811965811966e-07,
"logits/generated": -2.521160125732422,
"logits/oppo_generated": -2.9498441219329834,
"logits/oppo_real": -2.889374017715454,
"logits/real": -2.6443803310394287,
"logps/generated": -218.7113037109375,
"logps/oppo_gen": -93.28401184082031,
"logps/oppo_real": -446.9027099609375,
"logps/real": -420.687744140625,
"loss": -3.1837,
"loss/gen": 0.3799129128456116,
"loss/real": -3.5870652198791504,
"rewards/accuracies": 1.0,
"rewards/generated": -125.42729187011719,
"rewards/margins": 151.6422119140625,
"rewards/real": 26.214933395385742,
"step": 201
},
{
"epoch": 0.85,
"grad_norm": 85.4263034452065,
"learning_rate": 4.4622507122507117e-07,
"logits/generated": -2.0849194526672363,
"logits/oppo_generated": -2.5877699851989746,
"logits/oppo_real": -2.4145617485046387,
"logits/real": -2.3253278732299805,
"logps/generated": -175.31253051757812,
"logps/oppo_gen": -58.147544860839844,
"logps/oppo_real": -256.63494873046875,
"logps/real": -251.91702270507812,
"loss": -3.1424,
"loss/gen": 0.633264422416687,
"loss/real": -2.9826745986938477,
"rewards/accuracies": 1.0,
"rewards/generated": -117.16497802734375,
"rewards/margins": 121.88292694091797,
"rewards/real": 4.7179460525512695,
"step": 202
},
{
"epoch": 0.85,
"grad_norm": 61.56183771851885,
"learning_rate": 4.4586894586894584e-07,
"logits/generated": -2.5706653594970703,
"logits/oppo_generated": -2.825096607208252,
"logits/oppo_real": -2.919394016265869,
"logits/real": -2.5202863216400146,
"logps/generated": -173.32174682617188,
"logps/oppo_gen": -62.71122360229492,
"logps/oppo_real": -234.44354248046875,
"logps/real": -211.79794311523438,
"loss": -3.0867,
"loss/gen": 0.7330012321472168,
"loss/real": -3.4876270294189453,
"rewards/accuracies": 1.0,
"rewards/generated": -110.61052703857422,
"rewards/margins": 133.25613403320312,
"rewards/real": 22.645606994628906,
"step": 203
},
{
"epoch": 0.85,
"grad_norm": 55.0575072225623,
"learning_rate": 4.455128205128205e-07,
"logits/generated": -2.589749336242676,
"logits/oppo_generated": -2.681910276412964,
"logits/oppo_real": -2.8930723667144775,
"logits/real": -2.393826484680176,
"logps/generated": -184.09539794921875,
"logps/oppo_gen": -69.35714721679688,
"logps/oppo_real": -321.68878173828125,
"logps/real": -300.22052001953125,
"loss": -2.9885,
"loss/gen": 0.6509556770324707,
"loss/real": -3.5244717597961426,
"rewards/accuracies": 1.0,
"rewards/generated": -114.73826599121094,
"rewards/margins": 136.20651245117188,
"rewards/real": 21.46826171875,
"step": 204
},
{
"epoch": 0.86,
"grad_norm": 205.9144144875487,
"learning_rate": 4.4515669515669514e-07,
"logits/generated": -2.5176801681518555,
"logits/oppo_generated": -2.910146951675415,
"logits/oppo_real": -2.842686653137207,
"logits/real": -2.6160125732421875,
"logps/generated": -192.1990966796875,
"logps/oppo_gen": -55.29602813720703,
"logps/oppo_real": -188.457763671875,
"logps/real": -170.13516235351562,
"loss": -3.8694,
"loss/gen": 0.29164981842041016,
"loss/real": -3.5206615924835205,
"rewards/accuracies": 1.0,
"rewards/generated": -136.903076171875,
"rewards/margins": 155.22567749023438,
"rewards/real": 18.32259750366211,
"step": 205
},
{
"epoch": 0.86,
"grad_norm": 72.13692086277665,
"learning_rate": 4.448005698005698e-07,
"logits/generated": -2.6976194381713867,
"logits/oppo_generated": -2.9482345581054688,
"logits/oppo_real": -3.0109448432922363,
"logits/real": -2.652653455734253,
"logps/generated": -196.82077026367188,
"logps/oppo_gen": -70.6409912109375,
"logps/oppo_real": -375.189697265625,
"logps/real": -351.4737243652344,
"loss": -3.0215,
"loss/gen": 0.35152187943458557,
"loss/real": -3.49497127532959,
"rewards/accuracies": 1.0,
"rewards/generated": -126.1797866821289,
"rewards/margins": 149.89573669433594,
"rewards/real": 23.7159423828125,
"step": 206
},
{
"epoch": 0.87,
"grad_norm": 61.61398545074811,
"learning_rate": 4.444444444444444e-07,
"logits/generated": -2.5743110179901123,
"logits/oppo_generated": -2.7811834812164307,
"logits/oppo_real": -2.923962116241455,
"logits/real": -2.454921245574951,
"logps/generated": -196.2818603515625,
"logps/oppo_gen": -71.71026611328125,
"logps/oppo_real": -353.846923828125,
"logps/real": -345.35919189453125,
"loss": -3.0779,
"loss/gen": 0.32238900661468506,
"loss/real": -3.2966091632843018,
"rewards/accuracies": 1.0,
"rewards/generated": -124.57159423828125,
"rewards/margins": 133.05931091308594,
"rewards/real": 8.487724304199219,
"step": 207
},
{
"epoch": 0.87,
"grad_norm": 57.15490447432092,
"learning_rate": 4.4408831908831906e-07,
"logits/generated": -2.782914161682129,
"logits/oppo_generated": -2.8043360710144043,
"logits/oppo_real": -3.0211949348449707,
"logits/real": -2.5231986045837402,
"logps/generated": -194.0042724609375,
"logps/oppo_gen": -77.71004486083984,
"logps/oppo_real": -389.77301025390625,
"logps/real": -358.2027282714844,
"loss": -3.0323,
"loss/gen": 0.423714816570282,
"loss/real": -3.7564921379089355,
"rewards/accuracies": 1.0,
"rewards/generated": -116.29424285888672,
"rewards/margins": 147.8645477294922,
"rewards/real": 31.570310592651367,
"step": 208
},
{
"epoch": 0.87,
"grad_norm": 59.74080162590932,
"learning_rate": 4.4373219373219373e-07,
"logits/generated": -2.357008934020996,
"logits/oppo_generated": -2.7760987281799316,
"logits/oppo_real": -2.740163803100586,
"logits/real": -2.439347505569458,
"logps/generated": -205.67852783203125,
"logps/oppo_gen": -88.69313049316406,
"logps/oppo_real": -338.8006591796875,
"logps/real": -314.58441162109375,
"loss": -3.0479,
"loss/gen": 0.4672941565513611,
"loss/real": -3.506350040435791,
"rewards/accuracies": 1.0,
"rewards/generated": -116.98541259765625,
"rewards/margins": 141.20166015625,
"rewards/real": 24.21624755859375,
"step": 209
},
{
"epoch": 0.88,
"grad_norm": 1848.1626687064102,
"learning_rate": 4.4337606837606836e-07,
"logits/generated": -2.4638514518737793,
"logits/oppo_generated": -2.7127938270568848,
"logits/oppo_real": -2.803234577178955,
"logits/real": -2.38244366645813,
"logps/generated": -237.40353393554688,
"logps/oppo_gen": -85.75541687011719,
"logps/oppo_real": -242.4071807861328,
"logps/real": -237.742431640625,
"loss": -5.9552,
"loss/gen": 0.2063872367143631,
"loss/real": -3.0251030921936035,
"rewards/accuracies": 1.0,
"rewards/generated": -151.64813232421875,
"rewards/margins": 156.31288146972656,
"rewards/real": 4.664756774902344,
"step": 210
},
{
"epoch": 0.88,
"grad_norm": 400.5685997741211,
"learning_rate": 4.43019943019943e-07,
"logits/generated": -2.5298070907592773,
"logits/oppo_generated": -2.995426654815674,
"logits/oppo_real": -2.8803281784057617,
"logits/real": -2.6857643127441406,
"logps/generated": -184.82400512695312,
"logps/oppo_gen": -68.82854461669922,
"logps/oppo_real": -337.844482421875,
"logps/real": -296.872314453125,
"loss": -4.4013,
"loss/gen": 0.568313717842102,
"loss/real": -4.289045333862305,
"rewards/accuracies": 1.0,
"rewards/generated": -115.99545288085938,
"rewards/margins": 156.96762084960938,
"rewards/real": 40.97218322753906,
"step": 211
},
{
"epoch": 0.89,
"grad_norm": 40892.68061646241,
"learning_rate": 4.4266381766381765e-07,
"logits/generated": -2.705117702484131,
"logits/oppo_generated": -2.6126418113708496,
"logits/oppo_real": -3.0222294330596924,
"logits/real": -2.3141441345214844,
"logps/generated": -171.36810302734375,
"logps/oppo_gen": -56.36054992675781,
"logps/oppo_real": -325.3075256347656,
"logps/real": -308.48114013671875,
"loss": -129.61,
"loss/gen": 0.39068758487701416,
"loss/real": -3.74429988861084,
"rewards/accuracies": 1.0,
"rewards/generated": -115.00755310058594,
"rewards/margins": 131.83392333984375,
"rewards/real": 16.826370239257812,
"step": 212
},
{
"epoch": 0.89,
"grad_norm": 3590.9546084572,
"learning_rate": 4.423076923076923e-07,
"logits/generated": -2.696446418762207,
"logits/oppo_generated": -3.026592254638672,
"logits/oppo_real": -2.9974026679992676,
"logits/real": -2.68188214302063,
"logps/generated": -209.5288543701172,
"logps/oppo_gen": -81.62860107421875,
"logps/oppo_real": -354.01513671875,
"logps/real": -335.030517578125,
"loss": -13.5441,
"loss/gen": 0.3259393572807312,
"loss/real": -3.325887680053711,
"rewards/accuracies": 1.0,
"rewards/generated": -127.90025329589844,
"rewards/margins": 146.8848876953125,
"rewards/real": 18.9846248626709,
"step": 213
},
{
"epoch": 0.9,
"grad_norm": 62.96425038203723,
"learning_rate": 4.4195156695156695e-07,
"logits/generated": -2.4695000648498535,
"logits/oppo_generated": -2.86299991607666,
"logits/oppo_real": -2.897392749786377,
"logits/real": -2.52815580368042,
"logps/generated": -159.04405212402344,
"logps/oppo_gen": -55.654396057128906,
"logps/oppo_real": -286.4037170410156,
"logps/real": -273.60174560546875,
"loss": -3.259,
"loss/gen": 0.6177021265029907,
"loss/real": -3.30937123298645,
"rewards/accuracies": 1.0,
"rewards/generated": -103.38966369628906,
"rewards/margins": 116.19161224365234,
"rewards/real": 12.801952362060547,
"step": 214
},
{
"epoch": 0.9,
"grad_norm": 57.797468485057806,
"learning_rate": 4.4159544159544157e-07,
"logits/generated": -2.4351806640625,
"logits/oppo_generated": -2.8678367137908936,
"logits/oppo_real": -2.797013759613037,
"logits/real": -2.479971408843994,
"logps/generated": -265.5663757324219,
"logps/oppo_gen": -154.916748046875,
"logps/oppo_real": -268.4582824707031,
"logps/real": -245.14251708984375,
"loss": -3.1633,
"loss/gen": 0.5528236627578735,
"loss/real": -3.513732433319092,
"rewards/accuracies": 1.0,
"rewards/generated": -110.6496353149414,
"rewards/margins": 133.96539306640625,
"rewards/real": 23.315759658813477,
"step": 215
},
{
"epoch": 0.9,
"grad_norm": 80.14004161173895,
"learning_rate": 4.412393162393162e-07,
"logits/generated": -2.6762161254882812,
"logits/oppo_generated": -2.879833221435547,
"logits/oppo_real": -3.0112786293029785,
"logits/real": -2.480961799621582,
"logps/generated": -193.92112731933594,
"logps/oppo_gen": -96.10844421386719,
"logps/oppo_real": -492.59039306640625,
"logps/real": -480.1392517089844,
"loss": -2.9444,
"loss/gen": 1.0747777223587036,
"loss/real": -3.2505507469177246,
"rewards/accuracies": 0.875,
"rewards/generated": -97.81267547607422,
"rewards/margins": 110.26382446289062,
"rewards/real": 12.45114803314209,
"step": 216
},
{
"epoch": 0.91,
"grad_norm": 27165.010436221077,
"learning_rate": 4.4088319088319087e-07,
"logits/generated": -2.661689281463623,
"logits/oppo_generated": -2.855457305908203,
"logits/oppo_real": -3.161579132080078,
"logits/real": -2.475346565246582,
"logps/generated": -173.70448303222656,
"logps/oppo_gen": -79.04156494140625,
"logps/oppo_real": -508.73779296875,
"logps/real": -485.790283203125,
"loss": -63.2568,
"loss/gen": 0.8462377786636353,
"loss/real": -3.7985730171203613,
"rewards/accuracies": 0.875,
"rewards/generated": -94.66291809082031,
"rewards/margins": 117.61046600341797,
"rewards/real": 22.947547912597656,
"step": 217
},
{
"epoch": 0.91,
"grad_norm": 79.71609474703807,
"learning_rate": 4.4052706552706555e-07,
"logits/generated": -2.6024856567382812,
"logits/oppo_generated": -2.8270015716552734,
"logits/oppo_real": -2.9884450435638428,
"logits/real": -2.381761074066162,
"logps/generated": -193.1739959716797,
"logps/oppo_gen": -79.96229553222656,
"logps/oppo_real": -295.296630859375,
"logps/real": -280.0252685546875,
"loss": -3.1061,
"loss/gen": 0.8399382829666138,
"loss/real": -3.309168815612793,
"rewards/accuracies": 0.875,
"rewards/generated": -113.2116928100586,
"rewards/margins": 128.48306274414062,
"rewards/real": 15.27135944366455,
"step": 218
},
{
"epoch": 0.92,
"grad_norm": 117.95946155486003,
"learning_rate": 4.4017094017094017e-07,
"logits/generated": -2.274564266204834,
"logits/oppo_generated": -2.7040886878967285,
"logits/oppo_real": -2.816561698913574,
"logits/real": -2.2658865451812744,
"logps/generated": -187.32923889160156,
"logps/oppo_gen": -55.71031188964844,
"logps/oppo_real": -202.95962524414062,
"logps/real": -166.00265502929688,
"loss": -3.2002,
"loss/gen": 0.33614322543144226,
"loss/real": -4.0761213302612305,
"rewards/accuracies": 1.0,
"rewards/generated": -131.61892700195312,
"rewards/margins": 168.57589721679688,
"rewards/real": 36.956966400146484,
"step": 219
},
{
"epoch": 0.92,
"grad_norm": 88.19558620351276,
"learning_rate": 4.398148148148148e-07,
"logits/generated": -1.9364006519317627,
"logits/oppo_generated": -2.385345458984375,
"logits/oppo_real": -2.4835422039031982,
"logits/real": -1.808083415031433,
"logps/generated": -179.90750122070312,
"logps/oppo_gen": -75.58077239990234,
"logps/oppo_real": -339.3034973144531,
"logps/real": -272.0712585449219,
"loss": -3.2605,
"loss/gen": 0.8947268724441528,
"loss/real": -8.812257766723633,
"rewards/accuracies": 1.0,
"rewards/generated": -104.32673645019531,
"rewards/margins": 171.55897521972656,
"rewards/real": 67.23223876953125,
"step": 220
},
{
"epoch": 0.92,
"grad_norm": 5908.862834759325,
"learning_rate": 4.394586894586894e-07,
"logits/generated": -2.601797580718994,
"logits/oppo_generated": -3.011491060256958,
"logits/oppo_real": -3.0487937927246094,
"logits/real": -2.562220573425293,
"logps/generated": -246.65899658203125,
"logps/oppo_gen": -131.22396850585938,
"logps/oppo_real": -400.33868408203125,
"logps/real": -376.3664245605469,
"loss": -13.8663,
"loss/gen": 0.42373475432395935,
"loss/real": -3.5093624591827393,
"rewards/accuracies": 1.0,
"rewards/generated": -115.43504333496094,
"rewards/margins": 139.40728759765625,
"rewards/real": 23.972253799438477,
"step": 221
},
{
"epoch": 0.93,
"grad_norm": 95.4678242590141,
"learning_rate": 4.391025641025641e-07,
"logits/generated": -2.3840436935424805,
"logits/oppo_generated": -2.755108118057251,
"logits/oppo_real": -2.8694067001342773,
"logits/real": -2.2530529499053955,
"logps/generated": -177.37356567382812,
"logps/oppo_gen": -61.73572540283203,
"logps/oppo_real": -230.838134765625,
"logps/real": -215.96424865722656,
"loss": -3.0607,
"loss/gen": 0.6986711025238037,
"loss/real": -3.6220858097076416,
"rewards/accuracies": 0.875,
"rewards/generated": -115.63784790039062,
"rewards/margins": 130.51173400878906,
"rewards/real": 14.873891830444336,
"step": 222
},
{
"epoch": 0.93,
"grad_norm": 87.51268602233479,
"learning_rate": 4.3874643874643876e-07,
"logits/generated": -2.3197760581970215,
"logits/oppo_generated": -2.8574419021606445,
"logits/oppo_real": -2.923137903213501,
"logits/real": -2.3333818912506104,
"logps/generated": -231.46023559570312,
"logps/oppo_gen": -82.77210998535156,
"logps/oppo_real": -252.58892822265625,
"logps/real": -270.0004577636719,
"loss": -2.98,
"loss/gen": 0.21608535945415497,
"loss/real": -2.6946630477905273,
"rewards/accuracies": 0.875,
"rewards/generated": -148.6881103515625,
"rewards/margins": 131.2765655517578,
"rewards/real": -17.411537170410156,
"step": 223
},
{
"epoch": 0.94,
"grad_norm": 87.51268602233479,
"learning_rate": 4.3874643874643876e-07,
"logits/generated": -2.242218017578125,
"logits/oppo_generated": -2.994565010070801,
"logits/oppo_real": -2.8149280548095703,
"logits/real": -2.4609484672546387,
"logps/generated": -181.10708618164062,
"logps/oppo_gen": -48.2861213684082,
"logps/oppo_real": -137.37625122070312,
"logps/real": -162.7267608642578,
"loss": -7932.2959,
"loss/gen": 0.7526332139968872,
"loss/real": -2.542538642883301,
"rewards/accuracies": 0.875,
"rewards/generated": -132.82098388671875,
"rewards/margins": 107.47045135498047,
"rewards/real": -25.350521087646484,
"step": 224
},
{
"epoch": 0.94,
"grad_norm": 67.6441819490967,
"learning_rate": 4.3839031339031333e-07,
"logits/generated": -2.315286636352539,
"logits/oppo_generated": -2.816603422164917,
"logits/oppo_real": -2.9343314170837402,
"logits/real": -2.3111538887023926,
"logps/generated": -155.87979125976562,
"logps/oppo_gen": -30.44548988342285,
"logps/oppo_real": -174.9966278076172,
"logps/real": -169.0306854248047,
"loss": -2.8919,
"loss/gen": 0.31436973810195923,
"loss/real": -3.049879550933838,
"rewards/accuracies": 1.0,
"rewards/generated": -125.43431091308594,
"rewards/margins": 131.40023803710938,
"rewards/real": 5.965947151184082,
"step": 225
},
{
"epoch": 0.95,
"grad_norm": 61.01237787454485,
"learning_rate": 4.38034188034188e-07,
"logits/generated": -2.3058667182922363,
"logits/oppo_generated": -2.6415185928344727,
"logits/oppo_real": -3.0115818977355957,
"logits/real": -2.004304885864258,
"logps/generated": -215.7899627685547,
"logps/oppo_gen": -93.466064453125,
"logps/oppo_real": -340.529296875,
"logps/real": -337.6749572753906,
"loss": -3.1087,
"loss/gen": 0.5611802339553833,
"loss/real": -2.9437613487243652,
"rewards/accuracies": 1.0,
"rewards/generated": -122.32388305664062,
"rewards/margins": 125.1782455444336,
"rewards/real": 2.8543548583984375,
"step": 226
},
{
"epoch": 0.95,
"grad_norm": 68.83477018148241,
"learning_rate": 4.376780626780627e-07,
"logits/generated": -2.3567757606506348,
"logits/oppo_generated": -2.7984108924865723,
"logits/oppo_real": -2.9754528999328613,
"logits/real": -2.275757312774658,
"logps/generated": -208.89105224609375,
"logps/oppo_gen": -69.67858123779297,
"logps/oppo_real": -268.7974853515625,
"logps/real": -266.00445556640625,
"loss": -3.1351,
"loss/gen": 0.24598746001720428,
"loss/real": -3.0755763053894043,
"rewards/accuracies": 1.0,
"rewards/generated": -139.21246337890625,
"rewards/margins": 142.00546264648438,
"rewards/real": 2.7930030822753906,
"step": 227
},
{
"epoch": 0.95,
"grad_norm": 103.64887767723296,
"learning_rate": 4.373219373219373e-07,
"logits/generated": -2.2009589672088623,
"logits/oppo_generated": -2.7994847297668457,
"logits/oppo_real": -2.687981605529785,
"logits/real": -2.264253616333008,
"logps/generated": -200.60202026367188,
"logps/oppo_gen": -76.17577362060547,
"logps/oppo_real": -381.5020751953125,
"logps/real": -350.08245849609375,
"loss": -3.0609,
"loss/gen": 0.34163713455200195,
"loss/real": -4.447661399841309,
"rewards/accuracies": 1.0,
"rewards/generated": -124.42623138427734,
"rewards/margins": 155.8458251953125,
"rewards/real": 31.419601440429688,
"step": 228
},
{
"epoch": 0.96,
"grad_norm": 4236.734355609282,
"learning_rate": 4.3696581196581193e-07,
"logits/generated": -2.313900947570801,
"logits/oppo_generated": -2.8429031372070312,
"logits/oppo_real": -3.0224597454071045,
"logits/real": -2.2679154872894287,
"logps/generated": -197.47596740722656,
"logps/oppo_gen": -78.5534439086914,
"logps/oppo_real": -246.5026397705078,
"logps/real": -232.70751953125,
"loss": -7.8903,
"loss/gen": 0.3893076479434967,
"loss/real": -3.408906936645508,
"rewards/accuracies": 1.0,
"rewards/generated": -118.92252349853516,
"rewards/margins": 132.71763610839844,
"rewards/real": 13.795119285583496,
"step": 229
},
{
"epoch": 0.96,
"grad_norm": 65.91049876655653,
"learning_rate": 4.366096866096866e-07,
"logits/generated": -2.122530221939087,
"logits/oppo_generated": -2.5529236793518066,
"logits/oppo_real": -2.7146146297454834,
"logits/real": -1.90482759475708,
"logps/generated": -217.27114868164062,
"logps/oppo_gen": -79.70944213867188,
"logps/oppo_real": -106.01055145263672,
"logps/real": -128.87461853027344,
"loss": -3.0589,
"loss/gen": 0.2452090084552765,
"loss/real": -2.452601909637451,
"rewards/accuracies": 1.0,
"rewards/generated": -137.56170654296875,
"rewards/margins": 114.6976547241211,
"rewards/real": -22.864065170288086,
"step": 230
},
{
"epoch": 0.97,
"grad_norm": 100.04292047666341,
"learning_rate": 4.362535612535612e-07,
"logits/generated": -1.987313985824585,
"logits/oppo_generated": -2.5894346237182617,
"logits/oppo_real": -2.6849865913391113,
"logits/real": -2.029129981994629,
"logps/generated": -221.10101318359375,
"logps/oppo_gen": -67.09019470214844,
"logps/oppo_real": -256.4427185058594,
"logps/real": -237.23843383789062,
"loss": -3.213,
"loss/gen": 0.4131355285644531,
"loss/real": -3.562318801879883,
"rewards/accuracies": 1.0,
"rewards/generated": -154.0108184814453,
"rewards/margins": 173.215087890625,
"rewards/real": 19.204273223876953,
"step": 231
},
{
"epoch": 0.97,
"grad_norm": 79.75284391970766,
"learning_rate": 4.358974358974359e-07,
"logits/generated": -2.341341495513916,
"logits/oppo_generated": -2.959817886352539,
"logits/oppo_real": -2.9362192153930664,
"logits/real": -2.416731119155884,
"logps/generated": -216.04949951171875,
"logps/oppo_gen": -82.48292541503906,
"logps/oppo_real": -458.88818359375,
"logps/real": -485.8726806640625,
"loss": -2.8748,
"loss/gen": 0.28640565276145935,
"loss/real": -2.7164149284362793,
"rewards/accuracies": 0.75,
"rewards/generated": -133.5665740966797,
"rewards/margins": 106.58207702636719,
"rewards/real": -26.9844970703125,
"step": 232
},
{
"epoch": 0.97,
"grad_norm": 263.329314974615,
"learning_rate": 4.355413105413105e-07,
"logits/generated": -2.236466884613037,
"logits/oppo_generated": -2.7284858226776123,
"logits/oppo_real": -2.8326492309570312,
"logits/real": -2.176626205444336,
"logps/generated": -187.85845947265625,
"logps/oppo_gen": -60.89936828613281,
"logps/oppo_real": -245.58233642578125,
"logps/real": -235.84445190429688,
"loss": -3.2515,
"loss/gen": 0.3334371745586395,
"loss/real": -3.059086322784424,
"rewards/accuracies": 1.0,
"rewards/generated": -126.9590835571289,
"rewards/margins": 136.69699096679688,
"rewards/real": 9.737905502319336,
"step": 233
},
{
"epoch": 0.98,
"grad_norm": 130.99742811168088,
"learning_rate": 4.3518518518518514e-07,
"logits/generated": -2.2553353309631348,
"logits/oppo_generated": -2.884782075881958,
"logits/oppo_real": -3.007986545562744,
"logits/real": -2.3814938068389893,
"logps/generated": -198.6071319580078,
"logps/oppo_gen": -64.29571533203125,
"logps/oppo_real": -445.2386169433594,
"logps/real": -404.5937805175781,
"loss": -3.0983,
"loss/gen": 0.26874154806137085,
"loss/real": -4.306643962860107,
"rewards/accuracies": 1.0,
"rewards/generated": -134.31141662597656,
"rewards/margins": 174.95623779296875,
"rewards/real": 40.64482498168945,
"step": 234
},
{
"epoch": 0.98,
"grad_norm": 70.3636098964248,
"learning_rate": 4.348290598290598e-07,
"logits/generated": -2.1750454902648926,
"logits/oppo_generated": -2.8430304527282715,
"logits/oppo_real": -2.873483657836914,
"logits/real": -2.3181915283203125,
"logps/generated": -196.6973876953125,
"logps/oppo_gen": -68.79239654541016,
"logps/oppo_real": -391.89910888671875,
"logps/real": -368.2919616699219,
"loss": -2.8684,
"loss/gen": 0.313241183757782,
"loss/real": -3.5294508934020996,
"rewards/accuracies": 1.0,
"rewards/generated": -127.90498352050781,
"rewards/margins": 151.51217651367188,
"rewards/real": 23.607187271118164,
"step": 235
},
{
"epoch": 0.99,
"grad_norm": 69.64450397263053,
"learning_rate": 4.3447293447293444e-07,
"logits/generated": -2.3948874473571777,
"logits/oppo_generated": -2.8508265018463135,
"logits/oppo_real": -2.9677348136901855,
"logits/real": -2.3284974098205566,
"logps/generated": -211.0993194580078,
"logps/oppo_gen": -88.43344116210938,
"logps/oppo_real": -438.55322265625,
"logps/real": -395.72943115234375,
"loss": -3.1273,
"loss/gen": 0.3554914891719818,
"loss/real": -4.292209625244141,
"rewards/accuracies": 1.0,
"rewards/generated": -122.66587829589844,
"rewards/margins": 165.48968505859375,
"rewards/real": 42.82379913330078,
"step": 236
},
{
"epoch": 0.99,
"grad_norm": 287.0907957423923,
"learning_rate": 4.341168091168091e-07,
"logits/generated": -2.3292388916015625,
"logits/oppo_generated": -2.816070079803467,
"logits/oppo_real": -3.012850761413574,
"logits/real": -2.2773959636688232,
"logps/generated": -189.18850708007812,
"logps/oppo_gen": -55.2912483215332,
"logps/oppo_real": -255.20977783203125,
"logps/real": -236.0473175048828,
"loss": -3.7901,
"loss/gen": 0.27096259593963623,
"loss/real": -3.297393321990967,
"rewards/accuracies": 1.0,
"rewards/generated": -133.89724731445312,
"rewards/margins": 153.05970764160156,
"rewards/real": 19.162452697753906,
"step": 237
},
{
"epoch": 1.0,
"grad_norm": 68.14058737960438,
"learning_rate": 4.3376068376068374e-07,
"logits/generated": -2.3537933826446533,
"logits/oppo_generated": -2.701869487762451,
"logits/oppo_real": -2.963564872741699,
"logits/real": -2.1307592391967773,
"logps/generated": -207.35293579101562,
"logps/oppo_gen": -83.03327941894531,
"logps/oppo_real": -312.4057312011719,
"logps/real": -294.8611755371094,
"loss": -3.0868,
"loss/gen": 0.3351461589336395,
"loss/real": -3.4385178089141846,
"rewards/accuracies": 1.0,
"rewards/generated": -124.31964111328125,
"rewards/margins": 141.8642120361328,
"rewards/real": 17.544559478759766,
"step": 238
},
{
"epoch": 1.0,
"grad_norm": 66.31466478558872,
"learning_rate": 4.3340455840455836e-07,
"logits/generated": -2.291761636734009,
"logits/oppo_generated": -2.8546152114868164,
"logits/oppo_real": -3.036848545074463,
"logits/real": -2.2752645015716553,
"logps/generated": -213.76364135742188,
"logps/oppo_gen": -75.19477844238281,
"logps/oppo_real": -314.191162109375,
"logps/real": -290.7506103515625,
"loss": -3.1633,
"loss/gen": 0.2661153972148895,
"loss/real": -3.5240395069122314,
"rewards/accuracies": 1.0,
"rewards/generated": -138.56887817382812,
"rewards/margins": 162.0093994140625,
"rewards/real": 23.440532684326172,
"step": 239
}
],
"logging_steps": 1.0,
"max_steps": 1434,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 500,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}