{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 100, "global_step": 4689, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.066098081023454e-09, "logits/generated": -3.009117841720581, "logits/real": -3.035973310470581, "logps/generated": -135.85076904296875, "logps/real": -392.24298095703125, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 1 }, { "epoch": 0.01, "learning_rate": 1.0660980810234541e-08, "logits/generated": -3.0000903606414795, "logits/real": -3.033531427383423, "logps/generated": -123.82107543945312, "logps/real": -288.15521240234375, "loss": 0.6915, "rewards/accuracies": 0.4027777910232544, "rewards/generated": -0.00034361134748905897, "rewards/margins": 0.0022192317992448807, "rewards/real": 0.0018756203353404999, "step": 10 }, { "epoch": 0.01, "learning_rate": 2.1321961620469082e-08, "logits/generated": -2.994983673095703, "logits/real": -3.063678026199341, "logps/generated": -100.84471130371094, "logps/real": -199.7611541748047, "loss": 0.6709, "rewards/accuracies": 0.8125, "rewards/generated": -0.03251934424042702, "rewards/margins": 0.0452335849404335, "rewards/real": 0.01271424163132906, "step": 20 }, { "epoch": 0.02, "learning_rate": 3.1982942430703625e-08, "logits/generated": -2.9915778636932373, "logits/real": -3.0394511222839355, "logps/generated": -110.52748107910156, "logps/real": -247.39794921875, "loss": 0.5963, "rewards/accuracies": 0.949999988079071, "rewards/generated": -0.15009155869483948, "rewards/margins": 0.23985818028450012, "rewards/real": 0.08976660668849945, "step": 30 }, { "epoch": 0.03, "learning_rate": 4.2643923240938164e-08, "logits/generated": -2.9606637954711914, "logits/real": -3.029853343963623, "logps/generated": -108.22274017333984, "logps/real": -235.333740234375, "loss": 0.4724, "rewards/accuracies": 0.9375, "rewards/generated": -0.39079219102859497, "rewards/margins": 0.5763204097747803, "rewards/real": 0.18552818894386292, "step": 40 }, { "epoch": 0.03, "learning_rate": 5.3304904051172704e-08, "logits/generated": -2.973432779312134, "logits/real": -3.049670457839966, "logps/generated": -111.00482177734375, "logps/real": -233.86471557617188, "loss": 0.386, "rewards/accuracies": 0.9375, "rewards/generated": -0.5961061120033264, "rewards/margins": 0.8909885287284851, "rewards/real": 0.2948824167251587, "step": 50 }, { "epoch": 0.04, "learning_rate": 6.396588486140725e-08, "logits/generated": -2.8845043182373047, "logits/real": -3.0369975566864014, "logps/generated": -117.0033187866211, "logps/real": -240.541259765625, "loss": 0.2917, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -0.9591042399406433, "rewards/margins": 1.4262845516204834, "rewards/real": 0.4671803414821625, "step": 60 }, { "epoch": 0.04, "learning_rate": 7.462686567164178e-08, "logits/generated": -2.90877103805542, "logits/real": -3.014878273010254, "logps/generated": -116.1484146118164, "logps/real": -237.03872680664062, "loss": 0.2178, "rewards/accuracies": 1.0, "rewards/generated": -1.303546667098999, "rewards/margins": 1.947913408279419, "rewards/real": 0.6443666815757751, "step": 70 }, { "epoch": 0.05, "learning_rate": 8.528784648187633e-08, "logits/generated": -2.89373517036438, "logits/real": -3.015916347503662, "logps/generated": -122.8466796875, "logps/real": -240.26565551757812, "loss": 0.2029, "rewards/accuracies": 0.987500011920929, "rewards/generated": -1.497593641281128, "rewards/margins": 2.123328447341919, "rewards/real": 0.6257346868515015, "step": 80 }, { "epoch": 0.06, "learning_rate": 9.594882729211087e-08, "logits/generated": -2.8569562435150146, "logits/real": -3.0172367095947266, "logps/generated": -127.4751205444336, "logps/real": -212.5527801513672, "loss": 0.1732, "rewards/accuracies": 0.987500011920929, "rewards/generated": -1.9681780338287354, "rewards/margins": 2.7209601402282715, "rewards/real": 0.7527822256088257, "step": 90 }, { "epoch": 0.06, "learning_rate": 1.0660980810234541e-07, "logits/generated": -2.8705780506134033, "logits/real": -2.9818317890167236, "logps/generated": -121.19868469238281, "logps/real": -237.79733276367188, "loss": 0.1566, "rewards/accuracies": 1.0, "rewards/generated": -1.7731469869613647, "rewards/margins": 2.562190294265747, "rewards/real": 0.7890429496765137, "step": 100 }, { "epoch": 0.07, "learning_rate": 1.1727078891257995e-07, "logits/generated": -2.848050832748413, "logits/real": -2.976525068283081, "logps/generated": -132.4097137451172, "logps/real": -262.47882080078125, "loss": 0.1559, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -2.003335475921631, "rewards/margins": 2.8780131340026855, "rewards/real": 0.874677836894989, "step": 110 }, { "epoch": 0.08, "learning_rate": 1.279317697228145e-07, "logits/generated": -2.82133150100708, "logits/real": -2.963655710220337, "logps/generated": -137.30862426757812, "logps/real": -256.87567138671875, "loss": 0.1414, "rewards/accuracies": 1.0, "rewards/generated": -2.5247628688812256, "rewards/margins": 3.375014066696167, "rewards/real": 0.8502515554428101, "step": 120 }, { "epoch": 0.08, "learning_rate": 1.3859275053304903e-07, "logits/generated": -2.808093309402466, "logits/real": -2.9520535469055176, "logps/generated": -125.69123840332031, "logps/real": -206.429931640625, "loss": 0.1105, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.6616690158843994, "rewards/margins": 3.4152088165283203, "rewards/real": 0.7535400390625, "step": 130 }, { "epoch": 0.09, "learning_rate": 1.4925373134328355e-07, "logits/generated": -2.779547691345215, "logits/real": -2.948424816131592, "logps/generated": -141.017822265625, "logps/real": -251.13095092773438, "loss": 0.1126, "rewards/accuracies": 1.0, "rewards/generated": -3.277395248413086, "rewards/margins": 4.223907470703125, "rewards/real": 0.9465125203132629, "step": 140 }, { "epoch": 0.1, "learning_rate": 1.5991471215351813e-07, "logits/generated": -2.783216714859009, "logits/real": -2.95442533493042, "logps/generated": -149.49667358398438, "logps/real": -203.93063354492188, "loss": 0.0934, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -3.4615890979766846, "rewards/margins": 4.269316673278809, "rewards/real": 0.8077276349067688, "step": 150 }, { "epoch": 0.1, "learning_rate": 1.7057569296375266e-07, "logits/generated": -2.7575511932373047, "logits/real": -2.9458460807800293, "logps/generated": -144.13388061523438, "logps/real": -260.1390686035156, "loss": 0.087, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.196132183074951, "rewards/margins": 4.332037448883057, "rewards/real": 1.1359055042266846, "step": 160 }, { "epoch": 0.11, "learning_rate": 1.8123667377398718e-07, "logits/generated": -2.7579033374786377, "logits/real": -2.943512439727783, "logps/generated": -153.94261169433594, "logps/real": -213.04147338867188, "loss": 0.0943, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -3.8576176166534424, "rewards/margins": 4.797067642211914, "rewards/real": 0.9394500851631165, "step": 170 }, { "epoch": 0.12, "learning_rate": 1.9189765458422174e-07, "logits/generated": -2.7721304893493652, "logits/real": -2.879235029220581, "logps/generated": -147.11483764648438, "logps/real": -197.543212890625, "loss": 0.0739, "rewards/accuracies": 1.0, "rewards/generated": -4.2951579093933105, "rewards/margins": 5.02254581451416, "rewards/real": 0.7273877859115601, "step": 180 }, { "epoch": 0.12, "learning_rate": 2.025586353944563e-07, "logits/generated": -2.7546629905700684, "logits/real": -2.9070351123809814, "logps/generated": -143.64361572265625, "logps/real": -201.7648468017578, "loss": 0.0929, "rewards/accuracies": 0.987500011920929, "rewards/generated": -4.264950752258301, "rewards/margins": 5.123804092407227, "rewards/real": 0.8588531613349915, "step": 190 }, { "epoch": 0.13, "learning_rate": 2.1321961620469082e-07, "logits/generated": -2.704319477081299, "logits/real": -2.8970680236816406, "logps/generated": -142.53402709960938, "logps/real": -203.28872680664062, "loss": 0.0702, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -4.170575141906738, "rewards/margins": 5.0223588943481445, "rewards/real": 0.8517836332321167, "step": 200 }, { "epoch": 0.13, "learning_rate": 2.2388059701492537e-07, "logits/generated": -2.714703321456909, "logits/real": -2.902454137802124, "logps/generated": -155.59530639648438, "logps/real": -206.8930206298828, "loss": 0.0655, "rewards/accuracies": 1.0, "rewards/generated": -5.102675437927246, "rewards/margins": 5.6945576667785645, "rewards/real": 0.5918816328048706, "step": 210 }, { "epoch": 0.14, "learning_rate": 2.345415778251599e-07, "logits/generated": -2.7051188945770264, "logits/real": -2.8684916496276855, "logps/generated": -158.3416290283203, "logps/real": -230.9740447998047, "loss": 0.0747, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -5.212487697601318, "rewards/margins": 6.16409158706665, "rewards/real": 0.9516040682792664, "step": 220 }, { "epoch": 0.15, "learning_rate": 2.452025586353944e-07, "logits/generated": -2.7530570030212402, "logits/real": -2.8788464069366455, "logps/generated": -161.42819213867188, "logps/real": -214.85202026367188, "loss": 0.0543, "rewards/accuracies": 0.987500011920929, "rewards/generated": -5.426655292510986, "rewards/margins": 5.798591136932373, "rewards/real": 0.37193647027015686, "step": 230 }, { "epoch": 0.15, "learning_rate": 2.55863539445629e-07, "logits/generated": -2.6958775520324707, "logits/real": -2.8694121837615967, "logps/generated": -161.2700958251953, "logps/real": -178.91891479492188, "loss": 0.0453, "rewards/accuracies": 1.0, "rewards/generated": -5.468385696411133, "rewards/margins": 5.783989906311035, "rewards/real": 0.315604031085968, "step": 240 }, { "epoch": 0.16, "learning_rate": 2.665245202558635e-07, "logits/generated": -2.712916851043701, "logits/real": -2.84846568107605, "logps/generated": -161.986328125, "logps/real": -215.94033813476562, "loss": 0.0668, "rewards/accuracies": 0.987500011920929, "rewards/generated": -6.006771087646484, "rewards/margins": 6.370474815368652, "rewards/real": 0.363704115152359, "step": 250 }, { "epoch": 0.17, "learning_rate": 2.7718550106609805e-07, "logits/generated": -2.6760001182556152, "logits/real": -2.8535640239715576, "logps/generated": -159.93551635742188, "logps/real": -228.80801391601562, "loss": 0.0695, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -5.786929130554199, "rewards/margins": 5.9885358810424805, "rewards/real": 0.20160651206970215, "step": 260 }, { "epoch": 0.17, "learning_rate": 2.878464818763326e-07, "logits/generated": -2.61970591545105, "logits/real": -2.8495993614196777, "logps/generated": -177.95175170898438, "logps/real": -246.32052612304688, "loss": 0.0479, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -6.833677768707275, "rewards/margins": 7.163332462310791, "rewards/real": 0.32965537905693054, "step": 270 }, { "epoch": 0.18, "learning_rate": 2.985074626865671e-07, "logits/generated": -2.6257996559143066, "logits/real": -2.8162739276885986, "logps/generated": -172.50180053710938, "logps/real": -239.02352905273438, "loss": 0.0459, "rewards/accuracies": 0.987500011920929, "rewards/generated": -7.3265485763549805, "rewards/margins": 7.394839286804199, "rewards/real": 0.06829099357128143, "step": 280 }, { "epoch": 0.19, "learning_rate": 3.0916844349680174e-07, "logits/generated": -2.6942734718322754, "logits/real": -2.8194546699523926, "logps/generated": -199.2237548828125, "logps/real": -242.6060791015625, "loss": 0.0424, "rewards/accuracies": 0.987500011920929, "rewards/generated": -8.099980354309082, "rewards/margins": 7.564375877380371, "rewards/real": -0.5356050729751587, "step": 290 }, { "epoch": 0.19, "learning_rate": 3.1982942430703626e-07, "logits/generated": -2.6435790061950684, "logits/real": -2.797001838684082, "logps/generated": -198.12205505371094, "logps/real": -245.3691864013672, "loss": 0.0278, "rewards/accuracies": 1.0, "rewards/generated": -8.79419231414795, "rewards/margins": 8.475809097290039, "rewards/real": -0.3183833062648773, "step": 300 }, { "epoch": 0.2, "learning_rate": 3.304904051172708e-07, "logits/generated": -2.620265483856201, "logits/real": -2.802964925765991, "logps/generated": -201.2637939453125, "logps/real": -251.5539093017578, "loss": 0.0469, "rewards/accuracies": 0.987500011920929, "rewards/generated": -8.044418334960938, "rewards/margins": 8.085213661193848, "rewards/real": 0.04079418629407883, "step": 310 }, { "epoch": 0.2, "learning_rate": 3.411513859275053e-07, "logits/generated": -2.5675623416900635, "logits/real": -2.8209726810455322, "logps/generated": -212.57968139648438, "logps/real": -236.8415985107422, "loss": 0.0355, "rewards/accuracies": 1.0, "rewards/generated": -10.560102462768555, "rewards/margins": 9.677938461303711, "rewards/real": -0.8821651339530945, "step": 320 }, { "epoch": 0.21, "learning_rate": 3.5181236673773984e-07, "logits/generated": -2.532376766204834, "logits/real": -2.7409865856170654, "logps/generated": -189.15724182128906, "logps/real": -281.72003173828125, "loss": 0.0543, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -8.800742149353027, "rewards/margins": 8.003158569335938, "rewards/real": -0.7975834608078003, "step": 330 }, { "epoch": 0.22, "learning_rate": 3.6247334754797437e-07, "logits/generated": -2.565183401107788, "logits/real": -2.7490944862365723, "logps/generated": -187.1248321533203, "logps/real": -226.8002166748047, "loss": 0.0525, "rewards/accuracies": 0.987500011920929, "rewards/generated": -8.799155235290527, "rewards/margins": 7.27255916595459, "rewards/real": -1.526595950126648, "step": 340 }, { "epoch": 0.22, "learning_rate": 3.7313432835820895e-07, "logits/generated": -2.5572450160980225, "logits/real": -2.779174327850342, "logps/generated": -200.363525390625, "logps/real": -253.7314453125, "loss": 0.0535, "rewards/accuracies": 0.987500011920929, "rewards/generated": -9.488149642944336, "rewards/margins": 8.699429512023926, "rewards/real": -0.7887213230133057, "step": 350 }, { "epoch": 0.23, "learning_rate": 3.8379530916844347e-07, "logits/generated": -2.5597970485687256, "logits/real": -2.734602928161621, "logps/generated": -207.70529174804688, "logps/real": -247.5392608642578, "loss": 0.0492, "rewards/accuracies": 1.0, "rewards/generated": -9.628881454467773, "rewards/margins": 8.560700416564941, "rewards/real": -1.0681811571121216, "step": 360 }, { "epoch": 0.24, "learning_rate": 3.9445628997867805e-07, "logits/generated": -2.5730834007263184, "logits/real": -2.7402100563049316, "logps/generated": -206.32864379882812, "logps/real": -205.1163330078125, "loss": 0.0677, "rewards/accuracies": 0.9375, "rewards/generated": -9.762407302856445, "rewards/margins": 8.460587501525879, "rewards/real": -1.3018196821212769, "step": 370 }, { "epoch": 0.24, "learning_rate": 4.051172707889126e-07, "logits/generated": -2.5265259742736816, "logits/real": -2.725435972213745, "logps/generated": -212.57608032226562, "logps/real": -223.85104370117188, "loss": 0.0367, "rewards/accuracies": 1.0, "rewards/generated": -10.966038703918457, "rewards/margins": 10.222475051879883, "rewards/real": -0.7435646653175354, "step": 380 }, { "epoch": 0.25, "learning_rate": 4.157782515991471e-07, "logits/generated": -2.44181227684021, "logits/real": -2.7299582958221436, "logps/generated": -221.58920288085938, "logps/real": -276.7346496582031, "loss": 0.0388, "rewards/accuracies": 0.987500011920929, "rewards/generated": -11.691991806030273, "rewards/margins": 11.166677474975586, "rewards/real": -0.5253145098686218, "step": 390 }, { "epoch": 0.26, "learning_rate": 4.2643923240938163e-07, "logits/generated": -2.507145404815674, "logits/real": -2.689694881439209, "logps/generated": -230.5036163330078, "logps/real": -235.1736602783203, "loss": 0.0282, "rewards/accuracies": 1.0, "rewards/generated": -11.178986549377441, "rewards/margins": 9.90050220489502, "rewards/real": -1.2784844636917114, "step": 400 }, { "epoch": 0.26, "learning_rate": 4.371002132196162e-07, "logits/generated": -2.502819061279297, "logits/real": -2.6965878009796143, "logps/generated": -234.39993286132812, "logps/real": -237.62744140625, "loss": 0.0323, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -12.532038688659668, "rewards/margins": 10.712557792663574, "rewards/real": -1.819482445716858, "step": 410 }, { "epoch": 0.27, "learning_rate": 4.4776119402985074e-07, "logits/generated": -2.4594552516937256, "logits/real": -2.703640937805176, "logps/generated": -215.9478302001953, "logps/real": -272.41522216796875, "loss": 0.0226, "rewards/accuracies": 1.0, "rewards/generated": -11.244471549987793, "rewards/margins": 10.069063186645508, "rewards/real": -1.1754099130630493, "step": 420 }, { "epoch": 0.28, "learning_rate": 4.5842217484008526e-07, "logits/generated": -2.4474716186523438, "logits/real": -2.6229748725891113, "logps/generated": -228.8519744873047, "logps/real": -245.61215209960938, "loss": 0.0219, "rewards/accuracies": 0.987500011920929, "rewards/generated": -12.294897079467773, "rewards/margins": 10.74439811706543, "rewards/real": -1.5504984855651855, "step": 430 }, { "epoch": 0.28, "learning_rate": 4.690831556503198e-07, "logits/generated": -2.515381336212158, "logits/real": -2.632841110229492, "logps/generated": -221.27975463867188, "logps/real": -260.13641357421875, "loss": 0.0353, "rewards/accuracies": 0.987500011920929, "rewards/generated": -11.185568809509277, "rewards/margins": 9.645071983337402, "rewards/real": -1.54049813747406, "step": 440 }, { "epoch": 0.29, "learning_rate": 4.797441364605543e-07, "logits/generated": -2.4365837574005127, "logits/real": -2.6345643997192383, "logps/generated": -214.77029418945312, "logps/real": -274.2986755371094, "loss": 0.0363, "rewards/accuracies": 0.987500011920929, "rewards/generated": -11.434799194335938, "rewards/margins": 10.464271545410156, "rewards/real": -0.9705268740653992, "step": 450 }, { "epoch": 0.29, "learning_rate": 4.904051172707888e-07, "logits/generated": -2.4966938495635986, "logits/real": -2.577971935272217, "logps/generated": -224.62466430664062, "logps/real": -238.035400390625, "loss": 0.0624, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -12.316131591796875, "rewards/margins": 10.328946113586426, "rewards/real": -1.987186074256897, "step": 460 }, { "epoch": 0.3, "learning_rate": 4.998815165876776e-07, "logits/generated": -2.5350544452667236, "logits/real": -2.603980302810669, "logps/generated": -231.7646484375, "logps/real": -275.49993896484375, "loss": 0.0361, "rewards/accuracies": 0.987500011920929, "rewards/generated": -12.520462989807129, "rewards/margins": 10.306425094604492, "rewards/real": -2.214038372039795, "step": 470 }, { "epoch": 0.31, "learning_rate": 4.98696682464455e-07, "logits/generated": -2.4045310020446777, "logits/real": -2.608654499053955, "logps/generated": -233.0770263671875, "logps/real": -309.36322021484375, "loss": 0.0336, "rewards/accuracies": 0.987500011920929, "rewards/generated": -12.922886848449707, "rewards/margins": 10.215009689331055, "rewards/real": -2.707876682281494, "step": 480 }, { "epoch": 0.31, "learning_rate": 4.975118483412322e-07, "logits/generated": -2.4344067573547363, "logits/real": -2.6011695861816406, "logps/generated": -244.77059936523438, "logps/real": -258.57525634765625, "loss": 0.0324, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -14.29552173614502, "rewards/margins": 10.700136184692383, "rewards/real": -3.595385789871216, "step": 490 }, { "epoch": 0.32, "learning_rate": 4.963270142180094e-07, "logits/generated": -2.4870104789733887, "logits/real": -2.630181074142456, "logps/generated": -239.10421752929688, "logps/real": -282.9891662597656, "loss": 0.0334, "rewards/accuracies": 1.0, "rewards/generated": -13.501965522766113, "rewards/margins": 10.855205535888672, "rewards/real": -2.6467597484588623, "step": 500 }, { "epoch": 0.33, "learning_rate": 4.951421800947867e-07, "logits/generated": -2.450176954269409, "logits/real": -2.5793440341949463, "logps/generated": -230.175048828125, "logps/real": -299.5987243652344, "loss": 0.0386, "rewards/accuracies": 0.987500011920929, "rewards/generated": -12.763737678527832, "rewards/margins": 10.467870712280273, "rewards/real": -2.295866012573242, "step": 510 }, { "epoch": 0.33, "learning_rate": 4.93957345971564e-07, "logits/generated": -2.473119020462036, "logits/real": -2.5927822589874268, "logps/generated": -239.88040161132812, "logps/real": -218.50906372070312, "loss": 0.0299, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -13.901123046875, "rewards/margins": 10.96774673461914, "rewards/real": -2.9333770275115967, "step": 520 }, { "epoch": 0.34, "learning_rate": 4.927725118483413e-07, "logits/generated": -2.455239772796631, "logits/real": -2.552358388900757, "logps/generated": -256.57354736328125, "logps/real": -221.6819610595703, "loss": 0.0191, "rewards/accuracies": 0.987500011920929, "rewards/generated": -14.927894592285156, "rewards/margins": 11.572967529296875, "rewards/real": -3.3549275398254395, "step": 530 }, { "epoch": 0.35, "learning_rate": 4.915876777251184e-07, "logits/generated": -2.435835599899292, "logits/real": -2.5121445655822754, "logps/generated": -263.62420654296875, "logps/real": -271.3969421386719, "loss": 0.029, "rewards/accuracies": 0.987500011920929, "rewards/generated": -15.717508316040039, "rewards/margins": 12.2947998046875, "rewards/real": -3.422708034515381, "step": 540 }, { "epoch": 0.35, "learning_rate": 4.904028436018957e-07, "logits/generated": -2.4802205562591553, "logits/real": -2.5491814613342285, "logps/generated": -241.066650390625, "logps/real": -222.16323852539062, "loss": 0.0193, "rewards/accuracies": 1.0, "rewards/generated": -13.833559036254883, "rewards/margins": 10.92573356628418, "rewards/real": -2.907824993133545, "step": 550 }, { "epoch": 0.36, "learning_rate": 4.892180094786729e-07, "logits/generated": -2.429196357727051, "logits/real": -2.5694682598114014, "logps/generated": -249.312255859375, "logps/real": -228.79592895507812, "loss": 0.0602, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -14.980000495910645, "rewards/margins": 11.490592002868652, "rewards/real": -3.4894092082977295, "step": 560 }, { "epoch": 0.36, "learning_rate": 4.880331753554502e-07, "logits/generated": -2.584810972213745, "logits/real": -2.7082631587982178, "logps/generated": -221.3469696044922, "logps/real": -252.34561157226562, "loss": 0.0312, "rewards/accuracies": 1.0, "rewards/generated": -12.08858871459961, "rewards/margins": 11.336016654968262, "rewards/real": -0.7525719404220581, "step": 570 }, { "epoch": 0.37, "learning_rate": 4.868483412322275e-07, "logits/generated": -2.567894458770752, "logits/real": -2.661423921585083, "logps/generated": -239.905517578125, "logps/real": -263.59918212890625, "loss": 0.0388, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -12.732629776000977, "rewards/margins": 11.67176628112793, "rewards/real": -1.0608632564544678, "step": 580 }, { "epoch": 0.38, "learning_rate": 4.856635071090047e-07, "logits/generated": -2.523099422454834, "logits/real": -2.6167566776275635, "logps/generated": -228.9334259033203, "logps/real": -236.6662139892578, "loss": 0.0337, "rewards/accuracies": 0.987500011920929, "rewards/generated": -11.951663970947266, "rewards/margins": 11.008401870727539, "rewards/real": -0.9432622194290161, "step": 590 }, { "epoch": 0.38, "learning_rate": 4.84478672985782e-07, "logits/generated": -2.4970996379852295, "logits/real": -2.646541118621826, "logps/generated": -250.40011596679688, "logps/real": -237.4553680419922, "loss": 0.0465, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -13.550500869750977, "rewards/margins": 11.562530517578125, "rewards/real": -1.9879701137542725, "step": 600 }, { "epoch": 0.39, "learning_rate": 4.832938388625591e-07, "logits/generated": -2.4692635536193848, "logits/real": -2.5598652362823486, "logps/generated": -243.6393585205078, "logps/real": -216.361083984375, "loss": 0.0315, "rewards/accuracies": 1.0, "rewards/generated": -14.87501049041748, "rewards/margins": 12.408052444458008, "rewards/real": -2.466959238052368, "step": 610 }, { "epoch": 0.4, "learning_rate": 4.821090047393365e-07, "logits/generated": -2.459730625152588, "logits/real": -2.5611660480499268, "logps/generated": -250.59909057617188, "logps/real": -267.4476623535156, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/generated": -14.998074531555176, "rewards/margins": 11.819680213928223, "rewards/real": -3.1783957481384277, "step": 620 }, { "epoch": 0.4, "learning_rate": 4.809241706161137e-07, "logits/generated": -2.427530288696289, "logits/real": -2.5768373012542725, "logps/generated": -273.51495361328125, "logps/real": -301.49310302734375, "loss": 0.0314, "rewards/accuracies": 0.987500011920929, "rewards/generated": -15.543191909790039, "rewards/margins": 12.9131441116333, "rewards/real": -2.6300482749938965, "step": 630 }, { "epoch": 0.41, "learning_rate": 4.79739336492891e-07, "logits/generated": -2.455411672592163, "logits/real": -2.5930774211883545, "logps/generated": -288.97998046875, "logps/real": -279.2014465332031, "loss": 0.0389, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -17.09014320373535, "rewards/margins": 14.309709548950195, "rewards/real": -2.7804324626922607, "step": 640 }, { "epoch": 0.42, "learning_rate": 4.785545023696682e-07, "logits/generated": -2.3936052322387695, "logits/real": -2.58086895942688, "logps/generated": -263.72552490234375, "logps/real": -258.2544250488281, "loss": 0.0273, "rewards/accuracies": 0.987500011920929, "rewards/generated": -16.33547592163086, "rewards/margins": 12.990063667297363, "rewards/real": -3.3454136848449707, "step": 650 }, { "epoch": 0.42, "learning_rate": 4.773696682464455e-07, "logits/generated": -2.4560112953186035, "logits/real": -2.57716703414917, "logps/generated": -265.07843017578125, "logps/real": -228.6409912109375, "loss": 0.0226, "rewards/accuracies": 1.0, "rewards/generated": -15.906695365905762, "rewards/margins": 12.9684476852417, "rewards/real": -2.9382481575012207, "step": 660 }, { "epoch": 0.43, "learning_rate": 4.7618483412322273e-07, "logits/generated": -2.3757667541503906, "logits/real": -2.5697007179260254, "logps/generated": -273.296142578125, "logps/real": -280.37835693359375, "loss": 0.022, "rewards/accuracies": 0.987500011920929, "rewards/generated": -17.565967559814453, "rewards/margins": 14.934137344360352, "rewards/real": -2.6318306922912598, "step": 670 }, { "epoch": 0.44, "learning_rate": 4.7499999999999995e-07, "logits/generated": -2.3877642154693604, "logits/real": -2.4737722873687744, "logps/generated": -270.6985168457031, "logps/real": -216.703369140625, "loss": 0.0445, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -16.873882293701172, "rewards/margins": 13.297744750976562, "rewards/real": -3.576136827468872, "step": 680 }, { "epoch": 0.44, "learning_rate": 4.738151658767772e-07, "logits/generated": -2.357905149459839, "logits/real": -2.5049002170562744, "logps/generated": -250.86215209960938, "logps/real": -245.96664428710938, "loss": 0.0277, "rewards/accuracies": 0.987500011920929, "rewards/generated": -15.14183521270752, "rewards/margins": 13.497465133666992, "rewards/real": -1.6443710327148438, "step": 690 }, { "epoch": 0.45, "learning_rate": 4.726303317535545e-07, "logits/generated": -2.3780131340026855, "logits/real": -2.5022144317626953, "logps/generated": -256.3135070800781, "logps/real": -263.6549377441406, "loss": 0.0301, "rewards/accuracies": 0.987500011920929, "rewards/generated": -15.227795600891113, "rewards/margins": 13.609758377075195, "rewards/real": -1.618038535118103, "step": 700 }, { "epoch": 0.45, "learning_rate": 4.7144549763033177e-07, "logits/generated": -2.4752538204193115, "logits/real": -2.518859386444092, "logps/generated": -264.70703125, "logps/real": -244.23629760742188, "loss": 0.0386, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -15.996221542358398, "rewards/margins": 13.332077026367188, "rewards/real": -2.664144992828369, "step": 710 }, { "epoch": 0.46, "learning_rate": 4.70260663507109e-07, "logits/generated": -2.468376874923706, "logits/real": -2.4994096755981445, "logps/generated": -264.22625732421875, "logps/real": -266.2322998046875, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/generated": -15.571908950805664, "rewards/margins": 13.056289672851562, "rewards/real": -2.5156185626983643, "step": 720 }, { "epoch": 0.47, "learning_rate": 4.690758293838862e-07, "logits/generated": -2.4043707847595215, "logits/real": -2.4540791511535645, "logps/generated": -286.14727783203125, "logps/real": -247.5970001220703, "loss": 0.0284, "rewards/accuracies": 1.0, "rewards/generated": -17.904537200927734, "rewards/margins": 14.721110343933105, "rewards/real": -3.1834263801574707, "step": 730 }, { "epoch": 0.47, "learning_rate": 4.678909952606635e-07, "logits/generated": -2.445075035095215, "logits/real": -2.420685291290283, "logps/generated": -282.981689453125, "logps/real": -268.9815673828125, "loss": 0.0255, "rewards/accuracies": 0.987500011920929, "rewards/generated": -16.945833206176758, "rewards/margins": 14.0308198928833, "rewards/real": -2.915013074874878, "step": 740 }, { "epoch": 0.48, "learning_rate": 4.667061611374407e-07, "logits/generated": -2.4320383071899414, "logits/real": -2.505674362182617, "logps/generated": -259.02423095703125, "logps/real": -264.7741394042969, "loss": 0.0232, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -15.274149894714355, "rewards/margins": 12.89158821105957, "rewards/real": -2.382561683654785, "step": 750 }, { "epoch": 0.49, "learning_rate": 4.65521327014218e-07, "logits/generated": -2.450739860534668, "logits/real": -2.5071914196014404, "logps/generated": -255.146484375, "logps/real": -270.22210693359375, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/generated": -15.33598804473877, "rewards/margins": 13.024540901184082, "rewards/real": -2.3114476203918457, "step": 760 }, { "epoch": 0.49, "learning_rate": 4.6433649289099525e-07, "logits/generated": -2.4163241386413574, "logits/real": -2.4942569732666016, "logps/generated": -252.5504913330078, "logps/real": -287.49334716796875, "loss": 0.0382, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -14.969491958618164, "rewards/margins": 12.219428062438965, "rewards/real": -2.750063896179199, "step": 770 }, { "epoch": 0.5, "learning_rate": 4.631516587677725e-07, "logits/generated": -2.455021858215332, "logits/real": -2.4540677070617676, "logps/generated": -276.6857604980469, "logps/real": -255.8550262451172, "loss": 0.0156, "rewards/accuracies": 0.987500011920929, "rewards/generated": -17.444377899169922, "rewards/margins": 14.266420364379883, "rewards/real": -3.177957773208618, "step": 780 }, { "epoch": 0.51, "learning_rate": 4.6196682464454974e-07, "logits/generated": -2.4093470573425293, "logits/real": -2.470271348953247, "logps/generated": -253.94662475585938, "logps/real": -264.2749938964844, "loss": 0.0347, "rewards/accuracies": 0.987500011920929, "rewards/generated": -15.776174545288086, "rewards/margins": 13.000862121582031, "rewards/real": -2.775312900543213, "step": 790 }, { "epoch": 0.51, "learning_rate": 4.60781990521327e-07, "logits/generated": -2.470078229904175, "logits/real": -2.5724174976348877, "logps/generated": -267.72296142578125, "logps/real": -302.47650146484375, "loss": 0.0769, "rewards/accuracies": 0.987500011920929, "rewards/generated": -15.174127578735352, "rewards/margins": 11.917966842651367, "rewards/real": -3.256159543991089, "step": 800 }, { "epoch": 0.52, "learning_rate": 4.5959715639810423e-07, "logits/generated": -2.540130138397217, "logits/real": -2.6211256980895996, "logps/generated": -233.63998413085938, "logps/real": -288.25494384765625, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/generated": -12.888757705688477, "rewards/margins": 10.385416030883789, "rewards/real": -2.503340005874634, "step": 810 }, { "epoch": 0.52, "learning_rate": 4.5841232227488145e-07, "logits/generated": -2.538295269012451, "logits/real": -2.5893173217773438, "logps/generated": -249.40536499023438, "logps/real": -262.49261474609375, "loss": 0.0246, "rewards/accuracies": 1.0, "rewards/generated": -14.566215515136719, "rewards/margins": 11.135366439819336, "rewards/real": -3.4308483600616455, "step": 820 }, { "epoch": 0.53, "learning_rate": 4.5722748815165873e-07, "logits/generated": -2.488826274871826, "logits/real": -2.6115565299987793, "logps/generated": -265.40972900390625, "logps/real": -316.6865539550781, "loss": 0.0239, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -15.062512397766113, "rewards/margins": 11.956504821777344, "rewards/real": -3.1060070991516113, "step": 830 }, { "epoch": 0.54, "learning_rate": 4.56042654028436e-07, "logits/generated": -2.3590731620788574, "logits/real": -2.495044469833374, "logps/generated": -269.42681884765625, "logps/real": -309.6129455566406, "loss": 0.0397, "rewards/accuracies": 0.987500011920929, "rewards/generated": -15.904958724975586, "rewards/margins": 12.251577377319336, "rewards/real": -3.6533825397491455, "step": 840 }, { "epoch": 0.54, "learning_rate": 4.5485781990521327e-07, "logits/generated": -2.339799642562866, "logits/real": -2.4667954444885254, "logps/generated": -293.05267333984375, "logps/real": -278.82855224609375, "loss": 0.0349, "rewards/accuracies": 0.987500011920929, "rewards/generated": -18.821535110473633, "rewards/margins": 14.51972770690918, "rewards/real": -4.301807403564453, "step": 850 }, { "epoch": 0.55, "learning_rate": 4.536729857819905e-07, "logits/generated": -2.430203437805176, "logits/real": -2.537501573562622, "logps/generated": -272.36602783203125, "logps/real": -316.49383544921875, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/generated": -15.430384635925293, "rewards/margins": 12.318005561828613, "rewards/real": -3.1123790740966797, "step": 860 }, { "epoch": 0.56, "learning_rate": 4.5248815165876776e-07, "logits/generated": -2.2888057231903076, "logits/real": -2.4335665702819824, "logps/generated": -282.5766906738281, "logps/real": -279.062255859375, "loss": 0.024, "rewards/accuracies": 1.0, "rewards/generated": -17.430299758911133, "rewards/margins": 13.197275161743164, "rewards/real": -4.233025550842285, "step": 870 }, { "epoch": 0.56, "learning_rate": 4.5130331753554504e-07, "logits/generated": -2.3086037635803223, "logits/real": -2.4613966941833496, "logps/generated": -259.74395751953125, "logps/real": -214.3597869873047, "loss": 0.041, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -16.724639892578125, "rewards/margins": 12.922261238098145, "rewards/real": -3.8023808002471924, "step": 880 }, { "epoch": 0.57, "learning_rate": 4.5011848341232226e-07, "logits/generated": -2.339136838912964, "logits/real": -2.4579660892486572, "logps/generated": -270.0120544433594, "logps/real": -232.5855712890625, "loss": 0.022, "rewards/accuracies": 0.987500011920929, "rewards/generated": -16.816797256469727, "rewards/margins": 13.385887145996094, "rewards/real": -3.4309089183807373, "step": 890 }, { "epoch": 0.58, "learning_rate": 4.489336492890995e-07, "logits/generated": -2.2905521392822266, "logits/real": -2.361793279647827, "logps/generated": -292.40521240234375, "logps/real": -229.45394897460938, "loss": 0.0253, "rewards/accuracies": 1.0, "rewards/generated": -18.920324325561523, "rewards/margins": 14.62226390838623, "rewards/real": -4.298060417175293, "step": 900 }, { "epoch": 0.58, "learning_rate": 4.4774881516587675e-07, "logits/generated": -2.34869122505188, "logits/real": -2.428506374359131, "logps/generated": -275.98199462890625, "logps/real": -236.7742462158203, "loss": 0.0458, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -16.499164581298828, "rewards/margins": 11.50661563873291, "rewards/real": -4.992548942565918, "step": 910 }, { "epoch": 0.59, "learning_rate": 4.46563981042654e-07, "logits/generated": -2.2736713886260986, "logits/real": -2.445061206817627, "logps/generated": -305.16497802734375, "logps/real": -278.02374267578125, "loss": 0.0338, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -20.268054962158203, "rewards/margins": 13.73902416229248, "rewards/real": -6.529031276702881, "step": 920 }, { "epoch": 0.6, "learning_rate": 4.4537914691943124e-07, "logits/generated": -2.2922310829162598, "logits/real": -2.4935543537139893, "logps/generated": -277.3129577636719, "logps/real": -288.2353515625, "loss": 0.0433, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -17.90709686279297, "rewards/margins": 11.381673812866211, "rewards/real": -6.525424003601074, "step": 930 }, { "epoch": 0.6, "learning_rate": 4.441943127962085e-07, "logits/generated": -2.3361358642578125, "logits/real": -2.5857903957366943, "logps/generated": -313.67254638671875, "logps/real": -327.71337890625, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/generated": -19.346548080444336, "rewards/margins": 13.977258682250977, "rewards/real": -5.369288444519043, "step": 940 }, { "epoch": 0.61, "learning_rate": 4.430094786729858e-07, "logits/generated": -2.338799476623535, "logits/real": -2.565807819366455, "logps/generated": -290.45855712890625, "logps/real": -251.903076171875, "loss": 0.0231, "rewards/accuracies": 0.987500011920929, "rewards/generated": -19.45851707458496, "rewards/margins": 14.581155776977539, "rewards/real": -4.877361297607422, "step": 950 }, { "epoch": 0.61, "learning_rate": 4.4182464454976306e-07, "logits/generated": -2.3942151069641113, "logits/real": -2.580857992172241, "logps/generated": -293.68487548828125, "logps/real": -248.95877075195312, "loss": 0.0311, "rewards/accuracies": 0.987500011920929, "rewards/generated": -19.443933486938477, "rewards/margins": 14.078336715698242, "rewards/real": -5.365598201751709, "step": 960 }, { "epoch": 0.62, "learning_rate": 4.4063981042654023e-07, "logits/generated": -2.3892123699188232, "logits/real": -2.667109727859497, "logps/generated": -273.72802734375, "logps/real": -326.7926025390625, "loss": 0.0217, "rewards/accuracies": 0.987500011920929, "rewards/generated": -16.955928802490234, "rewards/margins": 13.641815185546875, "rewards/real": -3.3141121864318848, "step": 970 }, { "epoch": 0.63, "learning_rate": 4.394549763033175e-07, "logits/generated": -2.3923146724700928, "logits/real": -2.559901714324951, "logps/generated": -286.25128173828125, "logps/real": -302.48834228515625, "loss": 0.0268, "rewards/accuracies": 1.0, "rewards/generated": -18.069164276123047, "rewards/margins": 13.043965339660645, "rewards/real": -5.025198936462402, "step": 980 }, { "epoch": 0.63, "learning_rate": 4.382701421800948e-07, "logits/generated": -2.410012722015381, "logits/real": -2.62170147895813, "logps/generated": -307.0192565917969, "logps/real": -260.86248779296875, "loss": 0.0173, "rewards/accuracies": 0.987500011920929, "rewards/generated": -19.955039978027344, "rewards/margins": 15.444422721862793, "rewards/real": -4.510618686676025, "step": 990 }, { "epoch": 0.64, "learning_rate": 4.37085308056872e-07, "logits/generated": -2.387606143951416, "logits/real": -2.554452419281006, "logps/generated": -283.7722473144531, "logps/real": -292.525390625, "loss": 0.054, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -17.216604232788086, "rewards/margins": 12.737438201904297, "rewards/real": -4.4791669845581055, "step": 1000 }, { "epoch": 0.65, "learning_rate": 4.3590047393364927e-07, "logits/generated": -2.4104082584381104, "logits/real": -2.5261144638061523, "logps/generated": -282.57440185546875, "logps/real": -258.09112548828125, "loss": 0.0461, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -17.301794052124023, "rewards/margins": 12.839556694030762, "rewards/real": -4.462237358093262, "step": 1010 }, { "epoch": 0.65, "learning_rate": 4.3471563981042654e-07, "logits/generated": -2.3187150955200195, "logits/real": -2.5287089347839355, "logps/generated": -286.00872802734375, "logps/real": -295.44097900390625, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/generated": -18.19363021850586, "rewards/margins": 14.453539848327637, "rewards/real": -3.7400927543640137, "step": 1020 }, { "epoch": 0.66, "learning_rate": 4.335308056872038e-07, "logits/generated": -2.332850694656372, "logits/real": -2.53454327583313, "logps/generated": -293.3934631347656, "logps/real": -295.30621337890625, "loss": 0.0215, "rewards/accuracies": 1.0, "rewards/generated": -18.05362892150879, "rewards/margins": 13.677447319030762, "rewards/real": -4.376180171966553, "step": 1030 }, { "epoch": 0.67, "learning_rate": 4.32345971563981e-07, "logits/generated": -2.2965195178985596, "logits/real": -2.4986045360565186, "logps/generated": -274.46112060546875, "logps/real": -285.53778076171875, "loss": 0.0384, "rewards/accuracies": 0.987500011920929, "rewards/generated": -17.356571197509766, "rewards/margins": 12.720812797546387, "rewards/real": -4.635758399963379, "step": 1040 }, { "epoch": 0.67, "learning_rate": 4.3116113744075825e-07, "logits/generated": -2.300463914871216, "logits/real": -2.478001117706299, "logps/generated": -303.2227783203125, "logps/real": -287.7460021972656, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/generated": -19.46930694580078, "rewards/margins": 14.814462661743164, "rewards/real": -4.654845237731934, "step": 1050 }, { "epoch": 0.68, "learning_rate": 4.299763033175355e-07, "logits/generated": -2.3426880836486816, "logits/real": -2.5291225910186768, "logps/generated": -289.17120361328125, "logps/real": -266.672119140625, "loss": 0.0375, "rewards/accuracies": 0.949999988079071, "rewards/generated": -18.12887191772461, "rewards/margins": 13.657282829284668, "rewards/real": -4.471587657928467, "step": 1060 }, { "epoch": 0.68, "learning_rate": 4.2879146919431274e-07, "logits/generated": -2.3856160640716553, "logits/real": -2.5741703510284424, "logps/generated": -296.04132080078125, "logps/real": -313.6421813964844, "loss": 0.0326, "rewards/accuracies": 1.0, "rewards/generated": -18.098491668701172, "rewards/margins": 14.236276626586914, "rewards/real": -3.862215042114258, "step": 1070 }, { "epoch": 0.69, "learning_rate": 4.2760663507109e-07, "logits/generated": -2.3564302921295166, "logits/real": -2.53045916557312, "logps/generated": -310.1300964355469, "logps/real": -310.7464599609375, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/generated": -19.677143096923828, "rewards/margins": 15.148625373840332, "rewards/real": -4.528520584106445, "step": 1080 }, { "epoch": 0.7, "learning_rate": 4.264218009478673e-07, "logits/generated": -2.399268388748169, "logits/real": -2.4992451667785645, "logps/generated": -290.4671936035156, "logps/real": -260.6993713378906, "loss": 0.0119, "rewards/accuracies": 0.987500011920929, "rewards/generated": -19.11050033569336, "rewards/margins": 14.06385326385498, "rewards/real": -5.0466485023498535, "step": 1090 }, { "epoch": 0.7, "learning_rate": 4.2523696682464456e-07, "logits/generated": -2.289309501647949, "logits/real": -2.5127501487731934, "logps/generated": -306.51641845703125, "logps/real": -326.2106018066406, "loss": 0.0409, "rewards/accuracies": 1.0, "rewards/generated": -19.816822052001953, "rewards/margins": 14.96058177947998, "rewards/real": -4.856239318847656, "step": 1100 }, { "epoch": 0.71, "learning_rate": 4.240521327014218e-07, "logits/generated": -2.3970062732696533, "logits/real": -2.5096230506896973, "logps/generated": -272.3966064453125, "logps/real": -278.3008728027344, "loss": 0.036, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -16.609127044677734, "rewards/margins": 12.158090591430664, "rewards/real": -4.451037406921387, "step": 1110 }, { "epoch": 0.72, "learning_rate": 4.22867298578199e-07, "logits/generated": -2.376469135284424, "logits/real": -2.57863187789917, "logps/generated": -291.18463134765625, "logps/real": -284.26727294921875, "loss": 0.0263, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -18.04172134399414, "rewards/margins": 13.597633361816406, "rewards/real": -4.444087028503418, "step": 1120 }, { "epoch": 0.72, "learning_rate": 4.216824644549763e-07, "logits/generated": -2.399825096130371, "logits/real": -2.5337142944335938, "logps/generated": -279.63177490234375, "logps/real": -290.47589111328125, "loss": 0.0298, "rewards/accuracies": 0.987500011920929, "rewards/generated": -17.438886642456055, "rewards/margins": 12.631416320800781, "rewards/real": -4.807469844818115, "step": 1130 }, { "epoch": 0.73, "learning_rate": 4.2049763033175355e-07, "logits/generated": -2.3421072959899902, "logits/real": -2.4822893142700195, "logps/generated": -308.11932373046875, "logps/real": -330.6546936035156, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/generated": -18.39755630493164, "rewards/margins": 13.66209888458252, "rewards/real": -4.735455513000488, "step": 1140 }, { "epoch": 0.74, "learning_rate": 4.1931279620853077e-07, "logits/generated": -2.342663288116455, "logits/real": -2.5248234272003174, "logps/generated": -309.02117919921875, "logps/real": -296.07037353515625, "loss": 0.0355, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -20.545312881469727, "rewards/margins": 15.586636543273926, "rewards/real": -4.958677768707275, "step": 1150 }, { "epoch": 0.74, "learning_rate": 4.1812796208530804e-07, "logits/generated": -2.337934970855713, "logits/real": -2.4495997428894043, "logps/generated": -301.7442626953125, "logps/real": -266.92401123046875, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/generated": -19.53786849975586, "rewards/margins": 13.502288818359375, "rewards/real": -6.03557825088501, "step": 1160 }, { "epoch": 0.75, "learning_rate": 4.169431279620853e-07, "logits/generated": -2.2867414951324463, "logits/real": -2.378726005554199, "logps/generated": -292.9967346191406, "logps/real": -269.1019287109375, "loss": 0.032, "rewards/accuracies": 0.987500011920929, "rewards/generated": -19.068912506103516, "rewards/margins": 13.344259262084961, "rewards/real": -5.7246527671813965, "step": 1170 }, { "epoch": 0.75, "learning_rate": 4.1575829383886253e-07, "logits/generated": -2.275810718536377, "logits/real": -2.4016122817993164, "logps/generated": -312.2940979003906, "logps/real": -276.72027587890625, "loss": 0.0155, "rewards/accuracies": 0.987500011920929, "rewards/generated": -20.791011810302734, "rewards/margins": 15.633366584777832, "rewards/real": -5.157645225524902, "step": 1180 }, { "epoch": 0.76, "learning_rate": 4.145734597156398e-07, "logits/generated": -2.264380931854248, "logits/real": -2.3693957328796387, "logps/generated": -299.9407653808594, "logps/real": -277.1905517578125, "loss": 0.0168, "rewards/accuracies": 0.987500011920929, "rewards/generated": -20.110647201538086, "rewards/margins": 14.023755073547363, "rewards/real": -6.086895942687988, "step": 1190 }, { "epoch": 0.77, "learning_rate": 4.1338862559241703e-07, "logits/generated": -2.276496410369873, "logits/real": -2.4009838104248047, "logps/generated": -344.5177307128906, "logps/real": -310.5501708984375, "loss": 0.0237, "rewards/accuracies": 0.987500011920929, "rewards/generated": -23.132648468017578, "rewards/margins": 16.120880126953125, "rewards/real": -7.011769771575928, "step": 1200 }, { "epoch": 0.77, "learning_rate": 4.122037914691943e-07, "logits/generated": -2.283618211746216, "logits/real": -2.4024503231048584, "logps/generated": -287.85919189453125, "logps/real": -278.244140625, "loss": 0.0222, "rewards/accuracies": 0.987500011920929, "rewards/generated": -18.882511138916016, "rewards/margins": 13.156415939331055, "rewards/real": -5.726097106933594, "step": 1210 }, { "epoch": 0.78, "learning_rate": 4.110189573459715e-07, "logits/generated": -2.295532703399658, "logits/real": -2.3860344886779785, "logps/generated": -302.39312744140625, "logps/real": -255.3650665283203, "loss": 0.0232, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -20.025859832763672, "rewards/margins": 14.425395011901855, "rewards/real": -5.600464820861816, "step": 1220 }, { "epoch": 0.79, "learning_rate": 4.098341232227488e-07, "logits/generated": -2.3488059043884277, "logits/real": -2.474379062652588, "logps/generated": -315.18756103515625, "logps/real": -282.739990234375, "loss": 0.1158, "rewards/accuracies": 1.0, "rewards/generated": -19.212299346923828, "rewards/margins": 14.532743453979492, "rewards/real": -4.679556369781494, "step": 1230 }, { "epoch": 0.79, "learning_rate": 4.0864928909952607e-07, "logits/generated": -2.3911032676696777, "logits/real": -2.5036733150482178, "logps/generated": -285.2294921875, "logps/real": -312.811279296875, "loss": 0.0261, "rewards/accuracies": 0.987500011920929, "rewards/generated": -17.23556900024414, "rewards/margins": 11.865394592285156, "rewards/real": -5.370173931121826, "step": 1240 }, { "epoch": 0.8, "learning_rate": 4.074644549763033e-07, "logits/generated": -2.255256414413452, "logits/real": -2.4088189601898193, "logps/generated": -281.2724914550781, "logps/real": -258.9282531738281, "loss": 0.0448, "rewards/accuracies": 0.987500011920929, "rewards/generated": -18.890933990478516, "rewards/margins": 12.679600715637207, "rewards/real": -6.211331844329834, "step": 1250 }, { "epoch": 0.81, "learning_rate": 4.0627962085308056e-07, "logits/generated": -2.271714448928833, "logits/real": -2.46032977104187, "logps/generated": -288.73077392578125, "logps/real": -320.3868713378906, "loss": 0.0413, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -18.242067337036133, "rewards/margins": 13.210156440734863, "rewards/real": -5.0319108963012695, "step": 1260 }, { "epoch": 0.81, "learning_rate": 4.0509478672985783e-07, "logits/generated": -2.35395884513855, "logits/real": -2.448251724243164, "logps/generated": -300.24700927734375, "logps/real": -272.71044921875, "loss": 0.0568, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -18.208026885986328, "rewards/margins": 12.991012573242188, "rewards/real": -5.21701717376709, "step": 1270 }, { "epoch": 0.82, "learning_rate": 4.0390995260663505e-07, "logits/generated": -2.374265193939209, "logits/real": -2.5217158794403076, "logps/generated": -290.9504699707031, "logps/real": -316.7360534667969, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/generated": -18.148794174194336, "rewards/margins": 13.157681465148926, "rewards/real": -4.99111270904541, "step": 1280 }, { "epoch": 0.83, "learning_rate": 4.0272511848341227e-07, "logits/generated": -2.316471576690674, "logits/real": -2.527329206466675, "logps/generated": -287.37139892578125, "logps/real": -346.442626953125, "loss": 0.0213, "rewards/accuracies": 1.0, "rewards/generated": -17.83603286743164, "rewards/margins": 12.730929374694824, "rewards/real": -5.105101585388184, "step": 1290 }, { "epoch": 0.83, "learning_rate": 4.0154028436018954e-07, "logits/generated": -2.3851349353790283, "logits/real": -2.5525565147399902, "logps/generated": -287.569091796875, "logps/real": -334.72625732421875, "loss": 0.0247, "rewards/accuracies": 0.987500011920929, "rewards/generated": -17.36513900756836, "rewards/margins": 12.514284133911133, "rewards/real": -4.850854396820068, "step": 1300 }, { "epoch": 0.84, "learning_rate": 4.003554502369668e-07, "logits/generated": -2.350069522857666, "logits/real": -2.5130248069763184, "logps/generated": -290.7829895019531, "logps/real": -285.68975830078125, "loss": 0.0513, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -18.050655364990234, "rewards/margins": 12.557219505310059, "rewards/real": -5.493437767028809, "step": 1310 }, { "epoch": 0.84, "learning_rate": 3.991706161137441e-07, "logits/generated": -2.3853585720062256, "logits/real": -2.5433051586151123, "logps/generated": -280.49371337890625, "logps/real": -309.3614807128906, "loss": 0.0172, "rewards/accuracies": 0.987500011920929, "rewards/generated": -17.24584197998047, "rewards/margins": 11.380583763122559, "rewards/real": -5.865257740020752, "step": 1320 }, { "epoch": 0.85, "learning_rate": 3.979857819905213e-07, "logits/generated": -2.2883365154266357, "logits/real": -2.509340763092041, "logps/generated": -281.46185302734375, "logps/real": -317.1385192871094, "loss": 0.0197, "rewards/accuracies": 1.0, "rewards/generated": -17.596721649169922, "rewards/margins": 12.168859481811523, "rewards/real": -5.427859783172607, "step": 1330 }, { "epoch": 0.86, "learning_rate": 3.968009478672986e-07, "logits/generated": -2.276294469833374, "logits/real": -2.529151201248169, "logps/generated": -275.62384033203125, "logps/real": -320.17840576171875, "loss": 0.0174, "rewards/accuracies": 0.987500011920929, "rewards/generated": -17.126468658447266, "rewards/margins": 12.034772872924805, "rewards/real": -5.091695785522461, "step": 1340 }, { "epoch": 0.86, "learning_rate": 3.9561611374407585e-07, "logits/generated": -2.397404432296753, "logits/real": -2.5299489498138428, "logps/generated": -279.8531799316406, "logps/real": -248.511474609375, "loss": 0.0293, "rewards/accuracies": 0.987500011920929, "rewards/generated": -17.51804542541504, "rewards/margins": 12.492974281311035, "rewards/real": -5.025073051452637, "step": 1350 }, { "epoch": 0.87, "learning_rate": 3.94431279620853e-07, "logits/generated": -2.3150904178619385, "logits/real": -2.5204906463623047, "logps/generated": -291.2157897949219, "logps/real": -299.140380859375, "loss": 0.0278, "rewards/accuracies": 0.987500011920929, "rewards/generated": -18.304561614990234, "rewards/margins": 13.8828763961792, "rewards/real": -4.421683311462402, "step": 1360 }, { "epoch": 0.88, "learning_rate": 3.932464454976303e-07, "logits/generated": -2.308262825012207, "logits/real": -2.466447591781616, "logps/generated": -288.3290100097656, "logps/real": -261.1494140625, "loss": 0.0338, "rewards/accuracies": 0.987500011920929, "rewards/generated": -18.667638778686523, "rewards/margins": 13.086454391479492, "rewards/real": -5.581185340881348, "step": 1370 }, { "epoch": 0.88, "learning_rate": 3.9206161137440757e-07, "logits/generated": -2.327383041381836, "logits/real": -2.471449851989746, "logps/generated": -295.0485534667969, "logps/real": -267.4327087402344, "loss": 0.0226, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -18.104694366455078, "rewards/margins": 12.788865089416504, "rewards/real": -5.315830707550049, "step": 1380 }, { "epoch": 0.89, "learning_rate": 3.9087677725118484e-07, "logits/generated": -2.2615890502929688, "logits/real": -2.4707770347595215, "logps/generated": -306.2514343261719, "logps/real": -302.1688232421875, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/generated": -19.19952964782715, "rewards/margins": 14.145184516906738, "rewards/real": -5.054343223571777, "step": 1390 }, { "epoch": 0.9, "learning_rate": 3.8969194312796206e-07, "logits/generated": -2.276962995529175, "logits/real": -2.43565034866333, "logps/generated": -311.103515625, "logps/real": -271.02191162109375, "loss": 0.0308, "rewards/accuracies": 1.0, "rewards/generated": -20.81112289428711, "rewards/margins": 14.516395568847656, "rewards/real": -6.2947282791137695, "step": 1400 }, { "epoch": 0.9, "learning_rate": 3.8850710900473933e-07, "logits/generated": -2.2672057151794434, "logits/real": -2.4533634185791016, "logps/generated": -306.622802734375, "logps/real": -320.2618408203125, "loss": 0.0143, "rewards/accuracies": 0.987500011920929, "rewards/generated": -19.772937774658203, "rewards/margins": 14.028053283691406, "rewards/real": -5.744885444641113, "step": 1410 }, { "epoch": 0.91, "learning_rate": 3.873222748815166e-07, "logits/generated": -2.372107744216919, "logits/real": -2.481254816055298, "logps/generated": -281.7245788574219, "logps/real": -283.98748779296875, "loss": 0.0264, "rewards/accuracies": 1.0, "rewards/generated": -17.529558181762695, "rewards/margins": 11.68727970123291, "rewards/real": -5.842276096343994, "step": 1420 }, { "epoch": 0.91, "learning_rate": 3.8613744075829377e-07, "logits/generated": -2.2598013877868652, "logits/real": -2.47208833694458, "logps/generated": -285.4142150878906, "logps/real": -285.35162353515625, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/generated": -18.84803581237793, "rewards/margins": 12.526016235351562, "rewards/real": -6.322018623352051, "step": 1430 }, { "epoch": 0.92, "learning_rate": 3.8495260663507104e-07, "logits/generated": -2.3580639362335205, "logits/real": -2.4859871864318848, "logps/generated": -307.646240234375, "logps/real": -323.43414306640625, "loss": 0.0195, "rewards/accuracies": 0.987500011920929, "rewards/generated": -19.400405883789062, "rewards/margins": 13.40168285369873, "rewards/real": -5.998722553253174, "step": 1440 }, { "epoch": 0.93, "learning_rate": 3.837677725118483e-07, "logits/generated": -2.2192952632904053, "logits/real": -2.428776979446411, "logps/generated": -305.4685974121094, "logps/real": -296.8509216308594, "loss": 0.0205, "rewards/accuracies": 0.987500011920929, "rewards/generated": -19.33831024169922, "rewards/margins": 13.525497436523438, "rewards/real": -5.812812805175781, "step": 1450 }, { "epoch": 0.93, "learning_rate": 3.825829383886256e-07, "logits/generated": -2.2133755683898926, "logits/real": -2.446166753768921, "logps/generated": -308.5284729003906, "logps/real": -311.6910095214844, "loss": 0.0166, "rewards/accuracies": 0.987500011920929, "rewards/generated": -20.147266387939453, "rewards/margins": 14.619921684265137, "rewards/real": -5.52734375, "step": 1460 }, { "epoch": 0.94, "learning_rate": 3.813981042654028e-07, "logits/generated": -2.251068592071533, "logits/real": -2.3732194900512695, "logps/generated": -301.8031311035156, "logps/real": -263.5353698730469, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/generated": -20.869558334350586, "rewards/margins": 13.65925407409668, "rewards/real": -7.210305213928223, "step": 1470 }, { "epoch": 0.95, "learning_rate": 3.802132701421801e-07, "logits/generated": -2.2254586219787598, "logits/real": -2.400791645050049, "logps/generated": -323.98382568359375, "logps/real": -320.86419677734375, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/generated": -21.44746971130371, "rewards/margins": 15.346704483032227, "rewards/real": -6.100764751434326, "step": 1480 }, { "epoch": 0.95, "learning_rate": 3.7902843601895736e-07, "logits/generated": -2.26737117767334, "logits/real": -2.390167713165283, "logps/generated": -312.045166015625, "logps/real": -311.6873474121094, "loss": 0.0164, "rewards/accuracies": 0.987500011920929, "rewards/generated": -20.39379119873047, "rewards/margins": 14.455484390258789, "rewards/real": -5.938305854797363, "step": 1490 }, { "epoch": 0.96, "learning_rate": 3.778436018957346e-07, "logits/generated": -2.294706344604492, "logits/real": -2.4145607948303223, "logps/generated": -298.1540222167969, "logps/real": -306.29913330078125, "loss": 0.0298, "rewards/accuracies": 1.0, "rewards/generated": -19.42539405822754, "rewards/margins": 14.029606819152832, "rewards/real": -5.395786762237549, "step": 1500 }, { "epoch": 0.97, "learning_rate": 3.766587677725118e-07, "logits/generated": -2.3276476860046387, "logits/real": -2.3730132579803467, "logps/generated": -314.5223083496094, "logps/real": -288.64404296875, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/generated": -20.662466049194336, "rewards/margins": 14.94616985321045, "rewards/real": -5.716297626495361, "step": 1510 }, { "epoch": 0.97, "learning_rate": 3.7547393364928907e-07, "logits/generated": -2.2427303791046143, "logits/real": -2.3749637603759766, "logps/generated": -315.2216796875, "logps/real": -287.2800598144531, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/generated": -20.293941497802734, "rewards/margins": 14.373272895812988, "rewards/real": -5.920670509338379, "step": 1520 }, { "epoch": 0.98, "learning_rate": 3.7428909952606634e-07, "logits/generated": -2.350670337677002, "logits/real": -2.3875861167907715, "logps/generated": -317.49090576171875, "logps/real": -294.496337890625, "loss": 0.0287, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -20.363561630249023, "rewards/margins": 13.375410079956055, "rewards/real": -6.988152503967285, "step": 1530 }, { "epoch": 0.99, "learning_rate": 3.7310426540284356e-07, "logits/generated": -2.2628579139709473, "logits/real": -2.4144299030303955, "logps/generated": -286.41558837890625, "logps/real": -317.0753479003906, "loss": 0.0267, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -18.326913833618164, "rewards/margins": 13.711787223815918, "rewards/real": -4.6151275634765625, "step": 1540 }, { "epoch": 0.99, "learning_rate": 3.7191943127962083e-07, "logits/generated": -2.2808165550231934, "logits/real": -2.3928608894348145, "logps/generated": -290.4697265625, "logps/real": -281.3714904785156, "loss": 0.0368, "rewards/accuracies": 1.0, "rewards/generated": -18.250659942626953, "rewards/margins": 12.909965515136719, "rewards/real": -5.340696811676025, "step": 1550 }, { "epoch": 1.0, "learning_rate": 3.707345971563981e-07, "logits/generated": -2.2403323650360107, "logits/real": -2.375622272491455, "logps/generated": -299.19879150390625, "logps/real": -264.7336730957031, "loss": 0.0178, "rewards/accuracies": 0.987500011920929, "rewards/generated": -19.26840591430664, "rewards/margins": 14.170907974243164, "rewards/real": -5.097498416900635, "step": 1560 }, { "epoch": 1.0, "learning_rate": 3.695497630331754e-07, "logits/generated": -2.2569198608398438, "logits/real": -2.3728420734405518, "logps/generated": -313.026123046875, "logps/real": -280.13641357421875, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/generated": -19.841373443603516, "rewards/margins": 15.321266174316406, "rewards/real": -4.520107269287109, "step": 1570 }, { "epoch": 1.01, "learning_rate": 3.683649289099526e-07, "logits/generated": -2.2165145874023438, "logits/real": -2.3671815395355225, "logps/generated": -299.0692138671875, "logps/real": -294.7959899902344, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/generated": -19.413881301879883, "rewards/margins": 15.00433349609375, "rewards/real": -4.409549713134766, "step": 1580 }, { "epoch": 1.02, "learning_rate": 3.671800947867298e-07, "logits/generated": -2.136179208755493, "logits/real": -2.3207507133483887, "logps/generated": -303.35137939453125, "logps/real": -287.4952392578125, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/generated": -20.405773162841797, "rewards/margins": 15.668429374694824, "rewards/real": -4.73734188079834, "step": 1590 }, { "epoch": 1.02, "learning_rate": 3.659952606635071e-07, "logits/generated": -2.13120698928833, "logits/real": -2.320891857147217, "logps/generated": -306.2853698730469, "logps/real": -283.64691162109375, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/generated": -19.937503814697266, "rewards/margins": 15.81896686553955, "rewards/real": -4.11853551864624, "step": 1600 }, { "epoch": 1.03, "learning_rate": 3.648104265402843e-07, "logits/generated": -2.1870360374450684, "logits/real": -2.368260622024536, "logps/generated": -300.4111328125, "logps/real": -296.3978576660156, "loss": 0.0066, "rewards/accuracies": 0.987500011920929, "rewards/generated": -19.767839431762695, "rewards/margins": 14.85081958770752, "rewards/real": -4.917020797729492, "step": 1610 }, { "epoch": 1.04, "learning_rate": 3.636255924170616e-07, "logits/generated": -2.1743369102478027, "logits/real": -2.3282134532928467, "logps/generated": -321.19610595703125, "logps/real": -281.8503723144531, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/generated": -21.900375366210938, "rewards/margins": 16.69029426574707, "rewards/real": -5.210080623626709, "step": 1620 }, { "epoch": 1.04, "learning_rate": 3.6244075829383886e-07, "logits/generated": -2.220449447631836, "logits/real": -2.344844341278076, "logps/generated": -303.0509948730469, "logps/real": -268.18267822265625, "loss": 0.0111, "rewards/accuracies": 0.987500011920929, "rewards/generated": -19.523670196533203, "rewards/margins": 14.576342582702637, "rewards/real": -4.947329044342041, "step": 1630 }, { "epoch": 1.05, "learning_rate": 3.6125592417061613e-07, "logits/generated": -2.132232904434204, "logits/real": -2.3197970390319824, "logps/generated": -314.4263916015625, "logps/real": -292.0470275878906, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/generated": -20.414146423339844, "rewards/margins": 16.534496307373047, "rewards/real": -3.879650592803955, "step": 1640 }, { "epoch": 1.06, "learning_rate": 3.6007109004739335e-07, "logits/generated": -2.2374231815338135, "logits/real": -2.352515697479248, "logps/generated": -316.6592102050781, "logps/real": -296.956787109375, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/generated": -20.78524398803711, "rewards/margins": 16.528079986572266, "rewards/real": -4.257164001464844, "step": 1650 }, { "epoch": 1.06, "learning_rate": 3.588862559241706e-07, "logits/generated": -2.1662914752960205, "logits/real": -2.328010082244873, "logps/generated": -304.8622741699219, "logps/real": -291.980712890625, "loss": 0.0142, "rewards/accuracies": 0.987500011920929, "rewards/generated": -19.998083114624023, "rewards/margins": 15.650113105773926, "rewards/real": -4.347971439361572, "step": 1660 }, { "epoch": 1.07, "learning_rate": 3.5770142180094784e-07, "logits/generated": -2.306222438812256, "logits/real": -2.3590731620788574, "logps/generated": -312.7652282714844, "logps/real": -287.0503234863281, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/generated": -20.211284637451172, "rewards/margins": 15.283581733703613, "rewards/real": -4.927702903747559, "step": 1670 }, { "epoch": 1.07, "learning_rate": 3.5651658767772506e-07, "logits/generated": -2.223875045776367, "logits/real": -2.365973472595215, "logps/generated": -300.7914733886719, "logps/real": -289.1277770996094, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/generated": -19.49875259399414, "rewards/margins": 15.722567558288574, "rewards/real": -3.7761855125427246, "step": 1680 }, { "epoch": 1.08, "learning_rate": 3.5533175355450234e-07, "logits/generated": -2.167297840118408, "logits/real": -2.2996826171875, "logps/generated": -310.1976013183594, "logps/real": -246.65658569335938, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/generated": -21.07327651977539, "rewards/margins": 16.01577377319336, "rewards/real": -5.057503700256348, "step": 1690 }, { "epoch": 1.09, "learning_rate": 3.541469194312796e-07, "logits/generated": -2.1909658908843994, "logits/real": -2.3059241771698, "logps/generated": -312.3503112792969, "logps/real": -264.6876220703125, "loss": 0.0061, "rewards/accuracies": 0.987500011920929, "rewards/generated": -20.395238876342773, "rewards/margins": 15.531048774719238, "rewards/real": -4.864190578460693, "step": 1700 }, { "epoch": 1.09, "learning_rate": 3.529620853080569e-07, "logits/generated": -2.2165889739990234, "logits/real": -2.3362364768981934, "logps/generated": -315.99798583984375, "logps/real": -290.00384521484375, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/generated": -20.464693069458008, "rewards/margins": 16.096210479736328, "rewards/real": -4.3684821128845215, "step": 1710 }, { "epoch": 1.1, "learning_rate": 3.517772511848341e-07, "logits/generated": -2.1913318634033203, "logits/real": -2.2931675910949707, "logps/generated": -293.7994689941406, "logps/real": -273.39923095703125, "loss": 0.0174, "rewards/accuracies": 0.987500011920929, "rewards/generated": -19.379470825195312, "rewards/margins": 14.768470764160156, "rewards/real": -4.61099910736084, "step": 1720 }, { "epoch": 1.11, "learning_rate": 3.505924170616114e-07, "logits/generated": -2.169776201248169, "logits/real": -2.252234935760498, "logps/generated": -335.75946044921875, "logps/real": -265.8716735839844, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/generated": -23.035526275634766, "rewards/margins": 17.36758041381836, "rewards/real": -5.667943477630615, "step": 1730 }, { "epoch": 1.11, "learning_rate": 3.4940758293838865e-07, "logits/generated": -2.240286111831665, "logits/real": -2.3108315467834473, "logps/generated": -301.48870849609375, "logps/real": -264.96429443359375, "loss": 0.0108, "rewards/accuracies": 0.987500011920929, "rewards/generated": -19.83957862854004, "rewards/margins": 14.789782524108887, "rewards/real": -5.049793720245361, "step": 1740 }, { "epoch": 1.12, "learning_rate": 3.482227488151658e-07, "logits/generated": -2.24094295501709, "logits/real": -2.2868783473968506, "logps/generated": -319.02813720703125, "logps/real": -294.453369140625, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/generated": -21.49105453491211, "rewards/margins": 16.97692108154297, "rewards/real": -4.51413106918335, "step": 1750 }, { "epoch": 1.13, "learning_rate": 3.470379146919431e-07, "logits/generated": -2.2272396087646484, "logits/real": -2.369088888168335, "logps/generated": -295.22247314453125, "logps/real": -313.66143798828125, "loss": 0.0102, "rewards/accuracies": 0.987500011920929, "rewards/generated": -18.529632568359375, "rewards/margins": 14.83338737487793, "rewards/real": -3.69624662399292, "step": 1760 }, { "epoch": 1.13, "learning_rate": 3.4585308056872036e-07, "logits/generated": -2.240562677383423, "logits/real": -2.347003698348999, "logps/generated": -324.1022644042969, "logps/real": -263.2792053222656, "loss": 0.0035, "rewards/accuracies": 0.987500011920929, "rewards/generated": -20.202388763427734, "rewards/margins": 15.57550048828125, "rewards/real": -4.626888751983643, "step": 1770 }, { "epoch": 1.14, "learning_rate": 3.4466824644549763e-07, "logits/generated": -2.2323672771453857, "logits/real": -2.3367342948913574, "logps/generated": -296.6525573730469, "logps/real": -260.25048828125, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/generated": -19.346256256103516, "rewards/margins": 14.571496963500977, "rewards/real": -4.7747626304626465, "step": 1780 }, { "epoch": 1.15, "learning_rate": 3.4348341232227485e-07, "logits/generated": -2.1286113262176514, "logits/real": -2.3490428924560547, "logps/generated": -309.1128234863281, "logps/real": -294.2756042480469, "loss": 0.0134, "rewards/accuracies": 0.987500011920929, "rewards/generated": -20.084026336669922, "rewards/margins": 15.66749095916748, "rewards/real": -4.416535377502441, "step": 1790 }, { "epoch": 1.15, "learning_rate": 3.422985781990521e-07, "logits/generated": -2.293304443359375, "logits/real": -2.436685800552368, "logps/generated": -314.29083251953125, "logps/real": -287.0198669433594, "loss": 0.0138, "rewards/accuracies": 0.987500011920929, "rewards/generated": -20.650161743164062, "rewards/margins": 15.928117752075195, "rewards/real": -4.722043991088867, "step": 1800 }, { "epoch": 1.16, "learning_rate": 3.411137440758294e-07, "logits/generated": -2.3594422340393066, "logits/real": -2.5309062004089355, "logps/generated": -283.93548583984375, "logps/real": -290.99139404296875, "loss": 0.0074, "rewards/accuracies": 0.987500011920929, "rewards/generated": -17.6586856842041, "rewards/margins": 13.922680854797363, "rewards/real": -3.7360050678253174, "step": 1810 }, { "epoch": 1.16, "learning_rate": 3.3992890995260667e-07, "logits/generated": -2.2194488048553467, "logits/real": -2.469252824783325, "logps/generated": -310.2274169921875, "logps/real": -270.26385498046875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/generated": -20.600818634033203, "rewards/margins": 15.282350540161133, "rewards/real": -5.318469524383545, "step": 1820 }, { "epoch": 1.17, "learning_rate": 3.3874407582938384e-07, "logits/generated": -2.2475979328155518, "logits/real": -2.450146436691284, "logps/generated": -330.9019775390625, "logps/real": -325.27789306640625, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/generated": -21.632858276367188, "rewards/margins": 16.27450942993164, "rewards/real": -5.358347415924072, "step": 1830 }, { "epoch": 1.18, "learning_rate": 3.375592417061611e-07, "logits/generated": -2.2602717876434326, "logits/real": -2.5164554119110107, "logps/generated": -302.0518798828125, "logps/real": -302.34881591796875, "loss": 0.0114, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -19.150955200195312, "rewards/margins": 15.48846435546875, "rewards/real": -3.6624884605407715, "step": 1840 }, { "epoch": 1.18, "learning_rate": 3.363744075829384e-07, "logits/generated": -2.328455924987793, "logits/real": -2.5169782638549805, "logps/generated": -301.71575927734375, "logps/real": -282.5423583984375, "loss": 0.0094, "rewards/accuracies": 0.987500011920929, "rewards/generated": -19.608911514282227, "rewards/margins": 15.527392387390137, "rewards/real": -4.081518173217773, "step": 1850 }, { "epoch": 1.19, "learning_rate": 3.351895734597156e-07, "logits/generated": -2.3264639377593994, "logits/real": -2.420229196548462, "logps/generated": -290.0008544921875, "logps/real": -266.5218505859375, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/generated": -19.041057586669922, "rewards/margins": 14.385503768920898, "rewards/real": -4.655551910400391, "step": 1860 }, { "epoch": 1.2, "learning_rate": 3.340047393364929e-07, "logits/generated": -2.231644630432129, "logits/real": -2.3841605186462402, "logps/generated": -315.7859802246094, "logps/real": -264.2816467285156, "loss": 0.0096, "rewards/accuracies": 0.987500011920929, "rewards/generated": -21.186296463012695, "rewards/margins": 16.85459327697754, "rewards/real": -4.33170223236084, "step": 1870 }, { "epoch": 1.2, "learning_rate": 3.3281990521327015e-07, "logits/generated": -2.28442120552063, "logits/real": -2.4339089393615723, "logps/generated": -305.69390869140625, "logps/real": -332.723388671875, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/generated": -20.054880142211914, "rewards/margins": 14.866659164428711, "rewards/real": -5.18821907043457, "step": 1880 }, { "epoch": 1.21, "learning_rate": 3.316350710900474e-07, "logits/generated": -2.1956193447113037, "logits/real": -2.391106605529785, "logps/generated": -304.96148681640625, "logps/real": -269.6791687011719, "loss": 0.0043, "rewards/accuracies": 0.987500011920929, "rewards/generated": -20.582901000976562, "rewards/margins": 16.172176361083984, "rewards/real": -4.410725116729736, "step": 1890 }, { "epoch": 1.22, "learning_rate": 3.304502369668246e-07, "logits/generated": -2.1891417503356934, "logits/real": -2.4029784202575684, "logps/generated": -323.70172119140625, "logps/real": -313.7615966796875, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/generated": -21.427139282226562, "rewards/margins": 16.72661590576172, "rewards/real": -4.700521945953369, "step": 1900 }, { "epoch": 1.22, "learning_rate": 3.2926540284360186e-07, "logits/generated": -2.2477095127105713, "logits/real": -2.407269239425659, "logps/generated": -326.52703857421875, "logps/real": -317.97808837890625, "loss": 0.0081, "rewards/accuracies": 0.987500011920929, "rewards/generated": -21.259418487548828, "rewards/margins": 17.468002319335938, "rewards/real": -3.791417360305786, "step": 1910 }, { "epoch": 1.23, "learning_rate": 3.2808056872037913e-07, "logits/generated": -2.2605175971984863, "logits/real": -2.4191315174102783, "logps/generated": -315.5021667480469, "logps/real": -311.63360595703125, "loss": 0.0162, "rewards/accuracies": 0.987500011920929, "rewards/generated": -20.560211181640625, "rewards/margins": 15.904121398925781, "rewards/real": -4.6560869216918945, "step": 1920 }, { "epoch": 1.23, "learning_rate": 3.2689573459715635e-07, "logits/generated": -2.133112668991089, "logits/real": -2.3952369689941406, "logps/generated": -330.35308837890625, "logps/real": -308.2007141113281, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -21.54788589477539, "rewards/margins": 16.650882720947266, "rewards/real": -4.897005558013916, "step": 1930 }, { "epoch": 1.24, "learning_rate": 3.2571090047393363e-07, "logits/generated": -2.223424196243286, "logits/real": -2.3665499687194824, "logps/generated": -338.943115234375, "logps/real": -278.33984375, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/generated": -22.474721908569336, "rewards/margins": 16.395320892333984, "rewards/real": -6.079402446746826, "step": 1940 }, { "epoch": 1.25, "learning_rate": 3.245260663507109e-07, "logits/generated": -2.159966468811035, "logits/real": -2.3640992641448975, "logps/generated": -356.1936950683594, "logps/real": -290.65447998046875, "loss": 0.0075, "rewards/accuracies": 0.987500011920929, "rewards/generated": -24.283023834228516, "rewards/margins": 18.25802230834961, "rewards/real": -6.0250043869018555, "step": 1950 }, { "epoch": 1.25, "learning_rate": 3.2334123222748817e-07, "logits/generated": -2.139967918395996, "logits/real": -2.2997608184814453, "logps/generated": -333.8506774902344, "logps/real": -259.5197448730469, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/generated": -23.21651840209961, "rewards/margins": 17.41876792907715, "rewards/real": -5.797752857208252, "step": 1960 }, { "epoch": 1.26, "learning_rate": 3.221563981042654e-07, "logits/generated": -2.023646116256714, "logits/real": -2.324492931365967, "logps/generated": -348.760986328125, "logps/real": -314.42315673828125, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/generated": -24.4593448638916, "rewards/margins": 18.718902587890625, "rewards/real": -5.740442276000977, "step": 1970 }, { "epoch": 1.27, "learning_rate": 3.209715639810426e-07, "logits/generated": -2.134831666946411, "logits/real": -2.299773693084717, "logps/generated": -347.0093688964844, "logps/real": -295.0484619140625, "loss": 0.0048, "rewards/accuracies": 0.987500011920929, "rewards/generated": -23.788387298583984, "rewards/margins": 17.44914436340332, "rewards/real": -6.3392462730407715, "step": 1980 }, { "epoch": 1.27, "learning_rate": 3.197867298578199e-07, "logits/generated": -2.0692691802978516, "logits/real": -2.304626941680908, "logps/generated": -326.07366943359375, "logps/real": -276.44549560546875, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/generated": -22.178640365600586, "rewards/margins": 17.356496810913086, "rewards/real": -4.822144508361816, "step": 1990 }, { "epoch": 1.28, "learning_rate": 3.186018957345971e-07, "logits/generated": -2.078138589859009, "logits/real": -2.3022000789642334, "logps/generated": -337.51348876953125, "logps/real": -269.2457275390625, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/generated": -22.881973266601562, "rewards/margins": 17.953664779663086, "rewards/real": -4.928309917449951, "step": 2000 }, { "epoch": 1.29, "learning_rate": 3.174170616113744e-07, "logits/generated": -2.1155338287353516, "logits/real": -2.3254213333129883, "logps/generated": -311.26519775390625, "logps/real": -275.30755615234375, "loss": 0.0037, "rewards/accuracies": 0.987500011920929, "rewards/generated": -21.179630279541016, "rewards/margins": 15.785786628723145, "rewards/real": -5.3938446044921875, "step": 2010 }, { "epoch": 1.29, "learning_rate": 3.1623222748815165e-07, "logits/generated": -2.102132797241211, "logits/real": -2.2949185371398926, "logps/generated": -329.10809326171875, "logps/real": -266.596435546875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/generated": -22.795299530029297, "rewards/margins": 17.099586486816406, "rewards/real": -5.695716857910156, "step": 2020 }, { "epoch": 1.3, "learning_rate": 3.150473933649289e-07, "logits/generated": -2.1224253177642822, "logits/real": -2.2488582134246826, "logps/generated": -374.454833984375, "logps/real": -301.51336669921875, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/generated": -26.284189224243164, "rewards/margins": 20.073284149169922, "rewards/real": -6.210905075073242, "step": 2030 }, { "epoch": 1.31, "learning_rate": 3.1386255924170614e-07, "logits/generated": -1.984297752380371, "logits/real": -2.2786407470703125, "logps/generated": -347.9783020019531, "logps/real": -304.0118713378906, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -23.826396942138672, "rewards/margins": 18.487462997436523, "rewards/real": -5.338932037353516, "step": 2040 }, { "epoch": 1.31, "learning_rate": 3.126777251184834e-07, "logits/generated": -2.0077693462371826, "logits/real": -2.2613766193389893, "logps/generated": -348.8348388671875, "logps/real": -292.101806640625, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/generated": -24.404682159423828, "rewards/margins": 19.08761978149414, "rewards/real": -5.317059516906738, "step": 2050 }, { "epoch": 1.32, "learning_rate": 3.1149289099526064e-07, "logits/generated": -2.02852201461792, "logits/real": -2.29878306388855, "logps/generated": -344.41717529296875, "logps/real": -307.5559387207031, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/generated": -22.812238693237305, "rewards/margins": 18.065067291259766, "rewards/real": -4.747171878814697, "step": 2060 }, { "epoch": 1.32, "learning_rate": 3.103080568720379e-07, "logits/generated": -2.0517935752868652, "logits/real": -2.2768099308013916, "logps/generated": -323.74639892578125, "logps/real": -269.71893310546875, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/generated": -22.311723709106445, "rewards/margins": 17.752567291259766, "rewards/real": -4.559154033660889, "step": 2070 }, { "epoch": 1.33, "learning_rate": 3.0912322274881513e-07, "logits/generated": -1.9986320734024048, "logits/real": -2.286355972290039, "logps/generated": -342.7187805175781, "logps/real": -335.91143798828125, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/generated": -23.347644805908203, "rewards/margins": 18.038827896118164, "rewards/real": -5.308821201324463, "step": 2080 }, { "epoch": 1.34, "learning_rate": 3.079383886255924e-07, "logits/generated": -2.086455821990967, "logits/real": -2.2344307899475098, "logps/generated": -373.5683288574219, "logps/real": -286.02728271484375, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/generated": -26.34897232055664, "rewards/margins": 21.176847457885742, "rewards/real": -5.172126770019531, "step": 2090 }, { "epoch": 1.34, "learning_rate": 3.067535545023697e-07, "logits/generated": -2.0571374893188477, "logits/real": -2.2799127101898193, "logps/generated": -333.6913146972656, "logps/real": -269.0575866699219, "loss": 0.0039, "rewards/accuracies": 0.987500011920929, "rewards/generated": -22.993635177612305, "rewards/margins": 17.178089141845703, "rewards/real": -5.815545082092285, "step": 2100 }, { "epoch": 1.35, "learning_rate": 3.055687203791469e-07, "logits/generated": -2.0613174438476562, "logits/real": -2.289515256881714, "logps/generated": -354.09716796875, "logps/real": -322.1733093261719, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/generated": -24.417530059814453, "rewards/margins": 18.71674346923828, "rewards/real": -5.700786113739014, "step": 2110 }, { "epoch": 1.36, "learning_rate": 3.0438388625592417e-07, "logits/generated": -2.080763101577759, "logits/real": -2.314450740814209, "logps/generated": -338.1839599609375, "logps/real": -306.02886962890625, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/generated": -22.660348892211914, "rewards/margins": 16.645235061645508, "rewards/real": -6.01511287689209, "step": 2120 }, { "epoch": 1.36, "learning_rate": 3.0319905213270144e-07, "logits/generated": -2.092369794845581, "logits/real": -2.2747347354888916, "logps/generated": -386.8224792480469, "logps/real": -286.4530334472656, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/generated": -27.519500732421875, "rewards/margins": 21.448610305786133, "rewards/real": -6.070888996124268, "step": 2130 }, { "epoch": 1.37, "learning_rate": 3.0201421800947866e-07, "logits/generated": -2.027413845062256, "logits/real": -2.234687328338623, "logps/generated": -377.666015625, "logps/real": -314.5318908691406, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/generated": -26.735797882080078, "rewards/margins": 20.168197631835938, "rewards/real": -6.567601680755615, "step": 2140 }, { "epoch": 1.38, "learning_rate": 3.008293838862559e-07, "logits/generated": -2.048600912094116, "logits/real": -2.174879550933838, "logps/generated": -389.923828125, "logps/real": -308.6571960449219, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/generated": -27.314075469970703, "rewards/margins": 19.176753997802734, "rewards/real": -8.137316703796387, "step": 2150 }, { "epoch": 1.38, "learning_rate": 2.9964454976303315e-07, "logits/generated": -2.0499486923217773, "logits/real": -2.1856703758239746, "logps/generated": -381.08001708984375, "logps/real": -319.88995361328125, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/generated": -26.891727447509766, "rewards/margins": 19.29157257080078, "rewards/real": -7.60015344619751, "step": 2160 }, { "epoch": 1.39, "learning_rate": 2.984597156398104e-07, "logits/generated": -1.9547226428985596, "logits/real": -2.22841215133667, "logps/generated": -346.93646240234375, "logps/real": -305.8375244140625, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/generated": -24.429630279541016, "rewards/margins": 17.81489372253418, "rewards/real": -6.6147356033325195, "step": 2170 }, { "epoch": 1.39, "learning_rate": 2.9727488151658765e-07, "logits/generated": -1.9613704681396484, "logits/real": -2.265411853790283, "logps/generated": -342.83575439453125, "logps/real": -336.58807373046875, "loss": 0.0099, "rewards/accuracies": 0.987500011920929, "rewards/generated": -23.710336685180664, "rewards/margins": 18.872779846191406, "rewards/real": -4.837557792663574, "step": 2180 }, { "epoch": 1.4, "learning_rate": 2.960900473933649e-07, "logits/generated": -1.9236023426055908, "logits/real": -2.200582504272461, "logps/generated": -348.8911437988281, "logps/real": -265.9325256347656, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/generated": -24.7271671295166, "rewards/margins": 18.50712013244629, "rewards/real": -6.220047950744629, "step": 2190 }, { "epoch": 1.41, "learning_rate": 2.949052132701422e-07, "logits/generated": -1.9877769947052002, "logits/real": -2.205859899520874, "logps/generated": -351.3087463378906, "logps/real": -295.75042724609375, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/generated": -24.641620635986328, "rewards/margins": 18.743465423583984, "rewards/real": -5.89815616607666, "step": 2200 }, { "epoch": 1.41, "learning_rate": 2.9372037914691946e-07, "logits/generated": -1.9616267681121826, "logits/real": -2.109819173812866, "logps/generated": -381.1598205566406, "logps/real": -299.94085693359375, "loss": 0.0114, "rewards/accuracies": 0.987500011920929, "rewards/generated": -27.39129066467285, "rewards/margins": 20.818603515625, "rewards/real": -6.572684288024902, "step": 2210 }, { "epoch": 1.42, "learning_rate": 2.9253554502369663e-07, "logits/generated": -1.94171941280365, "logits/real": -2.114952564239502, "logps/generated": -389.1252136230469, "logps/real": -274.3523864746094, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/generated": -28.433679580688477, "rewards/margins": 21.05464744567871, "rewards/real": -7.379031181335449, "step": 2220 }, { "epoch": 1.43, "learning_rate": 2.913507109004739e-07, "logits/generated": -2.000523090362549, "logits/real": -2.2058169841766357, "logps/generated": -385.126953125, "logps/real": -307.17401123046875, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/generated": -27.793697357177734, "rewards/margins": 20.554990768432617, "rewards/real": -7.238706111907959, "step": 2230 }, { "epoch": 1.43, "learning_rate": 2.901658767772512e-07, "logits/generated": -2.1331028938293457, "logits/real": -2.261775255203247, "logps/generated": -308.9983215332031, "logps/real": -257.3055114746094, "loss": 0.0279, "rewards/accuracies": 1.0, "rewards/generated": -20.865665435791016, "rewards/margins": 15.690821647644043, "rewards/real": -5.174844264984131, "step": 2240 }, { "epoch": 1.44, "learning_rate": 2.889810426540284e-07, "logits/generated": -2.1631789207458496, "logits/real": -2.312990665435791, "logps/generated": -308.62359619140625, "logps/real": -290.2818298339844, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/generated": -20.118532180786133, "rewards/margins": 15.512298583984375, "rewards/real": -4.606230735778809, "step": 2250 }, { "epoch": 1.45, "learning_rate": 2.8779620853080567e-07, "logits/generated": -2.306006669998169, "logits/real": -2.395618438720703, "logps/generated": -288.3228454589844, "logps/real": -268.11212158203125, "loss": 0.0263, "rewards/accuracies": 0.987500011920929, "rewards/generated": -17.302173614501953, "rewards/margins": 14.114949226379395, "rewards/real": -3.1872246265411377, "step": 2260 }, { "epoch": 1.45, "learning_rate": 2.8661137440758294e-07, "logits/generated": -2.1571130752563477, "logits/real": -2.408663511276245, "logps/generated": -290.388671875, "logps/real": -312.73199462890625, "loss": 0.0048, "rewards/accuracies": 0.987500011920929, "rewards/generated": -18.008201599121094, "rewards/margins": 15.325651168823242, "rewards/real": -2.682548761367798, "step": 2270 }, { "epoch": 1.46, "learning_rate": 2.854265402843602e-07, "logits/generated": -2.1750426292419434, "logits/real": -2.3797481060028076, "logps/generated": -287.82562255859375, "logps/real": -281.77349853515625, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/generated": -18.010038375854492, "rewards/margins": 14.384750366210938, "rewards/real": -3.625290632247925, "step": 2280 }, { "epoch": 1.47, "learning_rate": 2.842417061611374e-07, "logits/generated": -2.150526523590088, "logits/real": -2.3643288612365723, "logps/generated": -306.185546875, "logps/real": -303.93426513671875, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/generated": -19.840198516845703, "rewards/margins": 16.938533782958984, "rewards/real": -2.901662826538086, "step": 2290 }, { "epoch": 1.47, "learning_rate": 2.8305687203791465e-07, "logits/generated": -2.22916841506958, "logits/real": -2.382570743560791, "logps/generated": -298.77288818359375, "logps/real": -287.53533935546875, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/generated": -18.984249114990234, "rewards/margins": 14.85334587097168, "rewards/real": -4.130903244018555, "step": 2300 }, { "epoch": 1.48, "learning_rate": 2.8187203791469193e-07, "logits/generated": -2.1493663787841797, "logits/real": -2.3570504188537598, "logps/generated": -296.1643371582031, "logps/real": -278.86376953125, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/generated": -18.78934097290039, "rewards/margins": 15.320466995239258, "rewards/real": -3.4688727855682373, "step": 2310 }, { "epoch": 1.48, "learning_rate": 2.806872037914692e-07, "logits/generated": -2.055908679962158, "logits/real": -2.341315984725952, "logps/generated": -322.4883728027344, "logps/real": -305.29534912109375, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/generated": -20.715917587280273, "rewards/margins": 16.561243057250977, "rewards/real": -4.154674053192139, "step": 2320 }, { "epoch": 1.49, "learning_rate": 2.795023696682464e-07, "logits/generated": -2.1408910751342773, "logits/real": -2.2490763664245605, "logps/generated": -323.20245361328125, "logps/real": -231.9637908935547, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/generated": -22.67896842956543, "rewards/margins": 16.87398910522461, "rewards/real": -5.804980278015137, "step": 2330 }, { "epoch": 1.5, "learning_rate": 2.783175355450237e-07, "logits/generated": -2.1436820030212402, "logits/real": -2.2497756481170654, "logps/generated": -315.02166748046875, "logps/real": -274.26824951171875, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/generated": -21.036762237548828, "rewards/margins": 16.998281478881836, "rewards/real": -4.038480758666992, "step": 2340 }, { "epoch": 1.5, "learning_rate": 2.7713270142180097e-07, "logits/generated": -2.0521388053894043, "logits/real": -2.257903575897217, "logps/generated": -323.6043395996094, "logps/real": -284.04071044921875, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/generated": -21.63604164123535, "rewards/margins": 16.72218132019043, "rewards/real": -4.913861274719238, "step": 2350 }, { "epoch": 1.51, "learning_rate": 2.759478672985782e-07, "logits/generated": -2.0604171752929688, "logits/real": -2.2787575721740723, "logps/generated": -322.8190612792969, "logps/real": -262.122802734375, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/generated": -22.032209396362305, "rewards/margins": 16.762222290039062, "rewards/real": -5.269987106323242, "step": 2360 }, { "epoch": 1.52, "learning_rate": 2.747630331753554e-07, "logits/generated": -2.072727680206299, "logits/real": -2.246783494949341, "logps/generated": -317.24407958984375, "logps/real": -293.20184326171875, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/generated": -20.969411849975586, "rewards/margins": 15.59190845489502, "rewards/real": -5.377503395080566, "step": 2370 }, { "epoch": 1.52, "learning_rate": 2.735781990521327e-07, "logits/generated": -2.0133070945739746, "logits/real": -2.256195306777954, "logps/generated": -333.7389831542969, "logps/real": -275.0140686035156, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/generated": -23.798381805419922, "rewards/margins": 17.914201736450195, "rewards/real": -5.88417911529541, "step": 2380 }, { "epoch": 1.53, "learning_rate": 2.7239336492890995e-07, "logits/generated": -2.0671088695526123, "logits/real": -2.2632241249084473, "logps/generated": -326.7292175292969, "logps/real": -265.8837890625, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/generated": -22.720916748046875, "rewards/margins": 16.91278839111328, "rewards/real": -5.80812931060791, "step": 2390 }, { "epoch": 1.54, "learning_rate": 2.7120853080568717e-07, "logits/generated": -2.159471273422241, "logits/real": -2.258662700653076, "logps/generated": -320.0276184082031, "logps/real": -258.6063232421875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/generated": -20.94855308532715, "rewards/margins": 15.71070384979248, "rewards/real": -5.237849712371826, "step": 2400 }, { "epoch": 1.54, "learning_rate": 2.7002369668246444e-07, "logits/generated": -2.0678138732910156, "logits/real": -2.2282309532165527, "logps/generated": -323.46160888671875, "logps/real": -293.63043212890625, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/generated": -21.792064666748047, "rewards/margins": 16.06268310546875, "rewards/real": -5.729379653930664, "step": 2410 }, { "epoch": 1.55, "learning_rate": 2.688388625592417e-07, "logits/generated": -2.0311505794525146, "logits/real": -2.227372169494629, "logps/generated": -331.828857421875, "logps/real": -310.86114501953125, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/generated": -22.839397430419922, "rewards/margins": 16.84860610961914, "rewards/real": -5.990791320800781, "step": 2420 }, { "epoch": 1.55, "learning_rate": 2.6765402843601894e-07, "logits/generated": -2.0514676570892334, "logits/real": -2.1547234058380127, "logps/generated": -336.21075439453125, "logps/real": -264.7240295410156, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/generated": -23.538625717163086, "rewards/margins": 16.194805145263672, "rewards/real": -7.3438215255737305, "step": 2430 }, { "epoch": 1.56, "learning_rate": 2.664691943127962e-07, "logits/generated": -2.0216307640075684, "logits/real": -2.211293935775757, "logps/generated": -358.1768798828125, "logps/real": -316.6245422363281, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/generated": -24.672779083251953, "rewards/margins": 19.19417381286621, "rewards/real": -5.478603363037109, "step": 2440 }, { "epoch": 1.57, "learning_rate": 2.6528436018957343e-07, "logits/generated": -2.041341543197632, "logits/real": -2.10255765914917, "logps/generated": -344.2445373535156, "logps/real": -247.41921997070312, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/generated": -24.607763290405273, "rewards/margins": 17.24384307861328, "rewards/real": -7.36392068862915, "step": 2450 }, { "epoch": 1.57, "learning_rate": 2.640995260663507e-07, "logits/generated": -2.0063443183898926, "logits/real": -2.1921615600585938, "logps/generated": -352.2701721191406, "logps/real": -318.3006896972656, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/generated": -24.384395599365234, "rewards/margins": 18.296855926513672, "rewards/real": -6.0875396728515625, "step": 2460 }, { "epoch": 1.58, "learning_rate": 2.629146919431279e-07, "logits/generated": -2.015026092529297, "logits/real": -2.216576099395752, "logps/generated": -364.0120849609375, "logps/real": -310.111083984375, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/generated": -26.099964141845703, "rewards/margins": 19.072391510009766, "rewards/real": -7.0275726318359375, "step": 2470 }, { "epoch": 1.59, "learning_rate": 2.617298578199052e-07, "logits/generated": -2.0228731632232666, "logits/real": -2.152249813079834, "logps/generated": -367.5391845703125, "logps/real": -313.62969970703125, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/generated": -25.40250015258789, "rewards/margins": 18.70431900024414, "rewards/real": -6.698182582855225, "step": 2480 }, { "epoch": 1.59, "learning_rate": 2.6054502369668247e-07, "logits/generated": -2.0362861156463623, "logits/real": -2.1551527976989746, "logps/generated": -319.7622375488281, "logps/real": -263.19439697265625, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/generated": -21.64724349975586, "rewards/margins": 17.379783630371094, "rewards/real": -4.267460823059082, "step": 2490 }, { "epoch": 1.6, "learning_rate": 2.5936018957345974e-07, "logits/generated": -1.9363447427749634, "logits/real": -2.1170012950897217, "logps/generated": -337.524658203125, "logps/real": -263.98779296875, "loss": 0.0045, "rewards/accuracies": 0.987500011920929, "rewards/generated": -23.839879989624023, "rewards/margins": 19.158344268798828, "rewards/real": -4.681534767150879, "step": 2500 }, { "epoch": 1.61, "learning_rate": 2.5817535545023696e-07, "logits/generated": -1.991236686706543, "logits/real": -2.1649136543273926, "logps/generated": -323.94805908203125, "logps/real": -315.3453674316406, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/generated": -21.72148323059082, "rewards/margins": 16.818408966064453, "rewards/real": -4.903075218200684, "step": 2510 }, { "epoch": 1.61, "learning_rate": 2.5699052132701423e-07, "logits/generated": -2.0180463790893555, "logits/real": -2.0788466930389404, "logps/generated": -311.48992919921875, "logps/real": -245.39083862304688, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/generated": -21.404287338256836, "rewards/margins": 16.352306365966797, "rewards/real": -5.051980972290039, "step": 2520 }, { "epoch": 1.62, "learning_rate": 2.5580568720379145e-07, "logits/generated": -2.0104432106018066, "logits/real": -2.135164737701416, "logps/generated": -333.90020751953125, "logps/real": -293.7203674316406, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/generated": -23.03586196899414, "rewards/margins": 18.168453216552734, "rewards/real": -4.867411136627197, "step": 2530 }, { "epoch": 1.63, "learning_rate": 2.5462085308056867e-07, "logits/generated": -2.0322206020355225, "logits/real": -2.1355350017547607, "logps/generated": -348.94915771484375, "logps/real": -311.4462585449219, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/generated": -23.815948486328125, "rewards/margins": 18.45013427734375, "rewards/real": -5.365814208984375, "step": 2540 }, { "epoch": 1.63, "learning_rate": 2.5343601895734595e-07, "logits/generated": -2.015996217727661, "logits/real": -2.027782440185547, "logps/generated": -354.2010192871094, "logps/real": -256.85198974609375, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/generated": -24.54047966003418, "rewards/margins": 19.149431228637695, "rewards/real": -5.391049385070801, "step": 2550 }, { "epoch": 1.64, "learning_rate": 2.522511848341232e-07, "logits/generated": -1.9637119770050049, "logits/real": -2.039952278137207, "logps/generated": -347.145263671875, "logps/real": -247.9694366455078, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/generated": -24.598966598510742, "rewards/margins": 18.92831802368164, "rewards/real": -5.670650005340576, "step": 2560 }, { "epoch": 1.64, "learning_rate": 2.510663507109005e-07, "logits/generated": -1.9824374914169312, "logits/real": -2.1313223838806152, "logps/generated": -318.1536865234375, "logps/real": -312.9999084472656, "loss": 0.0114, "rewards/accuracies": 0.987500011920929, "rewards/generated": -21.444355010986328, "rewards/margins": 16.674604415893555, "rewards/real": -4.769750595092773, "step": 2570 }, { "epoch": 1.65, "learning_rate": 2.498815165876777e-07, "logits/generated": -2.0012238025665283, "logits/real": -2.179154872894287, "logps/generated": -295.0409851074219, "logps/real": -306.1881103515625, "loss": 0.0101, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -18.802841186523438, "rewards/margins": 15.604291915893555, "rewards/real": -3.1985487937927246, "step": 2580 }, { "epoch": 1.66, "learning_rate": 2.48696682464455e-07, "logits/generated": -1.9817421436309814, "logits/real": -2.15266489982605, "logps/generated": -314.5645751953125, "logps/real": -322.54107666015625, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/generated": -20.512224197387695, "rewards/margins": 17.625017166137695, "rewards/real": -2.887207269668579, "step": 2590 }, { "epoch": 1.66, "learning_rate": 2.475118483412322e-07, "logits/generated": -1.8838014602661133, "logits/real": -2.065337896347046, "logps/generated": -348.7995300292969, "logps/real": -263.181640625, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/generated": -24.87447166442871, "rewards/margins": 21.053264617919922, "rewards/real": -3.821207046508789, "step": 2600 }, { "epoch": 1.67, "learning_rate": 2.463270142180095e-07, "logits/generated": -1.9176208972930908, "logits/real": -2.047250747680664, "logps/generated": -333.9322204589844, "logps/real": -290.37420654296875, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/generated": -22.631237030029297, "rewards/margins": 18.093896865844727, "rewards/real": -4.537338733673096, "step": 2610 }, { "epoch": 1.68, "learning_rate": 2.451421800947867e-07, "logits/generated": -1.869368314743042, "logits/real": -2.067248821258545, "logps/generated": -328.75384521484375, "logps/real": -284.6513671875, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/generated": -21.998384475708008, "rewards/margins": 18.069828033447266, "rewards/real": -3.928557872772217, "step": 2620 }, { "epoch": 1.68, "learning_rate": 2.4395734597156397e-07, "logits/generated": -1.9721105098724365, "logits/real": -1.9859319925308228, "logps/generated": -322.07733154296875, "logps/real": -214.5552520751953, "loss": 0.0086, "rewards/accuracies": 0.987500011920929, "rewards/generated": -22.10195541381836, "rewards/margins": 18.13933753967285, "rewards/real": -3.962615489959717, "step": 2630 }, { "epoch": 1.69, "learning_rate": 2.4277251184834124e-07, "logits/generated": -1.9935197830200195, "logits/real": -2.0847668647766113, "logps/generated": -336.79388427734375, "logps/real": -272.2264404296875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/generated": -22.606184005737305, "rewards/margins": 18.493946075439453, "rewards/real": -4.112237453460693, "step": 2640 }, { "epoch": 1.7, "learning_rate": 2.4158767772511846e-07, "logits/generated": -1.9344911575317383, "logits/real": -2.0520946979522705, "logps/generated": -325.8318786621094, "logps/real": -301.53857421875, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/generated": -21.865243911743164, "rewards/margins": 17.928768157958984, "rewards/real": -3.936476230621338, "step": 2650 }, { "epoch": 1.7, "learning_rate": 2.4040284360189573e-07, "logits/generated": -1.8800468444824219, "logits/real": -2.0344691276550293, "logps/generated": -332.1402282714844, "logps/real": -301.62042236328125, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/generated": -22.578327178955078, "rewards/margins": 18.19384765625, "rewards/real": -4.384476661682129, "step": 2660 }, { "epoch": 1.71, "learning_rate": 2.39218009478673e-07, "logits/generated": -1.901908278465271, "logits/real": -2.0253829956054688, "logps/generated": -334.27960205078125, "logps/real": -262.5256652832031, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/generated": -24.076019287109375, "rewards/margins": 19.42727279663086, "rewards/real": -4.64874267578125, "step": 2670 }, { "epoch": 1.71, "learning_rate": 2.3803317535545023e-07, "logits/generated": -1.9247627258300781, "logits/real": -2.078843593597412, "logps/generated": -337.6062927246094, "logps/real": -317.027099609375, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/generated": -22.987529754638672, "rewards/margins": 18.43692398071289, "rewards/real": -4.550606727600098, "step": 2680 }, { "epoch": 1.72, "learning_rate": 2.3684834123222747e-07, "logits/generated": -1.9171488285064697, "logits/real": -2.0078930854797363, "logps/generated": -360.25799560546875, "logps/real": -293.09429931640625, "loss": 0.0072, "rewards/accuracies": 0.987500011920929, "rewards/generated": -24.852455139160156, "rewards/margins": 19.05927848815918, "rewards/real": -5.793177127838135, "step": 2690 }, { "epoch": 1.73, "learning_rate": 2.3566350710900475e-07, "logits/generated": -1.9496829509735107, "logits/real": -2.0645487308502197, "logps/generated": -333.0656433105469, "logps/real": -307.91094970703125, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/generated": -23.068565368652344, "rewards/margins": 16.719371795654297, "rewards/real": -6.3491926193237305, "step": 2700 }, { "epoch": 1.73, "learning_rate": 2.3447867298578197e-07, "logits/generated": -1.86553156375885, "logits/real": -2.009887933731079, "logps/generated": -384.51861572265625, "logps/real": -254.32211303710938, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/generated": -27.905101776123047, "rewards/margins": 22.367870330810547, "rewards/real": -5.537230014801025, "step": 2710 }, { "epoch": 1.74, "learning_rate": 2.3329383886255924e-07, "logits/generated": -1.8696056604385376, "logits/real": -2.008697509765625, "logps/generated": -332.2572937011719, "logps/real": -264.5096130371094, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/generated": -23.768789291381836, "rewards/margins": 18.116247177124023, "rewards/real": -5.652542591094971, "step": 2720 }, { "epoch": 1.75, "learning_rate": 2.3210900473933649e-07, "logits/generated": -1.9646952152252197, "logits/real": -2.0399162769317627, "logps/generated": -362.4119873046875, "logps/real": -262.9007568359375, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/generated": -24.729812622070312, "rewards/margins": 19.58879852294922, "rewards/real": -5.1410112380981445, "step": 2730 }, { "epoch": 1.75, "learning_rate": 2.3092417061611373e-07, "logits/generated": -1.8820825815200806, "logits/real": -2.106609344482422, "logps/generated": -345.9579162597656, "logps/real": -277.6873779296875, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/generated": -24.09268569946289, "rewards/margins": 19.987350463867188, "rewards/real": -4.1053361892700195, "step": 2740 }, { "epoch": 1.76, "learning_rate": 2.2973933649289098e-07, "logits/generated": -1.8273859024047852, "logits/real": -2.048422336578369, "logps/generated": -351.7268981933594, "logps/real": -271.568115234375, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/generated": -24.743534088134766, "rewards/margins": 20.314531326293945, "rewards/real": -4.429001808166504, "step": 2750 }, { "epoch": 1.77, "learning_rate": 2.2855450236966822e-07, "logits/generated": -1.8855018615722656, "logits/real": -2.0111851692199707, "logps/generated": -360.0576171875, "logps/real": -229.41171264648438, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/generated": -26.014429092407227, "rewards/margins": 22.26042938232422, "rewards/real": -3.753999710083008, "step": 2760 }, { "epoch": 1.77, "learning_rate": 2.273696682464455e-07, "logits/generated": -1.9568793773651123, "logits/real": -2.0069072246551514, "logps/generated": -325.590576171875, "logps/real": -239.29653930664062, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/generated": -22.475872039794922, "rewards/margins": 18.081890106201172, "rewards/real": -4.393985748291016, "step": 2770 }, { "epoch": 1.78, "learning_rate": 2.2618483412322272e-07, "logits/generated": -1.8930606842041016, "logits/real": -2.063122272491455, "logps/generated": -332.5451965332031, "logps/real": -307.4909362792969, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/generated": -22.505624771118164, "rewards/margins": 18.05331039428711, "rewards/real": -4.452314853668213, "step": 2780 }, { "epoch": 1.79, "learning_rate": 2.25e-07, "logits/generated": -1.8584035634994507, "logits/real": -2.032623291015625, "logps/generated": -357.2879333496094, "logps/real": -272.9072265625, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/generated": -25.67281723022461, "rewards/margins": 22.030052185058594, "rewards/real": -3.642765760421753, "step": 2790 }, { "epoch": 1.79, "learning_rate": 2.2381516587677724e-07, "logits/generated": -1.986790418624878, "logits/real": -2.0681300163269043, "logps/generated": -320.0332946777344, "logps/real": -247.047119140625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/generated": -21.96512794494629, "rewards/margins": 17.276538848876953, "rewards/real": -4.688588619232178, "step": 2800 }, { "epoch": 1.8, "learning_rate": 2.226303317535545e-07, "logits/generated": -1.8750879764556885, "logits/real": -2.0300040245056152, "logps/generated": -353.9804992675781, "logps/real": -279.1330261230469, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/generated": -24.2004337310791, "rewards/margins": 19.79877471923828, "rewards/real": -4.4016571044921875, "step": 2810 }, { "epoch": 1.8, "learning_rate": 2.2144549763033173e-07, "logits/generated": -1.9594194889068604, "logits/real": -2.0362954139709473, "logps/generated": -349.21844482421875, "logps/real": -291.8232727050781, "loss": 0.0067, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -23.7236385345459, "rewards/margins": 18.65010643005371, "rewards/real": -5.073533058166504, "step": 2820 }, { "epoch": 1.81, "learning_rate": 2.20260663507109e-07, "logits/generated": -1.9532238245010376, "logits/real": -2.059518814086914, "logps/generated": -358.15362548828125, "logps/real": -295.75555419921875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/generated": -24.933134078979492, "rewards/margins": 19.796754837036133, "rewards/real": -5.136380195617676, "step": 2830 }, { "epoch": 1.82, "learning_rate": 2.1907582938388625e-07, "logits/generated": -1.911240816116333, "logits/real": -2.035658121109009, "logps/generated": -355.701171875, "logps/real": -262.9356689453125, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/generated": -25.080434799194336, "rewards/margins": 20.165552139282227, "rewards/real": -4.914883136749268, "step": 2840 }, { "epoch": 1.82, "learning_rate": 2.178909952606635e-07, "logits/generated": -1.9019253253936768, "logits/real": -1.9727287292480469, "logps/generated": -339.832763671875, "logps/real": -244.0312957763672, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/generated": -24.016708374023438, "rewards/margins": 19.056392669677734, "rewards/real": -4.9603142738342285, "step": 2850 }, { "epoch": 1.83, "learning_rate": 2.1670616113744074e-07, "logits/generated": -1.8738031387329102, "logits/real": -1.9713420867919922, "logps/generated": -358.6424865722656, "logps/real": -236.83349609375, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/generated": -25.90484046936035, "rewards/margins": 20.887779235839844, "rewards/real": -5.017061710357666, "step": 2860 }, { "epoch": 1.84, "learning_rate": 2.15521327014218e-07, "logits/generated": -1.9246352910995483, "logits/real": -2.0558464527130127, "logps/generated": -349.736083984375, "logps/real": -273.07879638671875, "loss": 0.0196, "rewards/accuracies": 1.0, "rewards/generated": -24.154857635498047, "rewards/margins": 19.935977935791016, "rewards/real": -4.218877792358398, "step": 2870 }, { "epoch": 1.84, "learning_rate": 2.1433649289099526e-07, "logits/generated": -1.8406873941421509, "logits/real": -2.04058575630188, "logps/generated": -361.99090576171875, "logps/real": -329.4713439941406, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/generated": -25.09902572631836, "rewards/margins": 20.601699829101562, "rewards/real": -4.4973249435424805, "step": 2880 }, { "epoch": 1.85, "learning_rate": 2.131516587677725e-07, "logits/generated": -1.9347474575042725, "logits/real": -2.107963800430298, "logps/generated": -338.20220947265625, "logps/real": -339.88177490234375, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/generated": -22.654552459716797, "rewards/margins": 17.851367950439453, "rewards/real": -4.80318546295166, "step": 2890 }, { "epoch": 1.86, "learning_rate": 2.1196682464454975e-07, "logits/generated": -1.9360589981079102, "logits/real": -2.094698190689087, "logps/generated": -345.03936767578125, "logps/real": -347.36328125, "loss": 0.005, "rewards/accuracies": 0.987500011920929, "rewards/generated": -23.069780349731445, "rewards/margins": 18.0810489654541, "rewards/real": -4.988730430603027, "step": 2900 }, { "epoch": 1.86, "learning_rate": 2.10781990521327e-07, "logits/generated": -1.8973820209503174, "logits/real": -2.0721583366394043, "logps/generated": -354.6455993652344, "logps/real": -321.29388427734375, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/generated": -24.49210548400879, "rewards/margins": 20.05221939086914, "rewards/real": -4.439886569976807, "step": 2910 }, { "epoch": 1.87, "learning_rate": 2.0959715639810427e-07, "logits/generated": -1.8905102014541626, "logits/real": -2.0525612831115723, "logps/generated": -350.294189453125, "logps/real": -321.59576416015625, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/generated": -24.51646614074707, "rewards/margins": 18.570537567138672, "rewards/real": -5.945926189422607, "step": 2920 }, { "epoch": 1.87, "learning_rate": 2.0841232227488152e-07, "logits/generated": -1.8868262767791748, "logits/real": -2.0915586948394775, "logps/generated": -346.82244873046875, "logps/real": -324.75360107421875, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/generated": -23.300960540771484, "rewards/margins": 18.860183715820312, "rewards/real": -4.440775394439697, "step": 2930 }, { "epoch": 1.88, "learning_rate": 2.0722748815165874e-07, "logits/generated": -2.009647846221924, "logits/real": -2.112830400466919, "logps/generated": -314.67822265625, "logps/real": -273.21246337890625, "loss": 0.0047, "rewards/accuracies": 0.987500011920929, "rewards/generated": -21.59378433227539, "rewards/margins": 16.634010314941406, "rewards/real": -4.959776878356934, "step": 2940 }, { "epoch": 1.89, "learning_rate": 2.06042654028436e-07, "logits/generated": -1.8931806087493896, "logits/real": -2.008685350418091, "logps/generated": -350.7391662597656, "logps/real": -279.31610107421875, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/generated": -24.823436737060547, "rewards/margins": 19.396289825439453, "rewards/real": -5.427145957946777, "step": 2950 }, { "epoch": 1.89, "learning_rate": 2.0485781990521326e-07, "logits/generated": -1.8286612033843994, "logits/real": -2.0201098918914795, "logps/generated": -330.6366271972656, "logps/real": -285.68145751953125, "loss": 0.0077, "rewards/accuracies": 0.987500011920929, "rewards/generated": -21.9193172454834, "rewards/margins": 17.24250602722168, "rewards/real": -4.676810264587402, "step": 2960 }, { "epoch": 1.9, "learning_rate": 2.0367298578199053e-07, "logits/generated": -1.962898850440979, "logits/real": -2.057426929473877, "logps/generated": -323.2264404296875, "logps/real": -326.7906799316406, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/generated": -21.479419708251953, "rewards/margins": 17.808374404907227, "rewards/real": -3.6710457801818848, "step": 2970 }, { "epoch": 1.91, "learning_rate": 2.0248815165876775e-07, "logits/generated": -1.92549729347229, "logits/real": -2.0787177085876465, "logps/generated": -340.48388671875, "logps/real": -327.29638671875, "loss": 0.029, "rewards/accuracies": 1.0, "rewards/generated": -22.590269088745117, "rewards/margins": 18.792264938354492, "rewards/real": -3.798003673553467, "step": 2980 }, { "epoch": 1.91, "learning_rate": 2.0130331753554502e-07, "logits/generated": -2.082226276397705, "logits/real": -2.060159683227539, "logps/generated": -309.9555358886719, "logps/real": -248.53634643554688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/generated": -20.31454849243164, "rewards/margins": 16.472434997558594, "rewards/real": -3.842111587524414, "step": 2990 }, { "epoch": 1.92, "learning_rate": 2.0011848341232227e-07, "logits/generated": -2.0672497749328613, "logits/real": -2.071643829345703, "logps/generated": -317.7587890625, "logps/real": -244.15213012695312, "loss": 0.0031, "rewards/accuracies": 0.987500011920929, "rewards/generated": -20.6685848236084, "rewards/margins": 16.24555778503418, "rewards/real": -4.423028945922852, "step": 3000 }, { "epoch": 1.93, "learning_rate": 1.9893364928909952e-07, "logits/generated": -2.0025432109832764, "logits/real": -2.051884412765503, "logps/generated": -323.74127197265625, "logps/real": -253.74807739257812, "loss": 0.0121, "rewards/accuracies": 0.987500011920929, "rewards/generated": -21.344167709350586, "rewards/margins": 17.927459716796875, "rewards/real": -3.41670560836792, "step": 3010 }, { "epoch": 1.93, "learning_rate": 1.9774881516587676e-07, "logits/generated": -2.0415196418762207, "logits/real": -1.9388816356658936, "logps/generated": -339.0467224121094, "logps/real": -225.0322265625, "loss": 0.007, "rewards/accuracies": 0.987500011920929, "rewards/generated": -23.312593460083008, "rewards/margins": 18.541973114013672, "rewards/real": -4.770620346069336, "step": 3020 }, { "epoch": 1.94, "learning_rate": 1.96563981042654e-07, "logits/generated": -2.0513784885406494, "logits/real": -2.1174235343933105, "logps/generated": -330.4163818359375, "logps/real": -336.45703125, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/generated": -21.52487564086914, "rewards/margins": 18.490808486938477, "rewards/real": -3.034066915512085, "step": 3030 }, { "epoch": 1.94, "learning_rate": 1.9537914691943128e-07, "logits/generated": -1.978915810585022, "logits/real": -2.065412759780884, "logps/generated": -305.82666015625, "logps/real": -273.7213439941406, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/generated": -20.667461395263672, "rewards/margins": 16.536968231201172, "rewards/real": -4.130496025085449, "step": 3040 }, { "epoch": 1.95, "learning_rate": 1.9419431279620853e-07, "logits/generated": -1.9168899059295654, "logits/real": -1.9833358526229858, "logps/generated": -328.87860107421875, "logps/real": -255.411865234375, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/generated": -22.704120635986328, "rewards/margins": 18.399288177490234, "rewards/real": -4.30483341217041, "step": 3050 }, { "epoch": 1.96, "learning_rate": 1.9300947867298577e-07, "logits/generated": -2.037984609603882, "logits/real": -2.1186954975128174, "logps/generated": -327.5215148925781, "logps/real": -289.11798095703125, "loss": 0.0044, "rewards/accuracies": 0.987500011920929, "rewards/generated": -21.641704559326172, "rewards/margins": 17.83352279663086, "rewards/real": -3.8081812858581543, "step": 3060 }, { "epoch": 1.96, "learning_rate": 1.9182464454976302e-07, "logits/generated": -1.9606168270111084, "logits/real": -2.066399574279785, "logps/generated": -335.49310302734375, "logps/real": -285.63916015625, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/generated": -22.89196014404297, "rewards/margins": 18.424591064453125, "rewards/real": -4.467370510101318, "step": 3070 }, { "epoch": 1.97, "learning_rate": 1.906398104265403e-07, "logits/generated": -1.9231208562850952, "logits/real": -1.9928699731826782, "logps/generated": -349.9872131347656, "logps/real": -250.4529571533203, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/generated": -24.382661819458008, "rewards/margins": 19.963834762573242, "rewards/real": -4.418826580047607, "step": 3080 }, { "epoch": 1.98, "learning_rate": 1.8945497630331754e-07, "logits/generated": -2.0129315853118896, "logits/real": -2.0563480854034424, "logps/generated": -344.85418701171875, "logps/real": -267.9552917480469, "loss": 0.5252, "rewards/accuracies": 0.987500011920929, "rewards/generated": -23.324771881103516, "rewards/margins": 19.245107650756836, "rewards/real": -4.079663276672363, "step": 3090 }, { "epoch": 1.98, "learning_rate": 1.8827014218009476e-07, "logits/generated": -2.067603349685669, "logits/real": -2.162640333175659, "logps/generated": -311.3556823730469, "logps/real": -290.5445556640625, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/generated": -21.031274795532227, "rewards/margins": 17.459707260131836, "rewards/real": -3.571566343307495, "step": 3100 }, { "epoch": 1.99, "learning_rate": 1.8708530805687203e-07, "logits/generated": -2.022533416748047, "logits/real": -2.082559585571289, "logps/generated": -357.4684143066406, "logps/real": -272.97613525390625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/generated": -25.341123580932617, "rewards/margins": 21.033334732055664, "rewards/real": -4.3077898025512695, "step": 3110 }, { "epoch": 2.0, "learning_rate": 1.8590047393364928e-07, "logits/generated": -2.0808520317077637, "logits/real": -2.121340036392212, "logps/generated": -372.4068603515625, "logps/real": -298.7972717285156, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/generated": -25.399755477905273, "rewards/margins": 20.64004135131836, "rewards/real": -4.759713172912598, "step": 3120 }, { "epoch": 2.0, "learning_rate": 1.8471563981042655e-07, "logits/generated": -2.081714630126953, "logits/real": -2.120727300643921, "logps/generated": -337.6578674316406, "logps/real": -305.0014953613281, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/generated": -22.75674057006836, "rewards/margins": 18.288433074951172, "rewards/real": -4.468310356140137, "step": 3130 }, { "epoch": 2.01, "learning_rate": 1.8353080568720377e-07, "logits/generated": -2.0968246459960938, "logits/real": -2.1070868968963623, "logps/generated": -362.4174499511719, "logps/real": -297.3021545410156, "loss": 0.0024, "rewards/accuracies": 0.987500011920929, "rewards/generated": -25.106538772583008, "rewards/margins": 20.243976593017578, "rewards/real": -4.862562656402588, "step": 3140 }, { "epoch": 2.02, "learning_rate": 1.8234597156398104e-07, "logits/generated": -2.073312997817993, "logits/real": -2.031247615814209, "logps/generated": -362.1297302246094, "logps/real": -264.1739807128906, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/generated": -25.403884887695312, "rewards/margins": 19.92275619506836, "rewards/real": -5.481128692626953, "step": 3150 }, { "epoch": 2.02, "learning_rate": 1.811611374407583e-07, "logits/generated": -2.0962705612182617, "logits/real": -2.139554262161255, "logps/generated": -340.9764404296875, "logps/real": -297.6211853027344, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/generated": -23.11123275756836, "rewards/margins": 17.79618263244629, "rewards/real": -5.315046787261963, "step": 3160 }, { "epoch": 2.03, "learning_rate": 1.7997630331753554e-07, "logits/generated": -2.072605609893799, "logits/real": -2.092684268951416, "logps/generated": -334.9135437011719, "logps/real": -268.64984130859375, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/generated": -24.23314666748047, "rewards/margins": 18.820934295654297, "rewards/real": -5.412210464477539, "step": 3170 }, { "epoch": 2.03, "learning_rate": 1.7879146919431278e-07, "logits/generated": -2.103445529937744, "logits/real": -2.1847872734069824, "logps/generated": -348.1864929199219, "logps/real": -362.13543701171875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -23.36882972717285, "rewards/margins": 19.663820266723633, "rewards/real": -3.7050089836120605, "step": 3180 }, { "epoch": 2.04, "learning_rate": 1.7760663507109003e-07, "logits/generated": -2.0266237258911133, "logits/real": -2.1335673332214355, "logps/generated": -337.2787170410156, "logps/real": -351.09234619140625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -22.30938148498535, "rewards/margins": 18.489295959472656, "rewards/real": -3.820082426071167, "step": 3190 }, { "epoch": 2.05, "learning_rate": 1.764218009478673e-07, "logits/generated": -2.089790105819702, "logits/real": -2.045483112335205, "logps/generated": -348.64691162109375, "logps/real": -275.2323303222656, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -24.00542640686035, "rewards/margins": 19.243324279785156, "rewards/real": -4.762101173400879, "step": 3200 }, { "epoch": 2.05, "learning_rate": 1.7523696682464452e-07, "logits/generated": -2.064568519592285, "logits/real": -2.1267263889312744, "logps/generated": -356.4788513183594, "logps/real": -309.7297668457031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -24.821239471435547, "rewards/margins": 20.07695198059082, "rewards/real": -4.744289875030518, "step": 3210 }, { "epoch": 2.06, "learning_rate": 1.740521327014218e-07, "logits/generated": -2.0550034046173096, "logits/real": -2.1228978633880615, "logps/generated": -342.8502197265625, "logps/real": -311.9898681640625, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/generated": -23.458721160888672, "rewards/margins": 19.106483459472656, "rewards/real": -4.352238178253174, "step": 3220 }, { "epoch": 2.07, "learning_rate": 1.7286729857819904e-07, "logits/generated": -2.0442018508911133, "logits/real": -2.037679672241211, "logps/generated": -368.05096435546875, "logps/real": -234.1935272216797, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/generated": -25.96514892578125, "rewards/margins": 21.14073371887207, "rewards/real": -4.824419975280762, "step": 3230 }, { "epoch": 2.07, "learning_rate": 1.7168246445497631e-07, "logits/generated": -2.0806503295898438, "logits/real": -2.138878583908081, "logps/generated": -350.0105285644531, "logps/real": -273.70556640625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -24.906692504882812, "rewards/margins": 19.990764617919922, "rewards/real": -4.915929317474365, "step": 3240 }, { "epoch": 2.08, "learning_rate": 1.7049763033175353e-07, "logits/generated": -2.0372068881988525, "logits/real": -2.050089120864868, "logps/generated": -350.42095947265625, "logps/real": -272.7009582519531, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -24.547452926635742, "rewards/margins": 19.302318572998047, "rewards/real": -5.2451372146606445, "step": 3250 }, { "epoch": 2.09, "learning_rate": 1.693127962085308e-07, "logits/generated": -2.1300880908966064, "logits/real": -2.2184221744537354, "logps/generated": -359.78472900390625, "logps/real": -349.1603698730469, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/generated": -23.26993751525879, "rewards/margins": 19.06733512878418, "rewards/real": -4.202603340148926, "step": 3260 }, { "epoch": 2.09, "learning_rate": 1.6812796208530805e-07, "logits/generated": -2.011422872543335, "logits/real": -2.0868258476257324, "logps/generated": -361.9312744140625, "logps/real": -315.9602966308594, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -24.96506690979004, "rewards/margins": 20.63692283630371, "rewards/real": -4.328146457672119, "step": 3270 }, { "epoch": 2.1, "learning_rate": 1.669431279620853e-07, "logits/generated": -2.047356128692627, "logits/real": -2.114980697631836, "logps/generated": -342.90252685546875, "logps/real": -288.1419372558594, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -22.95301628112793, "rewards/margins": 17.99421501159668, "rewards/real": -4.95880126953125, "step": 3280 }, { "epoch": 2.1, "learning_rate": 1.6575829383886255e-07, "logits/generated": -2.0588772296905518, "logits/real": -2.0936694145202637, "logps/generated": -365.0455017089844, "logps/real": -304.8596496582031, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -25.46891212463379, "rewards/margins": 19.947111129760742, "rewards/real": -5.521799564361572, "step": 3290 }, { "epoch": 2.11, "learning_rate": 1.645734597156398e-07, "logits/generated": -2.035792827606201, "logits/real": -2.089658498764038, "logps/generated": -363.96466064453125, "logps/real": -310.9200134277344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -24.831939697265625, "rewards/margins": 20.766572952270508, "rewards/real": -4.065365791320801, "step": 3300 }, { "epoch": 2.12, "learning_rate": 1.6338862559241706e-07, "logits/generated": -2.032679319381714, "logits/real": -2.05472731590271, "logps/generated": -352.28948974609375, "logps/real": -283.97161865234375, "loss": 0.0023, "rewards/accuracies": 0.987500011920929, "rewards/generated": -24.409162521362305, "rewards/margins": 19.225811004638672, "rewards/real": -5.183350563049316, "step": 3310 }, { "epoch": 2.12, "learning_rate": 1.622037914691943e-07, "logits/generated": -2.0000383853912354, "logits/real": -2.0034682750701904, "logps/generated": -356.1659240722656, "logps/real": -244.78005981445312, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/generated": -25.83150863647461, "rewards/margins": 20.468902587890625, "rewards/real": -5.362606525421143, "step": 3320 }, { "epoch": 2.13, "learning_rate": 1.6101895734597156e-07, "logits/generated": -2.0317182540893555, "logits/real": -2.0870862007141113, "logps/generated": -363.60394287109375, "logps/real": -261.621826171875, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/generated": -25.53971290588379, "rewards/margins": 20.445377349853516, "rewards/real": -5.094333171844482, "step": 3330 }, { "epoch": 2.14, "learning_rate": 1.598341232227488e-07, "logits/generated": -2.082185745239258, "logits/real": -2.1197800636291504, "logps/generated": -357.8829345703125, "logps/real": -276.6432189941406, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/generated": -24.641422271728516, "rewards/margins": 19.46223258972168, "rewards/real": -5.1791863441467285, "step": 3340 }, { "epoch": 2.14, "learning_rate": 1.5864928909952605e-07, "logits/generated": -2.0468974113464355, "logits/real": -2.0383057594299316, "logps/generated": -383.3557434082031, "logps/real": -272.8359680175781, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -27.14200782775879, "rewards/margins": 21.54371452331543, "rewards/real": -5.598288536071777, "step": 3350 }, { "epoch": 2.15, "learning_rate": 1.5746445497630332e-07, "logits/generated": -1.9730154275894165, "logits/real": -2.0217251777648926, "logps/generated": -347.0853576660156, "logps/real": -291.0683898925781, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/generated": -25.273738861083984, "rewards/margins": 19.66875457763672, "rewards/real": -5.604984283447266, "step": 3360 }, { "epoch": 2.16, "learning_rate": 1.5627962085308054e-07, "logits/generated": -2.016352415084839, "logits/real": -2.0556960105895996, "logps/generated": -358.643310546875, "logps/real": -292.9967041015625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -25.528522491455078, "rewards/margins": 20.312397003173828, "rewards/real": -5.216123104095459, "step": 3370 }, { "epoch": 2.16, "learning_rate": 1.5509478672985782e-07, "logits/generated": -2.0128049850463867, "logits/real": -2.0695934295654297, "logps/generated": -361.88629150390625, "logps/real": -289.5730895996094, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -25.175642013549805, "rewards/margins": 20.242481231689453, "rewards/real": -4.933161735534668, "step": 3380 }, { "epoch": 2.17, "learning_rate": 1.5390995260663506e-07, "logits/generated": -2.0474679470062256, "logits/real": -2.0958378314971924, "logps/generated": -371.52984619140625, "logps/real": -277.81854248046875, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/generated": -26.174081802368164, "rewards/margins": 20.208127975463867, "rewards/real": -5.9659528732299805, "step": 3390 }, { "epoch": 2.18, "learning_rate": 1.5272511848341233e-07, "logits/generated": -1.981287956237793, "logits/real": -1.9887183904647827, "logps/generated": -371.8958740234375, "logps/real": -270.61627197265625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -27.01461410522461, "rewards/margins": 20.439838409423828, "rewards/real": -6.574775695800781, "step": 3400 }, { "epoch": 2.18, "learning_rate": 1.5154028436018955e-07, "logits/generated": -2.091592788696289, "logits/real": -2.0786731243133545, "logps/generated": -378.8370056152344, "logps/real": -287.45281982421875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -26.378387451171875, "rewards/margins": 21.110021591186523, "rewards/real": -5.268365859985352, "step": 3410 }, { "epoch": 2.19, "learning_rate": 1.5035545023696683e-07, "logits/generated": -2.031449794769287, "logits/real": -2.08339262008667, "logps/generated": -368.6145935058594, "logps/real": -299.513427734375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -25.480379104614258, "rewards/margins": 20.795780181884766, "rewards/real": -4.684597969055176, "step": 3420 }, { "epoch": 2.19, "learning_rate": 1.4917061611374407e-07, "logits/generated": -2.0079009532928467, "logits/real": -2.053812026977539, "logps/generated": -364.58734130859375, "logps/real": -280.71612548828125, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/generated": -25.69207763671875, "rewards/margins": 20.46217918395996, "rewards/real": -5.229896545410156, "step": 3430 }, { "epoch": 2.2, "learning_rate": 1.4798578199052132e-07, "logits/generated": -2.052233934402466, "logits/real": -2.1017303466796875, "logps/generated": -376.9831237792969, "logps/real": -309.8131408691406, "loss": 0.0023, "rewards/accuracies": 0.987500011920929, "rewards/generated": -25.689865112304688, "rewards/margins": 19.916255950927734, "rewards/real": -5.7736077308654785, "step": 3440 }, { "epoch": 2.21, "learning_rate": 1.4680094786729857e-07, "logits/generated": -2.0397682189941406, "logits/real": -2.0936062335968018, "logps/generated": -353.9073791503906, "logps/real": -299.5115966796875, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/generated": -24.632925033569336, "rewards/margins": 19.708118438720703, "rewards/real": -4.924810886383057, "step": 3450 }, { "epoch": 2.21, "learning_rate": 1.456161137440758e-07, "logits/generated": -1.9885776042938232, "logits/real": -2.034236192703247, "logps/generated": -360.8766784667969, "logps/real": -280.8948059082031, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -26.076366424560547, "rewards/margins": 20.19205665588379, "rewards/real": -5.884313106536865, "step": 3460 }, { "epoch": 2.22, "learning_rate": 1.4443127962085309e-07, "logits/generated": -2.0221621990203857, "logits/real": -2.0913443565368652, "logps/generated": -377.02734375, "logps/real": -307.8632507324219, "loss": 0.0089, "rewards/accuracies": 0.987500011920929, "rewards/generated": -26.230060577392578, "rewards/margins": 20.80460548400879, "rewards/real": -5.425457000732422, "step": 3470 }, { "epoch": 2.23, "learning_rate": 1.4324644549763033e-07, "logits/generated": -1.970720648765564, "logits/real": -2.0222678184509277, "logps/generated": -364.70477294921875, "logps/real": -270.23492431640625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/generated": -25.826274871826172, "rewards/margins": 19.925310134887695, "rewards/real": -5.900964736938477, "step": 3480 }, { "epoch": 2.23, "learning_rate": 1.4206161137440758e-07, "logits/generated": -1.985548734664917, "logits/real": -2.059246301651001, "logps/generated": -399.61077880859375, "logps/real": -299.520263671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -28.505584716796875, "rewards/margins": 22.971120834350586, "rewards/real": -5.534466743469238, "step": 3490 }, { "epoch": 2.24, "learning_rate": 1.4087677725118482e-07, "logits/generated": -2.0236446857452393, "logits/real": -2.041933536529541, "logps/generated": -376.0531921386719, "logps/real": -258.92315673828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -26.45395851135254, "rewards/margins": 21.224918365478516, "rewards/real": -5.229036808013916, "step": 3500 }, { "epoch": 2.25, "learning_rate": 1.396919431279621e-07, "logits/generated": -1.960320234298706, "logits/real": -2.0525546073913574, "logps/generated": -351.20184326171875, "logps/real": -317.5999755859375, "loss": 0.0023, "rewards/accuracies": 0.987500011920929, "rewards/generated": -24.332490921020508, "rewards/margins": 19.579879760742188, "rewards/real": -4.752608299255371, "step": 3510 }, { "epoch": 2.25, "learning_rate": 1.3850710900473934e-07, "logits/generated": -1.9520498514175415, "logits/real": -2.0305888652801514, "logps/generated": -374.1609191894531, "logps/real": -324.40753173828125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/generated": -27.1029052734375, "rewards/margins": 20.919063568115234, "rewards/real": -6.183840751647949, "step": 3520 }, { "epoch": 2.26, "learning_rate": 1.3732227488151656e-07, "logits/generated": -1.9751561880111694, "logits/real": -1.9569429159164429, "logps/generated": -413.66143798828125, "logps/real": -266.12664794921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -31.187402725219727, "rewards/margins": 24.67436408996582, "rewards/real": -6.513040065765381, "step": 3530 }, { "epoch": 2.26, "learning_rate": 1.3613744075829384e-07, "logits/generated": -2.030137538909912, "logits/real": -2.0903306007385254, "logps/generated": -369.189453125, "logps/real": -314.4388122558594, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -25.56549072265625, "rewards/margins": 19.738645553588867, "rewards/real": -5.826841831207275, "step": 3540 }, { "epoch": 2.27, "learning_rate": 1.3495260663507108e-07, "logits/generated": -1.9893041849136353, "logits/real": -2.0538480281829834, "logps/generated": -384.92987060546875, "logps/real": -279.54071044921875, "loss": 0.0045, "rewards/accuracies": 0.987500011920929, "rewards/generated": -28.083690643310547, "rewards/margins": 22.457271575927734, "rewards/real": -5.626420021057129, "step": 3550 }, { "epoch": 2.28, "learning_rate": 1.3376777251184836e-07, "logits/generated": -1.9623091220855713, "logits/real": -2.057955265045166, "logps/generated": -360.659912109375, "logps/real": -299.2305603027344, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -25.131025314331055, "rewards/margins": 20.248781204223633, "rewards/real": -4.882245063781738, "step": 3560 }, { "epoch": 2.28, "learning_rate": 1.3258293838862558e-07, "logits/generated": -1.9596290588378906, "logits/real": -2.014758348464966, "logps/generated": -369.7837829589844, "logps/real": -292.29656982421875, "loss": 0.0024, "rewards/accuracies": 0.987500011920929, "rewards/generated": -25.983783721923828, "rewards/margins": 20.629314422607422, "rewards/real": -5.354469299316406, "step": 3570 }, { "epoch": 2.29, "learning_rate": 1.3139810426540285e-07, "logits/generated": -1.9941390752792358, "logits/real": -2.0187554359436035, "logps/generated": -402.2820739746094, "logps/real": -298.3996887207031, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -29.100866317749023, "rewards/margins": 23.757795333862305, "rewards/real": -5.3430681228637695, "step": 3580 }, { "epoch": 2.3, "learning_rate": 1.302132701421801e-07, "logits/generated": -1.9865878820419312, "logits/real": -1.99555242061615, "logps/generated": -370.57489013671875, "logps/real": -270.86138916015625, "loss": 0.0066, "rewards/accuracies": 0.987500011920929, "rewards/generated": -26.78008460998535, "rewards/margins": 20.997785568237305, "rewards/real": -5.782297611236572, "step": 3590 }, { "epoch": 2.3, "learning_rate": 1.2902843601895734e-07, "logits/generated": -1.9569337368011475, "logits/real": -2.038245439529419, "logps/generated": -385.96734619140625, "logps/real": -300.8575744628906, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -26.998550415039062, "rewards/margins": 21.897846221923828, "rewards/real": -5.100704669952393, "step": 3600 }, { "epoch": 2.31, "learning_rate": 1.278436018957346e-07, "logits/generated": -1.9720693826675415, "logits/real": -2.037219762802124, "logps/generated": -357.646240234375, "logps/real": -312.5133056640625, "loss": 0.0071, "rewards/accuracies": 0.987500011920929, "rewards/generated": -25.564912796020508, "rewards/margins": 20.172155380249023, "rewards/real": -5.392756462097168, "step": 3610 }, { "epoch": 2.32, "learning_rate": 1.2665876777251183e-07, "logits/generated": -1.9715849161148071, "logits/real": -2.0456924438476562, "logps/generated": -389.7020263671875, "logps/real": -263.62359619140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -27.894466400146484, "rewards/margins": 21.80486297607422, "rewards/real": -6.089602470397949, "step": 3620 }, { "epoch": 2.32, "learning_rate": 1.254739336492891e-07, "logits/generated": -1.9623218774795532, "logits/real": -1.9732621908187866, "logps/generated": -381.63861083984375, "logps/real": -290.93035888671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -27.41843032836914, "rewards/margins": 21.812026977539062, "rewards/real": -5.6064043045043945, "step": 3630 }, { "epoch": 2.33, "learning_rate": 1.2428909952606635e-07, "logits/generated": -1.9537547826766968, "logits/real": -2.0077974796295166, "logps/generated": -398.73822021484375, "logps/real": -308.29779052734375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -29.166988372802734, "rewards/margins": 23.160018920898438, "rewards/real": -6.0069708824157715, "step": 3640 }, { "epoch": 2.34, "learning_rate": 1.231042654028436e-07, "logits/generated": -1.9947917461395264, "logits/real": -1.9984674453735352, "logps/generated": -376.10589599609375, "logps/real": -315.67144775390625, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/generated": -27.812427520751953, "rewards/margins": 21.052278518676758, "rewards/real": -6.760148525238037, "step": 3650 }, { "epoch": 2.34, "learning_rate": 1.2191943127962085e-07, "logits/generated": -2.017561197280884, "logits/real": -1.9898500442504883, "logps/generated": -381.71728515625, "logps/real": -296.3028564453125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -25.982290267944336, "rewards/margins": 19.864425659179688, "rewards/real": -6.117864608764648, "step": 3660 }, { "epoch": 2.35, "learning_rate": 1.207345971563981e-07, "logits/generated": -1.9187358617782593, "logits/real": -1.9610626697540283, "logps/generated": -382.46905517578125, "logps/real": -290.3877258300781, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/generated": -27.577932357788086, "rewards/margins": 21.187341690063477, "rewards/real": -6.390590667724609, "step": 3670 }, { "epoch": 2.35, "learning_rate": 1.1954976303317534e-07, "logits/generated": -1.9850883483886719, "logits/real": -2.047105073928833, "logps/generated": -375.0716552734375, "logps/real": -340.9063415527344, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -26.463632583618164, "rewards/margins": 21.219701766967773, "rewards/real": -5.243931293487549, "step": 3680 }, { "epoch": 2.36, "learning_rate": 1.183649289099526e-07, "logits/generated": -1.8932054042816162, "logits/real": -1.8967291116714478, "logps/generated": -359.06365966796875, "logps/real": -283.9896240234375, "loss": 0.0033, "rewards/accuracies": 0.987500011920929, "rewards/generated": -25.62455177307129, "rewards/margins": 19.897136688232422, "rewards/real": -5.727414131164551, "step": 3690 }, { "epoch": 2.37, "learning_rate": 1.1718009478672986e-07, "logits/generated": -1.8854577541351318, "logits/real": -1.8945916891098022, "logps/generated": -380.1351318359375, "logps/real": -284.0939025878906, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -28.0650634765625, "rewards/margins": 23.04536247253418, "rewards/real": -5.019701957702637, "step": 3700 }, { "epoch": 2.37, "learning_rate": 1.159952606635071e-07, "logits/generated": -1.9407052993774414, "logits/real": -1.9002673625946045, "logps/generated": -395.69915771484375, "logps/real": -285.1513977050781, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/generated": -28.094594955444336, "rewards/margins": 22.06725311279297, "rewards/real": -6.027345657348633, "step": 3710 }, { "epoch": 2.38, "learning_rate": 1.1481042654028436e-07, "logits/generated": -1.8787548542022705, "logits/real": -1.8866329193115234, "logps/generated": -390.35162353515625, "logps/real": -278.56182861328125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -28.97882080078125, "rewards/margins": 22.92947769165039, "rewards/real": -6.049341678619385, "step": 3720 }, { "epoch": 2.39, "learning_rate": 1.136255924170616e-07, "logits/generated": -1.9647785425186157, "logits/real": -1.893686294555664, "logps/generated": -373.9754943847656, "logps/real": -302.5462646484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -26.09256362915039, "rewards/margins": 20.720409393310547, "rewards/real": -5.372152805328369, "step": 3730 }, { "epoch": 2.39, "learning_rate": 1.1244075829383886e-07, "logits/generated": -1.9494024515151978, "logits/real": -1.8903011083602905, "logps/generated": -388.49896240234375, "logps/real": -317.46038818359375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -27.872272491455078, "rewards/margins": 22.287893295288086, "rewards/real": -5.584378719329834, "step": 3740 }, { "epoch": 2.4, "learning_rate": 1.112559241706161e-07, "logits/generated": -1.9262495040893555, "logits/real": -1.961554765701294, "logps/generated": -386.28167724609375, "logps/real": -334.42181396484375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -26.91708755493164, "rewards/margins": 21.78619384765625, "rewards/real": -5.130893707275391, "step": 3750 }, { "epoch": 2.41, "learning_rate": 1.1007109004739336e-07, "logits/generated": -1.9308741092681885, "logits/real": -1.8776057958602905, "logps/generated": -372.0367431640625, "logps/real": -292.9879150390625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -26.954111099243164, "rewards/margins": 21.254783630371094, "rewards/real": -5.699324607849121, "step": 3760 }, { "epoch": 2.41, "learning_rate": 1.0888625592417061e-07, "logits/generated": -1.9344863891601562, "logits/real": -1.8149305582046509, "logps/generated": -409.9720153808594, "logps/real": -289.69500732421875, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/generated": -29.697546005249023, "rewards/margins": 23.537311553955078, "rewards/real": -6.1602349281311035, "step": 3770 }, { "epoch": 2.42, "learning_rate": 1.0770142180094787e-07, "logits/generated": -1.9099664688110352, "logits/real": -1.847806692123413, "logps/generated": -381.0286560058594, "logps/real": -284.8314208984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -27.63125228881836, "rewards/margins": 21.55826187133789, "rewards/real": -6.072990417480469, "step": 3780 }, { "epoch": 2.42, "learning_rate": 1.0651658767772511e-07, "logits/generated": -1.916006326675415, "logits/real": -1.90207040309906, "logps/generated": -396.6690368652344, "logps/real": -309.1549377441406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -29.109012603759766, "rewards/margins": 22.90389633178711, "rewards/real": -6.205114364624023, "step": 3790 }, { "epoch": 2.43, "learning_rate": 1.0533175355450237e-07, "logits/generated": -1.8688459396362305, "logits/real": -1.777832269668579, "logps/generated": -398.3630065917969, "logps/real": -297.55621337890625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -29.57208251953125, "rewards/margins": 23.178564071655273, "rewards/real": -6.393516540527344, "step": 3800 }, { "epoch": 2.44, "learning_rate": 1.0414691943127962e-07, "logits/generated": -1.9176315069198608, "logits/real": -1.8100831508636475, "logps/generated": -409.38543701171875, "logps/real": -302.27642822265625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -30.042591094970703, "rewards/margins": 24.186843872070312, "rewards/real": -5.8557515144348145, "step": 3810 }, { "epoch": 2.44, "learning_rate": 1.0296208530805687e-07, "logits/generated": -1.8687940835952759, "logits/real": -1.8604339361190796, "logps/generated": -380.5201416015625, "logps/real": -306.03607177734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -27.1380672454834, "rewards/margins": 21.264789581298828, "rewards/real": -5.87327766418457, "step": 3820 }, { "epoch": 2.45, "learning_rate": 1.0177725118483411e-07, "logits/generated": -1.9053184986114502, "logits/real": -1.9573678970336914, "logps/generated": -377.49066162109375, "logps/real": -322.23114013671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -26.4570369720459, "rewards/margins": 20.188465118408203, "rewards/real": -6.26857328414917, "step": 3830 }, { "epoch": 2.46, "learning_rate": 1.0059241706161137e-07, "logits/generated": -1.8795225620269775, "logits/real": -1.8401075601577759, "logps/generated": -415.0228576660156, "logps/real": -294.7200927734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -31.20663070678711, "rewards/margins": 25.226472854614258, "rewards/real": -5.980162143707275, "step": 3840 }, { "epoch": 2.46, "learning_rate": 9.940758293838862e-08, "logits/generated": -1.7763783931732178, "logits/real": -1.7465555667877197, "logps/generated": -414.9600524902344, "logps/real": -258.68865966796875, "loss": 0.0044, "rewards/accuracies": 0.987500011920929, "rewards/generated": -30.951168060302734, "rewards/margins": 24.258214950561523, "rewards/real": -6.692956447601318, "step": 3850 }, { "epoch": 2.47, "learning_rate": 9.822274881516588e-08, "logits/generated": -1.8327720165252686, "logits/real": -1.8621619939804077, "logps/generated": -400.9129943847656, "logps/real": -288.2205505371094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -29.8226318359375, "rewards/margins": 23.871675491333008, "rewards/real": -5.950957298278809, "step": 3860 }, { "epoch": 2.48, "learning_rate": 9.703791469194312e-08, "logits/generated": -1.8595101833343506, "logits/real": -1.9030145406723022, "logps/generated": -375.13726806640625, "logps/real": -311.8053283691406, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -27.23568344116211, "rewards/margins": 19.974584579467773, "rewards/real": -7.261102199554443, "step": 3870 }, { "epoch": 2.48, "learning_rate": 9.585308056872038e-08, "logits/generated": -1.896691918373108, "logits/real": -1.899420976638794, "logps/generated": -395.8522644042969, "logps/real": -334.49566650390625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -28.630496978759766, "rewards/margins": 23.07138442993164, "rewards/real": -5.559113025665283, "step": 3880 }, { "epoch": 2.49, "learning_rate": 9.466824644549763e-08, "logits/generated": -1.9497692584991455, "logits/real": -1.9165337085723877, "logps/generated": -410.4734802246094, "logps/real": -317.7174987792969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -29.726776123046875, "rewards/margins": 22.598018646240234, "rewards/real": -7.128758907318115, "step": 3890 }, { "epoch": 2.5, "learning_rate": 9.348341232227488e-08, "logits/generated": -1.9241125583648682, "logits/real": -1.9029643535614014, "logps/generated": -407.60406494140625, "logps/real": -347.81939697265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -29.964529037475586, "rewards/margins": 23.82811164855957, "rewards/real": -6.136418342590332, "step": 3900 }, { "epoch": 2.5, "learning_rate": 9.229857819905212e-08, "logits/generated": -1.8210303783416748, "logits/real": -1.811648964881897, "logps/generated": -425.33349609375, "logps/real": -271.4889221191406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -32.074005126953125, "rewards/margins": 24.543872833251953, "rewards/real": -7.5301337242126465, "step": 3910 }, { "epoch": 2.51, "learning_rate": 9.111374407582938e-08, "logits/generated": -1.8708751201629639, "logits/real": -1.8068602085113525, "logps/generated": -458.919921875, "logps/real": -279.9232482910156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -33.913780212402344, "rewards/margins": 26.395395278930664, "rewards/real": -7.518378257751465, "step": 3920 }, { "epoch": 2.51, "learning_rate": 8.992890995260663e-08, "logits/generated": -1.8618663549423218, "logits/real": -1.8846619129180908, "logps/generated": -391.3426208496094, "logps/real": -329.83099365234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -28.733356475830078, "rewards/margins": 21.27267074584961, "rewards/real": -7.46068811416626, "step": 3930 }, { "epoch": 2.52, "learning_rate": 8.874407582938389e-08, "logits/generated": -1.8135093450546265, "logits/real": -1.8050518035888672, "logps/generated": -409.73284912109375, "logps/real": -296.38873291015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -30.615734100341797, "rewards/margins": 23.74664306640625, "rewards/real": -6.8690900802612305, "step": 3940 }, { "epoch": 2.53, "learning_rate": 8.755924170616114e-08, "logits/generated": -1.868835210800171, "logits/real": -1.8635787963867188, "logps/generated": -397.95159912109375, "logps/real": -321.065185546875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/generated": -28.5548038482666, "rewards/margins": 21.278282165527344, "rewards/real": -7.276516914367676, "step": 3950 }, { "epoch": 2.53, "learning_rate": 8.63744075829384e-08, "logits/generated": -1.8097864389419556, "logits/real": -1.7577613592147827, "logps/generated": -443.16552734375, "logps/real": -305.9537353515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -33.76448059082031, "rewards/margins": 25.336135864257812, "rewards/real": -8.428342819213867, "step": 3960 }, { "epoch": 2.54, "learning_rate": 8.518957345971564e-08, "logits/generated": -1.8915891647338867, "logits/real": -1.8530080318450928, "logps/generated": -429.5049743652344, "logps/real": -289.32135009765625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -32.58146286010742, "rewards/margins": 25.115055084228516, "rewards/real": -7.466407775878906, "step": 3970 }, { "epoch": 2.55, "learning_rate": 8.40047393364929e-08, "logits/generated": -1.9106069803237915, "logits/real": -1.8629109859466553, "logps/generated": -409.6499938964844, "logps/real": -333.20416259765625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -29.9986629486084, "rewards/margins": 23.40935516357422, "rewards/real": -6.5893120765686035, "step": 3980 }, { "epoch": 2.55, "learning_rate": 8.281990521327013e-08, "logits/generated": -1.8551766872406006, "logits/real": -1.7436447143554688, "logps/generated": -455.6358337402344, "logps/real": -287.45489501953125, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/generated": -34.54535675048828, "rewards/margins": 26.85367774963379, "rewards/real": -7.691675662994385, "step": 3990 }, { "epoch": 2.56, "learning_rate": 8.163507109004738e-08, "logits/generated": -1.8847310543060303, "logits/real": -1.7997153997421265, "logps/generated": -436.38079833984375, "logps/real": -352.53582763671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -32.270843505859375, "rewards/margins": 25.198665618896484, "rewards/real": -7.072180271148682, "step": 4000 }, { "epoch": 2.57, "learning_rate": 8.045023696682464e-08, "logits/generated": -1.7876107692718506, "logits/real": -1.8028312921524048, "logps/generated": -394.3335876464844, "logps/real": -319.1776428222656, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -29.803787231445312, "rewards/margins": 21.604991912841797, "rewards/real": -8.198799133300781, "step": 4010 }, { "epoch": 2.57, "learning_rate": 7.926540284360189e-08, "logits/generated": -1.8726141452789307, "logits/real": -1.8162380456924438, "logps/generated": -417.1497497558594, "logps/real": -305.0618896484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -31.330730438232422, "rewards/margins": 23.804101943969727, "rewards/real": -7.526628017425537, "step": 4020 }, { "epoch": 2.58, "learning_rate": 7.808056872037915e-08, "logits/generated": -1.7631124258041382, "logits/real": -1.7794008255004883, "logps/generated": -448.10430908203125, "logps/real": -308.09625244140625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -33.852027893066406, "rewards/margins": 26.34817886352539, "rewards/real": -7.503846168518066, "step": 4030 }, { "epoch": 2.58, "learning_rate": 7.689573459715639e-08, "logits/generated": -1.805232048034668, "logits/real": -1.7675590515136719, "logps/generated": -404.6713562011719, "logps/real": -301.56793212890625, "loss": 0.0044, "rewards/accuracies": 0.987500011920929, "rewards/generated": -30.712646484375, "rewards/margins": 22.97926139831543, "rewards/real": -7.733384609222412, "step": 4040 }, { "epoch": 2.59, "learning_rate": 7.571090047393365e-08, "logits/generated": -1.7601591348648071, "logits/real": -1.749284029006958, "logps/generated": -433.79034423828125, "logps/real": -329.8743591308594, "loss": 0.0024, "rewards/accuracies": 0.987500011920929, "rewards/generated": -33.34691619873047, "rewards/margins": 24.43657112121582, "rewards/real": -8.910343170166016, "step": 4050 }, { "epoch": 2.6, "learning_rate": 7.45260663507109e-08, "logits/generated": -1.7626146078109741, "logits/real": -1.7438932657241821, "logps/generated": -427.18743896484375, "logps/real": -290.01751708984375, "loss": 0.0046, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.555919647216797, "rewards/margins": 23.180891036987305, "rewards/real": -8.375027656555176, "step": 4060 }, { "epoch": 2.6, "learning_rate": 7.334123222748814e-08, "logits/generated": -1.8292105197906494, "logits/real": -1.7760603427886963, "logps/generated": -439.42193603515625, "logps/real": -292.0125427246094, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -32.864219665527344, "rewards/margins": 24.91449737548828, "rewards/real": -7.949720859527588, "step": 4070 }, { "epoch": 2.61, "learning_rate": 7.215639810426539e-08, "logits/generated": -1.7653682231903076, "logits/real": -1.6930913925170898, "logps/generated": -437.002685546875, "logps/real": -267.6156311035156, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -33.63715362548828, "rewards/margins": 25.4591064453125, "rewards/real": -8.178048133850098, "step": 4080 }, { "epoch": 2.62, "learning_rate": 7.097156398104265e-08, "logits/generated": -1.7562963962554932, "logits/real": -1.7341216802597046, "logps/generated": -434.3614807128906, "logps/real": -289.385498046875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -32.92387771606445, "rewards/margins": 24.789287567138672, "rewards/real": -8.134592056274414, "step": 4090 }, { "epoch": 2.62, "learning_rate": 6.97867298578199e-08, "logits/generated": -1.7824033498764038, "logits/real": -1.8202848434448242, "logps/generated": -422.67523193359375, "logps/real": -367.13006591796875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -31.36875343322754, "rewards/margins": 24.126026153564453, "rewards/real": -7.242722988128662, "step": 4100 }, { "epoch": 2.63, "learning_rate": 6.860189573459716e-08, "logits/generated": -1.7355695962905884, "logits/real": -1.7290878295898438, "logps/generated": -451.7454528808594, "logps/real": -326.6995544433594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -33.70394515991211, "rewards/margins": 25.241628646850586, "rewards/real": -8.462319374084473, "step": 4110 }, { "epoch": 2.64, "learning_rate": 6.74170616113744e-08, "logits/generated": -1.7768300771713257, "logits/real": -1.7098900079727173, "logps/generated": -452.030029296875, "logps/real": -327.0111083984375, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/generated": -34.611488342285156, "rewards/margins": 26.06864356994629, "rewards/real": -8.542844772338867, "step": 4120 }, { "epoch": 2.64, "learning_rate": 6.623222748815166e-08, "logits/generated": -1.715787649154663, "logits/real": -1.6770191192626953, "logps/generated": -441.8128356933594, "logps/real": -296.57110595703125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -34.08544158935547, "rewards/margins": 26.49606704711914, "rewards/real": -7.5893754959106445, "step": 4130 }, { "epoch": 2.65, "learning_rate": 6.504739336492891e-08, "logits/generated": -1.7766027450561523, "logits/real": -1.6856123208999634, "logps/generated": -450.623779296875, "logps/real": -295.4322204589844, "loss": 0.0051, "rewards/accuracies": 0.987500011920929, "rewards/generated": -34.94755172729492, "rewards/margins": 26.268753051757812, "rewards/real": -8.678799629211426, "step": 4140 }, { "epoch": 2.66, "learning_rate": 6.386255924170615e-08, "logits/generated": -1.7369333505630493, "logits/real": -1.6449072360992432, "logps/generated": -429.2544860839844, "logps/real": -283.56549072265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -33.380615234375, "rewards/margins": 24.91189956665039, "rewards/real": -8.468714714050293, "step": 4150 }, { "epoch": 2.66, "learning_rate": 6.26777251184834e-08, "logits/generated": -1.6794822216033936, "logits/real": -1.662726640701294, "logps/generated": -423.33184814453125, "logps/real": -332.1803894042969, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -31.899770736694336, "rewards/margins": 23.491817474365234, "rewards/real": -8.407957077026367, "step": 4160 }, { "epoch": 2.67, "learning_rate": 6.149289099526066e-08, "logits/generated": -1.7565444707870483, "logits/real": -1.7407537698745728, "logps/generated": -402.0395202636719, "logps/real": -327.73248291015625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -29.680978775024414, "rewards/margins": 20.341283798217773, "rewards/real": -9.339695930480957, "step": 4170 }, { "epoch": 2.67, "learning_rate": 6.030805687203791e-08, "logits/generated": -1.6737483739852905, "logits/real": -1.7306264638900757, "logps/generated": -436.11260986328125, "logps/real": -338.9502868652344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -32.65018844604492, "rewards/margins": 25.60652732849121, "rewards/real": -7.043665409088135, "step": 4180 }, { "epoch": 2.68, "learning_rate": 5.912322274881516e-08, "logits/generated": -1.6893961429595947, "logits/real": -1.7002031803131104, "logps/generated": -438.9224548339844, "logps/real": -327.76141357421875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/generated": -33.08007049560547, "rewards/margins": 24.394916534423828, "rewards/real": -8.685155868530273, "step": 4190 }, { "epoch": 2.69, "learning_rate": 5.793838862559241e-08, "logits/generated": -1.7550160884857178, "logits/real": -1.7153337001800537, "logps/generated": -444.51824951171875, "logps/real": -278.1734619140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -33.22731399536133, "rewards/margins": 24.608617782592773, "rewards/real": -8.618697166442871, "step": 4200 }, { "epoch": 2.69, "learning_rate": 5.6753554502369666e-08, "logits/generated": -1.7421271800994873, "logits/real": -1.6843255758285522, "logps/generated": -428.5711975097656, "logps/real": -331.9335632324219, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -32.22383499145508, "rewards/margins": 24.138261795043945, "rewards/real": -8.085573196411133, "step": 4210 }, { "epoch": 2.7, "learning_rate": 5.556872037914691e-08, "logits/generated": -1.7008402347564697, "logits/real": -1.634892225265503, "logps/generated": -433.476806640625, "logps/real": -287.6949768066406, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -32.910457611083984, "rewards/margins": 24.70734977722168, "rewards/real": -8.203106880187988, "step": 4220 }, { "epoch": 2.71, "learning_rate": 5.4383886255924165e-08, "logits/generated": -1.7923284769058228, "logits/real": -1.7142051458358765, "logps/generated": -453.1385803222656, "logps/real": -300.06781005859375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -34.70769119262695, "rewards/margins": 25.626541137695312, "rewards/real": -9.081144332885742, "step": 4230 }, { "epoch": 2.71, "learning_rate": 5.319905213270142e-08, "logits/generated": -1.7816097736358643, "logits/real": -1.9599437713623047, "logps/generated": -453.1598205566406, "logps/real": -349.44171142578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -34.06262969970703, "rewards/margins": 25.226978302001953, "rewards/real": -8.835651397705078, "step": 4240 }, { "epoch": 2.72, "learning_rate": 5.201421800947867e-08, "logits/generated": -1.7193883657455444, "logits/real": -1.9186795949935913, "logps/generated": -463.2687072753906, "logps/real": -305.72314453125, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/generated": -35.47248458862305, "rewards/margins": 25.576107025146484, "rewards/real": -9.896378517150879, "step": 4250 }, { "epoch": 2.73, "learning_rate": 5.082938388625592e-08, "logits/generated": -1.802926778793335, "logits/real": -1.9906041622161865, "logps/generated": -439.27899169921875, "logps/real": -342.9306945800781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -33.147518157958984, "rewards/margins": 23.982521057128906, "rewards/real": -9.164995193481445, "step": 4260 }, { "epoch": 2.73, "learning_rate": 4.964454976303317e-08, "logits/generated": -1.8892700672149658, "logits/real": -2.009641647338867, "logps/generated": -437.8634338378906, "logps/real": -341.58135986328125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -33.01290512084961, "rewards/margins": 23.391399383544922, "rewards/real": -9.621504783630371, "step": 4270 }, { "epoch": 2.74, "learning_rate": 4.845971563981042e-08, "logits/generated": -1.7956806421279907, "logits/real": -1.9637285470962524, "logps/generated": -460.10601806640625, "logps/real": -356.51959228515625, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/generated": -34.52501678466797, "rewards/margins": 25.541818618774414, "rewards/real": -8.983198165893555, "step": 4280 }, { "epoch": 2.74, "learning_rate": 4.7274881516587676e-08, "logits/generated": -1.7769763469696045, "logits/real": -1.9866300821304321, "logps/generated": -436.766357421875, "logps/real": -326.61431884765625, "loss": 0.0044, "rewards/accuracies": 0.987500011920929, "rewards/generated": -32.676849365234375, "rewards/margins": 24.444482803344727, "rewards/real": -8.232365608215332, "step": 4290 }, { "epoch": 2.75, "learning_rate": 4.609004739336492e-08, "logits/generated": -1.8134574890136719, "logits/real": -1.982287049293518, "logps/generated": -467.0006408691406, "logps/real": -319.8429260253906, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -35.988407135009766, "rewards/margins": 27.567617416381836, "rewards/real": -8.420794486999512, "step": 4300 }, { "epoch": 2.76, "learning_rate": 4.4905213270142176e-08, "logits/generated": -1.6407321691513062, "logits/real": -1.7915595769882202, "logps/generated": -438.76019287109375, "logps/real": -317.3435974121094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -33.72128677368164, "rewards/margins": 24.755414962768555, "rewards/real": -8.965871810913086, "step": 4310 }, { "epoch": 2.76, "learning_rate": 4.372037914691943e-08, "logits/generated": -1.745700478553772, "logits/real": -1.9136505126953125, "logps/generated": -435.6446228027344, "logps/real": -311.99542236328125, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/generated": -33.17008590698242, "rewards/margins": 24.030542373657227, "rewards/real": -9.139547348022461, "step": 4320 }, { "epoch": 2.77, "learning_rate": 4.253554502369668e-08, "logits/generated": -1.6967980861663818, "logits/real": -1.8322668075561523, "logps/generated": -457.9407653808594, "logps/real": -311.10418701171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -35.436546325683594, "rewards/margins": 27.187633514404297, "rewards/real": -8.24891185760498, "step": 4330 }, { "epoch": 2.78, "learning_rate": 4.135071090047393e-08, "logits/generated": -1.7738192081451416, "logits/real": -2.0109634399414062, "logps/generated": -443.57568359375, "logps/real": -337.6534729003906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -33.9232177734375, "rewards/margins": 23.900415420532227, "rewards/real": -10.022802352905273, "step": 4340 }, { "epoch": 2.78, "learning_rate": 4.016587677725118e-08, "logits/generated": -1.744932770729065, "logits/real": -1.8545424938201904, "logps/generated": -459.23614501953125, "logps/real": -283.38372802734375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -35.45733642578125, "rewards/margins": 24.5408878326416, "rewards/real": -10.9164457321167, "step": 4350 }, { "epoch": 2.79, "learning_rate": 3.8981042654028434e-08, "logits/generated": -1.6494481563568115, "logits/real": -1.8807508945465088, "logps/generated": -449.21856689453125, "logps/real": -347.99090576171875, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/generated": -34.048919677734375, "rewards/margins": 25.075237274169922, "rewards/real": -8.97368049621582, "step": 4360 }, { "epoch": 2.8, "learning_rate": 3.779620853080569e-08, "logits/generated": -1.7925529479980469, "logits/real": -1.838467001914978, "logps/generated": -463.3714294433594, "logps/real": -335.3411865234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -34.72087478637695, "rewards/margins": 26.26070213317871, "rewards/real": -8.46017074584961, "step": 4370 }, { "epoch": 2.8, "learning_rate": 3.661137440758294e-08, "logits/generated": -1.6993271112442017, "logits/real": -1.7849382162094116, "logps/generated": -451.7796936035156, "logps/real": -284.07977294921875, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/generated": -35.063255310058594, "rewards/margins": 26.02164077758789, "rewards/real": -9.041617393493652, "step": 4380 }, { "epoch": 2.81, "learning_rate": 3.5426540284360186e-08, "logits/generated": -1.5902955532073975, "logits/real": -1.7994792461395264, "logps/generated": -469.3233947753906, "logps/real": -322.81182861328125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -36.78888702392578, "rewards/margins": 27.999755859375, "rewards/real": -8.789128303527832, "step": 4390 }, { "epoch": 2.82, "learning_rate": 3.424170616113744e-08, "logits/generated": -1.7078588008880615, "logits/real": -1.9263912439346313, "logps/generated": -462.34368896484375, "logps/real": -322.7901916503906, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -35.061485290527344, "rewards/margins": 26.207286834716797, "rewards/real": -8.854198455810547, "step": 4400 }, { "epoch": 2.82, "learning_rate": 3.305687203791469e-08, "logits/generated": -1.7351045608520508, "logits/real": -1.7719390392303467, "logps/generated": -442.97735595703125, "logps/real": -315.83087158203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -33.271141052246094, "rewards/margins": 24.884248733520508, "rewards/real": -8.386889457702637, "step": 4410 }, { "epoch": 2.83, "learning_rate": 3.1872037914691945e-08, "logits/generated": -1.6406385898590088, "logits/real": -1.8298654556274414, "logps/generated": -455.2843322753906, "logps/real": -318.89337158203125, "loss": 0.0023, "rewards/accuracies": 0.987500011920929, "rewards/generated": -35.38450241088867, "rewards/margins": 26.2564640045166, "rewards/real": -9.12803840637207, "step": 4420 }, { "epoch": 2.83, "learning_rate": 3.068720379146919e-08, "logits/generated": -1.7697868347167969, "logits/real": -1.822705864906311, "logps/generated": -438.582275390625, "logps/real": -304.01617431640625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -33.55647277832031, "rewards/margins": 23.950986862182617, "rewards/real": -9.605484008789062, "step": 4430 }, { "epoch": 2.84, "learning_rate": 2.9502369668246444e-08, "logits/generated": -1.6771583557128906, "logits/real": -1.854857087135315, "logps/generated": -430.46282958984375, "logps/real": -316.00958251953125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/generated": -32.684688568115234, "rewards/margins": 23.534297943115234, "rewards/real": -9.150388717651367, "step": 4440 }, { "epoch": 2.85, "learning_rate": 2.8317535545023697e-08, "logits/generated": -1.7022714614868164, "logits/real": -1.8785192966461182, "logps/generated": -466.81396484375, "logps/real": -324.5412292480469, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/generated": -36.14876174926758, "rewards/margins": 27.45867919921875, "rewards/real": -8.690082550048828, "step": 4450 }, { "epoch": 2.85, "learning_rate": 2.7132701421800947e-08, "logits/generated": -1.694084882736206, "logits/real": -1.75972580909729, "logps/generated": -468.11016845703125, "logps/real": -322.52679443359375, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/generated": -36.64365768432617, "rewards/margins": 27.119192123413086, "rewards/real": -9.524468421936035, "step": 4460 }, { "epoch": 2.86, "learning_rate": 2.59478672985782e-08, "logits/generated": -1.61457097530365, "logits/real": -1.8486804962158203, "logps/generated": -432.85284423828125, "logps/real": -327.989990234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -33.129432678222656, "rewards/margins": 24.179370880126953, "rewards/real": -8.950057983398438, "step": 4470 }, { "epoch": 2.87, "learning_rate": 2.476303317535545e-08, "logits/generated": -1.6624841690063477, "logits/real": -1.8223241567611694, "logps/generated": -456.5455017089844, "logps/real": -319.22509765625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -35.40416717529297, "rewards/margins": 26.277795791625977, "rewards/real": -9.12637710571289, "step": 4480 }, { "epoch": 2.87, "learning_rate": 2.3578199052132702e-08, "logits/generated": -1.7079921960830688, "logits/real": -1.829472541809082, "logps/generated": -408.7738342285156, "logps/real": -334.1302795410156, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/generated": -30.858179092407227, "rewards/margins": 22.388179779052734, "rewards/real": -8.470001220703125, "step": 4490 }, { "epoch": 2.88, "learning_rate": 2.239336492890995e-08, "logits/generated": -1.759307861328125, "logits/real": -1.8515949249267578, "logps/generated": -441.416259765625, "logps/real": -318.9807434082031, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -33.875450134277344, "rewards/margins": 24.575389862060547, "rewards/real": -9.300056457519531, "step": 4500 }, { "epoch": 2.89, "learning_rate": 2.1208530805687202e-08, "logits/generated": -1.6304250955581665, "logits/real": -1.8564281463623047, "logps/generated": -444.32659912109375, "logps/real": -306.439697265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -33.86289978027344, "rewards/margins": 25.232988357543945, "rewards/real": -8.62990951538086, "step": 4510 }, { "epoch": 2.89, "learning_rate": 2.002369668246445e-08, "logits/generated": -1.7856998443603516, "logits/real": -1.8842220306396484, "logps/generated": -467.9981384277344, "logps/real": -315.4876403808594, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -35.54204559326172, "rewards/margins": 26.91533851623535, "rewards/real": -8.626705169677734, "step": 4520 }, { "epoch": 2.9, "learning_rate": 1.8838862559241704e-08, "logits/generated": -1.7326618432998657, "logits/real": -1.9145400524139404, "logps/generated": -440.7589416503906, "logps/real": -306.02862548828125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/generated": -33.05302429199219, "rewards/margins": 24.692596435546875, "rewards/real": -8.360427856445312, "step": 4530 }, { "epoch": 2.9, "learning_rate": 1.7654028436018954e-08, "logits/generated": -1.7552152872085571, "logits/real": -1.817386269569397, "logps/generated": -461.41070556640625, "logps/real": -327.1177978515625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -34.42705535888672, "rewards/margins": 25.052804946899414, "rewards/real": -9.374256134033203, "step": 4540 }, { "epoch": 2.91, "learning_rate": 1.6469194312796207e-08, "logits/generated": -1.7163751125335693, "logits/real": -1.8879632949829102, "logps/generated": -425.6315002441406, "logps/real": -320.1476135253906, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.34682273864746, "rewards/margins": 22.973106384277344, "rewards/real": -8.373712539672852, "step": 4550 }, { "epoch": 2.92, "learning_rate": 1.528436018957346e-08, "logits/generated": -1.7140891551971436, "logits/real": -1.8986284732818604, "logps/generated": -440.7607421875, "logps/real": -326.2283935546875, "loss": 0.0047, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -32.59244918823242, "rewards/margins": 24.93307876586914, "rewards/real": -7.659371852874756, "step": 4560 }, { "epoch": 2.92, "learning_rate": 1.409952606635071e-08, "logits/generated": -1.7714250087738037, "logits/real": -1.9002504348754883, "logps/generated": -453.63543701171875, "logps/real": -300.76885986328125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -35.10737228393555, "rewards/margins": 27.279804229736328, "rewards/real": -7.827570915222168, "step": 4570 }, { "epoch": 2.93, "learning_rate": 1.2914691943127961e-08, "logits/generated": -1.6990067958831787, "logits/real": -1.9161514043807983, "logps/generated": -414.04302978515625, "logps/real": -300.9434814453125, "loss": 0.0023, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.5983829498291, "rewards/margins": 22.450963973999023, "rewards/real": -9.147419929504395, "step": 4580 }, { "epoch": 2.94, "learning_rate": 1.1729857819905212e-08, "logits/generated": -1.7278430461883545, "logits/real": -1.7927424907684326, "logps/generated": -472.21429443359375, "logps/real": -341.255126953125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -35.60918426513672, "rewards/margins": 27.14546775817871, "rewards/real": -8.463715553283691, "step": 4590 }, { "epoch": 2.94, "learning_rate": 1.0545023696682464e-08, "logits/generated": -1.6661436557769775, "logits/real": -1.824730634689331, "logps/generated": -441.8641052246094, "logps/real": -310.7605895996094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -33.483917236328125, "rewards/margins": 25.23209571838379, "rewards/real": -8.251824378967285, "step": 4600 }, { "epoch": 2.95, "learning_rate": 9.360189573459715e-09, "logits/generated": -1.776623010635376, "logits/real": -1.8051350116729736, "logps/generated": -457.2853088378906, "logps/real": -318.8353271484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -35.16924285888672, "rewards/margins": 26.07401466369629, "rewards/real": -9.09522533416748, "step": 4610 }, { "epoch": 2.96, "learning_rate": 8.175355450236966e-09, "logits/generated": -1.771087646484375, "logits/real": -1.8877332210540771, "logps/generated": -436.79986572265625, "logps/real": -330.20916748046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -32.55842590332031, "rewards/margins": 24.00469970703125, "rewards/real": -8.553728103637695, "step": 4620 }, { "epoch": 2.96, "learning_rate": 6.990521327014218e-09, "logits/generated": -1.7417593002319336, "logits/real": -1.8479112386703491, "logps/generated": -403.1101989746094, "logps/real": -297.042724609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -30.246688842773438, "rewards/margins": 22.63168716430664, "rewards/real": -7.615001678466797, "step": 4630 }, { "epoch": 2.97, "learning_rate": 5.805687203791469e-09, "logits/generated": -1.7405914068222046, "logits/real": -1.8920139074325562, "logps/generated": -443.20306396484375, "logps/real": -322.84783935546875, "loss": 0.0024, "rewards/accuracies": 0.987500011920929, "rewards/generated": -33.4544792175293, "rewards/margins": 24.869140625, "rewards/real": -8.585339546203613, "step": 4640 }, { "epoch": 2.98, "learning_rate": 4.62085308056872e-09, "logits/generated": -1.7274150848388672, "logits/real": -1.89272141456604, "logps/generated": -461.22607421875, "logps/real": -324.2146301269531, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/generated": -36.303890228271484, "rewards/margins": 27.63534927368164, "rewards/real": -8.668540954589844, "step": 4650 }, { "epoch": 2.98, "learning_rate": 3.4360189573459714e-09, "logits/generated": -1.7416044473648071, "logits/real": -1.9509055614471436, "logps/generated": -453.283935546875, "logps/real": -366.43060302734375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -33.6645393371582, "rewards/margins": 25.695453643798828, "rewards/real": -7.96908712387085, "step": 4660 }, { "epoch": 2.99, "learning_rate": 2.2511848341232227e-09, "logits/generated": -1.8054618835449219, "logits/real": -1.8594818115234375, "logps/generated": -428.821044921875, "logps/real": -289.93951416015625, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/generated": -32.35812759399414, "rewards/margins": 23.261104583740234, "rewards/real": -9.097023010253906, "step": 4670 }, { "epoch": 2.99, "learning_rate": 1.0663507109004738e-09, "logits/generated": -1.6955547332763672, "logits/real": -1.981899619102478, "logps/generated": -423.219482421875, "logps/real": -349.48504638671875, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.58346176147461, "rewards/margins": 23.281259536743164, "rewards/real": -8.302202224731445, "step": 4680 }, { "epoch": 3.0, "step": 4689, "total_flos": 0.0, "train_loss": 0.023988249511193074, "train_runtime": 36939.4965, "train_samples_per_second": 4.061, "train_steps_per_second": 0.127 } ], "logging_steps": 10, "max_steps": 4689, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }