AmberYifan's picture
Model save
108c0a4 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 3126,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.597444089456869e-09,
"logits/generated": -1.5697296857833862,
"logits/real": 0.02788793109357357,
"logps/generated": -161.09165954589844,
"logps/real": -142.571533203125,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/generated": 0.0,
"rewards/margins": 0.0,
"rewards/real": 0.0,
"step": 1
},
{
"epoch": 0.01,
"learning_rate": 1.597444089456869e-08,
"logits/generated": -1.633887767791748,
"logits/real": -0.1783123016357422,
"logps/generated": -138.75979614257812,
"logps/real": -156.88577270507812,
"loss": 0.6935,
"rewards/accuracies": 0.4166666567325592,
"rewards/generated": 0.004628816619515419,
"rewards/margins": -0.002158037619665265,
"rewards/real": 0.0024707792326807976,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 3.194888178913738e-08,
"logits/generated": -1.609938621520996,
"logits/real": -0.08152679353952408,
"logps/generated": -143.5062713623047,
"logps/real": -153.39932250976562,
"loss": 0.6919,
"rewards/accuracies": 0.5874999761581421,
"rewards/generated": -0.002054329263046384,
"rewards/margins": 0.006409396883100271,
"rewards/real": 0.004355068318545818,
"step": 20
},
{
"epoch": 0.02,
"learning_rate": 4.7923322683706064e-08,
"logits/generated": -1.5137046575546265,
"logits/real": -0.25311604142189026,
"logps/generated": -142.5679168701172,
"logps/real": -165.5823516845703,
"loss": 0.6937,
"rewards/accuracies": 0.48750001192092896,
"rewards/generated": 0.0021794841159135103,
"rewards/margins": -0.005685324314981699,
"rewards/real": -0.0035058397334069014,
"step": 30
},
{
"epoch": 0.03,
"learning_rate": 6.389776357827476e-08,
"logits/generated": -1.5637849569320679,
"logits/real": -0.13809606432914734,
"logps/generated": -131.64889526367188,
"logps/real": -160.1712646484375,
"loss": 0.6956,
"rewards/accuracies": 0.42500001192092896,
"rewards/generated": 0.00549626350402832,
"rewards/margins": -0.00184511614497751,
"rewards/real": 0.003651147009804845,
"step": 40
},
{
"epoch": 0.03,
"learning_rate": 7.987220447284344e-08,
"logits/generated": -1.5748565196990967,
"logits/real": -0.09178884327411652,
"logps/generated": -135.52053833007812,
"logps/real": -153.59854125976562,
"loss": 0.6936,
"rewards/accuracies": 0.5,
"rewards/generated": -0.0018657876644283533,
"rewards/margins": -0.0018160634208470583,
"rewards/real": -0.003681850153952837,
"step": 50
},
{
"epoch": 0.04,
"learning_rate": 9.584664536741213e-08,
"logits/generated": -1.6641861200332642,
"logits/real": -0.2794622778892517,
"logps/generated": -143.5887908935547,
"logps/real": -161.76043701171875,
"loss": 0.6933,
"rewards/accuracies": 0.4625000059604645,
"rewards/generated": 0.002687716158106923,
"rewards/margins": -0.0016607001889497042,
"rewards/real": 0.001027016551233828,
"step": 60
},
{
"epoch": 0.04,
"learning_rate": 1.1182108626198082e-07,
"logits/generated": -1.7215445041656494,
"logits/real": -0.27054786682128906,
"logps/generated": -135.14833068847656,
"logps/real": -166.22914123535156,
"loss": 0.6924,
"rewards/accuracies": 0.4375,
"rewards/generated": 0.0037495135329663754,
"rewards/margins": -0.0029804420191794634,
"rewards/real": 0.000769071455579251,
"step": 70
},
{
"epoch": 0.05,
"learning_rate": 1.2779552715654952e-07,
"logits/generated": -1.6150354146957397,
"logits/real": -0.09898678958415985,
"logps/generated": -136.48165893554688,
"logps/real": -153.1577911376953,
"loss": 0.6921,
"rewards/accuracies": 0.4749999940395355,
"rewards/generated": 0.0034830570220947266,
"rewards/margins": -0.0015155791770666838,
"rewards/real": 0.001967477845028043,
"step": 80
},
{
"epoch": 0.06,
"learning_rate": 1.437699680511182e-07,
"logits/generated": -1.7415987253189087,
"logits/real": -0.13428936898708344,
"logps/generated": -147.66615295410156,
"logps/real": -163.003173828125,
"loss": 0.6908,
"rewards/accuracies": 0.5249999761581421,
"rewards/generated": -0.0029185961466282606,
"rewards/margins": 0.0036810163874179125,
"rewards/real": 0.0007624196587130427,
"step": 90
},
{
"epoch": 0.06,
"learning_rate": 1.5974440894568688e-07,
"logits/generated": -1.655890703201294,
"logits/real": -0.1572108417749405,
"logps/generated": -142.35391235351562,
"logps/real": -153.11849975585938,
"loss": 0.6918,
"rewards/accuracies": 0.5249999761581421,
"rewards/generated": -0.004457283299416304,
"rewards/margins": 0.0026433137245476246,
"rewards/real": -0.0018139698076993227,
"step": 100
},
{
"epoch": 0.07,
"learning_rate": 1.757188498402556e-07,
"logits/generated": -1.5630085468292236,
"logits/real": -0.24260249733924866,
"logps/generated": -141.05294799804688,
"logps/real": -168.055908203125,
"loss": 0.6911,
"rewards/accuracies": 0.6499999761581421,
"rewards/generated": -0.01049154344946146,
"rewards/margins": 0.012994527816772461,
"rewards/real": 0.002502984832972288,
"step": 110
},
{
"epoch": 0.08,
"learning_rate": 1.9169329073482426e-07,
"logits/generated": -1.6308023929595947,
"logits/real": -0.1937154084444046,
"logps/generated": -147.5388946533203,
"logps/real": -156.50515747070312,
"loss": 0.6906,
"rewards/accuracies": 0.5874999761581421,
"rewards/generated": -0.004769793711602688,
"rewards/margins": 0.0057532163336873055,
"rewards/real": 0.0009834242518991232,
"step": 120
},
{
"epoch": 0.08,
"learning_rate": 2.0766773162939297e-07,
"logits/generated": -1.559140920639038,
"logits/real": -0.2510816156864166,
"logps/generated": -131.83071899414062,
"logps/real": -164.52407836914062,
"loss": 0.6905,
"rewards/accuracies": 0.574999988079071,
"rewards/generated": -0.007517705671489239,
"rewards/margins": 0.009749026037752628,
"rewards/real": 0.002231321996077895,
"step": 130
},
{
"epoch": 0.09,
"learning_rate": 2.2364217252396164e-07,
"logits/generated": -1.6431344747543335,
"logits/real": -0.08175196498632431,
"logps/generated": -142.07656860351562,
"logps/real": -150.12713623046875,
"loss": 0.6904,
"rewards/accuracies": 0.625,
"rewards/generated": -0.009607553482055664,
"rewards/margins": 0.00782174151390791,
"rewards/real": -0.0017858125502243638,
"step": 140
},
{
"epoch": 0.1,
"learning_rate": 2.3961661341853033e-07,
"logits/generated": -1.7087827920913696,
"logits/real": -0.3516523540019989,
"logps/generated": -146.2122039794922,
"logps/real": -160.49546813964844,
"loss": 0.6906,
"rewards/accuracies": 0.612500011920929,
"rewards/generated": -0.009243173524737358,
"rewards/margins": 0.008949460461735725,
"rewards/real": -0.00029371288837864995,
"step": 150
},
{
"epoch": 0.1,
"learning_rate": 2.5559105431309904e-07,
"logits/generated": -1.7223360538482666,
"logits/real": -0.2035084068775177,
"logps/generated": -140.39572143554688,
"logps/real": -157.5358428955078,
"loss": 0.6889,
"rewards/accuracies": 0.6625000238418579,
"rewards/generated": -0.009247403591871262,
"rewards/margins": 0.012728390283882618,
"rewards/real": 0.0034809871576726437,
"step": 160
},
{
"epoch": 0.11,
"learning_rate": 2.715654952076677e-07,
"logits/generated": -1.749093770980835,
"logits/real": -0.13887380063533783,
"logps/generated": -144.42491149902344,
"logps/real": -150.71852111816406,
"loss": 0.688,
"rewards/accuracies": 0.550000011920929,
"rewards/generated": -0.009006218984723091,
"rewards/margins": 0.010439801029860973,
"rewards/real": 0.0014335823943838477,
"step": 170
},
{
"epoch": 0.12,
"learning_rate": 2.875399361022364e-07,
"logits/generated": -1.533534049987793,
"logits/real": -0.2637261152267456,
"logps/generated": -143.3952178955078,
"logps/real": -171.69735717773438,
"loss": 0.6891,
"rewards/accuracies": 0.6625000238418579,
"rewards/generated": -0.01498076505959034,
"rewards/margins": 0.009685126133263111,
"rewards/real": -0.005295639391988516,
"step": 180
},
{
"epoch": 0.12,
"learning_rate": 3.035143769968051e-07,
"logits/generated": -1.6899821758270264,
"logits/real": -0.24950842559337616,
"logps/generated": -138.8522186279297,
"logps/real": -165.54269409179688,
"loss": 0.6865,
"rewards/accuracies": 0.5375000238418579,
"rewards/generated": -0.007621455006301403,
"rewards/margins": 0.0072743757627904415,
"rewards/real": -0.0003470802039373666,
"step": 190
},
{
"epoch": 0.13,
"learning_rate": 3.1948881789137375e-07,
"logits/generated": -1.6720901727676392,
"logits/real": -0.13108500838279724,
"logps/generated": -143.76254272460938,
"logps/real": -152.6102294921875,
"loss": 0.6866,
"rewards/accuracies": 0.6499999761581421,
"rewards/generated": -0.016768785193562508,
"rewards/margins": 0.013681398704648018,
"rewards/real": -0.0030873871874064207,
"step": 200
},
{
"epoch": 0.13,
"learning_rate": 3.354632587859425e-07,
"logits/generated": -1.668308973312378,
"logits/real": -0.07337333261966705,
"logps/generated": -139.4440460205078,
"logps/real": -145.42164611816406,
"loss": 0.6848,
"rewards/accuracies": 0.625,
"rewards/generated": -0.013003669679164886,
"rewards/margins": 0.013915425166487694,
"rewards/real": 0.0009117558365687728,
"step": 210
},
{
"epoch": 0.14,
"learning_rate": 3.514376996805112e-07,
"logits/generated": -1.6061630249023438,
"logits/real": -0.04177895188331604,
"logps/generated": -141.5182647705078,
"logps/real": -143.1357879638672,
"loss": 0.6839,
"rewards/accuracies": 0.699999988079071,
"rewards/generated": -0.022188017144799232,
"rewards/margins": 0.020234428346157074,
"rewards/real": -0.0019535874016582966,
"step": 220
},
{
"epoch": 0.15,
"learning_rate": 3.6741214057507985e-07,
"logits/generated": -1.6456787586212158,
"logits/real": -0.032159410417079926,
"logps/generated": -137.02691650390625,
"logps/real": -152.61387634277344,
"loss": 0.6837,
"rewards/accuracies": 0.75,
"rewards/generated": -0.0165521539747715,
"rewards/margins": 0.021729346364736557,
"rewards/real": 0.005177192389965057,
"step": 230
},
{
"epoch": 0.15,
"learning_rate": 3.833865814696485e-07,
"logits/generated": -1.6691703796386719,
"logits/real": -0.4035402834415436,
"logps/generated": -143.48324584960938,
"logps/real": -179.24752807617188,
"loss": 0.6803,
"rewards/accuracies": 0.75,
"rewards/generated": -0.02668914757668972,
"rewards/margins": 0.02352159470319748,
"rewards/real": -0.0031675524078309536,
"step": 240
},
{
"epoch": 0.16,
"learning_rate": 3.993610223642173e-07,
"logits/generated": -1.5687007904052734,
"logits/real": -0.09528298676013947,
"logps/generated": -141.37509155273438,
"logps/real": -149.46035766601562,
"loss": 0.6762,
"rewards/accuracies": 0.8374999761581421,
"rewards/generated": -0.03992265835404396,
"rewards/margins": 0.043977029621601105,
"rewards/real": 0.004054374061524868,
"step": 250
},
{
"epoch": 0.17,
"learning_rate": 4.1533546325878595e-07,
"logits/generated": -1.6911046504974365,
"logits/real": -0.007008680608123541,
"logps/generated": -144.0658416748047,
"logps/real": -150.27243041992188,
"loss": 0.6756,
"rewards/accuracies": 0.800000011920929,
"rewards/generated": -0.03158753365278244,
"rewards/margins": 0.03852955996990204,
"rewards/real": 0.006942029111087322,
"step": 260
},
{
"epoch": 0.17,
"learning_rate": 4.313099041533546e-07,
"logits/generated": -1.6412391662597656,
"logits/real": -0.16801941394805908,
"logps/generated": -136.202392578125,
"logps/real": -153.8549346923828,
"loss": 0.6721,
"rewards/accuracies": 0.862500011920929,
"rewards/generated": -0.04177216440439224,
"rewards/margins": 0.04363798722624779,
"rewards/real": 0.001865826197899878,
"step": 270
},
{
"epoch": 0.18,
"learning_rate": 4.472843450479233e-07,
"logits/generated": -1.577200174331665,
"logits/real": -0.18333522975444794,
"logps/generated": -135.14651489257812,
"logps/real": -162.71102905273438,
"loss": 0.67,
"rewards/accuracies": 0.9125000238418579,
"rewards/generated": -0.04428236186504364,
"rewards/margins": 0.050743866711854935,
"rewards/real": 0.0064615062437951565,
"step": 280
},
{
"epoch": 0.19,
"learning_rate": 4.63258785942492e-07,
"logits/generated": -1.7181251049041748,
"logits/real": -0.14545153081417084,
"logps/generated": -141.39950561523438,
"logps/real": -159.49375915527344,
"loss": 0.6695,
"rewards/accuracies": 0.862500011920929,
"rewards/generated": -0.04990307241678238,
"rewards/margins": 0.05374089628458023,
"rewards/real": 0.003837819444015622,
"step": 290
},
{
"epoch": 0.19,
"learning_rate": 4.792332268370607e-07,
"logits/generated": -1.5909569263458252,
"logits/real": -0.11655733734369278,
"logps/generated": -136.44984436035156,
"logps/real": -149.45106506347656,
"loss": 0.6671,
"rewards/accuracies": 0.7749999761581421,
"rewards/generated": -0.04627872630953789,
"rewards/margins": 0.04471098631620407,
"rewards/real": -0.0015677406918257475,
"step": 300
},
{
"epoch": 0.2,
"learning_rate": 4.952076677316294e-07,
"logits/generated": -1.6312742233276367,
"logits/real": -0.2683960795402527,
"logps/generated": -139.54454040527344,
"logps/real": -171.73904418945312,
"loss": 0.66,
"rewards/accuracies": 0.9125000238418579,
"rewards/generated": -0.06426986306905746,
"rewards/margins": 0.06892012804746628,
"rewards/real": 0.004650264047086239,
"step": 310
},
{
"epoch": 0.2,
"learning_rate": 4.987557767507998e-07,
"logits/generated": -1.7353553771972656,
"logits/real": -0.2647973895072937,
"logps/generated": -140.25978088378906,
"logps/real": -161.4581298828125,
"loss": 0.6604,
"rewards/accuracies": 0.9375,
"rewards/generated": -0.0646638348698616,
"rewards/margins": 0.06798725575208664,
"rewards/real": 0.003323426004499197,
"step": 320
},
{
"epoch": 0.21,
"learning_rate": 4.969783149662282e-07,
"logits/generated": -1.4210546016693115,
"logits/real": -0.2087656706571579,
"logps/generated": -132.431640625,
"logps/real": -164.90301513671875,
"loss": 0.6557,
"rewards/accuracies": 0.9125000238418579,
"rewards/generated": -0.07163882255554199,
"rewards/margins": 0.07509996742010117,
"rewards/real": 0.0034611367154866457,
"step": 330
},
{
"epoch": 0.22,
"learning_rate": 4.952008531816565e-07,
"logits/generated": -1.5858566761016846,
"logits/real": -0.19087788462638855,
"logps/generated": -137.87277221679688,
"logps/real": -170.6455841064453,
"loss": 0.6461,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -0.08108492195606232,
"rewards/margins": 0.09145854413509369,
"rewards/real": 0.010373624972999096,
"step": 340
},
{
"epoch": 0.22,
"learning_rate": 4.93423391397085e-07,
"logits/generated": -1.5935401916503906,
"logits/real": -0.18511667847633362,
"logps/generated": -135.37991333007812,
"logps/real": -161.39453125,
"loss": 0.6448,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -0.09194954484701157,
"rewards/margins": 0.10509081184864044,
"rewards/real": 0.013141264207661152,
"step": 350
},
{
"epoch": 0.23,
"learning_rate": 4.916459296125133e-07,
"logits/generated": -1.6975730657577515,
"logits/real": -0.1985872983932495,
"logps/generated": -141.4312744140625,
"logps/real": -158.44564819335938,
"loss": 0.6358,
"rewards/accuracies": 0.9375,
"rewards/generated": -0.10384906828403473,
"rewards/margins": 0.12381346523761749,
"rewards/real": 0.01996440999209881,
"step": 360
},
{
"epoch": 0.24,
"learning_rate": 4.898684678279417e-07,
"logits/generated": -1.6519880294799805,
"logits/real": -0.35222429037094116,
"logps/generated": -142.85623168945312,
"logps/real": -178.80612182617188,
"loss": 0.6306,
"rewards/accuracies": 1.0,
"rewards/generated": -0.12715503573417664,
"rewards/margins": 0.13996237516403198,
"rewards/real": 0.01280735433101654,
"step": 370
},
{
"epoch": 0.24,
"learning_rate": 4.8809100604337e-07,
"logits/generated": -1.5434781312942505,
"logits/real": -0.13071385025978088,
"logps/generated": -144.16851806640625,
"logps/real": -166.1132049560547,
"loss": 0.6274,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -0.12086117267608643,
"rewards/margins": 0.14237499237060547,
"rewards/real": 0.02151382341980934,
"step": 380
},
{
"epoch": 0.25,
"learning_rate": 4.863135442587984e-07,
"logits/generated": -1.6076228618621826,
"logits/real": -0.207803413271904,
"logps/generated": -142.91873168945312,
"logps/real": -157.12567138671875,
"loss": 0.621,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -0.13728059828281403,
"rewards/margins": 0.1584828794002533,
"rewards/real": 0.021202292293310165,
"step": 390
},
{
"epoch": 0.26,
"learning_rate": 4.845360824742267e-07,
"logits/generated": -1.5034089088439941,
"logits/real": -0.20472228527069092,
"logps/generated": -132.5041046142578,
"logps/real": -161.69406127929688,
"loss": 0.6184,
"rewards/accuracies": 1.0,
"rewards/generated": -0.13708344101905823,
"rewards/margins": 0.159152552485466,
"rewards/real": 0.02206914685666561,
"step": 400
},
{
"epoch": 0.26,
"learning_rate": 4.827586206896552e-07,
"logits/generated": -1.6673437356948853,
"logits/real": -0.18107284605503082,
"logps/generated": -142.1982879638672,
"logps/real": -157.3358154296875,
"loss": 0.6092,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -0.14462080597877502,
"rewards/margins": 0.17726922035217285,
"rewards/real": 0.03264839947223663,
"step": 410
},
{
"epoch": 0.27,
"learning_rate": 4.809811589050835e-07,
"logits/generated": -1.5609720945358276,
"logits/real": -0.14302347600460052,
"logps/generated": -138.09237670898438,
"logps/real": -162.0977020263672,
"loss": 0.6008,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -0.1684601604938507,
"rewards/margins": 0.19570282101631165,
"rewards/real": 0.027242666110396385,
"step": 420
},
{
"epoch": 0.28,
"learning_rate": 4.792036971205119e-07,
"logits/generated": -1.5268409252166748,
"logits/real": -0.1176341325044632,
"logps/generated": -135.7368927001953,
"logps/real": -153.20358276367188,
"loss": 0.5896,
"rewards/accuracies": 0.9125000238418579,
"rewards/generated": -0.1638469398021698,
"rewards/margins": 0.19122377038002014,
"rewards/real": 0.027376821264624596,
"step": 430
},
{
"epoch": 0.28,
"learning_rate": 4.774262353359402e-07,
"logits/generated": -1.6257530450820923,
"logits/real": -0.1526736319065094,
"logps/generated": -139.25418090820312,
"logps/real": -156.4899139404297,
"loss": 0.5873,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -0.1955229938030243,
"rewards/margins": 0.23565657436847687,
"rewards/real": 0.04013354331254959,
"step": 440
},
{
"epoch": 0.29,
"learning_rate": 4.7564877355136863e-07,
"logits/generated": -1.6272735595703125,
"logits/real": -0.15557542443275452,
"logps/generated": -151.3992462158203,
"logps/real": -153.5313720703125,
"loss": 0.57,
"rewards/accuracies": 1.0,
"rewards/generated": -0.22929322719573975,
"rewards/margins": 0.2820231318473816,
"rewards/real": 0.052729904651641846,
"step": 450
},
{
"epoch": 0.29,
"learning_rate": 4.73871311766797e-07,
"logits/generated": -1.7158994674682617,
"logits/real": -0.3903907239437103,
"logps/generated": -144.54037475585938,
"logps/real": -180.07406616210938,
"loss": 0.5647,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -0.2549547553062439,
"rewards/margins": 0.2814892828464508,
"rewards/real": 0.02653454802930355,
"step": 460
},
{
"epoch": 0.3,
"learning_rate": 4.7209384998222536e-07,
"logits/generated": -1.6353950500488281,
"logits/real": -0.19543974101543427,
"logps/generated": -143.96304321289062,
"logps/real": -158.52206420898438,
"loss": 0.5595,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -0.2641616463661194,
"rewards/margins": 0.3153914213180542,
"rewards/real": 0.05122975632548332,
"step": 470
},
{
"epoch": 0.31,
"learning_rate": 4.7031638819765373e-07,
"logits/generated": -1.6073200702667236,
"logits/real": -0.14624395966529846,
"logps/generated": -142.60208129882812,
"logps/real": -160.1145477294922,
"loss": 0.5447,
"rewards/accuracies": 1.0,
"rewards/generated": -0.27238526940345764,
"rewards/margins": 0.3274722695350647,
"rewards/real": 0.055087022483348846,
"step": 480
},
{
"epoch": 0.31,
"learning_rate": 4.6853892641308215e-07,
"logits/generated": -1.5465781688690186,
"logits/real": -0.15420304238796234,
"logps/generated": -141.56056213378906,
"logps/real": -148.23391723632812,
"loss": 0.5345,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -0.27804866433143616,
"rewards/margins": 0.3418120741844177,
"rewards/real": 0.06376341730356216,
"step": 490
},
{
"epoch": 0.32,
"learning_rate": 4.6676146462851046e-07,
"logits/generated": -1.8107671737670898,
"logits/real": -0.2396336793899536,
"logps/generated": -150.1639404296875,
"logps/real": -157.54586791992188,
"loss": 0.5338,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -0.2998965084552765,
"rewards/margins": 0.352867066860199,
"rewards/real": 0.05297055095434189,
"step": 500
},
{
"epoch": 0.33,
"learning_rate": 4.649840028439388e-07,
"logits/generated": -1.583640694618225,
"logits/real": -0.08962409198284149,
"logps/generated": -140.02252197265625,
"logps/real": -157.4398956298828,
"loss": 0.5111,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -0.3320668339729309,
"rewards/margins": 0.4081410765647888,
"rewards/real": 0.07607419788837433,
"step": 510
},
{
"epoch": 0.33,
"learning_rate": 4.632065410593672e-07,
"logits/generated": -1.6017663478851318,
"logits/real": -0.16641400754451752,
"logps/generated": -140.48094177246094,
"logps/real": -152.2816925048828,
"loss": 0.508,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -0.32677119970321655,
"rewards/margins": 0.4095242917537689,
"rewards/real": 0.08275306969881058,
"step": 520
},
{
"epoch": 0.34,
"learning_rate": 4.6142907927479556e-07,
"logits/generated": -1.6886812448501587,
"logits/real": -0.14055995643138885,
"logps/generated": -144.4681854248047,
"logps/real": -148.32789611816406,
"loss": 0.5008,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -0.3314792215824127,
"rewards/margins": 0.40919432044029236,
"rewards/real": 0.07771513611078262,
"step": 530
},
{
"epoch": 0.35,
"learning_rate": 4.59651617490224e-07,
"logits/generated": -1.5692707300186157,
"logits/real": -0.17518237233161926,
"logps/generated": -142.68887329101562,
"logps/real": -158.769775390625,
"loss": 0.4812,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -0.3942539095878601,
"rewards/margins": 0.47341471910476685,
"rewards/real": 0.07916079461574554,
"step": 540
},
{
"epoch": 0.35,
"learning_rate": 4.578741557056523e-07,
"logits/generated": -1.585056185722351,
"logits/real": -0.1907566785812378,
"logps/generated": -149.67239379882812,
"logps/real": -164.2400665283203,
"loss": 0.4752,
"rewards/accuracies": 1.0,
"rewards/generated": -0.4361751973628998,
"rewards/margins": 0.5167232751846313,
"rewards/real": 0.08054807037115097,
"step": 550
},
{
"epoch": 0.36,
"learning_rate": 4.560966939210807e-07,
"logits/generated": -1.5427873134613037,
"logits/real": -0.2252950370311737,
"logps/generated": -148.17556762695312,
"logps/real": -153.58592224121094,
"loss": 0.4725,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -0.44603148102760315,
"rewards/margins": 0.540245532989502,
"rewards/real": 0.09421406686306,
"step": 560
},
{
"epoch": 0.36,
"learning_rate": 4.54319232136509e-07,
"logits/generated": -1.6634706258773804,
"logits/real": -0.10494379699230194,
"logps/generated": -148.4973602294922,
"logps/real": -156.35145568847656,
"loss": 0.4558,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -0.4902486801147461,
"rewards/margins": 0.5618138313293457,
"rewards/real": 0.0715651884675026,
"step": 570
},
{
"epoch": 0.37,
"learning_rate": 4.525417703519374e-07,
"logits/generated": -1.534942865371704,
"logits/real": -0.11929289996623993,
"logps/generated": -142.94210815429688,
"logps/real": -155.97193908691406,
"loss": 0.4502,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -0.49913454055786133,
"rewards/margins": 0.599966824054718,
"rewards/real": 0.1008322685956955,
"step": 580
},
{
"epoch": 0.38,
"learning_rate": 4.507643085673658e-07,
"logits/generated": -1.7383668422698975,
"logits/real": -0.0883287638425827,
"logps/generated": -153.69606018066406,
"logps/real": -157.78956604003906,
"loss": 0.4302,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -0.5414460301399231,
"rewards/margins": 0.6505511999130249,
"rewards/real": 0.10910521447658539,
"step": 590
},
{
"epoch": 0.38,
"learning_rate": 4.489868467827941e-07,
"logits/generated": -1.8656871318817139,
"logits/real": -0.06075664609670639,
"logps/generated": -147.55734252929688,
"logps/real": -150.363037109375,
"loss": 0.4354,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -0.5383075475692749,
"rewards/margins": 0.6746756434440613,
"rewards/real": 0.1363680064678192,
"step": 600
},
{
"epoch": 0.39,
"learning_rate": 4.4720938499822254e-07,
"logits/generated": -1.582039713859558,
"logits/real": -0.18077224493026733,
"logps/generated": -143.44656372070312,
"logps/real": -155.9421844482422,
"loss": 0.4154,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -0.5968641638755798,
"rewards/margins": 0.700276255607605,
"rewards/real": 0.10341213643550873,
"step": 610
},
{
"epoch": 0.4,
"learning_rate": 4.4543192321365085e-07,
"logits/generated": -1.710147500038147,
"logits/real": -0.11019489914178848,
"logps/generated": -152.04287719726562,
"logps/real": -154.15586853027344,
"loss": 0.4099,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -0.6149613857269287,
"rewards/margins": 0.7358669638633728,
"rewards/real": 0.12090563774108887,
"step": 620
},
{
"epoch": 0.4,
"learning_rate": 4.4365446142907927e-07,
"logits/generated": -1.474244475364685,
"logits/real": -0.2550203204154968,
"logps/generated": -142.00582885742188,
"logps/real": -169.1204833984375,
"loss": 0.4092,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -0.5503032207489014,
"rewards/margins": 0.6812837719917297,
"rewards/real": 0.1309804916381836,
"step": 630
},
{
"epoch": 0.41,
"learning_rate": 4.4187699964450764e-07,
"logits/generated": -1.7478046417236328,
"logits/real": -0.21401798725128174,
"logps/generated": -157.27719116210938,
"logps/real": -158.44354248046875,
"loss": 0.3751,
"rewards/accuracies": 1.0,
"rewards/generated": -0.7286791205406189,
"rewards/margins": 0.8884221315383911,
"rewards/real": 0.1597430408000946,
"step": 640
},
{
"epoch": 0.42,
"learning_rate": 4.4009953785993595e-07,
"logits/generated": -1.672858476638794,
"logits/real": -0.18186981976032257,
"logps/generated": -150.75625610351562,
"logps/real": -170.07919311523438,
"loss": 0.3658,
"rewards/accuracies": 1.0,
"rewards/generated": -0.7189536094665527,
"rewards/margins": 0.8496885299682617,
"rewards/real": 0.13073506951332092,
"step": 650
},
{
"epoch": 0.42,
"learning_rate": 4.3832207607536437e-07,
"logits/generated": -1.6526811122894287,
"logits/real": -0.14324292540550232,
"logps/generated": -151.4724578857422,
"logps/real": -154.22683715820312,
"loss": 0.365,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -0.7191274166107178,
"rewards/margins": 0.8864700198173523,
"rewards/real": 0.16734261810779572,
"step": 660
},
{
"epoch": 0.43,
"learning_rate": 4.365446142907927e-07,
"logits/generated": -1.5031042098999023,
"logits/real": -0.058509550988674164,
"logps/generated": -142.10000610351562,
"logps/real": -148.38555908203125,
"loss": 0.3644,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -0.672211229801178,
"rewards/margins": 0.8271724581718445,
"rewards/real": 0.1549612581729889,
"step": 670
},
{
"epoch": 0.44,
"learning_rate": 4.347671525062211e-07,
"logits/generated": -1.637528419494629,
"logits/real": -0.14800499379634857,
"logps/generated": -150.9839630126953,
"logps/real": -157.72259521484375,
"loss": 0.3411,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -0.8599473237991333,
"rewards/margins": 1.0459052324295044,
"rewards/real": 0.1859578788280487,
"step": 680
},
{
"epoch": 0.44,
"learning_rate": 4.3298969072164947e-07,
"logits/generated": -1.6163980960845947,
"logits/real": -0.21588876843452454,
"logps/generated": -149.24557495117188,
"logps/real": -159.4187774658203,
"loss": 0.3166,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -0.8925792574882507,
"rewards/margins": 1.0570037364959717,
"rewards/real": 0.16442444920539856,
"step": 690
},
{
"epoch": 0.45,
"learning_rate": 4.3121222893707783e-07,
"logits/generated": -1.5266709327697754,
"logits/real": -0.14610102772712708,
"logps/generated": -143.5114288330078,
"logps/real": -144.98056030273438,
"loss": 0.3339,
"rewards/accuracies": 0.9375,
"rewards/generated": -0.7892045974731445,
"rewards/margins": 0.9511388540267944,
"rewards/real": 0.16193436086177826,
"step": 700
},
{
"epoch": 0.45,
"learning_rate": 4.294347671525062e-07,
"logits/generated": -1.6533136367797852,
"logits/real": -0.06393162161111832,
"logps/generated": -152.63758850097656,
"logps/real": -149.1494903564453,
"loss": 0.3034,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -0.8443581461906433,
"rewards/margins": 1.0624687671661377,
"rewards/real": 0.2181106060743332,
"step": 710
},
{
"epoch": 0.46,
"learning_rate": 4.276573053679346e-07,
"logits/generated": -1.6860072612762451,
"logits/real": -0.24717557430267334,
"logps/generated": -150.0583953857422,
"logps/real": -158.61351013183594,
"loss": 0.3092,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -0.9428604245185852,
"rewards/margins": 1.096800446510315,
"rewards/real": 0.1539398431777954,
"step": 720
},
{
"epoch": 0.47,
"learning_rate": 4.2587984358336293e-07,
"logits/generated": -1.5238220691680908,
"logits/real": -0.3366602063179016,
"logps/generated": -143.13980102539062,
"logps/real": -169.9083709716797,
"loss": 0.308,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -0.8742044568061829,
"rewards/margins": 1.0897338390350342,
"rewards/real": 0.2155293971300125,
"step": 730
},
{
"epoch": 0.47,
"learning_rate": 4.241023817987913e-07,
"logits/generated": -1.6218751668930054,
"logits/real": -0.18687456846237183,
"logps/generated": -152.43099975585938,
"logps/real": -156.63699340820312,
"loss": 0.2944,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -1.0684313774108887,
"rewards/margins": 1.297039270401001,
"rewards/real": 0.22860772907733917,
"step": 740
},
{
"epoch": 0.48,
"learning_rate": 4.2232492001421966e-07,
"logits/generated": -1.6673694849014282,
"logits/real": -0.17359259724617004,
"logps/generated": -153.6688995361328,
"logps/real": -171.0704803466797,
"loss": 0.2723,
"rewards/accuracies": 1.0,
"rewards/generated": -1.088458776473999,
"rewards/margins": 1.3285064697265625,
"rewards/real": 0.24004778265953064,
"step": 750
},
{
"epoch": 0.49,
"learning_rate": 4.2054745822964803e-07,
"logits/generated": -1.588289499282837,
"logits/real": -0.3443184196949005,
"logps/generated": -156.00241088867188,
"logps/real": -176.3258514404297,
"loss": 0.28,
"rewards/accuracies": 1.0,
"rewards/generated": -1.1722843647003174,
"rewards/margins": 1.3391616344451904,
"rewards/real": 0.16687722504138947,
"step": 760
},
{
"epoch": 0.49,
"learning_rate": 4.1876999644507645e-07,
"logits/generated": -1.5838396549224854,
"logits/real": -0.16283896565437317,
"logps/generated": -153.18551635742188,
"logps/real": -156.0181427001953,
"loss": 0.2632,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -1.1464365720748901,
"rewards/margins": 1.3280103206634521,
"rewards/real": 0.1815737932920456,
"step": 770
},
{
"epoch": 0.5,
"learning_rate": 4.1699253466050476e-07,
"logits/generated": -1.5133371353149414,
"logits/real": -0.09465299546718597,
"logps/generated": -150.0225830078125,
"logps/real": -146.9311981201172,
"loss": 0.2495,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -1.2300481796264648,
"rewards/margins": 1.5034099817276,
"rewards/real": 0.2733617424964905,
"step": 780
},
{
"epoch": 0.51,
"learning_rate": 4.152150728759332e-07,
"logits/generated": -1.6122316122055054,
"logits/real": -0.2318686991930008,
"logps/generated": -159.11160278320312,
"logps/real": -168.3660430908203,
"loss": 0.2474,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -1.27286696434021,
"rewards/margins": 1.4775692224502563,
"rewards/real": 0.20470228791236877,
"step": 790
},
{
"epoch": 0.51,
"learning_rate": 4.134376110913615e-07,
"logits/generated": -1.694283127784729,
"logits/real": -0.10422974824905396,
"logps/generated": -157.1562042236328,
"logps/real": -150.5861358642578,
"loss": 0.2281,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -1.3571908473968506,
"rewards/margins": 1.6250684261322021,
"rewards/real": 0.26787763833999634,
"step": 800
},
{
"epoch": 0.52,
"learning_rate": 4.1166014930678986e-07,
"logits/generated": -1.5623177289962769,
"logits/real": -0.1493486613035202,
"logps/generated": -151.51956176757812,
"logps/real": -148.74160766601562,
"loss": 0.2271,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -1.2717982530593872,
"rewards/margins": 1.5056483745574951,
"rewards/real": 0.23385019600391388,
"step": 810
},
{
"epoch": 0.52,
"learning_rate": 4.098826875222183e-07,
"logits/generated": -1.6703298091888428,
"logits/real": -0.10680408775806427,
"logps/generated": -153.7378387451172,
"logps/real": -155.27723693847656,
"loss": 0.2101,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -1.5243198871612549,
"rewards/margins": 1.7411978244781494,
"rewards/real": 0.2168780267238617,
"step": 820
},
{
"epoch": 0.53,
"learning_rate": 4.081052257376466e-07,
"logits/generated": -1.6254603862762451,
"logits/real": -0.12683448195457458,
"logps/generated": -152.67662048339844,
"logps/real": -159.438720703125,
"loss": 0.2012,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -1.4263641834259033,
"rewards/margins": 1.615281343460083,
"rewards/real": 0.18891693651676178,
"step": 830
},
{
"epoch": 0.54,
"learning_rate": 4.06327763953075e-07,
"logits/generated": -1.6256685256958008,
"logits/real": -0.2439662665128708,
"logps/generated": -168.36373901367188,
"logps/real": -169.80215454101562,
"loss": 0.2073,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -1.3876028060913086,
"rewards/margins": 1.599313735961914,
"rewards/real": 0.21171092987060547,
"step": 840
},
{
"epoch": 0.54,
"learning_rate": 4.045503021685033e-07,
"logits/generated": -1.4529263973236084,
"logits/real": -0.10771781206130981,
"logps/generated": -144.90695190429688,
"logps/real": -161.789306640625,
"loss": 0.2015,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -1.5157387256622314,
"rewards/margins": 1.7433464527130127,
"rewards/real": 0.22760768234729767,
"step": 850
},
{
"epoch": 0.55,
"learning_rate": 4.0277284038393174e-07,
"logits/generated": -1.5856255292892456,
"logits/real": -0.188165545463562,
"logps/generated": -155.0406494140625,
"logps/real": -153.3172607421875,
"loss": 0.2001,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -1.531455636024475,
"rewards/margins": 1.780940055847168,
"rewards/real": 0.24948418140411377,
"step": 860
},
{
"epoch": 0.56,
"learning_rate": 4.009953785993601e-07,
"logits/generated": -1.59652578830719,
"logits/real": -0.19590969383716583,
"logps/generated": -158.6524200439453,
"logps/real": -162.3628692626953,
"loss": 0.1732,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -1.6548792123794556,
"rewards/margins": 1.9046993255615234,
"rewards/real": 0.24982018768787384,
"step": 870
},
{
"epoch": 0.56,
"learning_rate": 3.992179168147884e-07,
"logits/generated": -1.622241735458374,
"logits/real": -0.22373679280281067,
"logps/generated": -151.82644653320312,
"logps/real": -163.46112060546875,
"loss": 0.1631,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -1.8496040105819702,
"rewards/margins": 2.1095592975616455,
"rewards/real": 0.25995510816574097,
"step": 880
},
{
"epoch": 0.57,
"learning_rate": 3.9744045503021684e-07,
"logits/generated": -1.6477899551391602,
"logits/real": -0.17104220390319824,
"logps/generated": -160.24609375,
"logps/real": -157.3255157470703,
"loss": 0.1826,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -1.8226982355117798,
"rewards/margins": 2.0370171070098877,
"rewards/real": 0.21431896090507507,
"step": 890
},
{
"epoch": 0.58,
"learning_rate": 3.956629932456452e-07,
"logits/generated": -1.5971219539642334,
"logits/real": -0.07671569287776947,
"logps/generated": -157.93313598632812,
"logps/real": -151.52809143066406,
"loss": 0.1566,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -1.9616973400115967,
"rewards/margins": 2.2585387229919434,
"rewards/real": 0.29684123396873474,
"step": 900
},
{
"epoch": 0.58,
"learning_rate": 3.938855314610736e-07,
"logits/generated": -1.4671550989151,
"logits/real": -0.33207738399505615,
"logps/generated": -153.27047729492188,
"logps/real": -168.5091552734375,
"loss": 0.1614,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -1.8668949604034424,
"rewards/margins": 2.094343662261963,
"rewards/real": 0.22744879126548767,
"step": 910
},
{
"epoch": 0.59,
"learning_rate": 3.9210806967650194e-07,
"logits/generated": -1.5912812948226929,
"logits/real": -0.12044389545917511,
"logps/generated": -165.49241638183594,
"logps/real": -161.30966186523438,
"loss": 0.1496,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -2.0533995628356934,
"rewards/margins": 2.2546448707580566,
"rewards/real": 0.20124495029449463,
"step": 920
},
{
"epoch": 0.6,
"learning_rate": 3.903306078919303e-07,
"logits/generated": -1.474174976348877,
"logits/real": -0.13984277844429016,
"logps/generated": -151.71311950683594,
"logps/real": -147.8576202392578,
"loss": 0.163,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -1.827612280845642,
"rewards/margins": 2.1235859394073486,
"rewards/real": 0.29597336053848267,
"step": 930
},
{
"epoch": 0.6,
"learning_rate": 3.8855314610735867e-07,
"logits/generated": -1.5910863876342773,
"logits/real": -0.2784571945667267,
"logps/generated": -161.6417999267578,
"logps/real": -166.55007934570312,
"loss": 0.1643,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -2.0047945976257324,
"rewards/margins": 2.273448944091797,
"rewards/real": 0.2686540186405182,
"step": 940
},
{
"epoch": 0.61,
"learning_rate": 3.867756843227871e-07,
"logits/generated": -1.5748822689056396,
"logits/real": -0.13812735676765442,
"logps/generated": -168.13827514648438,
"logps/real": -146.0213623046875,
"loss": 0.1434,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -2.1277716159820557,
"rewards/margins": 2.380551815032959,
"rewards/real": 0.25278064608573914,
"step": 950
},
{
"epoch": 0.61,
"learning_rate": 3.849982225382154e-07,
"logits/generated": -1.5670706033706665,
"logits/real": -0.2948365807533264,
"logps/generated": -157.73426818847656,
"logps/real": -163.10662841796875,
"loss": 0.1319,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -2.087007522583008,
"rewards/margins": 2.3846383094787598,
"rewards/real": 0.29763054847717285,
"step": 960
},
{
"epoch": 0.62,
"learning_rate": 3.8322076075364377e-07,
"logits/generated": -1.6536438465118408,
"logits/real": -0.20418302714824677,
"logps/generated": -164.49765014648438,
"logps/real": -156.45957946777344,
"loss": 0.1406,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.122313976287842,
"rewards/margins": 2.3531196117401123,
"rewards/real": 0.2308053970336914,
"step": 970
},
{
"epoch": 0.63,
"learning_rate": 3.8144329896907214e-07,
"logits/generated": -1.5101605653762817,
"logits/real": 0.031563229858875275,
"logps/generated": -160.07464599609375,
"logps/real": -141.91098022460938,
"loss": 0.1351,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -2.1049656867980957,
"rewards/margins": 2.405392646789551,
"rewards/real": 0.30042701959609985,
"step": 980
},
{
"epoch": 0.63,
"learning_rate": 3.796658371845005e-07,
"logits/generated": -1.5053074359893799,
"logits/real": -0.3015785813331604,
"logps/generated": -160.33193969726562,
"logps/real": -172.7531280517578,
"loss": 0.127,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -2.241503953933716,
"rewards/margins": 2.3939685821533203,
"rewards/real": 0.15246476233005524,
"step": 990
},
{
"epoch": 0.64,
"learning_rate": 3.778883753999289e-07,
"logits/generated": -1.6345714330673218,
"logits/real": -0.10730817168951035,
"logps/generated": -164.5872802734375,
"logps/real": -144.282958984375,
"loss": 0.134,
"rewards/accuracies": 1.0,
"rewards/generated": -2.2479634284973145,
"rewards/margins": 2.539016008377075,
"rewards/real": 0.29105255007743835,
"step": 1000
},
{
"epoch": 0.65,
"learning_rate": 3.7611091361535723e-07,
"logits/generated": -1.4203786849975586,
"logits/real": -0.08897562325000763,
"logps/generated": -153.96353149414062,
"logps/real": -148.22430419921875,
"loss": 0.1316,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -2.2757656574249268,
"rewards/margins": 2.5660629272460938,
"rewards/real": 0.2902970016002655,
"step": 1010
},
{
"epoch": 0.65,
"learning_rate": 3.7433345183078565e-07,
"logits/generated": -1.655291199684143,
"logits/real": -0.01987510919570923,
"logps/generated": -165.6507110595703,
"logps/real": -138.74452209472656,
"loss": 0.1208,
"rewards/accuracies": 1.0,
"rewards/generated": -2.6197855472564697,
"rewards/margins": 2.930170774459839,
"rewards/real": 0.3103852868080139,
"step": 1020
},
{
"epoch": 0.66,
"learning_rate": 3.7255599004621397e-07,
"logits/generated": -1.4382855892181396,
"logits/real": -0.18755117058753967,
"logps/generated": -164.83363342285156,
"logps/real": -162.2533416748047,
"loss": 0.1202,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -2.4666531085968018,
"rewards/margins": 2.706902265548706,
"rewards/real": 0.24024927616119385,
"step": 1030
},
{
"epoch": 0.67,
"learning_rate": 3.7077852826164233e-07,
"logits/generated": -1.6496721506118774,
"logits/real": -0.22290463745594025,
"logps/generated": -171.09011840820312,
"logps/real": -166.6168670654297,
"loss": 0.1093,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.7524375915527344,
"rewards/margins": 2.975261688232422,
"rewards/real": 0.22282417118549347,
"step": 1040
},
{
"epoch": 0.67,
"learning_rate": 3.6900106647707075e-07,
"logits/generated": -1.7099498510360718,
"logits/real": -0.09361065924167633,
"logps/generated": -173.44967651367188,
"logps/real": -159.67605590820312,
"loss": 0.1062,
"rewards/accuracies": 1.0,
"rewards/generated": -2.8430135250091553,
"rewards/margins": 3.0707504749298096,
"rewards/real": 0.22773659229278564,
"step": 1050
},
{
"epoch": 0.68,
"learning_rate": 3.6722360469249906e-07,
"logits/generated": -1.425708532333374,
"logits/real": -0.14051005244255066,
"logps/generated": -165.50164794921875,
"logps/real": -154.0505828857422,
"loss": 0.1099,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.7030253410339355,
"rewards/margins": 3.061180830001831,
"rewards/real": 0.3581555485725403,
"step": 1060
},
{
"epoch": 0.68,
"learning_rate": 3.654461429079275e-07,
"logits/generated": -1.6046979427337646,
"logits/real": -0.15294823050498962,
"logps/generated": -172.5651092529297,
"logps/real": -155.49411010742188,
"loss": 0.0994,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.900844097137451,
"rewards/margins": 3.12347149848938,
"rewards/real": 0.22262760996818542,
"step": 1070
},
{
"epoch": 0.69,
"learning_rate": 3.6366868112335585e-07,
"logits/generated": -1.4243303537368774,
"logits/real": -0.11334830522537231,
"logps/generated": -156.65003967285156,
"logps/real": -148.0675048828125,
"loss": 0.1142,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -2.6817686557769775,
"rewards/margins": 2.9299864768981934,
"rewards/real": 0.2482178956270218,
"step": 1080
},
{
"epoch": 0.7,
"learning_rate": 3.618912193387842e-07,
"logits/generated": -1.584937334060669,
"logits/real": -0.11413073539733887,
"logps/generated": -178.1724090576172,
"logps/real": -150.05291748046875,
"loss": 0.1007,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -3.004488468170166,
"rewards/margins": 3.3728480339050293,
"rewards/real": 0.3683595657348633,
"step": 1090
},
{
"epoch": 0.7,
"learning_rate": 3.601137575542126e-07,
"logits/generated": -1.45297372341156,
"logits/real": -0.279954195022583,
"logps/generated": -172.93008422851562,
"logps/real": -165.13259887695312,
"loss": 0.0938,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.9406588077545166,
"rewards/margins": 3.235691785812378,
"rewards/real": 0.29503294825553894,
"step": 1100
},
{
"epoch": 0.71,
"learning_rate": 3.583362957696409e-07,
"logits/generated": -1.4498844146728516,
"logits/real": -0.13191112875938416,
"logps/generated": -161.51988220214844,
"logps/real": -152.23013305664062,
"loss": 0.0928,
"rewards/accuracies": 1.0,
"rewards/generated": -2.7946717739105225,
"rewards/margins": 3.1111600399017334,
"rewards/real": 0.3164881467819214,
"step": 1110
},
{
"epoch": 0.72,
"learning_rate": 3.565588339850693e-07,
"logits/generated": -1.4884414672851562,
"logits/real": -0.017497604712843895,
"logps/generated": -172.79934692382812,
"logps/real": -154.42282104492188,
"loss": 0.0864,
"rewards/accuracies": 1.0,
"rewards/generated": -3.134819984436035,
"rewards/margins": 3.4709200859069824,
"rewards/real": 0.3361000418663025,
"step": 1120
},
{
"epoch": 0.72,
"learning_rate": 3.547813722004977e-07,
"logits/generated": -1.6363372802734375,
"logits/real": -0.22440211474895477,
"logps/generated": -170.09425354003906,
"logps/real": -162.13650512695312,
"loss": 0.0984,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -3.131570816040039,
"rewards/margins": 3.428776264190674,
"rewards/real": 0.29720538854599,
"step": 1130
},
{
"epoch": 0.73,
"learning_rate": 3.5300391041592605e-07,
"logits/generated": -1.484832763671875,
"logits/real": -0.10550673305988312,
"logps/generated": -168.85377502441406,
"logps/real": -150.67498779296875,
"loss": 0.0837,
"rewards/accuracies": 1.0,
"rewards/generated": -3.156721830368042,
"rewards/margins": 3.53771710395813,
"rewards/real": 0.3809953033924103,
"step": 1140
},
{
"epoch": 0.74,
"learning_rate": 3.512264486313544e-07,
"logits/generated": -1.59353768825531,
"logits/real": -0.1766626536846161,
"logps/generated": -176.32730102539062,
"logps/real": -161.31967163085938,
"loss": 0.0912,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.0488924980163574,
"rewards/margins": 3.374645709991455,
"rewards/real": 0.32575327157974243,
"step": 1150
},
{
"epoch": 0.74,
"learning_rate": 3.494489868467828e-07,
"logits/generated": -1.556908369064331,
"logits/real": -0.23450179398059845,
"logps/generated": -183.6482696533203,
"logps/real": -159.4730987548828,
"loss": 0.0868,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.4513683319091797,
"rewards/margins": 3.7840518951416016,
"rewards/real": 0.33268359303474426,
"step": 1160
},
{
"epoch": 0.75,
"learning_rate": 3.4767152506221114e-07,
"logits/generated": -1.6676868200302124,
"logits/real": -0.32121220231056213,
"logps/generated": -172.13592529296875,
"logps/real": -174.69143676757812,
"loss": 0.0945,
"rewards/accuracies": 1.0,
"rewards/generated": -3.157599449157715,
"rewards/margins": 3.516270875930786,
"rewards/real": 0.35867148637771606,
"step": 1170
},
{
"epoch": 0.75,
"learning_rate": 3.458940632776395e-07,
"logits/generated": -1.4721453189849854,
"logits/real": -0.13291652500629425,
"logps/generated": -169.03042602539062,
"logps/real": -140.19325256347656,
"loss": 0.0937,
"rewards/accuracies": 1.0,
"rewards/generated": -3.0053653717041016,
"rewards/margins": 3.348203659057617,
"rewards/real": 0.3428387939929962,
"step": 1180
},
{
"epoch": 0.76,
"learning_rate": 3.441166014930679e-07,
"logits/generated": -1.5415700674057007,
"logits/real": -0.20805084705352783,
"logps/generated": -173.4481658935547,
"logps/real": -152.56295776367188,
"loss": 0.0859,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.1931285858154297,
"rewards/margins": 3.5694432258605957,
"rewards/real": 0.3763141930103302,
"step": 1190
},
{
"epoch": 0.77,
"learning_rate": 3.4233913970849624e-07,
"logits/generated": -1.5936585664749146,
"logits/real": -0.13701248168945312,
"logps/generated": -179.35784912109375,
"logps/real": -149.34103393554688,
"loss": 0.0821,
"rewards/accuracies": 1.0,
"rewards/generated": -3.652930498123169,
"rewards/margins": 4.033869743347168,
"rewards/real": 0.3809398114681244,
"step": 1200
},
{
"epoch": 0.77,
"learning_rate": 3.405616779239246e-07,
"logits/generated": -1.418588399887085,
"logits/real": -0.20335432887077332,
"logps/generated": -173.30572509765625,
"logps/real": -161.86990356445312,
"loss": 0.077,
"rewards/accuracies": 1.0,
"rewards/generated": -3.459143877029419,
"rewards/margins": 3.7515323162078857,
"rewards/real": 0.29238831996917725,
"step": 1210
},
{
"epoch": 0.78,
"learning_rate": 3.38784216139353e-07,
"logits/generated": -1.5245991945266724,
"logits/real": -0.04596313461661339,
"logps/generated": -177.37301635742188,
"logps/real": -146.84376525878906,
"loss": 0.0816,
"rewards/accuracies": 1.0,
"rewards/generated": -3.607050657272339,
"rewards/margins": 3.9615414142608643,
"rewards/real": 0.35449081659317017,
"step": 1220
},
{
"epoch": 0.79,
"learning_rate": 3.370067543547814e-07,
"logits/generated": -1.5164806842803955,
"logits/real": -0.10607216507196426,
"logps/generated": -174.8607177734375,
"logps/real": -140.32608032226562,
"loss": 0.0887,
"rewards/accuracies": 1.0,
"rewards/generated": -3.272444486618042,
"rewards/margins": 3.617063045501709,
"rewards/real": 0.34461817145347595,
"step": 1230
},
{
"epoch": 0.79,
"learning_rate": 3.352292925702097e-07,
"logits/generated": -1.4171545505523682,
"logits/real": -0.10191817581653595,
"logps/generated": -176.97470092773438,
"logps/real": -158.6232147216797,
"loss": 0.0788,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.494141101837158,
"rewards/margins": 3.8596279621124268,
"rewards/real": 0.36548665165901184,
"step": 1240
},
{
"epoch": 0.8,
"learning_rate": 3.334518307856381e-07,
"logits/generated": -1.532880187034607,
"logits/real": -0.10620009899139404,
"logps/generated": -175.44747924804688,
"logps/real": -155.5845489501953,
"loss": 0.076,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -3.504039764404297,
"rewards/margins": 3.8370566368103027,
"rewards/real": 0.33301740884780884,
"step": 1250
},
{
"epoch": 0.81,
"learning_rate": 3.316743690010665e-07,
"logits/generated": -1.5539209842681885,
"logits/real": 0.018734993413090706,
"logps/generated": -178.60914611816406,
"logps/real": -141.38162231445312,
"loss": 0.0602,
"rewards/accuracies": 1.0,
"rewards/generated": -3.7286376953125,
"rewards/margins": 4.203549385070801,
"rewards/real": 0.474911630153656,
"step": 1260
},
{
"epoch": 0.81,
"learning_rate": 3.298969072164948e-07,
"logits/generated": -1.4938045740127563,
"logits/real": -0.2437242567539215,
"logps/generated": -173.7627716064453,
"logps/real": -169.41697692871094,
"loss": 0.0852,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -3.6397221088409424,
"rewards/margins": 3.9213058948516846,
"rewards/real": 0.28158318996429443,
"step": 1270
},
{
"epoch": 0.82,
"learning_rate": 3.281194454319232e-07,
"logits/generated": -1.4745817184448242,
"logits/real": -0.3218688666820526,
"logps/generated": -178.52267456054688,
"logps/real": -164.70156860351562,
"loss": 0.0814,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -3.73933744430542,
"rewards/margins": 4.0132269859313965,
"rewards/real": 0.2738892138004303,
"step": 1280
},
{
"epoch": 0.83,
"learning_rate": 3.2634198364735154e-07,
"logits/generated": -1.3811590671539307,
"logits/real": -0.2854043245315552,
"logps/generated": -169.4147186279297,
"logps/real": -154.60781860351562,
"loss": 0.0859,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -3.4979610443115234,
"rewards/margins": 3.756443738937378,
"rewards/real": 0.258482426404953,
"step": 1290
},
{
"epoch": 0.83,
"learning_rate": 3.2456452186277996e-07,
"logits/generated": -1.6183143854141235,
"logits/real": -0.16637897491455078,
"logps/generated": -185.69973754882812,
"logps/real": -156.8994598388672,
"loss": 0.0636,
"rewards/accuracies": 1.0,
"rewards/generated": -3.972916841506958,
"rewards/margins": 4.307384490966797,
"rewards/real": 0.3344675302505493,
"step": 1300
},
{
"epoch": 0.84,
"learning_rate": 3.227870600782083e-07,
"logits/generated": -1.466426134109497,
"logits/real": -0.013457128778100014,
"logps/generated": -183.57412719726562,
"logps/real": -144.6224822998047,
"loss": 0.0681,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.7342000007629395,
"rewards/margins": 4.176131248474121,
"rewards/real": 0.44193094968795776,
"step": 1310
},
{
"epoch": 0.84,
"learning_rate": 3.210095982936367e-07,
"logits/generated": -1.6522903442382812,
"logits/real": -0.17722484469413757,
"logps/generated": -176.93878173828125,
"logps/real": -152.43270874023438,
"loss": 0.0732,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.882312774658203,
"rewards/margins": 4.290909767150879,
"rewards/real": 0.40859729051589966,
"step": 1320
},
{
"epoch": 0.85,
"learning_rate": 3.1923213650906505e-07,
"logits/generated": -1.5739877223968506,
"logits/real": -0.13895940780639648,
"logps/generated": -183.1358184814453,
"logps/real": -158.11346435546875,
"loss": 0.0595,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -4.044293403625488,
"rewards/margins": 4.440293312072754,
"rewards/real": 0.39599940180778503,
"step": 1330
},
{
"epoch": 0.86,
"learning_rate": 3.1745467472449337e-07,
"logits/generated": -1.4868980646133423,
"logits/real": -0.21168294548988342,
"logps/generated": -174.8867645263672,
"logps/real": -164.8509063720703,
"loss": 0.0788,
"rewards/accuracies": 1.0,
"rewards/generated": -3.7173595428466797,
"rewards/margins": 4.043525218963623,
"rewards/real": 0.3261655867099762,
"step": 1340
},
{
"epoch": 0.86,
"learning_rate": 3.156772129399218e-07,
"logits/generated": -1.4937036037445068,
"logits/real": -0.08979051560163498,
"logps/generated": -172.25582885742188,
"logps/real": -138.26126098632812,
"loss": 0.0632,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.6981003284454346,
"rewards/margins": 4.175982475280762,
"rewards/real": 0.47788214683532715,
"step": 1350
},
{
"epoch": 0.87,
"learning_rate": 3.1389975115535015e-07,
"logits/generated": -1.4787460565567017,
"logits/real": -0.13007709383964539,
"logps/generated": -172.90484619140625,
"logps/real": -150.52801513671875,
"loss": 0.0638,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.8822848796844482,
"rewards/margins": 4.298541069030762,
"rewards/real": 0.41625672578811646,
"step": 1360
},
{
"epoch": 0.88,
"learning_rate": 3.121222893707785e-07,
"logits/generated": -1.5493468046188354,
"logits/real": -0.2214834988117218,
"logps/generated": -184.1126251220703,
"logps/real": -168.06861877441406,
"loss": 0.0797,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -4.093251705169678,
"rewards/margins": 4.350040435791016,
"rewards/real": 0.2567889094352722,
"step": 1370
},
{
"epoch": 0.88,
"learning_rate": 3.103448275862069e-07,
"logits/generated": -1.63626229763031,
"logits/real": -0.19701644778251648,
"logps/generated": -187.59336853027344,
"logps/real": -165.13125610351562,
"loss": 0.0734,
"rewards/accuracies": 1.0,
"rewards/generated": -4.259321212768555,
"rewards/margins": 4.569417476654053,
"rewards/real": 0.3100959062576294,
"step": 1380
},
{
"epoch": 0.89,
"learning_rate": 3.0856736580163525e-07,
"logits/generated": -1.472497820854187,
"logits/real": -0.20631149411201477,
"logps/generated": -176.0330352783203,
"logps/real": -159.4289093017578,
"loss": 0.0574,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -4.0790910720825195,
"rewards/margins": 4.325627326965332,
"rewards/real": 0.24653713405132294,
"step": 1390
},
{
"epoch": 0.9,
"learning_rate": 3.067899040170636e-07,
"logits/generated": -1.4534804821014404,
"logits/real": -0.1081136092543602,
"logps/generated": -184.11058044433594,
"logps/real": -157.69619750976562,
"loss": 0.0579,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -4.127554893493652,
"rewards/margins": 4.575366973876953,
"rewards/real": 0.4478122591972351,
"step": 1400
},
{
"epoch": 0.9,
"learning_rate": 3.05012442232492e-07,
"logits/generated": -1.5773451328277588,
"logits/real": -0.11705253273248672,
"logps/generated": -192.18496704101562,
"logps/real": -155.13021850585938,
"loss": 0.0499,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -4.557686805725098,
"rewards/margins": 4.943387031555176,
"rewards/real": 0.38570016622543335,
"step": 1410
},
{
"epoch": 0.91,
"learning_rate": 3.0323498044792035e-07,
"logits/generated": -1.486706018447876,
"logits/real": -0.019088217988610268,
"logps/generated": -180.96835327148438,
"logps/real": -144.02244567871094,
"loss": 0.0602,
"rewards/accuracies": 1.0,
"rewards/generated": -4.163193225860596,
"rewards/margins": 4.452478885650635,
"rewards/real": 0.2892855107784271,
"step": 1420
},
{
"epoch": 0.91,
"learning_rate": 3.014575186633487e-07,
"logits/generated": -1.518604040145874,
"logits/real": -0.1295831948518753,
"logps/generated": -178.1422882080078,
"logps/real": -154.9306640625,
"loss": 0.0632,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -4.255062103271484,
"rewards/margins": 4.6544036865234375,
"rewards/real": 0.39934203028678894,
"step": 1430
},
{
"epoch": 0.92,
"learning_rate": 2.996800568787771e-07,
"logits/generated": -1.4814355373382568,
"logits/real": -0.1749541461467743,
"logps/generated": -173.97134399414062,
"logps/real": -152.14175415039062,
"loss": 0.071,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -4.158992290496826,
"rewards/margins": 4.453802108764648,
"rewards/real": 0.29480981826782227,
"step": 1440
},
{
"epoch": 0.93,
"learning_rate": 2.9790259509420545e-07,
"logits/generated": -1.534990668296814,
"logits/real": -0.15847769379615784,
"logps/generated": -187.8436279296875,
"logps/real": -155.74073791503906,
"loss": 0.0552,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -4.2658538818359375,
"rewards/margins": 4.6822991371154785,
"rewards/real": 0.41644495725631714,
"step": 1450
},
{
"epoch": 0.93,
"learning_rate": 2.9612513330963387e-07,
"logits/generated": -1.5072695016860962,
"logits/real": 0.0005754769081249833,
"logps/generated": -182.82794189453125,
"logps/real": -132.48300170898438,
"loss": 0.0646,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -4.370685577392578,
"rewards/margins": 4.957348346710205,
"rewards/real": 0.5866621732711792,
"step": 1460
},
{
"epoch": 0.94,
"learning_rate": 2.943476715250622e-07,
"logits/generated": -1.4925824403762817,
"logits/real": -0.10418804734945297,
"logps/generated": -186.21078491210938,
"logps/real": -149.9343719482422,
"loss": 0.0518,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -4.665707588195801,
"rewards/margins": 5.203892707824707,
"rewards/real": 0.5381842851638794,
"step": 1470
},
{
"epoch": 0.95,
"learning_rate": 2.9257020974049054e-07,
"logits/generated": -1.5403038263320923,
"logits/real": -0.24279017746448517,
"logps/generated": -187.37631225585938,
"logps/real": -162.3560791015625,
"loss": 0.0608,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -4.447120189666748,
"rewards/margins": 4.881729602813721,
"rewards/real": 0.4346093535423279,
"step": 1480
},
{
"epoch": 0.95,
"learning_rate": 2.9079274795591896e-07,
"logits/generated": -1.3779773712158203,
"logits/real": -0.14756569266319275,
"logps/generated": -178.95181274414062,
"logps/real": -165.2563018798828,
"loss": 0.0656,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -4.4514360427856445,
"rewards/margins": 4.870530605316162,
"rewards/real": 0.41909438371658325,
"step": 1490
},
{
"epoch": 0.96,
"learning_rate": 2.890152861713473e-07,
"logits/generated": -1.3544880151748657,
"logits/real": -0.11259318888187408,
"logps/generated": -182.8341827392578,
"logps/real": -151.156494140625,
"loss": 0.0573,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -4.413695812225342,
"rewards/margins": 4.731995582580566,
"rewards/real": 0.31829962134361267,
"step": 1500
},
{
"epoch": 0.97,
"learning_rate": 2.872378243867757e-07,
"logits/generated": -1.5561285018920898,
"logits/real": -0.14099498093128204,
"logps/generated": -184.40415954589844,
"logps/real": -154.1420135498047,
"loss": 0.0734,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -4.361074924468994,
"rewards/margins": 4.7188801765441895,
"rewards/real": 0.35780465602874756,
"step": 1510
},
{
"epoch": 0.97,
"learning_rate": 2.85460362602204e-07,
"logits/generated": -1.4556692838668823,
"logits/real": -0.15908826887607574,
"logps/generated": -179.03842163085938,
"logps/real": -151.68093872070312,
"loss": 0.0625,
"rewards/accuracies": 1.0,
"rewards/generated": -4.344630241394043,
"rewards/margins": 4.7134857177734375,
"rewards/real": 0.36885565519332886,
"step": 1520
},
{
"epoch": 0.98,
"learning_rate": 2.8368290081763243e-07,
"logits/generated": -1.4455296993255615,
"logits/real": -0.10524747520685196,
"logps/generated": -178.05532836914062,
"logps/real": -151.86215209960938,
"loss": 0.0725,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -4.490212917327881,
"rewards/margins": 4.926787376403809,
"rewards/real": 0.4365745484828949,
"step": 1530
},
{
"epoch": 0.99,
"learning_rate": 2.819054390330608e-07,
"logits/generated": -1.5643322467803955,
"logits/real": -0.22240431606769562,
"logps/generated": -187.14395141601562,
"logps/real": -157.94808959960938,
"loss": 0.0598,
"rewards/accuracies": 1.0,
"rewards/generated": -4.599512577056885,
"rewards/margins": 5.061091899871826,
"rewards/real": 0.46157994866371155,
"step": 1540
},
{
"epoch": 0.99,
"learning_rate": 2.801279772484891e-07,
"logits/generated": -1.4671770334243774,
"logits/real": -0.19106905162334442,
"logps/generated": -180.06546020507812,
"logps/real": -160.6680145263672,
"loss": 0.0703,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -4.547145843505859,
"rewards/margins": 4.966696739196777,
"rewards/real": 0.4195513129234314,
"step": 1550
},
{
"epoch": 1.0,
"learning_rate": 2.783505154639175e-07,
"logits/generated": -1.4373109340667725,
"logits/real": -0.14462901651859283,
"logps/generated": -187.5804901123047,
"logps/real": -151.3854217529297,
"loss": 0.0692,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -4.61647891998291,
"rewards/margins": 5.218323707580566,
"rewards/real": 0.6018449068069458,
"step": 1560
},
{
"epoch": 1.0,
"learning_rate": 2.7657305367934584e-07,
"logits/generated": -1.5236389636993408,
"logits/real": -0.18100953102111816,
"logps/generated": -183.6896514892578,
"logps/real": -155.19091796875,
"loss": 0.0668,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -4.461154460906982,
"rewards/margins": 5.004509925842285,
"rewards/real": 0.5433556437492371,
"step": 1570
},
{
"epoch": 1.01,
"learning_rate": 2.7479559189477426e-07,
"logits/generated": -1.4411672353744507,
"logits/real": -0.09060608595609665,
"logps/generated": -182.39309692382812,
"logps/real": -149.1797637939453,
"loss": 0.0545,
"rewards/accuracies": 1.0,
"rewards/generated": -4.790371894836426,
"rewards/margins": 5.318713665008545,
"rewards/real": 0.52834153175354,
"step": 1580
},
{
"epoch": 1.02,
"learning_rate": 2.730181301102026e-07,
"logits/generated": -1.4803905487060547,
"logits/real": -0.14638617634773254,
"logps/generated": -190.68942260742188,
"logps/real": -158.1866912841797,
"loss": 0.0516,
"rewards/accuracies": 1.0,
"rewards/generated": -4.740262031555176,
"rewards/margins": 5.252830505371094,
"rewards/real": 0.5125688910484314,
"step": 1590
},
{
"epoch": 1.02,
"learning_rate": 2.71240668325631e-07,
"logits/generated": -1.4773404598236084,
"logits/real": -0.28140923380851746,
"logps/generated": -185.19883728027344,
"logps/real": -168.70620727539062,
"loss": 0.0616,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -4.644852638244629,
"rewards/margins": 5.096782684326172,
"rewards/real": 0.45193013548851013,
"step": 1600
},
{
"epoch": 1.03,
"learning_rate": 2.6946320654105936e-07,
"logits/generated": -1.497534155845642,
"logits/real": -0.36784881353378296,
"logps/generated": -178.10247802734375,
"logps/real": -165.84774780273438,
"loss": 0.0613,
"rewards/accuracies": 1.0,
"rewards/generated": -4.512256145477295,
"rewards/margins": 4.852838039398193,
"rewards/real": 0.3405814468860626,
"step": 1610
},
{
"epoch": 1.04,
"learning_rate": 2.676857447564877e-07,
"logits/generated": -1.4658567905426025,
"logits/real": -0.12854836881160736,
"logps/generated": -188.9214324951172,
"logps/real": -150.326416015625,
"loss": 0.0622,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -4.890933036804199,
"rewards/margins": 5.391529083251953,
"rewards/real": 0.5005959272384644,
"step": 1620
},
{
"epoch": 1.04,
"learning_rate": 2.659082829719161e-07,
"logits/generated": -1.3702716827392578,
"logits/real": -0.18641087412834167,
"logps/generated": -184.32167053222656,
"logps/real": -166.14932250976562,
"loss": 0.0576,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -4.7197346687316895,
"rewards/margins": 5.14710807800293,
"rewards/real": 0.42737287282943726,
"step": 1630
},
{
"epoch": 1.05,
"learning_rate": 2.6413082118734445e-07,
"logits/generated": -1.5208382606506348,
"logits/real": -0.10880865156650543,
"logps/generated": -189.2379150390625,
"logps/real": -157.01681518554688,
"loss": 0.0565,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -5.028722286224365,
"rewards/margins": 5.426579475402832,
"rewards/real": 0.3978571891784668,
"step": 1640
},
{
"epoch": 1.06,
"learning_rate": 2.623533594027728e-07,
"logits/generated": -1.476301908493042,
"logits/real": -0.19111457467079163,
"logps/generated": -186.6832275390625,
"logps/real": -155.54295349121094,
"loss": 0.0473,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -4.851848125457764,
"rewards/margins": 5.253470420837402,
"rewards/real": 0.40162190794944763,
"step": 1650
},
{
"epoch": 1.06,
"learning_rate": 2.605758976182012e-07,
"logits/generated": -1.5290987491607666,
"logits/real": -0.1336485594511032,
"logps/generated": -206.06509399414062,
"logps/real": -151.84494018554688,
"loss": 0.0509,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -5.372347831726074,
"rewards/margins": 5.8217620849609375,
"rewards/real": 0.4494136869907379,
"step": 1660
},
{
"epoch": 1.07,
"learning_rate": 2.587984358336296e-07,
"logits/generated": -1.5546083450317383,
"logits/real": -0.23617632687091827,
"logps/generated": -203.20187377929688,
"logps/real": -173.64894104003906,
"loss": 0.04,
"rewards/accuracies": 1.0,
"rewards/generated": -5.4583282470703125,
"rewards/margins": 5.816749095916748,
"rewards/real": 0.35842153429985046,
"step": 1670
},
{
"epoch": 1.07,
"learning_rate": 2.570209740490579e-07,
"logits/generated": -1.4508366584777832,
"logits/real": -0.04887630045413971,
"logps/generated": -186.7039031982422,
"logps/real": -153.81202697753906,
"loss": 0.0566,
"rewards/accuracies": 0.9375,
"rewards/generated": -4.691603660583496,
"rewards/margins": 5.064540386199951,
"rewards/real": 0.3729364275932312,
"step": 1680
},
{
"epoch": 1.08,
"learning_rate": 2.5524351226448634e-07,
"logits/generated": -1.3482738733291626,
"logits/real": -0.2794376015663147,
"logps/generated": -198.63124084472656,
"logps/real": -172.26560974121094,
"loss": 0.0505,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -5.2879533767700195,
"rewards/margins": 5.73902702331543,
"rewards/real": 0.45107364654541016,
"step": 1690
},
{
"epoch": 1.09,
"learning_rate": 2.5346605047991465e-07,
"logits/generated": -1.5188000202178955,
"logits/real": -0.07588844746351242,
"logps/generated": -190.33047485351562,
"logps/real": -149.24380493164062,
"loss": 0.0473,
"rewards/accuracies": 1.0,
"rewards/generated": -4.852927207946777,
"rewards/margins": 5.346456527709961,
"rewards/real": 0.4935285151004791,
"step": 1700
},
{
"epoch": 1.09,
"learning_rate": 2.51688588695343e-07,
"logits/generated": -1.4112365245819092,
"logits/real": -0.037810131907463074,
"logps/generated": -181.25367736816406,
"logps/real": -135.86203002929688,
"loss": 0.045,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -4.706342697143555,
"rewards/margins": 5.279058456420898,
"rewards/real": 0.5727157592773438,
"step": 1710
},
{
"epoch": 1.1,
"learning_rate": 2.499111269107714e-07,
"logits/generated": -1.4743269681930542,
"logits/real": -0.3346148133277893,
"logps/generated": -201.05142211914062,
"logps/real": -184.9207305908203,
"loss": 0.0392,
"rewards/accuracies": 1.0,
"rewards/generated": -5.311040878295898,
"rewards/margins": 5.642301559448242,
"rewards/real": 0.3312605321407318,
"step": 1720
},
{
"epoch": 1.11,
"learning_rate": 2.4813366512619975e-07,
"logits/generated": -1.530133843421936,
"logits/real": -0.06566596776247025,
"logps/generated": -189.20339965820312,
"logps/real": -152.02157592773438,
"loss": 0.0492,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -5.331114768981934,
"rewards/margins": 5.823639869689941,
"rewards/real": 0.4925246238708496,
"step": 1730
},
{
"epoch": 1.11,
"learning_rate": 2.4635620334162817e-07,
"logits/generated": -1.4688317775726318,
"logits/real": -0.11931806802749634,
"logps/generated": -190.11228942871094,
"logps/real": -157.91522216796875,
"loss": 0.0519,
"rewards/accuracies": 1.0,
"rewards/generated": -5.377558708190918,
"rewards/margins": 5.723202705383301,
"rewards/real": 0.3456438183784485,
"step": 1740
},
{
"epoch": 1.12,
"learning_rate": 2.4457874155705653e-07,
"logits/generated": -1.5590466260910034,
"logits/real": -0.058088403195142746,
"logps/generated": -196.60153198242188,
"logps/real": -145.74993896484375,
"loss": 0.0481,
"rewards/accuracies": 1.0,
"rewards/generated": -5.340886116027832,
"rewards/margins": 5.873441219329834,
"rewards/real": 0.532554030418396,
"step": 1750
},
{
"epoch": 1.13,
"learning_rate": 2.428012797724849e-07,
"logits/generated": -1.46759831905365,
"logits/real": -0.08282347023487091,
"logps/generated": -195.71517944335938,
"logps/real": -151.4615478515625,
"loss": 0.0455,
"rewards/accuracies": 1.0,
"rewards/generated": -5.418810844421387,
"rewards/margins": 5.871450424194336,
"rewards/real": 0.452639639377594,
"step": 1760
},
{
"epoch": 1.13,
"learning_rate": 2.4102381798791327e-07,
"logits/generated": -1.6086094379425049,
"logits/real": -0.11050989478826523,
"logps/generated": -208.8617401123047,
"logps/real": -159.17808532714844,
"loss": 0.0382,
"rewards/accuracies": 1.0,
"rewards/generated": -6.02325439453125,
"rewards/margins": 6.450479030609131,
"rewards/real": 0.427224338054657,
"step": 1770
},
{
"epoch": 1.14,
"learning_rate": 2.392463562033416e-07,
"logits/generated": -1.5008881092071533,
"logits/real": -0.15384702384471893,
"logps/generated": -188.76950073242188,
"logps/real": -159.75119018554688,
"loss": 0.0412,
"rewards/accuracies": 1.0,
"rewards/generated": -5.168057918548584,
"rewards/margins": 5.656711101531982,
"rewards/real": 0.48865312337875366,
"step": 1780
},
{
"epoch": 1.15,
"learning_rate": 2.3746889441877e-07,
"logits/generated": -1.4526275396347046,
"logits/real": -0.3002064824104309,
"logps/generated": -199.5317840576172,
"logps/real": -170.03663635253906,
"loss": 0.0607,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -5.6653547286987305,
"rewards/margins": 6.049663066864014,
"rewards/real": 0.3843079209327698,
"step": 1790
},
{
"epoch": 1.15,
"learning_rate": 2.3569143263419836e-07,
"logits/generated": -1.6317417621612549,
"logits/real": -0.10097722709178925,
"logps/generated": -197.29315185546875,
"logps/real": -150.00180053710938,
"loss": 0.0414,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -5.416452407836914,
"rewards/margins": 5.868271827697754,
"rewards/real": 0.45181870460510254,
"step": 1800
},
{
"epoch": 1.16,
"learning_rate": 2.3391397084962673e-07,
"logits/generated": -1.4566489458084106,
"logits/real": -0.1597038060426712,
"logps/generated": -182.79869079589844,
"logps/real": -150.32553100585938,
"loss": 0.0467,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -5.3501973152160645,
"rewards/margins": 5.728513240814209,
"rewards/real": 0.37831613421440125,
"step": 1810
},
{
"epoch": 1.16,
"learning_rate": 2.3213650906505507e-07,
"logits/generated": -1.4283339977264404,
"logits/real": -0.01142942439764738,
"logps/generated": -187.8427734375,
"logps/real": -133.63656616210938,
"loss": 0.0486,
"rewards/accuracies": 1.0,
"rewards/generated": -5.248843193054199,
"rewards/margins": 5.7334394454956055,
"rewards/real": 0.4845956861972809,
"step": 1820
},
{
"epoch": 1.17,
"learning_rate": 2.3035904728048346e-07,
"logits/generated": -1.47895085811615,
"logits/real": -0.06862284243106842,
"logps/generated": -202.21640014648438,
"logps/real": -152.25730895996094,
"loss": 0.0335,
"rewards/accuracies": 1.0,
"rewards/generated": -5.783873558044434,
"rewards/margins": 6.278851509094238,
"rewards/real": 0.49497756361961365,
"step": 1830
},
{
"epoch": 1.18,
"learning_rate": 2.2858158549591183e-07,
"logits/generated": -1.5177428722381592,
"logits/real": -0.22056560218334198,
"logps/generated": -202.6049041748047,
"logps/real": -165.22787475585938,
"loss": 0.0576,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -5.6912360191345215,
"rewards/margins": 6.0315961837768555,
"rewards/real": 0.3403596580028534,
"step": 1840
},
{
"epoch": 1.18,
"learning_rate": 2.268041237113402e-07,
"logits/generated": -1.371692419052124,
"logits/real": -0.06362702697515488,
"logps/generated": -191.365234375,
"logps/real": -143.11497497558594,
"loss": 0.0449,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -5.428120136260986,
"rewards/margins": 6.007481098175049,
"rewards/real": 0.5793606042861938,
"step": 1850
},
{
"epoch": 1.19,
"learning_rate": 2.2502666192676856e-07,
"logits/generated": -1.279131293296814,
"logits/real": -0.09258531033992767,
"logps/generated": -190.04415893554688,
"logps/real": -146.318115234375,
"loss": 0.0404,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -5.40255069732666,
"rewards/margins": 5.839011192321777,
"rewards/real": 0.4364606440067291,
"step": 1860
},
{
"epoch": 1.2,
"learning_rate": 2.2324920014219693e-07,
"logits/generated": -1.4574114084243774,
"logits/real": -0.11219004541635513,
"logps/generated": -194.90048217773438,
"logps/real": -162.45753479003906,
"loss": 0.0458,
"rewards/accuracies": 1.0,
"rewards/generated": -5.726960182189941,
"rewards/margins": 6.180015563964844,
"rewards/real": 0.4530550539493561,
"step": 1870
},
{
"epoch": 1.2,
"learning_rate": 2.2147173835762532e-07,
"logits/generated": -1.5723118782043457,
"logits/real": -0.04347873479127884,
"logps/generated": -209.44775390625,
"logps/real": -149.29434204101562,
"loss": 0.0388,
"rewards/accuracies": 1.0,
"rewards/generated": -5.9704813957214355,
"rewards/margins": 6.489261627197266,
"rewards/real": 0.5187799334526062,
"step": 1880
},
{
"epoch": 1.21,
"learning_rate": 2.1969427657305366e-07,
"logits/generated": -1.4479305744171143,
"logits/real": -0.0961630642414093,
"logps/generated": -197.63656616210938,
"logps/real": -152.1067352294922,
"loss": 0.0491,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -5.782441139221191,
"rewards/margins": 6.282473087310791,
"rewards/real": 0.5000313520431519,
"step": 1890
},
{
"epoch": 1.22,
"learning_rate": 2.1791681478848203e-07,
"logits/generated": -1.435664415359497,
"logits/real": -0.1993381530046463,
"logps/generated": -190.99139404296875,
"logps/real": -152.6283416748047,
"loss": 0.0426,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -5.520625114440918,
"rewards/margins": 5.948267936706543,
"rewards/real": 0.42764243483543396,
"step": 1900
},
{
"epoch": 1.22,
"learning_rate": 2.161393530039104e-07,
"logits/generated": -1.458038568496704,
"logits/real": -0.12041410058736801,
"logps/generated": -192.7799530029297,
"logps/real": -155.81527709960938,
"loss": 0.0388,
"rewards/accuracies": 1.0,
"rewards/generated": -5.656401634216309,
"rewards/margins": 6.175935745239258,
"rewards/real": 0.519533097743988,
"step": 1910
},
{
"epoch": 1.23,
"learning_rate": 2.1436189121933878e-07,
"logits/generated": -1.3725850582122803,
"logits/real": -0.1823103129863739,
"logps/generated": -192.62945556640625,
"logps/real": -168.57968139648438,
"loss": 0.0402,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -5.725317001342773,
"rewards/margins": 6.0909833908081055,
"rewards/real": 0.36566513776779175,
"step": 1920
},
{
"epoch": 1.23,
"learning_rate": 2.1258442943476715e-07,
"logits/generated": -1.4110640287399292,
"logits/real": 0.0704520046710968,
"logps/generated": -194.18740844726562,
"logps/real": -138.5281219482422,
"loss": 0.0424,
"rewards/accuracies": 1.0,
"rewards/generated": -5.5607404708862305,
"rewards/margins": 6.144190788269043,
"rewards/real": 0.5834503769874573,
"step": 1930
},
{
"epoch": 1.24,
"learning_rate": 2.1080696765019552e-07,
"logits/generated": -1.479405164718628,
"logits/real": -0.22825618088245392,
"logps/generated": -200.736083984375,
"logps/real": -163.63735961914062,
"loss": 0.0318,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -5.762534141540527,
"rewards/margins": 6.215503692626953,
"rewards/real": 0.4529697000980377,
"step": 1940
},
{
"epoch": 1.25,
"learning_rate": 2.0902950586562388e-07,
"logits/generated": -1.4002245664596558,
"logits/real": -0.14121662080287933,
"logps/generated": -198.7134552001953,
"logps/real": -152.26492309570312,
"loss": 0.0427,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -5.689166069030762,
"rewards/margins": 6.122091770172119,
"rewards/real": 0.43292540311813354,
"step": 1950
},
{
"epoch": 1.25,
"learning_rate": 2.0725204408105225e-07,
"logits/generated": -1.4297267198562622,
"logits/real": -0.19367796182632446,
"logps/generated": -185.13780212402344,
"logps/real": -156.70587158203125,
"loss": 0.0436,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -5.418356895446777,
"rewards/margins": 5.945398330688477,
"rewards/real": 0.5270417928695679,
"step": 1960
},
{
"epoch": 1.26,
"learning_rate": 2.0547458229648061e-07,
"logits/generated": -1.5275622606277466,
"logits/real": -0.3052076995372772,
"logps/generated": -208.7298583984375,
"logps/real": -178.06521606445312,
"loss": 0.0376,
"rewards/accuracies": 1.0,
"rewards/generated": -6.276623249053955,
"rewards/margins": 6.54474401473999,
"rewards/real": 0.2681209444999695,
"step": 1970
},
{
"epoch": 1.27,
"learning_rate": 2.0369712051190898e-07,
"logits/generated": -1.4920480251312256,
"logits/real": -0.11144111305475235,
"logps/generated": -205.11953735351562,
"logps/real": -154.30245971679688,
"loss": 0.0376,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -5.8552117347717285,
"rewards/margins": 6.331951141357422,
"rewards/real": 0.4767402112483978,
"step": 1980
},
{
"epoch": 1.27,
"learning_rate": 2.0191965872733735e-07,
"logits/generated": -1.4427194595336914,
"logits/real": -0.16335290670394897,
"logps/generated": -204.39944458007812,
"logps/real": -155.73521423339844,
"loss": 0.0372,
"rewards/accuracies": 1.0,
"rewards/generated": -6.107733726501465,
"rewards/margins": 6.5864152908325195,
"rewards/real": 0.47868162393569946,
"step": 1990
},
{
"epoch": 1.28,
"learning_rate": 2.001421969427657e-07,
"logits/generated": -1.4014308452606201,
"logits/real": -0.10531187057495117,
"logps/generated": -205.13485717773438,
"logps/real": -151.3143768310547,
"loss": 0.0324,
"rewards/accuracies": 1.0,
"rewards/generated": -6.123740196228027,
"rewards/margins": 6.699014186859131,
"rewards/real": 0.5752742886543274,
"step": 2000
},
{
"epoch": 1.29,
"learning_rate": 1.983647351581941e-07,
"logits/generated": -1.3716758489608765,
"logits/real": -0.017807159572839737,
"logps/generated": -191.89364624023438,
"logps/real": -142.2311553955078,
"loss": 0.0314,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -5.589520454406738,
"rewards/margins": 6.077818870544434,
"rewards/real": 0.4882989823818207,
"step": 2010
},
{
"epoch": 1.29,
"learning_rate": 1.9658727337362247e-07,
"logits/generated": -1.2323060035705566,
"logits/real": -0.11805130541324615,
"logps/generated": -193.5916290283203,
"logps/real": -163.78118896484375,
"loss": 0.0438,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -5.801546096801758,
"rewards/margins": 6.197666168212891,
"rewards/real": 0.39611995220184326,
"step": 2020
},
{
"epoch": 1.3,
"learning_rate": 1.9480981158905084e-07,
"logits/generated": -1.4392839670181274,
"logits/real": -0.19422808289527893,
"logps/generated": -199.3451385498047,
"logps/real": -163.566162109375,
"loss": 0.0555,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -5.833590507507324,
"rewards/margins": 6.286513328552246,
"rewards/real": 0.45292234420776367,
"step": 2030
},
{
"epoch": 1.31,
"learning_rate": 1.9303234980447918e-07,
"logits/generated": -1.344807505607605,
"logits/real": 0.05807851627469063,
"logps/generated": -188.47280883789062,
"logps/real": -131.35252380371094,
"loss": 0.0404,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -5.383631706237793,
"rewards/margins": 5.914809226989746,
"rewards/real": 0.5311776995658875,
"step": 2040
},
{
"epoch": 1.31,
"learning_rate": 1.9125488801990754e-07,
"logits/generated": -1.4456682205200195,
"logits/real": -0.1890706866979599,
"logps/generated": -205.602783203125,
"logps/real": -156.99949645996094,
"loss": 0.0343,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -6.107948303222656,
"rewards/margins": 6.506932258605957,
"rewards/real": 0.3989841341972351,
"step": 2050
},
{
"epoch": 1.32,
"learning_rate": 1.8947742623533593e-07,
"logits/generated": -1.3190276622772217,
"logits/real": -0.09516116231679916,
"logps/generated": -193.70095825195312,
"logps/real": -160.75755310058594,
"loss": 0.0344,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -5.828548431396484,
"rewards/margins": 6.300034523010254,
"rewards/real": 0.4714859426021576,
"step": 2060
},
{
"epoch": 1.32,
"learning_rate": 1.876999644507643e-07,
"logits/generated": -1.348646879196167,
"logits/real": -0.10354932397603989,
"logps/generated": -194.30975341796875,
"logps/real": -153.31259155273438,
"loss": 0.0508,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -5.860109806060791,
"rewards/margins": 6.231678009033203,
"rewards/real": 0.3715675473213196,
"step": 2070
},
{
"epoch": 1.33,
"learning_rate": 1.8592250266619267e-07,
"logits/generated": -1.4050867557525635,
"logits/real": -0.2533566951751709,
"logps/generated": -195.93313598632812,
"logps/real": -169.0387420654297,
"loss": 0.0278,
"rewards/accuracies": 1.0,
"rewards/generated": -6.157801628112793,
"rewards/margins": 6.619472503662109,
"rewards/real": 0.46167105436325073,
"step": 2080
},
{
"epoch": 1.34,
"learning_rate": 1.8414504088162103e-07,
"logits/generated": -1.447456955909729,
"logits/real": -0.07161243259906769,
"logps/generated": -205.7145538330078,
"logps/real": -151.47238159179688,
"loss": 0.033,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -6.1400346755981445,
"rewards/margins": 6.463052272796631,
"rewards/real": 0.3230181634426117,
"step": 2090
},
{
"epoch": 1.34,
"learning_rate": 1.8236757909704943e-07,
"logits/generated": -1.3999097347259521,
"logits/real": -0.06229109689593315,
"logps/generated": -198.9770965576172,
"logps/real": -147.52398681640625,
"loss": 0.0359,
"rewards/accuracies": 1.0,
"rewards/generated": -6.335967540740967,
"rewards/margins": 6.788211822509766,
"rewards/real": 0.45224493741989136,
"step": 2100
},
{
"epoch": 1.35,
"learning_rate": 1.805901173124778e-07,
"logits/generated": -1.4580378532409668,
"logits/real": -0.21836349368095398,
"logps/generated": -199.5890655517578,
"logps/real": -159.8959503173828,
"loss": 0.0345,
"rewards/accuracies": 1.0,
"rewards/generated": -6.041956901550293,
"rewards/margins": 6.469993591308594,
"rewards/real": 0.4280371069908142,
"step": 2110
},
{
"epoch": 1.36,
"learning_rate": 1.7881265552790613e-07,
"logits/generated": -1.4729552268981934,
"logits/real": -0.08541516959667206,
"logps/generated": -209.23837280273438,
"logps/real": -150.85592651367188,
"loss": 0.0523,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -6.274621963500977,
"rewards/margins": 6.670513153076172,
"rewards/real": 0.3958915174007416,
"step": 2120
},
{
"epoch": 1.36,
"learning_rate": 1.770351937433345e-07,
"logits/generated": -1.460939645767212,
"logits/real": -0.06804711371660233,
"logps/generated": -195.3641815185547,
"logps/real": -149.21224975585938,
"loss": 0.0457,
"rewards/accuracies": 1.0,
"rewards/generated": -6.111988067626953,
"rewards/margins": 6.648555755615234,
"rewards/real": 0.5365672707557678,
"step": 2130
},
{
"epoch": 1.37,
"learning_rate": 1.7525773195876286e-07,
"logits/generated": -1.3989557027816772,
"logits/real": -0.141148641705513,
"logps/generated": -208.6423797607422,
"logps/real": -164.75941467285156,
"loss": 0.0468,
"rewards/accuracies": 1.0,
"rewards/generated": -6.388513565063477,
"rewards/margins": 6.783452033996582,
"rewards/real": 0.3949388563632965,
"step": 2140
},
{
"epoch": 1.38,
"learning_rate": 1.7348027017419126e-07,
"logits/generated": -1.3841809034347534,
"logits/real": -0.14616461098194122,
"logps/generated": -201.27157592773438,
"logps/real": -156.52682495117188,
"loss": 0.0468,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -5.992762565612793,
"rewards/margins": 6.373073577880859,
"rewards/real": 0.38031044602394104,
"step": 2150
},
{
"epoch": 1.38,
"learning_rate": 1.7170280838961962e-07,
"logits/generated": -1.4497352838516235,
"logits/real": -0.1744016855955124,
"logps/generated": -204.82797241210938,
"logps/real": -154.94497680664062,
"loss": 0.0389,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -6.267435550689697,
"rewards/margins": 6.67733907699585,
"rewards/real": 0.4099041521549225,
"step": 2160
},
{
"epoch": 1.39,
"learning_rate": 1.69925346605048e-07,
"logits/generated": -1.4557878971099854,
"logits/real": 0.009424996562302113,
"logps/generated": -198.59573364257812,
"logps/real": -144.44679260253906,
"loss": 0.0407,
"rewards/accuracies": 1.0,
"rewards/generated": -6.197464942932129,
"rewards/margins": 6.6801652908325195,
"rewards/real": 0.4827001094818115,
"step": 2170
},
{
"epoch": 1.39,
"learning_rate": 1.6814788482047635e-07,
"logits/generated": -1.4021763801574707,
"logits/real": -0.14931827783584595,
"logps/generated": -200.401123046875,
"logps/real": -173.49856567382812,
"loss": 0.0351,
"rewards/accuracies": 1.0,
"rewards/generated": -6.364272594451904,
"rewards/margins": 6.713151454925537,
"rewards/real": 0.34887903928756714,
"step": 2180
},
{
"epoch": 1.4,
"learning_rate": 1.6637042303590475e-07,
"logits/generated": -1.438078761100769,
"logits/real": -0.23877568542957306,
"logps/generated": -200.45779418945312,
"logps/real": -152.62110900878906,
"loss": 0.0334,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -5.951826095581055,
"rewards/margins": 6.3755693435668945,
"rewards/real": 0.42374467849731445,
"step": 2190
},
{
"epoch": 1.41,
"learning_rate": 1.6459296125133309e-07,
"logits/generated": -1.4022048711776733,
"logits/real": -0.20034177601337433,
"logps/generated": -200.08987426757812,
"logps/real": -159.6756591796875,
"loss": 0.0351,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -6.297463417053223,
"rewards/margins": 6.7816481590271,
"rewards/real": 0.4841853976249695,
"step": 2200
},
{
"epoch": 1.41,
"learning_rate": 1.6281549946676145e-07,
"logits/generated": -1.4501913785934448,
"logits/real": -0.13076047599315643,
"logps/generated": -203.63800048828125,
"logps/real": -148.5162353515625,
"loss": 0.0288,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -6.285429000854492,
"rewards/margins": 6.7836737632751465,
"rewards/real": 0.49824443459510803,
"step": 2210
},
{
"epoch": 1.42,
"learning_rate": 1.6103803768218982e-07,
"logits/generated": -1.515801191329956,
"logits/real": -0.08876947313547134,
"logps/generated": -214.5893096923828,
"logps/real": -150.77139282226562,
"loss": 0.0348,
"rewards/accuracies": 1.0,
"rewards/generated": -6.610386848449707,
"rewards/margins": 7.061656951904297,
"rewards/real": 0.45126986503601074,
"step": 2220
},
{
"epoch": 1.43,
"learning_rate": 1.5926057589761818e-07,
"logits/generated": -1.408809781074524,
"logits/real": -0.14982599020004272,
"logps/generated": -208.8214874267578,
"logps/real": -160.70396423339844,
"loss": 0.0466,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -6.5734405517578125,
"rewards/margins": 7.01800537109375,
"rewards/real": 0.4445651173591614,
"step": 2230
},
{
"epoch": 1.43,
"learning_rate": 1.5748311411304658e-07,
"logits/generated": -1.268897533416748,
"logits/real": -0.1435459554195404,
"logps/generated": -208.14059448242188,
"logps/real": -162.9286346435547,
"loss": 0.0424,
"rewards/accuracies": 1.0,
"rewards/generated": -6.466616630554199,
"rewards/margins": 6.879193305969238,
"rewards/real": 0.412576287984848,
"step": 2240
},
{
"epoch": 1.44,
"learning_rate": 1.5570565232847494e-07,
"logits/generated": -1.3088537454605103,
"logits/real": -0.055105965584516525,
"logps/generated": -202.38327026367188,
"logps/real": -150.609130859375,
"loss": 0.0231,
"rewards/accuracies": 1.0,
"rewards/generated": -6.501986503601074,
"rewards/margins": 6.831887245178223,
"rewards/real": 0.32990118861198425,
"step": 2250
},
{
"epoch": 1.45,
"learning_rate": 1.539281905439033e-07,
"logits/generated": -1.4072834253311157,
"logits/real": -0.13368968665599823,
"logps/generated": -196.32894897460938,
"logps/real": -156.68746948242188,
"loss": 0.034,
"rewards/accuracies": 1.0,
"rewards/generated": -6.223599433898926,
"rewards/margins": 6.634499549865723,
"rewards/real": 0.4108997881412506,
"step": 2260
},
{
"epoch": 1.45,
"learning_rate": 1.5215072875933165e-07,
"logits/generated": -1.4075140953063965,
"logits/real": -0.3013412356376648,
"logps/generated": -203.13999938964844,
"logps/real": -163.4533233642578,
"loss": 0.0479,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -6.3233642578125,
"rewards/margins": 6.650763034820557,
"rewards/real": 0.3273986279964447,
"step": 2270
},
{
"epoch": 1.46,
"learning_rate": 1.5037326697476004e-07,
"logits/generated": -1.4396686553955078,
"logits/real": -0.002043303567916155,
"logps/generated": -204.3917999267578,
"logps/real": -139.45896911621094,
"loss": 0.0308,
"rewards/accuracies": 1.0,
"rewards/generated": -6.184319496154785,
"rewards/margins": 6.667932987213135,
"rewards/real": 0.48361387848854065,
"step": 2280
},
{
"epoch": 1.47,
"learning_rate": 1.485958051901884e-07,
"logits/generated": -1.4441577196121216,
"logits/real": -0.20746362209320068,
"logps/generated": -203.9723358154297,
"logps/real": -166.56283569335938,
"loss": 0.0237,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -6.595439910888672,
"rewards/margins": 6.902490139007568,
"rewards/real": 0.3070511221885681,
"step": 2290
},
{
"epoch": 1.47,
"learning_rate": 1.4681834340561677e-07,
"logits/generated": -1.345245122909546,
"logits/real": -0.15873827040195465,
"logps/generated": -205.891357421875,
"logps/real": -160.63975524902344,
"loss": 0.0378,
"rewards/accuracies": 1.0,
"rewards/generated": -6.545729160308838,
"rewards/margins": 6.943263053894043,
"rewards/real": 0.3975338339805603,
"step": 2300
},
{
"epoch": 1.48,
"learning_rate": 1.4504088162104514e-07,
"logits/generated": -1.4238418340682983,
"logits/real": -0.1873057782649994,
"logps/generated": -206.091796875,
"logps/real": -164.68458557128906,
"loss": 0.0464,
"rewards/accuracies": 1.0,
"rewards/generated": -6.2456512451171875,
"rewards/margins": 6.481778621673584,
"rewards/real": 0.23612765967845917,
"step": 2310
},
{
"epoch": 1.48,
"learning_rate": 1.432634198364735e-07,
"logits/generated": -1.4479985237121582,
"logits/real": -0.07325728237628937,
"logps/generated": -201.4796600341797,
"logps/real": -156.04754638671875,
"loss": 0.0274,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -6.325311183929443,
"rewards/margins": 6.745765686035156,
"rewards/real": 0.42045480012893677,
"step": 2320
},
{
"epoch": 1.49,
"learning_rate": 1.414859580519019e-07,
"logits/generated": -1.4120184183120728,
"logits/real": -0.08143848925828934,
"logps/generated": -201.7879638671875,
"logps/real": -144.10708618164062,
"loss": 0.0317,
"rewards/accuracies": 1.0,
"rewards/generated": -6.235722541809082,
"rewards/margins": 6.648829460144043,
"rewards/real": 0.41310709714889526,
"step": 2330
},
{
"epoch": 1.5,
"learning_rate": 1.3970849626733024e-07,
"logits/generated": -1.3906993865966797,
"logits/real": -0.18309733271598816,
"logps/generated": -193.51211547851562,
"logps/real": -168.23666381835938,
"loss": 0.0396,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -6.0833845138549805,
"rewards/margins": 6.361334800720215,
"rewards/real": 0.2779490053653717,
"step": 2340
},
{
"epoch": 1.5,
"learning_rate": 1.379310344827586e-07,
"logits/generated": -1.269242763519287,
"logits/real": -0.1333479881286621,
"logps/generated": -199.2947540283203,
"logps/real": -156.45643615722656,
"loss": 0.0419,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -6.298374652862549,
"rewards/margins": 6.739838600158691,
"rewards/real": 0.44146427512168884,
"step": 2350
},
{
"epoch": 1.51,
"learning_rate": 1.3615357269818697e-07,
"logits/generated": -1.397362470626831,
"logits/real": -0.3238930106163025,
"logps/generated": -193.085205078125,
"logps/real": -161.82781982421875,
"loss": 0.0431,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -6.486599922180176,
"rewards/margins": 6.664914608001709,
"rewards/real": 0.17831535637378693,
"step": 2360
},
{
"epoch": 1.52,
"learning_rate": 1.3437611091361536e-07,
"logits/generated": -1.2911992073059082,
"logits/real": -0.21506524085998535,
"logps/generated": -193.64813232421875,
"logps/real": -163.50411987304688,
"loss": 0.0413,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -6.337356090545654,
"rewards/margins": 6.605440616607666,
"rewards/real": 0.26808419823646545,
"step": 2370
},
{
"epoch": 1.52,
"learning_rate": 1.3259864912904373e-07,
"logits/generated": -1.419874906539917,
"logits/real": 0.030509447678923607,
"logps/generated": -209.01718139648438,
"logps/real": -138.38870239257812,
"loss": 0.0304,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -6.7356061935424805,
"rewards/margins": 7.161271572113037,
"rewards/real": 0.4256650507450104,
"step": 2380
},
{
"epoch": 1.53,
"learning_rate": 1.308211873444721e-07,
"logits/generated": -1.4404840469360352,
"logits/real": -0.01684228517115116,
"logps/generated": -199.88868713378906,
"logps/real": -144.11949157714844,
"loss": 0.0398,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -6.491084098815918,
"rewards/margins": 6.836607933044434,
"rewards/real": 0.3455238938331604,
"step": 2390
},
{
"epoch": 1.54,
"learning_rate": 1.2904372555990046e-07,
"logits/generated": -1.4147288799285889,
"logits/real": -0.16526241600513458,
"logps/generated": -200.0225372314453,
"logps/real": -156.98641967773438,
"loss": 0.0344,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -6.307469844818115,
"rewards/margins": 6.676236629486084,
"rewards/real": 0.36876624822616577,
"step": 2400
},
{
"epoch": 1.54,
"learning_rate": 1.2726626377532883e-07,
"logits/generated": -1.414432168006897,
"logits/real": -0.0065914043225348,
"logps/generated": -200.77865600585938,
"logps/real": -146.02127075195312,
"loss": 0.0392,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -6.215537071228027,
"rewards/margins": 6.686474800109863,
"rewards/real": 0.47093725204467773,
"step": 2410
},
{
"epoch": 1.55,
"learning_rate": 1.254888019907572e-07,
"logits/generated": -1.260357141494751,
"logits/real": -0.2691400349140167,
"logps/generated": -192.88284301757812,
"logps/real": -168.7711639404297,
"loss": 0.0531,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -6.229541301727295,
"rewards/margins": 6.6325273513793945,
"rewards/real": 0.40298670530319214,
"step": 2420
},
{
"epoch": 1.55,
"learning_rate": 1.2371134020618556e-07,
"logits/generated": -1.354569435119629,
"logits/real": -0.11174142360687256,
"logps/generated": -199.36160278320312,
"logps/real": -148.06524658203125,
"loss": 0.0344,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -6.473987579345703,
"rewards/margins": 6.997427940368652,
"rewards/real": 0.5234400629997253,
"step": 2430
},
{
"epoch": 1.56,
"learning_rate": 1.2193387842161392e-07,
"logits/generated": -1.383927583694458,
"logits/real": -0.0022398829460144043,
"logps/generated": -199.3593292236328,
"logps/real": -146.00210571289062,
"loss": 0.0346,
"rewards/accuracies": 1.0,
"rewards/generated": -6.404942989349365,
"rewards/margins": 6.934741020202637,
"rewards/real": 0.5297980308532715,
"step": 2440
},
{
"epoch": 1.57,
"learning_rate": 1.2015641663704232e-07,
"logits/generated": -1.4926010370254517,
"logits/real": -0.18301942944526672,
"logps/generated": -210.3939208984375,
"logps/real": -158.6860809326172,
"loss": 0.0443,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -6.754066467285156,
"rewards/margins": 7.018126487731934,
"rewards/real": 0.2640603184700012,
"step": 2450
},
{
"epoch": 1.57,
"learning_rate": 1.1837895485247067e-07,
"logits/generated": -1.470483660697937,
"logits/real": -0.2634314298629761,
"logps/generated": -202.2202606201172,
"logps/real": -157.89248657226562,
"loss": 0.0362,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -6.489276885986328,
"rewards/margins": 6.953598976135254,
"rewards/real": 0.4643225073814392,
"step": 2460
},
{
"epoch": 1.58,
"learning_rate": 1.1660149306789902e-07,
"logits/generated": -1.4293309450149536,
"logits/real": -0.23708269000053406,
"logps/generated": -203.2031707763672,
"logps/real": -169.27066040039062,
"loss": 0.0356,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -6.314140319824219,
"rewards/margins": 6.681276798248291,
"rewards/real": 0.3671364486217499,
"step": 2470
},
{
"epoch": 1.59,
"learning_rate": 1.148240312833274e-07,
"logits/generated": -1.4235140085220337,
"logits/real": -0.10966993868350983,
"logps/generated": -210.45736694335938,
"logps/real": -150.0312957763672,
"loss": 0.0316,
"rewards/accuracies": 1.0,
"rewards/generated": -6.821863651275635,
"rewards/margins": 7.2514801025390625,
"rewards/real": 0.42961588501930237,
"step": 2480
},
{
"epoch": 1.59,
"learning_rate": 1.1304656949875577e-07,
"logits/generated": -1.354546070098877,
"logits/real": -0.022744635120034218,
"logps/generated": -192.60122680664062,
"logps/real": -140.7333526611328,
"loss": 0.036,
"rewards/accuracies": 1.0,
"rewards/generated": -5.868205547332764,
"rewards/margins": 6.2482709884643555,
"rewards/real": 0.3800655007362366,
"step": 2490
},
{
"epoch": 1.6,
"learning_rate": 1.1126910771418415e-07,
"logits/generated": -1.4601426124572754,
"logits/real": -0.15174929797649384,
"logps/generated": -199.8236846923828,
"logps/real": -161.28665161132812,
"loss": 0.0287,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -6.322902202606201,
"rewards/margins": 6.639632225036621,
"rewards/real": 0.3167303204536438,
"step": 2500
},
{
"epoch": 1.61,
"learning_rate": 1.094916459296125e-07,
"logits/generated": -1.5440027713775635,
"logits/real": -0.26040124893188477,
"logps/generated": -221.20291137695312,
"logps/real": -167.95321655273438,
"loss": 0.0395,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -6.880659580230713,
"rewards/margins": 7.220567226409912,
"rewards/real": 0.3399079144001007,
"step": 2510
},
{
"epoch": 1.61,
"learning_rate": 1.0771418414504088e-07,
"logits/generated": -1.340903878211975,
"logits/real": -0.2935740351676941,
"logps/generated": -202.96920776367188,
"logps/real": -176.96902465820312,
"loss": 0.025,
"rewards/accuracies": 1.0,
"rewards/generated": -6.716177463531494,
"rewards/margins": 7.051765441894531,
"rewards/real": 0.33558765053749084,
"step": 2520
},
{
"epoch": 1.62,
"learning_rate": 1.0593672236046925e-07,
"logits/generated": -1.4308208227157593,
"logits/real": -0.14327159523963928,
"logps/generated": -202.39431762695312,
"logps/real": -149.82705688476562,
"loss": 0.0313,
"rewards/accuracies": 1.0,
"rewards/generated": -6.53844690322876,
"rewards/margins": 6.978585243225098,
"rewards/real": 0.440138578414917,
"step": 2530
},
{
"epoch": 1.63,
"learning_rate": 1.0415926057589762e-07,
"logits/generated": -1.4518420696258545,
"logits/real": -0.10385274887084961,
"logps/generated": -213.8254852294922,
"logps/real": -147.71575927734375,
"loss": 0.0377,
"rewards/accuracies": 1.0,
"rewards/generated": -6.9299116134643555,
"rewards/margins": 7.326942443847656,
"rewards/real": 0.3970298767089844,
"step": 2540
},
{
"epoch": 1.63,
"learning_rate": 1.0238179879132598e-07,
"logits/generated": -1.4512475728988647,
"logits/real": -0.1038379818201065,
"logps/generated": -199.56454467773438,
"logps/real": -145.60775756835938,
"loss": 0.0445,
"rewards/accuracies": 1.0,
"rewards/generated": -6.219581604003906,
"rewards/margins": 6.6343889236450195,
"rewards/real": 0.4148074686527252,
"step": 2550
},
{
"epoch": 1.64,
"learning_rate": 1.0060433700675434e-07,
"logits/generated": -1.5067126750946045,
"logits/real": -0.24580475687980652,
"logps/generated": -216.79171752929688,
"logps/real": -168.87303161621094,
"loss": 0.0415,
"rewards/accuracies": 1.0,
"rewards/generated": -6.6367292404174805,
"rewards/margins": 7.050206184387207,
"rewards/real": 0.41347736120224,
"step": 2560
},
{
"epoch": 1.64,
"learning_rate": 9.882687522218272e-08,
"logits/generated": -1.4396215677261353,
"logits/real": -0.028474459424614906,
"logps/generated": -206.5319366455078,
"logps/real": -149.35960388183594,
"loss": 0.0301,
"rewards/accuracies": 1.0,
"rewards/generated": -6.7693681716918945,
"rewards/margins": 7.073111534118652,
"rewards/real": 0.30374377965927124,
"step": 2570
},
{
"epoch": 1.65,
"learning_rate": 9.704941343761109e-08,
"logits/generated": -1.3953574895858765,
"logits/real": -0.08167268335819244,
"logps/generated": -201.78231811523438,
"logps/real": -156.30703735351562,
"loss": 0.0361,
"rewards/accuracies": 1.0,
"rewards/generated": -6.351473808288574,
"rewards/margins": 6.825016975402832,
"rewards/real": 0.47354307770729065,
"step": 2580
},
{
"epoch": 1.66,
"learning_rate": 9.527195165303946e-08,
"logits/generated": -1.4204782247543335,
"logits/real": -0.261289119720459,
"logps/generated": -214.96920776367188,
"logps/real": -163.7252655029297,
"loss": 0.0438,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -6.997825622558594,
"rewards/margins": 7.31102991104126,
"rewards/real": 0.31320399045944214,
"step": 2590
},
{
"epoch": 1.66,
"learning_rate": 9.349448986846782e-08,
"logits/generated": -1.3477799892425537,
"logits/real": -0.22864647209644318,
"logps/generated": -206.8589630126953,
"logps/real": -162.54312133789062,
"loss": 0.0374,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -6.9989471435546875,
"rewards/margins": 7.419368743896484,
"rewards/real": 0.42042192816734314,
"step": 2600
},
{
"epoch": 1.67,
"learning_rate": 9.17170280838962e-08,
"logits/generated": -1.4512816667556763,
"logits/real": -0.2580679953098297,
"logps/generated": -214.8953857421875,
"logps/real": -162.36611938476562,
"loss": 0.0303,
"rewards/accuracies": 1.0,
"rewards/generated": -7.176792144775391,
"rewards/margins": 7.437612056732178,
"rewards/real": 0.26082050800323486,
"step": 2610
},
{
"epoch": 1.68,
"learning_rate": 8.993956629932455e-08,
"logits/generated": -1.3164294958114624,
"logits/real": -0.13470031321048737,
"logps/generated": -206.2738037109375,
"logps/real": -154.78414916992188,
"loss": 0.0313,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -6.593330383300781,
"rewards/margins": 6.940329074859619,
"rewards/real": 0.34699925780296326,
"step": 2620
},
{
"epoch": 1.68,
"learning_rate": 8.816210451475293e-08,
"logits/generated": -1.4106817245483398,
"logits/real": -0.15953537821769714,
"logps/generated": -208.6619415283203,
"logps/real": -160.1926727294922,
"loss": 0.0279,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -6.805656433105469,
"rewards/margins": 7.248448371887207,
"rewards/real": 0.44279175996780396,
"step": 2630
},
{
"epoch": 1.69,
"learning_rate": 8.63846427301813e-08,
"logits/generated": -1.3242509365081787,
"logits/real": -0.10427751392126083,
"logps/generated": -199.90911865234375,
"logps/real": -149.58468627929688,
"loss": 0.0391,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -6.570542812347412,
"rewards/margins": 6.95410680770874,
"rewards/real": 0.3835631310939789,
"step": 2640
},
{
"epoch": 1.7,
"learning_rate": 8.460718094560966e-08,
"logits/generated": -1.3325514793395996,
"logits/real": -0.12340172380208969,
"logps/generated": -205.9846954345703,
"logps/real": -154.2774658203125,
"loss": 0.0248,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -6.54462194442749,
"rewards/margins": 6.925265312194824,
"rewards/real": 0.380642831325531,
"step": 2650
},
{
"epoch": 1.7,
"learning_rate": 8.282971916103803e-08,
"logits/generated": -1.266965627670288,
"logits/real": -0.21162667870521545,
"logps/generated": -199.00389099121094,
"logps/real": -171.83628845214844,
"loss": 0.0329,
"rewards/accuracies": 1.0,
"rewards/generated": -6.594735622406006,
"rewards/margins": 6.811354160308838,
"rewards/real": 0.2166186273097992,
"step": 2660
},
{
"epoch": 1.71,
"learning_rate": 8.10522573764664e-08,
"logits/generated": -1.5509871244430542,
"logits/real": -0.21420331299304962,
"logps/generated": -228.1226348876953,
"logps/real": -163.10928344726562,
"loss": 0.0373,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -7.169036865234375,
"rewards/margins": 7.495211601257324,
"rewards/real": 0.3261755704879761,
"step": 2670
},
{
"epoch": 1.71,
"learning_rate": 7.927479559189478e-08,
"logits/generated": -1.387099266052246,
"logits/real": -0.06811436265707016,
"logps/generated": -203.49497985839844,
"logps/real": -153.1364288330078,
"loss": 0.0383,
"rewards/accuracies": 1.0,
"rewards/generated": -6.673671722412109,
"rewards/margins": 7.097157955169678,
"rewards/real": 0.42348676919937134,
"step": 2680
},
{
"epoch": 1.72,
"learning_rate": 7.749733380732314e-08,
"logits/generated": -1.4274492263793945,
"logits/real": -0.0971667617559433,
"logps/generated": -200.7941436767578,
"logps/real": -154.57699584960938,
"loss": 0.0454,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -6.580294609069824,
"rewards/margins": 6.978941917419434,
"rewards/real": 0.39864683151245117,
"step": 2690
},
{
"epoch": 1.73,
"learning_rate": 7.571987202275151e-08,
"logits/generated": -1.4912302494049072,
"logits/real": -0.08408372104167938,
"logps/generated": -204.36544799804688,
"logps/real": -154.26144409179688,
"loss": 0.0341,
"rewards/accuracies": 1.0,
"rewards/generated": -6.523919105529785,
"rewards/margins": 6.955370903015137,
"rewards/real": 0.43145233392715454,
"step": 2700
},
{
"epoch": 1.73,
"learning_rate": 7.394241023817987e-08,
"logits/generated": -1.4152591228485107,
"logits/real": -0.11495399475097656,
"logps/generated": -201.34291076660156,
"logps/real": -165.26194763183594,
"loss": 0.0272,
"rewards/accuracies": 1.0,
"rewards/generated": -6.8861589431762695,
"rewards/margins": 7.37485408782959,
"rewards/real": 0.48869651556015015,
"step": 2710
},
{
"epoch": 1.74,
"learning_rate": 7.216494845360824e-08,
"logits/generated": -1.3510379791259766,
"logits/real": -0.21854618191719055,
"logps/generated": -217.4750213623047,
"logps/real": -169.8649139404297,
"loss": 0.0342,
"rewards/accuracies": 1.0,
"rewards/generated": -7.587937355041504,
"rewards/margins": 7.85870361328125,
"rewards/real": 0.27076593041419983,
"step": 2720
},
{
"epoch": 1.75,
"learning_rate": 7.038748666903662e-08,
"logits/generated": -1.50370192527771,
"logits/real": -0.2702942490577698,
"logps/generated": -212.11178588867188,
"logps/real": -167.09085083007812,
"loss": 0.0319,
"rewards/accuracies": 1.0,
"rewards/generated": -7.054226875305176,
"rewards/margins": 7.354388236999512,
"rewards/real": 0.3001619875431061,
"step": 2730
},
{
"epoch": 1.75,
"learning_rate": 6.861002488446497e-08,
"logits/generated": -1.3323113918304443,
"logits/real": 0.04159053415060043,
"logps/generated": -205.6215057373047,
"logps/real": -143.99964904785156,
"loss": 0.0292,
"rewards/accuracies": 1.0,
"rewards/generated": -6.66431188583374,
"rewards/margins": 6.992480278015137,
"rewards/real": 0.3281685709953308,
"step": 2740
},
{
"epoch": 1.76,
"learning_rate": 6.683256309989335e-08,
"logits/generated": -1.4925063848495483,
"logits/real": -0.21390242874622345,
"logps/generated": -212.22586059570312,
"logps/real": -167.41766357421875,
"loss": 0.0257,
"rewards/accuracies": 1.0,
"rewards/generated": -7.017984867095947,
"rewards/margins": 7.473367214202881,
"rewards/real": 0.455382764339447,
"step": 2750
},
{
"epoch": 1.77,
"learning_rate": 6.505510131532172e-08,
"logits/generated": -1.393650770187378,
"logits/real": -0.0947226956486702,
"logps/generated": -210.9330596923828,
"logps/real": -147.23170471191406,
"loss": 0.0367,
"rewards/accuracies": 1.0,
"rewards/generated": -6.760880470275879,
"rewards/margins": 7.202750205993652,
"rewards/real": 0.4418713450431824,
"step": 2760
},
{
"epoch": 1.77,
"learning_rate": 6.32776395307501e-08,
"logits/generated": -1.341686487197876,
"logits/real": -0.18603375554084778,
"logps/generated": -201.18238830566406,
"logps/real": -153.87188720703125,
"loss": 0.0293,
"rewards/accuracies": 1.0,
"rewards/generated": -6.745263576507568,
"rewards/margins": 7.202706813812256,
"rewards/real": 0.45744413137435913,
"step": 2770
},
{
"epoch": 1.78,
"learning_rate": 6.150017774617845e-08,
"logits/generated": -1.3316876888275146,
"logits/real": -0.1949067860841751,
"logps/generated": -211.9070587158203,
"logps/real": -177.78701782226562,
"loss": 0.034,
"rewards/accuracies": 1.0,
"rewards/generated": -6.849858283996582,
"rewards/margins": 7.174862861633301,
"rewards/real": 0.3250047564506531,
"step": 2780
},
{
"epoch": 1.79,
"learning_rate": 5.972271596160682e-08,
"logits/generated": -1.3474787473678589,
"logits/real": -0.21125014126300812,
"logps/generated": -203.97470092773438,
"logps/real": -161.0288848876953,
"loss": 0.0355,
"rewards/accuracies": 1.0,
"rewards/generated": -6.6565728187561035,
"rewards/margins": 6.895970821380615,
"rewards/real": 0.23939922451972961,
"step": 2790
},
{
"epoch": 1.79,
"learning_rate": 5.794525417703519e-08,
"logits/generated": -1.4344779253005981,
"logits/real": -0.17681774497032166,
"logps/generated": -214.01559448242188,
"logps/real": -165.4995880126953,
"loss": 0.0359,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -7.138472557067871,
"rewards/margins": 7.418495178222656,
"rewards/real": 0.2800225615501404,
"step": 2800
},
{
"epoch": 1.8,
"learning_rate": 5.616779239246356e-08,
"logits/generated": -1.3456244468688965,
"logits/real": -0.23542913794517517,
"logps/generated": -211.3947296142578,
"logps/real": -163.82652282714844,
"loss": 0.0427,
"rewards/accuracies": 1.0,
"rewards/generated": -7.151065826416016,
"rewards/margins": 7.601234436035156,
"rewards/real": 0.45016852021217346,
"step": 2810
},
{
"epoch": 1.8,
"learning_rate": 5.439033060789193e-08,
"logits/generated": -1.5365709066390991,
"logits/real": -0.08734168857336044,
"logps/generated": -218.9232635498047,
"logps/real": -155.0095977783203,
"loss": 0.03,
"rewards/accuracies": 1.0,
"rewards/generated": -7.604518890380859,
"rewards/margins": 7.9174652099609375,
"rewards/real": 0.3129454255104065,
"step": 2820
},
{
"epoch": 1.81,
"learning_rate": 5.26128688233203e-08,
"logits/generated": -1.4437042474746704,
"logits/real": -0.055155061185359955,
"logps/generated": -205.85806274414062,
"logps/real": -149.55331420898438,
"loss": 0.0352,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -6.7587127685546875,
"rewards/margins": 7.158652305603027,
"rewards/real": 0.39993923902511597,
"step": 2830
},
{
"epoch": 1.82,
"learning_rate": 5.0835407038748666e-08,
"logits/generated": -1.4804272651672363,
"logits/real": -0.2698648273944855,
"logps/generated": -217.31039428710938,
"logps/real": -168.95205688476562,
"loss": 0.0321,
"rewards/accuracies": 1.0,
"rewards/generated": -7.3498854637146,
"rewards/margins": 7.651535987854004,
"rewards/real": 0.30164986848831177,
"step": 2840
},
{
"epoch": 1.82,
"learning_rate": 4.905794525417703e-08,
"logits/generated": -1.5234874486923218,
"logits/real": -0.20794352889060974,
"logps/generated": -211.02670288085938,
"logps/real": -158.8268280029297,
"loss": 0.0316,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -6.525379180908203,
"rewards/margins": 6.8619537353515625,
"rewards/real": 0.33657413721084595,
"step": 2850
},
{
"epoch": 1.83,
"learning_rate": 4.72804834696054e-08,
"logits/generated": -1.3400804996490479,
"logits/real": -0.2256588637828827,
"logps/generated": -208.4210205078125,
"logps/real": -167.0167999267578,
"loss": 0.04,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -6.771445274353027,
"rewards/margins": 6.9308977127075195,
"rewards/real": 0.1594521552324295,
"step": 2860
},
{
"epoch": 1.84,
"learning_rate": 4.550302168503377e-08,
"logits/generated": -1.3907737731933594,
"logits/real": -0.08319269865751266,
"logps/generated": -206.05990600585938,
"logps/real": -149.99700927734375,
"loss": 0.0316,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -6.864798545837402,
"rewards/margins": 7.238038539886475,
"rewards/real": 0.3732399344444275,
"step": 2870
},
{
"epoch": 1.84,
"learning_rate": 4.372555990046214e-08,
"logits/generated": -1.3409656286239624,
"logits/real": -0.26008692383766174,
"logps/generated": -204.02059936523438,
"logps/real": -168.06771850585938,
"loss": 0.0361,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -6.502602577209473,
"rewards/margins": 6.684649467468262,
"rewards/real": 0.1820472925901413,
"step": 2880
},
{
"epoch": 1.85,
"learning_rate": 4.194809811589051e-08,
"logits/generated": -1.5320767164230347,
"logits/real": -0.30421024560928345,
"logps/generated": -224.9248504638672,
"logps/real": -178.38499450683594,
"loss": 0.0244,
"rewards/accuracies": 1.0,
"rewards/generated": -7.527239799499512,
"rewards/margins": 7.814047813415527,
"rewards/real": 0.2868082523345947,
"step": 2890
},
{
"epoch": 1.86,
"learning_rate": 4.0170636331318876e-08,
"logits/generated": -1.3013502359390259,
"logits/real": -0.1398877501487732,
"logps/generated": -201.2360076904297,
"logps/real": -163.42172241210938,
"loss": 0.0342,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -6.56103515625,
"rewards/margins": 6.8316826820373535,
"rewards/real": 0.2706476151943207,
"step": 2900
},
{
"epoch": 1.86,
"learning_rate": 3.839317454674725e-08,
"logits/generated": -1.3678123950958252,
"logits/real": -0.2855227291584015,
"logps/generated": -197.8090362548828,
"logps/real": -164.1342315673828,
"loss": 0.0431,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -6.455258369445801,
"rewards/margins": 6.888462066650391,
"rewards/real": 0.4332040250301361,
"step": 2910
},
{
"epoch": 1.87,
"learning_rate": 3.6615712762175614e-08,
"logits/generated": -1.3677704334259033,
"logits/real": 0.010342784225940704,
"logps/generated": -209.58816528320312,
"logps/real": -145.00323486328125,
"loss": 0.0232,
"rewards/accuracies": 1.0,
"rewards/generated": -7.397311210632324,
"rewards/margins": 7.7184648513793945,
"rewards/real": 0.32115358114242554,
"step": 2920
},
{
"epoch": 1.87,
"learning_rate": 3.4838250977603974e-08,
"logits/generated": -1.524156928062439,
"logits/real": -0.12270829826593399,
"logps/generated": -218.85122680664062,
"logps/real": -161.03256225585938,
"loss": 0.0274,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -7.344332695007324,
"rewards/margins": 7.675253391265869,
"rewards/real": 0.3309203088283539,
"step": 2930
},
{
"epoch": 1.88,
"learning_rate": 3.3060789193032346e-08,
"logits/generated": -1.4847233295440674,
"logits/real": -0.09207119047641754,
"logps/generated": -210.13827514648438,
"logps/real": -152.16636657714844,
"loss": 0.0348,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -7.028836250305176,
"rewards/margins": 7.411356449127197,
"rewards/real": 0.38252073526382446,
"step": 2940
},
{
"epoch": 1.89,
"learning_rate": 3.128332740846071e-08,
"logits/generated": -1.3793671131134033,
"logits/real": -0.09992913901805878,
"logps/generated": -205.37014770507812,
"logps/real": -151.1650848388672,
"loss": 0.0286,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -6.737936973571777,
"rewards/margins": 7.170534610748291,
"rewards/real": 0.432596355676651,
"step": 2950
},
{
"epoch": 1.89,
"learning_rate": 2.9505865623889085e-08,
"logits/generated": -1.4762852191925049,
"logits/real": -0.06375066190958023,
"logps/generated": -212.5565185546875,
"logps/real": -149.23876953125,
"loss": 0.0268,
"rewards/accuracies": 1.0,
"rewards/generated": -7.110816955566406,
"rewards/margins": 7.316006660461426,
"rewards/real": 0.20518915355205536,
"step": 2960
},
{
"epoch": 1.9,
"learning_rate": 2.7728403839317454e-08,
"logits/generated": -1.3773549795150757,
"logits/real": -0.1580524891614914,
"logps/generated": -210.57321166992188,
"logps/real": -156.308837890625,
"loss": 0.0339,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -7.0069990158081055,
"rewards/margins": 7.312821388244629,
"rewards/real": 0.3058224320411682,
"step": 2970
},
{
"epoch": 1.91,
"learning_rate": 2.5950942054745824e-08,
"logits/generated": -1.3675754070281982,
"logits/real": -0.2727252244949341,
"logps/generated": -219.14755249023438,
"logps/real": -166.90438842773438,
"loss": 0.0296,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -7.234642028808594,
"rewards/margins": 7.47792911529541,
"rewards/real": 0.243287593126297,
"step": 2980
},
{
"epoch": 1.91,
"learning_rate": 2.4173480270174193e-08,
"logits/generated": -1.3553121089935303,
"logits/real": -0.29976850748062134,
"logps/generated": -209.1672821044922,
"logps/real": -172.01132202148438,
"loss": 0.0371,
"rewards/accuracies": 1.0,
"rewards/generated": -6.818951606750488,
"rewards/margins": 7.143617153167725,
"rewards/real": 0.32466596364974976,
"step": 2990
},
{
"epoch": 1.92,
"learning_rate": 2.2396018485602556e-08,
"logits/generated": -1.4340794086456299,
"logits/real": -0.17459583282470703,
"logps/generated": -211.97738647460938,
"logps/real": -155.25192260742188,
"loss": 0.0357,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -7.195143222808838,
"rewards/margins": 7.472241401672363,
"rewards/real": 0.27709802985191345,
"step": 3000
},
{
"epoch": 1.93,
"learning_rate": 2.0618556701030925e-08,
"logits/generated": -1.2854077816009521,
"logits/real": -0.05134361982345581,
"logps/generated": -192.98057556152344,
"logps/real": -141.09915161132812,
"loss": 0.0428,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -6.195518493652344,
"rewards/margins": 6.4610795974731445,
"rewards/real": 0.26556122303009033,
"step": 3010
},
{
"epoch": 1.93,
"learning_rate": 1.8841094916459295e-08,
"logits/generated": -1.3392913341522217,
"logits/real": -0.09781539440155029,
"logps/generated": -208.2666473388672,
"logps/real": -156.20436096191406,
"loss": 0.0446,
"rewards/accuracies": 1.0,
"rewards/generated": -6.725115776062012,
"rewards/margins": 6.97289514541626,
"rewards/real": 0.2477794587612152,
"step": 3020
},
{
"epoch": 1.94,
"learning_rate": 1.7063633131887664e-08,
"logits/generated": -1.3316737413406372,
"logits/real": -0.0454082116484642,
"logps/generated": -206.0417938232422,
"logps/real": -141.0169219970703,
"loss": 0.0333,
"rewards/accuracies": 1.0,
"rewards/generated": -6.749381065368652,
"rewards/margins": 7.064549922943115,
"rewards/real": 0.3151686489582062,
"step": 3030
},
{
"epoch": 1.94,
"learning_rate": 1.5286171347316033e-08,
"logits/generated": -1.534264087677002,
"logits/real": -0.13667024672031403,
"logps/generated": -226.2362518310547,
"logps/real": -160.57518005371094,
"loss": 0.0234,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -7.485151767730713,
"rewards/margins": 7.653719902038574,
"rewards/real": 0.16856878995895386,
"step": 3040
},
{
"epoch": 1.95,
"learning_rate": 1.35087095627444e-08,
"logits/generated": -1.3561798334121704,
"logits/real": -0.18901768326759338,
"logps/generated": -204.1754913330078,
"logps/real": -150.84564208984375,
"loss": 0.0257,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -6.298308849334717,
"rewards/margins": 6.538763523101807,
"rewards/real": 0.24045482277870178,
"step": 3050
},
{
"epoch": 1.96,
"learning_rate": 1.1731247778172769e-08,
"logits/generated": -1.2899856567382812,
"logits/real": -0.1477215737104416,
"logps/generated": -201.3439178466797,
"logps/real": -148.21932983398438,
"loss": 0.0327,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -6.692836761474609,
"rewards/margins": 6.993072509765625,
"rewards/real": 0.3002353608608246,
"step": 3060
},
{
"epoch": 1.96,
"learning_rate": 9.953785993601137e-09,
"logits/generated": -1.358318567276001,
"logits/real": -0.06824080646038055,
"logps/generated": -206.3799285888672,
"logps/real": -151.30718994140625,
"loss": 0.0316,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -7.049784183502197,
"rewards/margins": 7.375840663909912,
"rewards/real": 0.3260560631752014,
"step": 3070
},
{
"epoch": 1.97,
"learning_rate": 8.176324209029506e-09,
"logits/generated": -1.4635486602783203,
"logits/real": -0.02628200687468052,
"logps/generated": -222.9819793701172,
"logps/real": -150.2086181640625,
"loss": 0.0358,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -7.298110008239746,
"rewards/margins": 7.585814476013184,
"rewards/real": 0.2877052426338196,
"step": 3080
},
{
"epoch": 1.98,
"learning_rate": 6.398862424457874e-09,
"logits/generated": -1.2963229417800903,
"logits/real": -0.04144474118947983,
"logps/generated": -208.9534454345703,
"logps/real": -141.96522521972656,
"loss": 0.029,
"rewards/accuracies": 1.0,
"rewards/generated": -7.3213348388671875,
"rewards/margins": 7.746777534484863,
"rewards/real": 0.425443172454834,
"step": 3090
},
{
"epoch": 1.98,
"learning_rate": 4.621400639886242e-09,
"logits/generated": -1.4178071022033691,
"logits/real": -0.21926836669445038,
"logps/generated": -208.5327606201172,
"logps/real": -177.9146728515625,
"loss": 0.0355,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -7.14594030380249,
"rewards/margins": 7.450923919677734,
"rewards/real": 0.3049830198287964,
"step": 3100
},
{
"epoch": 1.99,
"learning_rate": 2.8439388553146107e-09,
"logits/generated": -1.3670203685760498,
"logits/real": -0.1273925006389618,
"logps/generated": -208.1735382080078,
"logps/real": -154.05970764160156,
"loss": 0.0372,
"rewards/accuracies": 1.0,
"rewards/generated": -6.859274387359619,
"rewards/margins": 7.159803867340088,
"rewards/real": 0.3005295395851135,
"step": 3110
},
{
"epoch": 2.0,
"learning_rate": 1.066477070742979e-09,
"logits/generated": -1.3436192274093628,
"logits/real": -0.02946655824780464,
"logps/generated": -204.7676239013672,
"logps/real": -143.30450439453125,
"loss": 0.0369,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -6.587100982666016,
"rewards/margins": 7.098004341125488,
"rewards/real": 0.5109024047851562,
"step": 3120
},
{
"epoch": 2.0,
"step": 3126,
"total_flos": 0.0,
"train_loss": 0.18478660625349003,
"train_runtime": 27852.0483,
"train_samples_per_second": 3.59,
"train_steps_per_second": 0.112
}
],
"logging_steps": 10,
"max_steps": 3126,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}