|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 3126, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.597444089456869e-09, |
|
"logits/generated": -1.5697296857833862, |
|
"logits/real": 0.02788793109357357, |
|
"logps/generated": -161.09165954589844, |
|
"logps/real": -142.571533203125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.597444089456869e-08, |
|
"logits/generated": -1.633887767791748, |
|
"logits/real": -0.1783123016357422, |
|
"logps/generated": -138.75979614257812, |
|
"logps/real": -156.88577270507812, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.4166666567325592, |
|
"rewards/generated": 0.004628816619515419, |
|
"rewards/margins": -0.002158037619665265, |
|
"rewards/real": 0.0024707792326807976, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.194888178913738e-08, |
|
"logits/generated": -1.609938621520996, |
|
"logits/real": -0.08152679353952408, |
|
"logps/generated": -143.5062713623047, |
|
"logps/real": -153.39932250976562, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/generated": -0.002054329263046384, |
|
"rewards/margins": 0.006409396883100271, |
|
"rewards/real": 0.004355068318545818, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.7923322683706064e-08, |
|
"logits/generated": -1.5137046575546265, |
|
"logits/real": -0.25311604142189026, |
|
"logps/generated": -142.5679168701172, |
|
"logps/real": -165.5823516845703, |
|
"loss": 0.6937, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/generated": 0.0021794841159135103, |
|
"rewards/margins": -0.005685324314981699, |
|
"rewards/real": -0.0035058397334069014, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.389776357827476e-08, |
|
"logits/generated": -1.5637849569320679, |
|
"logits/real": -0.13809606432914734, |
|
"logps/generated": -131.64889526367188, |
|
"logps/real": -160.1712646484375, |
|
"loss": 0.6956, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/generated": 0.00549626350402832, |
|
"rewards/margins": -0.00184511614497751, |
|
"rewards/real": 0.003651147009804845, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7.987220447284344e-08, |
|
"logits/generated": -1.5748565196990967, |
|
"logits/real": -0.09178884327411652, |
|
"logps/generated": -135.52053833007812, |
|
"logps/real": -153.59854125976562, |
|
"loss": 0.6936, |
|
"rewards/accuracies": 0.5, |
|
"rewards/generated": -0.0018657876644283533, |
|
"rewards/margins": -0.0018160634208470583, |
|
"rewards/real": -0.003681850153952837, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.584664536741213e-08, |
|
"logits/generated": -1.6641861200332642, |
|
"logits/real": -0.2794622778892517, |
|
"logps/generated": -143.5887908935547, |
|
"logps/real": -161.76043701171875, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/generated": 0.002687716158106923, |
|
"rewards/margins": -0.0016607001889497042, |
|
"rewards/real": 0.001027016551233828, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.1182108626198082e-07, |
|
"logits/generated": -1.7215445041656494, |
|
"logits/real": -0.27054786682128906, |
|
"logps/generated": -135.14833068847656, |
|
"logps/real": -166.22914123535156, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/generated": 0.0037495135329663754, |
|
"rewards/margins": -0.0029804420191794634, |
|
"rewards/real": 0.000769071455579251, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.2779552715654952e-07, |
|
"logits/generated": -1.6150354146957397, |
|
"logits/real": -0.09898678958415985, |
|
"logps/generated": -136.48165893554688, |
|
"logps/real": -153.1577911376953, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/generated": 0.0034830570220947266, |
|
"rewards/margins": -0.0015155791770666838, |
|
"rewards/real": 0.001967477845028043, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.437699680511182e-07, |
|
"logits/generated": -1.7415987253189087, |
|
"logits/real": -0.13428936898708344, |
|
"logps/generated": -147.66615295410156, |
|
"logps/real": -163.003173828125, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/generated": -0.0029185961466282606, |
|
"rewards/margins": 0.0036810163874179125, |
|
"rewards/real": 0.0007624196587130427, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.5974440894568688e-07, |
|
"logits/generated": -1.655890703201294, |
|
"logits/real": -0.1572108417749405, |
|
"logps/generated": -142.35391235351562, |
|
"logps/real": -153.11849975585938, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/generated": -0.004457283299416304, |
|
"rewards/margins": 0.0026433137245476246, |
|
"rewards/real": -0.0018139698076993227, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.757188498402556e-07, |
|
"logits/generated": -1.5630085468292236, |
|
"logits/real": -0.24260249733924866, |
|
"logps/generated": -141.05294799804688, |
|
"logps/real": -168.055908203125, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/generated": -0.01049154344946146, |
|
"rewards/margins": 0.012994527816772461, |
|
"rewards/real": 0.002502984832972288, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9169329073482426e-07, |
|
"logits/generated": -1.6308023929595947, |
|
"logits/real": -0.1937154084444046, |
|
"logps/generated": -147.5388946533203, |
|
"logps/real": -156.50515747070312, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/generated": -0.004769793711602688, |
|
"rewards/margins": 0.0057532163336873055, |
|
"rewards/real": 0.0009834242518991232, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.0766773162939297e-07, |
|
"logits/generated": -1.559140920639038, |
|
"logits/real": -0.2510816156864166, |
|
"logps/generated": -131.83071899414062, |
|
"logps/real": -164.52407836914062, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/generated": -0.007517705671489239, |
|
"rewards/margins": 0.009749026037752628, |
|
"rewards/real": 0.002231321996077895, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.2364217252396164e-07, |
|
"logits/generated": -1.6431344747543335, |
|
"logits/real": -0.08175196498632431, |
|
"logps/generated": -142.07656860351562, |
|
"logps/real": -150.12713623046875, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.625, |
|
"rewards/generated": -0.009607553482055664, |
|
"rewards/margins": 0.00782174151390791, |
|
"rewards/real": -0.0017858125502243638, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.3961661341853033e-07, |
|
"logits/generated": -1.7087827920913696, |
|
"logits/real": -0.3516523540019989, |
|
"logps/generated": -146.2122039794922, |
|
"logps/real": -160.49546813964844, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/generated": -0.009243173524737358, |
|
"rewards/margins": 0.008949460461735725, |
|
"rewards/real": -0.00029371288837864995, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.5559105431309904e-07, |
|
"logits/generated": -1.7223360538482666, |
|
"logits/real": -0.2035084068775177, |
|
"logps/generated": -140.39572143554688, |
|
"logps/real": -157.5358428955078, |
|
"loss": 0.6889, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/generated": -0.009247403591871262, |
|
"rewards/margins": 0.012728390283882618, |
|
"rewards/real": 0.0034809871576726437, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.715654952076677e-07, |
|
"logits/generated": -1.749093770980835, |
|
"logits/real": -0.13887380063533783, |
|
"logps/generated": -144.42491149902344, |
|
"logps/real": -150.71852111816406, |
|
"loss": 0.688, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/generated": -0.009006218984723091, |
|
"rewards/margins": 0.010439801029860973, |
|
"rewards/real": 0.0014335823943838477, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.875399361022364e-07, |
|
"logits/generated": -1.533534049987793, |
|
"logits/real": -0.2637261152267456, |
|
"logps/generated": -143.3952178955078, |
|
"logps/real": -171.69735717773438, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/generated": -0.01498076505959034, |
|
"rewards/margins": 0.009685126133263111, |
|
"rewards/real": -0.005295639391988516, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 3.035143769968051e-07, |
|
"logits/generated": -1.6899821758270264, |
|
"logits/real": -0.24950842559337616, |
|
"logps/generated": -138.8522186279297, |
|
"logps/real": -165.54269409179688, |
|
"loss": 0.6865, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/generated": -0.007621455006301403, |
|
"rewards/margins": 0.0072743757627904415, |
|
"rewards/real": -0.0003470802039373666, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3.1948881789137375e-07, |
|
"logits/generated": -1.6720901727676392, |
|
"logits/real": -0.13108500838279724, |
|
"logps/generated": -143.76254272460938, |
|
"logps/real": -152.6102294921875, |
|
"loss": 0.6866, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/generated": -0.016768785193562508, |
|
"rewards/margins": 0.013681398704648018, |
|
"rewards/real": -0.0030873871874064207, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3.354632587859425e-07, |
|
"logits/generated": -1.668308973312378, |
|
"logits/real": -0.07337333261966705, |
|
"logps/generated": -139.4440460205078, |
|
"logps/real": -145.42164611816406, |
|
"loss": 0.6848, |
|
"rewards/accuracies": 0.625, |
|
"rewards/generated": -0.013003669679164886, |
|
"rewards/margins": 0.013915425166487694, |
|
"rewards/real": 0.0009117558365687728, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 3.514376996805112e-07, |
|
"logits/generated": -1.6061630249023438, |
|
"logits/real": -0.04177895188331604, |
|
"logps/generated": -141.5182647705078, |
|
"logps/real": -143.1357879638672, |
|
"loss": 0.6839, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/generated": -0.022188017144799232, |
|
"rewards/margins": 0.020234428346157074, |
|
"rewards/real": -0.0019535874016582966, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.6741214057507985e-07, |
|
"logits/generated": -1.6456787586212158, |
|
"logits/real": -0.032159410417079926, |
|
"logps/generated": -137.02691650390625, |
|
"logps/real": -152.61387634277344, |
|
"loss": 0.6837, |
|
"rewards/accuracies": 0.75, |
|
"rewards/generated": -0.0165521539747715, |
|
"rewards/margins": 0.021729346364736557, |
|
"rewards/real": 0.005177192389965057, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.833865814696485e-07, |
|
"logits/generated": -1.6691703796386719, |
|
"logits/real": -0.4035402834415436, |
|
"logps/generated": -143.48324584960938, |
|
"logps/real": -179.24752807617188, |
|
"loss": 0.6803, |
|
"rewards/accuracies": 0.75, |
|
"rewards/generated": -0.02668914757668972, |
|
"rewards/margins": 0.02352159470319748, |
|
"rewards/real": -0.0031675524078309536, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.993610223642173e-07, |
|
"logits/generated": -1.5687007904052734, |
|
"logits/real": -0.09528298676013947, |
|
"logps/generated": -141.37509155273438, |
|
"logps/real": -149.46035766601562, |
|
"loss": 0.6762, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/generated": -0.03992265835404396, |
|
"rewards/margins": 0.043977029621601105, |
|
"rewards/real": 0.004054374061524868, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.1533546325878595e-07, |
|
"logits/generated": -1.6911046504974365, |
|
"logits/real": -0.007008680608123541, |
|
"logps/generated": -144.0658416748047, |
|
"logps/real": -150.27243041992188, |
|
"loss": 0.6756, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/generated": -0.03158753365278244, |
|
"rewards/margins": 0.03852955996990204, |
|
"rewards/real": 0.006942029111087322, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.313099041533546e-07, |
|
"logits/generated": -1.6412391662597656, |
|
"logits/real": -0.16801941394805908, |
|
"logps/generated": -136.202392578125, |
|
"logps/real": -153.8549346923828, |
|
"loss": 0.6721, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/generated": -0.04177216440439224, |
|
"rewards/margins": 0.04363798722624779, |
|
"rewards/real": 0.001865826197899878, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.472843450479233e-07, |
|
"logits/generated": -1.577200174331665, |
|
"logits/real": -0.18333522975444794, |
|
"logps/generated": -135.14651489257812, |
|
"logps/real": -162.71102905273438, |
|
"loss": 0.67, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -0.04428236186504364, |
|
"rewards/margins": 0.050743866711854935, |
|
"rewards/real": 0.0064615062437951565, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.63258785942492e-07, |
|
"logits/generated": -1.7181251049041748, |
|
"logits/real": -0.14545153081417084, |
|
"logps/generated": -141.39950561523438, |
|
"logps/real": -159.49375915527344, |
|
"loss": 0.6695, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/generated": -0.04990307241678238, |
|
"rewards/margins": 0.05374089628458023, |
|
"rewards/real": 0.003837819444015622, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.792332268370607e-07, |
|
"logits/generated": -1.5909569263458252, |
|
"logits/real": -0.11655733734369278, |
|
"logps/generated": -136.44984436035156, |
|
"logps/real": -149.45106506347656, |
|
"loss": 0.6671, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/generated": -0.04627872630953789, |
|
"rewards/margins": 0.04471098631620407, |
|
"rewards/real": -0.0015677406918257475, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.952076677316294e-07, |
|
"logits/generated": -1.6312742233276367, |
|
"logits/real": -0.2683960795402527, |
|
"logps/generated": -139.54454040527344, |
|
"logps/real": -171.73904418945312, |
|
"loss": 0.66, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -0.06426986306905746, |
|
"rewards/margins": 0.06892012804746628, |
|
"rewards/real": 0.004650264047086239, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.987557767507998e-07, |
|
"logits/generated": -1.7353553771972656, |
|
"logits/real": -0.2647973895072937, |
|
"logps/generated": -140.25978088378906, |
|
"logps/real": -161.4581298828125, |
|
"loss": 0.6604, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -0.0646638348698616, |
|
"rewards/margins": 0.06798725575208664, |
|
"rewards/real": 0.003323426004499197, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.969783149662282e-07, |
|
"logits/generated": -1.4210546016693115, |
|
"logits/real": -0.2087656706571579, |
|
"logps/generated": -132.431640625, |
|
"logps/real": -164.90301513671875, |
|
"loss": 0.6557, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -0.07163882255554199, |
|
"rewards/margins": 0.07509996742010117, |
|
"rewards/real": 0.0034611367154866457, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.952008531816565e-07, |
|
"logits/generated": -1.5858566761016846, |
|
"logits/real": -0.19087788462638855, |
|
"logps/generated": -137.87277221679688, |
|
"logps/real": -170.6455841064453, |
|
"loss": 0.6461, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -0.08108492195606232, |
|
"rewards/margins": 0.09145854413509369, |
|
"rewards/real": 0.010373624972999096, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.93423391397085e-07, |
|
"logits/generated": -1.5935401916503906, |
|
"logits/real": -0.18511667847633362, |
|
"logps/generated": -135.37991333007812, |
|
"logps/real": -161.39453125, |
|
"loss": 0.6448, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -0.09194954484701157, |
|
"rewards/margins": 0.10509081184864044, |
|
"rewards/real": 0.013141264207661152, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.916459296125133e-07, |
|
"logits/generated": -1.6975730657577515, |
|
"logits/real": -0.1985872983932495, |
|
"logps/generated": -141.4312744140625, |
|
"logps/real": -158.44564819335938, |
|
"loss": 0.6358, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -0.10384906828403473, |
|
"rewards/margins": 0.12381346523761749, |
|
"rewards/real": 0.01996440999209881, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.898684678279417e-07, |
|
"logits/generated": -1.6519880294799805, |
|
"logits/real": -0.35222429037094116, |
|
"logps/generated": -142.85623168945312, |
|
"logps/real": -178.80612182617188, |
|
"loss": 0.6306, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -0.12715503573417664, |
|
"rewards/margins": 0.13996237516403198, |
|
"rewards/real": 0.01280735433101654, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.8809100604337e-07, |
|
"logits/generated": -1.5434781312942505, |
|
"logits/real": -0.13071385025978088, |
|
"logps/generated": -144.16851806640625, |
|
"logps/real": -166.1132049560547, |
|
"loss": 0.6274, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -0.12086117267608643, |
|
"rewards/margins": 0.14237499237060547, |
|
"rewards/real": 0.02151382341980934, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.863135442587984e-07, |
|
"logits/generated": -1.6076228618621826, |
|
"logits/real": -0.207803413271904, |
|
"logps/generated": -142.91873168945312, |
|
"logps/real": -157.12567138671875, |
|
"loss": 0.621, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -0.13728059828281403, |
|
"rewards/margins": 0.1584828794002533, |
|
"rewards/real": 0.021202292293310165, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.845360824742267e-07, |
|
"logits/generated": -1.5034089088439941, |
|
"logits/real": -0.20472228527069092, |
|
"logps/generated": -132.5041046142578, |
|
"logps/real": -161.69406127929688, |
|
"loss": 0.6184, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -0.13708344101905823, |
|
"rewards/margins": 0.159152552485466, |
|
"rewards/real": 0.02206914685666561, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.827586206896552e-07, |
|
"logits/generated": -1.6673437356948853, |
|
"logits/real": -0.18107284605503082, |
|
"logps/generated": -142.1982879638672, |
|
"logps/real": -157.3358154296875, |
|
"loss": 0.6092, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -0.14462080597877502, |
|
"rewards/margins": 0.17726922035217285, |
|
"rewards/real": 0.03264839947223663, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.809811589050835e-07, |
|
"logits/generated": -1.5609720945358276, |
|
"logits/real": -0.14302347600460052, |
|
"logps/generated": -138.09237670898438, |
|
"logps/real": -162.0977020263672, |
|
"loss": 0.6008, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -0.1684601604938507, |
|
"rewards/margins": 0.19570282101631165, |
|
"rewards/real": 0.027242666110396385, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.792036971205119e-07, |
|
"logits/generated": -1.5268409252166748, |
|
"logits/real": -0.1176341325044632, |
|
"logps/generated": -135.7368927001953, |
|
"logps/real": -153.20358276367188, |
|
"loss": 0.5896, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -0.1638469398021698, |
|
"rewards/margins": 0.19122377038002014, |
|
"rewards/real": 0.027376821264624596, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.774262353359402e-07, |
|
"logits/generated": -1.6257530450820923, |
|
"logits/real": -0.1526736319065094, |
|
"logps/generated": -139.25418090820312, |
|
"logps/real": -156.4899139404297, |
|
"loss": 0.5873, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -0.1955229938030243, |
|
"rewards/margins": 0.23565657436847687, |
|
"rewards/real": 0.04013354331254959, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.7564877355136863e-07, |
|
"logits/generated": -1.6272735595703125, |
|
"logits/real": -0.15557542443275452, |
|
"logps/generated": -151.3992462158203, |
|
"logps/real": -153.5313720703125, |
|
"loss": 0.57, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -0.22929322719573975, |
|
"rewards/margins": 0.2820231318473816, |
|
"rewards/real": 0.052729904651641846, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.73871311766797e-07, |
|
"logits/generated": -1.7158994674682617, |
|
"logits/real": -0.3903907239437103, |
|
"logps/generated": -144.54037475585938, |
|
"logps/real": -180.07406616210938, |
|
"loss": 0.5647, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -0.2549547553062439, |
|
"rewards/margins": 0.2814892828464508, |
|
"rewards/real": 0.02653454802930355, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.7209384998222536e-07, |
|
"logits/generated": -1.6353950500488281, |
|
"logits/real": -0.19543974101543427, |
|
"logps/generated": -143.96304321289062, |
|
"logps/real": -158.52206420898438, |
|
"loss": 0.5595, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -0.2641616463661194, |
|
"rewards/margins": 0.3153914213180542, |
|
"rewards/real": 0.05122975632548332, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.7031638819765373e-07, |
|
"logits/generated": -1.6073200702667236, |
|
"logits/real": -0.14624395966529846, |
|
"logps/generated": -142.60208129882812, |
|
"logps/real": -160.1145477294922, |
|
"loss": 0.5447, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -0.27238526940345764, |
|
"rewards/margins": 0.3274722695350647, |
|
"rewards/real": 0.055087022483348846, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.6853892641308215e-07, |
|
"logits/generated": -1.5465781688690186, |
|
"logits/real": -0.15420304238796234, |
|
"logps/generated": -141.56056213378906, |
|
"logps/real": -148.23391723632812, |
|
"loss": 0.5345, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -0.27804866433143616, |
|
"rewards/margins": 0.3418120741844177, |
|
"rewards/real": 0.06376341730356216, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.6676146462851046e-07, |
|
"logits/generated": -1.8107671737670898, |
|
"logits/real": -0.2396336793899536, |
|
"logps/generated": -150.1639404296875, |
|
"logps/real": -157.54586791992188, |
|
"loss": 0.5338, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -0.2998965084552765, |
|
"rewards/margins": 0.352867066860199, |
|
"rewards/real": 0.05297055095434189, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.649840028439388e-07, |
|
"logits/generated": -1.583640694618225, |
|
"logits/real": -0.08962409198284149, |
|
"logps/generated": -140.02252197265625, |
|
"logps/real": -157.4398956298828, |
|
"loss": 0.5111, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -0.3320668339729309, |
|
"rewards/margins": 0.4081410765647888, |
|
"rewards/real": 0.07607419788837433, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.632065410593672e-07, |
|
"logits/generated": -1.6017663478851318, |
|
"logits/real": -0.16641400754451752, |
|
"logps/generated": -140.48094177246094, |
|
"logps/real": -152.2816925048828, |
|
"loss": 0.508, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -0.32677119970321655, |
|
"rewards/margins": 0.4095242917537689, |
|
"rewards/real": 0.08275306969881058, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.6142907927479556e-07, |
|
"logits/generated": -1.6886812448501587, |
|
"logits/real": -0.14055995643138885, |
|
"logps/generated": -144.4681854248047, |
|
"logps/real": -148.32789611816406, |
|
"loss": 0.5008, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -0.3314792215824127, |
|
"rewards/margins": 0.40919432044029236, |
|
"rewards/real": 0.07771513611078262, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.59651617490224e-07, |
|
"logits/generated": -1.5692707300186157, |
|
"logits/real": -0.17518237233161926, |
|
"logps/generated": -142.68887329101562, |
|
"logps/real": -158.769775390625, |
|
"loss": 0.4812, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -0.3942539095878601, |
|
"rewards/margins": 0.47341471910476685, |
|
"rewards/real": 0.07916079461574554, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.578741557056523e-07, |
|
"logits/generated": -1.585056185722351, |
|
"logits/real": -0.1907566785812378, |
|
"logps/generated": -149.67239379882812, |
|
"logps/real": -164.2400665283203, |
|
"loss": 0.4752, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -0.4361751973628998, |
|
"rewards/margins": 0.5167232751846313, |
|
"rewards/real": 0.08054807037115097, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.560966939210807e-07, |
|
"logits/generated": -1.5427873134613037, |
|
"logits/real": -0.2252950370311737, |
|
"logps/generated": -148.17556762695312, |
|
"logps/real": -153.58592224121094, |
|
"loss": 0.4725, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -0.44603148102760315, |
|
"rewards/margins": 0.540245532989502, |
|
"rewards/real": 0.09421406686306, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.54319232136509e-07, |
|
"logits/generated": -1.6634706258773804, |
|
"logits/real": -0.10494379699230194, |
|
"logps/generated": -148.4973602294922, |
|
"logps/real": -156.35145568847656, |
|
"loss": 0.4558, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -0.4902486801147461, |
|
"rewards/margins": 0.5618138313293457, |
|
"rewards/real": 0.0715651884675026, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.525417703519374e-07, |
|
"logits/generated": -1.534942865371704, |
|
"logits/real": -0.11929289996623993, |
|
"logps/generated": -142.94210815429688, |
|
"logps/real": -155.97193908691406, |
|
"loss": 0.4502, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -0.49913454055786133, |
|
"rewards/margins": 0.599966824054718, |
|
"rewards/real": 0.1008322685956955, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.507643085673658e-07, |
|
"logits/generated": -1.7383668422698975, |
|
"logits/real": -0.0883287638425827, |
|
"logps/generated": -153.69606018066406, |
|
"logps/real": -157.78956604003906, |
|
"loss": 0.4302, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -0.5414460301399231, |
|
"rewards/margins": 0.6505511999130249, |
|
"rewards/real": 0.10910521447658539, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.489868467827941e-07, |
|
"logits/generated": -1.8656871318817139, |
|
"logits/real": -0.06075664609670639, |
|
"logps/generated": -147.55734252929688, |
|
"logps/real": -150.363037109375, |
|
"loss": 0.4354, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -0.5383075475692749, |
|
"rewards/margins": 0.6746756434440613, |
|
"rewards/real": 0.1363680064678192, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.4720938499822254e-07, |
|
"logits/generated": -1.582039713859558, |
|
"logits/real": -0.18077224493026733, |
|
"logps/generated": -143.44656372070312, |
|
"logps/real": -155.9421844482422, |
|
"loss": 0.4154, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -0.5968641638755798, |
|
"rewards/margins": 0.700276255607605, |
|
"rewards/real": 0.10341213643550873, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.4543192321365085e-07, |
|
"logits/generated": -1.710147500038147, |
|
"logits/real": -0.11019489914178848, |
|
"logps/generated": -152.04287719726562, |
|
"logps/real": -154.15586853027344, |
|
"loss": 0.4099, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -0.6149613857269287, |
|
"rewards/margins": 0.7358669638633728, |
|
"rewards/real": 0.12090563774108887, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.4365446142907927e-07, |
|
"logits/generated": -1.474244475364685, |
|
"logits/real": -0.2550203204154968, |
|
"logps/generated": -142.00582885742188, |
|
"logps/real": -169.1204833984375, |
|
"loss": 0.4092, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -0.5503032207489014, |
|
"rewards/margins": 0.6812837719917297, |
|
"rewards/real": 0.1309804916381836, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.4187699964450764e-07, |
|
"logits/generated": -1.7478046417236328, |
|
"logits/real": -0.21401798725128174, |
|
"logps/generated": -157.27719116210938, |
|
"logps/real": -158.44354248046875, |
|
"loss": 0.3751, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -0.7286791205406189, |
|
"rewards/margins": 0.8884221315383911, |
|
"rewards/real": 0.1597430408000946, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.4009953785993595e-07, |
|
"logits/generated": -1.672858476638794, |
|
"logits/real": -0.18186981976032257, |
|
"logps/generated": -150.75625610351562, |
|
"logps/real": -170.07919311523438, |
|
"loss": 0.3658, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -0.7189536094665527, |
|
"rewards/margins": 0.8496885299682617, |
|
"rewards/real": 0.13073506951332092, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.3832207607536437e-07, |
|
"logits/generated": -1.6526811122894287, |
|
"logits/real": -0.14324292540550232, |
|
"logps/generated": -151.4724578857422, |
|
"logps/real": -154.22683715820312, |
|
"loss": 0.365, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -0.7191274166107178, |
|
"rewards/margins": 0.8864700198173523, |
|
"rewards/real": 0.16734261810779572, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.365446142907927e-07, |
|
"logits/generated": -1.5031042098999023, |
|
"logits/real": -0.058509550988674164, |
|
"logps/generated": -142.10000610351562, |
|
"logps/real": -148.38555908203125, |
|
"loss": 0.3644, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -0.672211229801178, |
|
"rewards/margins": 0.8271724581718445, |
|
"rewards/real": 0.1549612581729889, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.347671525062211e-07, |
|
"logits/generated": -1.637528419494629, |
|
"logits/real": -0.14800499379634857, |
|
"logps/generated": -150.9839630126953, |
|
"logps/real": -157.72259521484375, |
|
"loss": 0.3411, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -0.8599473237991333, |
|
"rewards/margins": 1.0459052324295044, |
|
"rewards/real": 0.1859578788280487, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.3298969072164947e-07, |
|
"logits/generated": -1.6163980960845947, |
|
"logits/real": -0.21588876843452454, |
|
"logps/generated": -149.24557495117188, |
|
"logps/real": -159.4187774658203, |
|
"loss": 0.3166, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -0.8925792574882507, |
|
"rewards/margins": 1.0570037364959717, |
|
"rewards/real": 0.16442444920539856, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.3121222893707783e-07, |
|
"logits/generated": -1.5266709327697754, |
|
"logits/real": -0.14610102772712708, |
|
"logps/generated": -143.5114288330078, |
|
"logps/real": -144.98056030273438, |
|
"loss": 0.3339, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -0.7892045974731445, |
|
"rewards/margins": 0.9511388540267944, |
|
"rewards/real": 0.16193436086177826, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.294347671525062e-07, |
|
"logits/generated": -1.6533136367797852, |
|
"logits/real": -0.06393162161111832, |
|
"logps/generated": -152.63758850097656, |
|
"logps/real": -149.1494903564453, |
|
"loss": 0.3034, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -0.8443581461906433, |
|
"rewards/margins": 1.0624687671661377, |
|
"rewards/real": 0.2181106060743332, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.276573053679346e-07, |
|
"logits/generated": -1.6860072612762451, |
|
"logits/real": -0.24717557430267334, |
|
"logps/generated": -150.0583953857422, |
|
"logps/real": -158.61351013183594, |
|
"loss": 0.3092, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -0.9428604245185852, |
|
"rewards/margins": 1.096800446510315, |
|
"rewards/real": 0.1539398431777954, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.2587984358336293e-07, |
|
"logits/generated": -1.5238220691680908, |
|
"logits/real": -0.3366602063179016, |
|
"logps/generated": -143.13980102539062, |
|
"logps/real": -169.9083709716797, |
|
"loss": 0.308, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -0.8742044568061829, |
|
"rewards/margins": 1.0897338390350342, |
|
"rewards/real": 0.2155293971300125, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.241023817987913e-07, |
|
"logits/generated": -1.6218751668930054, |
|
"logits/real": -0.18687456846237183, |
|
"logps/generated": -152.43099975585938, |
|
"logps/real": -156.63699340820312, |
|
"loss": 0.2944, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -1.0684313774108887, |
|
"rewards/margins": 1.297039270401001, |
|
"rewards/real": 0.22860772907733917, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.2232492001421966e-07, |
|
"logits/generated": -1.6673694849014282, |
|
"logits/real": -0.17359259724617004, |
|
"logps/generated": -153.6688995361328, |
|
"logps/real": -171.0704803466797, |
|
"loss": 0.2723, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -1.088458776473999, |
|
"rewards/margins": 1.3285064697265625, |
|
"rewards/real": 0.24004778265953064, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.2054745822964803e-07, |
|
"logits/generated": -1.588289499282837, |
|
"logits/real": -0.3443184196949005, |
|
"logps/generated": -156.00241088867188, |
|
"logps/real": -176.3258514404297, |
|
"loss": 0.28, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -1.1722843647003174, |
|
"rewards/margins": 1.3391616344451904, |
|
"rewards/real": 0.16687722504138947, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.1876999644507645e-07, |
|
"logits/generated": -1.5838396549224854, |
|
"logits/real": -0.16283896565437317, |
|
"logps/generated": -153.18551635742188, |
|
"logps/real": -156.0181427001953, |
|
"loss": 0.2632, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -1.1464365720748901, |
|
"rewards/margins": 1.3280103206634521, |
|
"rewards/real": 0.1815737932920456, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.1699253466050476e-07, |
|
"logits/generated": -1.5133371353149414, |
|
"logits/real": -0.09465299546718597, |
|
"logps/generated": -150.0225830078125, |
|
"logps/real": -146.9311981201172, |
|
"loss": 0.2495, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -1.2300481796264648, |
|
"rewards/margins": 1.5034099817276, |
|
"rewards/real": 0.2733617424964905, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.152150728759332e-07, |
|
"logits/generated": -1.6122316122055054, |
|
"logits/real": -0.2318686991930008, |
|
"logps/generated": -159.11160278320312, |
|
"logps/real": -168.3660430908203, |
|
"loss": 0.2474, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -1.27286696434021, |
|
"rewards/margins": 1.4775692224502563, |
|
"rewards/real": 0.20470228791236877, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.134376110913615e-07, |
|
"logits/generated": -1.694283127784729, |
|
"logits/real": -0.10422974824905396, |
|
"logps/generated": -157.1562042236328, |
|
"logps/real": -150.5861358642578, |
|
"loss": 0.2281, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -1.3571908473968506, |
|
"rewards/margins": 1.6250684261322021, |
|
"rewards/real": 0.26787763833999634, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.1166014930678986e-07, |
|
"logits/generated": -1.5623177289962769, |
|
"logits/real": -0.1493486613035202, |
|
"logps/generated": -151.51956176757812, |
|
"logps/real": -148.74160766601562, |
|
"loss": 0.2271, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -1.2717982530593872, |
|
"rewards/margins": 1.5056483745574951, |
|
"rewards/real": 0.23385019600391388, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.098826875222183e-07, |
|
"logits/generated": -1.6703298091888428, |
|
"logits/real": -0.10680408775806427, |
|
"logps/generated": -153.7378387451172, |
|
"logps/real": -155.27723693847656, |
|
"loss": 0.2101, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -1.5243198871612549, |
|
"rewards/margins": 1.7411978244781494, |
|
"rewards/real": 0.2168780267238617, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.081052257376466e-07, |
|
"logits/generated": -1.6254603862762451, |
|
"logits/real": -0.12683448195457458, |
|
"logps/generated": -152.67662048339844, |
|
"logps/real": -159.438720703125, |
|
"loss": 0.2012, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -1.4263641834259033, |
|
"rewards/margins": 1.615281343460083, |
|
"rewards/real": 0.18891693651676178, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.06327763953075e-07, |
|
"logits/generated": -1.6256685256958008, |
|
"logits/real": -0.2439662665128708, |
|
"logps/generated": -168.36373901367188, |
|
"logps/real": -169.80215454101562, |
|
"loss": 0.2073, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -1.3876028060913086, |
|
"rewards/margins": 1.599313735961914, |
|
"rewards/real": 0.21171092987060547, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.045503021685033e-07, |
|
"logits/generated": -1.4529263973236084, |
|
"logits/real": -0.10771781206130981, |
|
"logps/generated": -144.90695190429688, |
|
"logps/real": -161.789306640625, |
|
"loss": 0.2015, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -1.5157387256622314, |
|
"rewards/margins": 1.7433464527130127, |
|
"rewards/real": 0.22760768234729767, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.0277284038393174e-07, |
|
"logits/generated": -1.5856255292892456, |
|
"logits/real": -0.188165545463562, |
|
"logps/generated": -155.0406494140625, |
|
"logps/real": -153.3172607421875, |
|
"loss": 0.2001, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -1.531455636024475, |
|
"rewards/margins": 1.780940055847168, |
|
"rewards/real": 0.24948418140411377, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.009953785993601e-07, |
|
"logits/generated": -1.59652578830719, |
|
"logits/real": -0.19590969383716583, |
|
"logps/generated": -158.6524200439453, |
|
"logps/real": -162.3628692626953, |
|
"loss": 0.1732, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -1.6548792123794556, |
|
"rewards/margins": 1.9046993255615234, |
|
"rewards/real": 0.24982018768787384, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 3.992179168147884e-07, |
|
"logits/generated": -1.622241735458374, |
|
"logits/real": -0.22373679280281067, |
|
"logps/generated": -151.82644653320312, |
|
"logps/real": -163.46112060546875, |
|
"loss": 0.1631, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -1.8496040105819702, |
|
"rewards/margins": 2.1095592975616455, |
|
"rewards/real": 0.25995510816574097, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 3.9744045503021684e-07, |
|
"logits/generated": -1.6477899551391602, |
|
"logits/real": -0.17104220390319824, |
|
"logps/generated": -160.24609375, |
|
"logps/real": -157.3255157470703, |
|
"loss": 0.1826, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -1.8226982355117798, |
|
"rewards/margins": 2.0370171070098877, |
|
"rewards/real": 0.21431896090507507, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.956629932456452e-07, |
|
"logits/generated": -1.5971219539642334, |
|
"logits/real": -0.07671569287776947, |
|
"logps/generated": -157.93313598632812, |
|
"logps/real": -151.52809143066406, |
|
"loss": 0.1566, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -1.9616973400115967, |
|
"rewards/margins": 2.2585387229919434, |
|
"rewards/real": 0.29684123396873474, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.938855314610736e-07, |
|
"logits/generated": -1.4671550989151, |
|
"logits/real": -0.33207738399505615, |
|
"logps/generated": -153.27047729492188, |
|
"logps/real": -168.5091552734375, |
|
"loss": 0.1614, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -1.8668949604034424, |
|
"rewards/margins": 2.094343662261963, |
|
"rewards/real": 0.22744879126548767, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3.9210806967650194e-07, |
|
"logits/generated": -1.5912812948226929, |
|
"logits/real": -0.12044389545917511, |
|
"logps/generated": -165.49241638183594, |
|
"logps/real": -161.30966186523438, |
|
"loss": 0.1496, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -2.0533995628356934, |
|
"rewards/margins": 2.2546448707580566, |
|
"rewards/real": 0.20124495029449463, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.903306078919303e-07, |
|
"logits/generated": -1.474174976348877, |
|
"logits/real": -0.13984277844429016, |
|
"logps/generated": -151.71311950683594, |
|
"logps/real": -147.8576202392578, |
|
"loss": 0.163, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -1.827612280845642, |
|
"rewards/margins": 2.1235859394073486, |
|
"rewards/real": 0.29597336053848267, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.8855314610735867e-07, |
|
"logits/generated": -1.5910863876342773, |
|
"logits/real": -0.2784571945667267, |
|
"logps/generated": -161.6417999267578, |
|
"logps/real": -166.55007934570312, |
|
"loss": 0.1643, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -2.0047945976257324, |
|
"rewards/margins": 2.273448944091797, |
|
"rewards/real": 0.2686540186405182, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.867756843227871e-07, |
|
"logits/generated": -1.5748822689056396, |
|
"logits/real": -0.13812735676765442, |
|
"logps/generated": -168.13827514648438, |
|
"logps/real": -146.0213623046875, |
|
"loss": 0.1434, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -2.1277716159820557, |
|
"rewards/margins": 2.380551815032959, |
|
"rewards/real": 0.25278064608573914, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.849982225382154e-07, |
|
"logits/generated": -1.5670706033706665, |
|
"logits/real": -0.2948365807533264, |
|
"logps/generated": -157.73426818847656, |
|
"logps/real": -163.10662841796875, |
|
"loss": 0.1319, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -2.087007522583008, |
|
"rewards/margins": 2.3846383094787598, |
|
"rewards/real": 0.29763054847717285, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.8322076075364377e-07, |
|
"logits/generated": -1.6536438465118408, |
|
"logits/real": -0.20418302714824677, |
|
"logps/generated": -164.49765014648438, |
|
"logps/real": -156.45957946777344, |
|
"loss": 0.1406, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.122313976287842, |
|
"rewards/margins": 2.3531196117401123, |
|
"rewards/real": 0.2308053970336914, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.8144329896907214e-07, |
|
"logits/generated": -1.5101605653762817, |
|
"logits/real": 0.031563229858875275, |
|
"logps/generated": -160.07464599609375, |
|
"logps/real": -141.91098022460938, |
|
"loss": 0.1351, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -2.1049656867980957, |
|
"rewards/margins": 2.405392646789551, |
|
"rewards/real": 0.30042701959609985, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.796658371845005e-07, |
|
"logits/generated": -1.5053074359893799, |
|
"logits/real": -0.3015785813331604, |
|
"logps/generated": -160.33193969726562, |
|
"logps/real": -172.7531280517578, |
|
"loss": 0.127, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -2.241503953933716, |
|
"rewards/margins": 2.3939685821533203, |
|
"rewards/real": 0.15246476233005524, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.778883753999289e-07, |
|
"logits/generated": -1.6345714330673218, |
|
"logits/real": -0.10730817168951035, |
|
"logps/generated": -164.5872802734375, |
|
"logps/real": -144.282958984375, |
|
"loss": 0.134, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.2479634284973145, |
|
"rewards/margins": 2.539016008377075, |
|
"rewards/real": 0.29105255007743835, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.7611091361535723e-07, |
|
"logits/generated": -1.4203786849975586, |
|
"logits/real": -0.08897562325000763, |
|
"logps/generated": -153.96353149414062, |
|
"logps/real": -148.22430419921875, |
|
"loss": 0.1316, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -2.2757656574249268, |
|
"rewards/margins": 2.5660629272460938, |
|
"rewards/real": 0.2902970016002655, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.7433345183078565e-07, |
|
"logits/generated": -1.655291199684143, |
|
"logits/real": -0.01987510919570923, |
|
"logps/generated": -165.6507110595703, |
|
"logps/real": -138.74452209472656, |
|
"loss": 0.1208, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.6197855472564697, |
|
"rewards/margins": 2.930170774459839, |
|
"rewards/real": 0.3103852868080139, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.7255599004621397e-07, |
|
"logits/generated": -1.4382855892181396, |
|
"logits/real": -0.18755117058753967, |
|
"logps/generated": -164.83363342285156, |
|
"logps/real": -162.2533416748047, |
|
"loss": 0.1202, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -2.4666531085968018, |
|
"rewards/margins": 2.706902265548706, |
|
"rewards/real": 0.24024927616119385, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.7077852826164233e-07, |
|
"logits/generated": -1.6496721506118774, |
|
"logits/real": -0.22290463745594025, |
|
"logps/generated": -171.09011840820312, |
|
"logps/real": -166.6168670654297, |
|
"loss": 0.1093, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.7524375915527344, |
|
"rewards/margins": 2.975261688232422, |
|
"rewards/real": 0.22282417118549347, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.6900106647707075e-07, |
|
"logits/generated": -1.7099498510360718, |
|
"logits/real": -0.09361065924167633, |
|
"logps/generated": -173.44967651367188, |
|
"logps/real": -159.67605590820312, |
|
"loss": 0.1062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.8430135250091553, |
|
"rewards/margins": 3.0707504749298096, |
|
"rewards/real": 0.22773659229278564, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.6722360469249906e-07, |
|
"logits/generated": -1.425708532333374, |
|
"logits/real": -0.14051005244255066, |
|
"logps/generated": -165.50164794921875, |
|
"logps/real": -154.0505828857422, |
|
"loss": 0.1099, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.7030253410339355, |
|
"rewards/margins": 3.061180830001831, |
|
"rewards/real": 0.3581555485725403, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.654461429079275e-07, |
|
"logits/generated": -1.6046979427337646, |
|
"logits/real": -0.15294823050498962, |
|
"logps/generated": -172.5651092529297, |
|
"logps/real": -155.49411010742188, |
|
"loss": 0.0994, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.900844097137451, |
|
"rewards/margins": 3.12347149848938, |
|
"rewards/real": 0.22262760996818542, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.6366868112335585e-07, |
|
"logits/generated": -1.4243303537368774, |
|
"logits/real": -0.11334830522537231, |
|
"logps/generated": -156.65003967285156, |
|
"logps/real": -148.0675048828125, |
|
"loss": 0.1142, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -2.6817686557769775, |
|
"rewards/margins": 2.9299864768981934, |
|
"rewards/real": 0.2482178956270218, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.618912193387842e-07, |
|
"logits/generated": -1.584937334060669, |
|
"logits/real": -0.11413073539733887, |
|
"logps/generated": -178.1724090576172, |
|
"logps/real": -150.05291748046875, |
|
"loss": 0.1007, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -3.004488468170166, |
|
"rewards/margins": 3.3728480339050293, |
|
"rewards/real": 0.3683595657348633, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.601137575542126e-07, |
|
"logits/generated": -1.45297372341156, |
|
"logits/real": -0.279954195022583, |
|
"logps/generated": -172.93008422851562, |
|
"logps/real": -165.13259887695312, |
|
"loss": 0.0938, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.9406588077545166, |
|
"rewards/margins": 3.235691785812378, |
|
"rewards/real": 0.29503294825553894, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.583362957696409e-07, |
|
"logits/generated": -1.4498844146728516, |
|
"logits/real": -0.13191112875938416, |
|
"logps/generated": -161.51988220214844, |
|
"logps/real": -152.23013305664062, |
|
"loss": 0.0928, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.7946717739105225, |
|
"rewards/margins": 3.1111600399017334, |
|
"rewards/real": 0.3164881467819214, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.565588339850693e-07, |
|
"logits/generated": -1.4884414672851562, |
|
"logits/real": -0.017497604712843895, |
|
"logps/generated": -172.79934692382812, |
|
"logps/real": -154.42282104492188, |
|
"loss": 0.0864, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.134819984436035, |
|
"rewards/margins": 3.4709200859069824, |
|
"rewards/real": 0.3361000418663025, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.547813722004977e-07, |
|
"logits/generated": -1.6363372802734375, |
|
"logits/real": -0.22440211474895477, |
|
"logps/generated": -170.09425354003906, |
|
"logps/real": -162.13650512695312, |
|
"loss": 0.0984, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -3.131570816040039, |
|
"rewards/margins": 3.428776264190674, |
|
"rewards/real": 0.29720538854599, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.5300391041592605e-07, |
|
"logits/generated": -1.484832763671875, |
|
"logits/real": -0.10550673305988312, |
|
"logps/generated": -168.85377502441406, |
|
"logps/real": -150.67498779296875, |
|
"loss": 0.0837, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.156721830368042, |
|
"rewards/margins": 3.53771710395813, |
|
"rewards/real": 0.3809953033924103, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.512264486313544e-07, |
|
"logits/generated": -1.59353768825531, |
|
"logits/real": -0.1766626536846161, |
|
"logps/generated": -176.32730102539062, |
|
"logps/real": -161.31967163085938, |
|
"loss": 0.0912, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.0488924980163574, |
|
"rewards/margins": 3.374645709991455, |
|
"rewards/real": 0.32575327157974243, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.494489868467828e-07, |
|
"logits/generated": -1.556908369064331, |
|
"logits/real": -0.23450179398059845, |
|
"logps/generated": -183.6482696533203, |
|
"logps/real": -159.4730987548828, |
|
"loss": 0.0868, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.4513683319091797, |
|
"rewards/margins": 3.7840518951416016, |
|
"rewards/real": 0.33268359303474426, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.4767152506221114e-07, |
|
"logits/generated": -1.6676868200302124, |
|
"logits/real": -0.32121220231056213, |
|
"logps/generated": -172.13592529296875, |
|
"logps/real": -174.69143676757812, |
|
"loss": 0.0945, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.157599449157715, |
|
"rewards/margins": 3.516270875930786, |
|
"rewards/real": 0.35867148637771606, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.458940632776395e-07, |
|
"logits/generated": -1.4721453189849854, |
|
"logits/real": -0.13291652500629425, |
|
"logps/generated": -169.03042602539062, |
|
"logps/real": -140.19325256347656, |
|
"loss": 0.0937, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.0053653717041016, |
|
"rewards/margins": 3.348203659057617, |
|
"rewards/real": 0.3428387939929962, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.441166014930679e-07, |
|
"logits/generated": -1.5415700674057007, |
|
"logits/real": -0.20805084705352783, |
|
"logps/generated": -173.4481658935547, |
|
"logps/real": -152.56295776367188, |
|
"loss": 0.0859, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.1931285858154297, |
|
"rewards/margins": 3.5694432258605957, |
|
"rewards/real": 0.3763141930103302, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.4233913970849624e-07, |
|
"logits/generated": -1.5936585664749146, |
|
"logits/real": -0.13701248168945312, |
|
"logps/generated": -179.35784912109375, |
|
"logps/real": -149.34103393554688, |
|
"loss": 0.0821, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.652930498123169, |
|
"rewards/margins": 4.033869743347168, |
|
"rewards/real": 0.3809398114681244, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.405616779239246e-07, |
|
"logits/generated": -1.418588399887085, |
|
"logits/real": -0.20335432887077332, |
|
"logps/generated": -173.30572509765625, |
|
"logps/real": -161.86990356445312, |
|
"loss": 0.077, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.459143877029419, |
|
"rewards/margins": 3.7515323162078857, |
|
"rewards/real": 0.29238831996917725, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.38784216139353e-07, |
|
"logits/generated": -1.5245991945266724, |
|
"logits/real": -0.04596313461661339, |
|
"logps/generated": -177.37301635742188, |
|
"logps/real": -146.84376525878906, |
|
"loss": 0.0816, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.607050657272339, |
|
"rewards/margins": 3.9615414142608643, |
|
"rewards/real": 0.35449081659317017, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.370067543547814e-07, |
|
"logits/generated": -1.5164806842803955, |
|
"logits/real": -0.10607216507196426, |
|
"logps/generated": -174.8607177734375, |
|
"logps/real": -140.32608032226562, |
|
"loss": 0.0887, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.272444486618042, |
|
"rewards/margins": 3.617063045501709, |
|
"rewards/real": 0.34461817145347595, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.352292925702097e-07, |
|
"logits/generated": -1.4171545505523682, |
|
"logits/real": -0.10191817581653595, |
|
"logps/generated": -176.97470092773438, |
|
"logps/real": -158.6232147216797, |
|
"loss": 0.0788, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.494141101837158, |
|
"rewards/margins": 3.8596279621124268, |
|
"rewards/real": 0.36548665165901184, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.334518307856381e-07, |
|
"logits/generated": -1.532880187034607, |
|
"logits/real": -0.10620009899139404, |
|
"logps/generated": -175.44747924804688, |
|
"logps/real": -155.5845489501953, |
|
"loss": 0.076, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -3.504039764404297, |
|
"rewards/margins": 3.8370566368103027, |
|
"rewards/real": 0.33301740884780884, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.316743690010665e-07, |
|
"logits/generated": -1.5539209842681885, |
|
"logits/real": 0.018734993413090706, |
|
"logps/generated": -178.60914611816406, |
|
"logps/real": -141.38162231445312, |
|
"loss": 0.0602, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.7286376953125, |
|
"rewards/margins": 4.203549385070801, |
|
"rewards/real": 0.474911630153656, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.298969072164948e-07, |
|
"logits/generated": -1.4938045740127563, |
|
"logits/real": -0.2437242567539215, |
|
"logps/generated": -173.7627716064453, |
|
"logps/real": -169.41697692871094, |
|
"loss": 0.0852, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -3.6397221088409424, |
|
"rewards/margins": 3.9213058948516846, |
|
"rewards/real": 0.28158318996429443, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.281194454319232e-07, |
|
"logits/generated": -1.4745817184448242, |
|
"logits/real": -0.3218688666820526, |
|
"logps/generated": -178.52267456054688, |
|
"logps/real": -164.70156860351562, |
|
"loss": 0.0814, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -3.73933744430542, |
|
"rewards/margins": 4.0132269859313965, |
|
"rewards/real": 0.2738892138004303, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.2634198364735154e-07, |
|
"logits/generated": -1.3811590671539307, |
|
"logits/real": -0.2854043245315552, |
|
"logps/generated": -169.4147186279297, |
|
"logps/real": -154.60781860351562, |
|
"loss": 0.0859, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -3.4979610443115234, |
|
"rewards/margins": 3.756443738937378, |
|
"rewards/real": 0.258482426404953, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.2456452186277996e-07, |
|
"logits/generated": -1.6183143854141235, |
|
"logits/real": -0.16637897491455078, |
|
"logps/generated": -185.69973754882812, |
|
"logps/real": -156.8994598388672, |
|
"loss": 0.0636, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.972916841506958, |
|
"rewards/margins": 4.307384490966797, |
|
"rewards/real": 0.3344675302505493, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.227870600782083e-07, |
|
"logits/generated": -1.466426134109497, |
|
"logits/real": -0.013457128778100014, |
|
"logps/generated": -183.57412719726562, |
|
"logps/real": -144.6224822998047, |
|
"loss": 0.0681, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.7342000007629395, |
|
"rewards/margins": 4.176131248474121, |
|
"rewards/real": 0.44193094968795776, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.210095982936367e-07, |
|
"logits/generated": -1.6522903442382812, |
|
"logits/real": -0.17722484469413757, |
|
"logps/generated": -176.93878173828125, |
|
"logps/real": -152.43270874023438, |
|
"loss": 0.0732, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.882312774658203, |
|
"rewards/margins": 4.290909767150879, |
|
"rewards/real": 0.40859729051589966, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.1923213650906505e-07, |
|
"logits/generated": -1.5739877223968506, |
|
"logits/real": -0.13895940780639648, |
|
"logps/generated": -183.1358184814453, |
|
"logps/real": -158.11346435546875, |
|
"loss": 0.0595, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -4.044293403625488, |
|
"rewards/margins": 4.440293312072754, |
|
"rewards/real": 0.39599940180778503, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.1745467472449337e-07, |
|
"logits/generated": -1.4868980646133423, |
|
"logits/real": -0.21168294548988342, |
|
"logps/generated": -174.8867645263672, |
|
"logps/real": -164.8509063720703, |
|
"loss": 0.0788, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.7173595428466797, |
|
"rewards/margins": 4.043525218963623, |
|
"rewards/real": 0.3261655867099762, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.156772129399218e-07, |
|
"logits/generated": -1.4937036037445068, |
|
"logits/real": -0.08979051560163498, |
|
"logps/generated": -172.25582885742188, |
|
"logps/real": -138.26126098632812, |
|
"loss": 0.0632, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.6981003284454346, |
|
"rewards/margins": 4.175982475280762, |
|
"rewards/real": 0.47788214683532715, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.1389975115535015e-07, |
|
"logits/generated": -1.4787460565567017, |
|
"logits/real": -0.13007709383964539, |
|
"logps/generated": -172.90484619140625, |
|
"logps/real": -150.52801513671875, |
|
"loss": 0.0638, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.8822848796844482, |
|
"rewards/margins": 4.298541069030762, |
|
"rewards/real": 0.41625672578811646, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.121222893707785e-07, |
|
"logits/generated": -1.5493468046188354, |
|
"logits/real": -0.2214834988117218, |
|
"logps/generated": -184.1126251220703, |
|
"logps/real": -168.06861877441406, |
|
"loss": 0.0797, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -4.093251705169678, |
|
"rewards/margins": 4.350040435791016, |
|
"rewards/real": 0.2567889094352722, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.103448275862069e-07, |
|
"logits/generated": -1.63626229763031, |
|
"logits/real": -0.19701644778251648, |
|
"logps/generated": -187.59336853027344, |
|
"logps/real": -165.13125610351562, |
|
"loss": 0.0734, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.259321212768555, |
|
"rewards/margins": 4.569417476654053, |
|
"rewards/real": 0.3100959062576294, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.0856736580163525e-07, |
|
"logits/generated": -1.472497820854187, |
|
"logits/real": -0.20631149411201477, |
|
"logps/generated": -176.0330352783203, |
|
"logps/real": -159.4289093017578, |
|
"loss": 0.0574, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -4.0790910720825195, |
|
"rewards/margins": 4.325627326965332, |
|
"rewards/real": 0.24653713405132294, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.067899040170636e-07, |
|
"logits/generated": -1.4534804821014404, |
|
"logits/real": -0.1081136092543602, |
|
"logps/generated": -184.11058044433594, |
|
"logps/real": -157.69619750976562, |
|
"loss": 0.0579, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -4.127554893493652, |
|
"rewards/margins": 4.575366973876953, |
|
"rewards/real": 0.4478122591972351, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.05012442232492e-07, |
|
"logits/generated": -1.5773451328277588, |
|
"logits/real": -0.11705253273248672, |
|
"logps/generated": -192.18496704101562, |
|
"logps/real": -155.13021850585938, |
|
"loss": 0.0499, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -4.557686805725098, |
|
"rewards/margins": 4.943387031555176, |
|
"rewards/real": 0.38570016622543335, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.0323498044792035e-07, |
|
"logits/generated": -1.486706018447876, |
|
"logits/real": -0.019088217988610268, |
|
"logps/generated": -180.96835327148438, |
|
"logps/real": -144.02244567871094, |
|
"loss": 0.0602, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.163193225860596, |
|
"rewards/margins": 4.452478885650635, |
|
"rewards/real": 0.2892855107784271, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.014575186633487e-07, |
|
"logits/generated": -1.518604040145874, |
|
"logits/real": -0.1295831948518753, |
|
"logps/generated": -178.1422882080078, |
|
"logps/real": -154.9306640625, |
|
"loss": 0.0632, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -4.255062103271484, |
|
"rewards/margins": 4.6544036865234375, |
|
"rewards/real": 0.39934203028678894, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.996800568787771e-07, |
|
"logits/generated": -1.4814355373382568, |
|
"logits/real": -0.1749541461467743, |
|
"logps/generated": -173.97134399414062, |
|
"logps/real": -152.14175415039062, |
|
"loss": 0.071, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -4.158992290496826, |
|
"rewards/margins": 4.453802108764648, |
|
"rewards/real": 0.29480981826782227, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.9790259509420545e-07, |
|
"logits/generated": -1.534990668296814, |
|
"logits/real": -0.15847769379615784, |
|
"logps/generated": -187.8436279296875, |
|
"logps/real": -155.74073791503906, |
|
"loss": 0.0552, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -4.2658538818359375, |
|
"rewards/margins": 4.6822991371154785, |
|
"rewards/real": 0.41644495725631714, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.9612513330963387e-07, |
|
"logits/generated": -1.5072695016860962, |
|
"logits/real": 0.0005754769081249833, |
|
"logps/generated": -182.82794189453125, |
|
"logps/real": -132.48300170898438, |
|
"loss": 0.0646, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -4.370685577392578, |
|
"rewards/margins": 4.957348346710205, |
|
"rewards/real": 0.5866621732711792, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.943476715250622e-07, |
|
"logits/generated": -1.4925824403762817, |
|
"logits/real": -0.10418804734945297, |
|
"logps/generated": -186.21078491210938, |
|
"logps/real": -149.9343719482422, |
|
"loss": 0.0518, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -4.665707588195801, |
|
"rewards/margins": 5.203892707824707, |
|
"rewards/real": 0.5381842851638794, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.9257020974049054e-07, |
|
"logits/generated": -1.5403038263320923, |
|
"logits/real": -0.24279017746448517, |
|
"logps/generated": -187.37631225585938, |
|
"logps/real": -162.3560791015625, |
|
"loss": 0.0608, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -4.447120189666748, |
|
"rewards/margins": 4.881729602813721, |
|
"rewards/real": 0.4346093535423279, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.9079274795591896e-07, |
|
"logits/generated": -1.3779773712158203, |
|
"logits/real": -0.14756569266319275, |
|
"logps/generated": -178.95181274414062, |
|
"logps/real": -165.2563018798828, |
|
"loss": 0.0656, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -4.4514360427856445, |
|
"rewards/margins": 4.870530605316162, |
|
"rewards/real": 0.41909438371658325, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.890152861713473e-07, |
|
"logits/generated": -1.3544880151748657, |
|
"logits/real": -0.11259318888187408, |
|
"logps/generated": -182.8341827392578, |
|
"logps/real": -151.156494140625, |
|
"loss": 0.0573, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -4.413695812225342, |
|
"rewards/margins": 4.731995582580566, |
|
"rewards/real": 0.31829962134361267, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.872378243867757e-07, |
|
"logits/generated": -1.5561285018920898, |
|
"logits/real": -0.14099498093128204, |
|
"logps/generated": -184.40415954589844, |
|
"logps/real": -154.1420135498047, |
|
"loss": 0.0734, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -4.361074924468994, |
|
"rewards/margins": 4.7188801765441895, |
|
"rewards/real": 0.35780465602874756, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.85460362602204e-07, |
|
"logits/generated": -1.4556692838668823, |
|
"logits/real": -0.15908826887607574, |
|
"logps/generated": -179.03842163085938, |
|
"logps/real": -151.68093872070312, |
|
"loss": 0.0625, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.344630241394043, |
|
"rewards/margins": 4.7134857177734375, |
|
"rewards/real": 0.36885565519332886, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.8368290081763243e-07, |
|
"logits/generated": -1.4455296993255615, |
|
"logits/real": -0.10524747520685196, |
|
"logps/generated": -178.05532836914062, |
|
"logps/real": -151.86215209960938, |
|
"loss": 0.0725, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -4.490212917327881, |
|
"rewards/margins": 4.926787376403809, |
|
"rewards/real": 0.4365745484828949, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.819054390330608e-07, |
|
"logits/generated": -1.5643322467803955, |
|
"logits/real": -0.22240431606769562, |
|
"logps/generated": -187.14395141601562, |
|
"logps/real": -157.94808959960938, |
|
"loss": 0.0598, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.599512577056885, |
|
"rewards/margins": 5.061091899871826, |
|
"rewards/real": 0.46157994866371155, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.801279772484891e-07, |
|
"logits/generated": -1.4671770334243774, |
|
"logits/real": -0.19106905162334442, |
|
"logps/generated": -180.06546020507812, |
|
"logps/real": -160.6680145263672, |
|
"loss": 0.0703, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -4.547145843505859, |
|
"rewards/margins": 4.966696739196777, |
|
"rewards/real": 0.4195513129234314, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.783505154639175e-07, |
|
"logits/generated": -1.4373109340667725, |
|
"logits/real": -0.14462901651859283, |
|
"logps/generated": -187.5804901123047, |
|
"logps/real": -151.3854217529297, |
|
"loss": 0.0692, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -4.61647891998291, |
|
"rewards/margins": 5.218323707580566, |
|
"rewards/real": 0.6018449068069458, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.7657305367934584e-07, |
|
"logits/generated": -1.5236389636993408, |
|
"logits/real": -0.18100953102111816, |
|
"logps/generated": -183.6896514892578, |
|
"logps/real": -155.19091796875, |
|
"loss": 0.0668, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -4.461154460906982, |
|
"rewards/margins": 5.004509925842285, |
|
"rewards/real": 0.5433556437492371, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.7479559189477426e-07, |
|
"logits/generated": -1.4411672353744507, |
|
"logits/real": -0.09060608595609665, |
|
"logps/generated": -182.39309692382812, |
|
"logps/real": -149.1797637939453, |
|
"loss": 0.0545, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.790371894836426, |
|
"rewards/margins": 5.318713665008545, |
|
"rewards/real": 0.52834153175354, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.730181301102026e-07, |
|
"logits/generated": -1.4803905487060547, |
|
"logits/real": -0.14638617634773254, |
|
"logps/generated": -190.68942260742188, |
|
"logps/real": -158.1866912841797, |
|
"loss": 0.0516, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.740262031555176, |
|
"rewards/margins": 5.252830505371094, |
|
"rewards/real": 0.5125688910484314, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.71240668325631e-07, |
|
"logits/generated": -1.4773404598236084, |
|
"logits/real": -0.28140923380851746, |
|
"logps/generated": -185.19883728027344, |
|
"logps/real": -168.70620727539062, |
|
"loss": 0.0616, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -4.644852638244629, |
|
"rewards/margins": 5.096782684326172, |
|
"rewards/real": 0.45193013548851013, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.6946320654105936e-07, |
|
"logits/generated": -1.497534155845642, |
|
"logits/real": -0.36784881353378296, |
|
"logps/generated": -178.10247802734375, |
|
"logps/real": -165.84774780273438, |
|
"loss": 0.0613, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.512256145477295, |
|
"rewards/margins": 4.852838039398193, |
|
"rewards/real": 0.3405814468860626, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.676857447564877e-07, |
|
"logits/generated": -1.4658567905426025, |
|
"logits/real": -0.12854836881160736, |
|
"logps/generated": -188.9214324951172, |
|
"logps/real": -150.326416015625, |
|
"loss": 0.0622, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -4.890933036804199, |
|
"rewards/margins": 5.391529083251953, |
|
"rewards/real": 0.5005959272384644, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.659082829719161e-07, |
|
"logits/generated": -1.3702716827392578, |
|
"logits/real": -0.18641087412834167, |
|
"logps/generated": -184.32167053222656, |
|
"logps/real": -166.14932250976562, |
|
"loss": 0.0576, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -4.7197346687316895, |
|
"rewards/margins": 5.14710807800293, |
|
"rewards/real": 0.42737287282943726, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.6413082118734445e-07, |
|
"logits/generated": -1.5208382606506348, |
|
"logits/real": -0.10880865156650543, |
|
"logps/generated": -189.2379150390625, |
|
"logps/real": -157.01681518554688, |
|
"loss": 0.0565, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -5.028722286224365, |
|
"rewards/margins": 5.426579475402832, |
|
"rewards/real": 0.3978571891784668, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.623533594027728e-07, |
|
"logits/generated": -1.476301908493042, |
|
"logits/real": -0.19111457467079163, |
|
"logps/generated": -186.6832275390625, |
|
"logps/real": -155.54295349121094, |
|
"loss": 0.0473, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -4.851848125457764, |
|
"rewards/margins": 5.253470420837402, |
|
"rewards/real": 0.40162190794944763, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.605758976182012e-07, |
|
"logits/generated": -1.5290987491607666, |
|
"logits/real": -0.1336485594511032, |
|
"logps/generated": -206.06509399414062, |
|
"logps/real": -151.84494018554688, |
|
"loss": 0.0509, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -5.372347831726074, |
|
"rewards/margins": 5.8217620849609375, |
|
"rewards/real": 0.4494136869907379, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.587984358336296e-07, |
|
"logits/generated": -1.5546083450317383, |
|
"logits/real": -0.23617632687091827, |
|
"logps/generated": -203.20187377929688, |
|
"logps/real": -173.64894104003906, |
|
"loss": 0.04, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.4583282470703125, |
|
"rewards/margins": 5.816749095916748, |
|
"rewards/real": 0.35842153429985046, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.570209740490579e-07, |
|
"logits/generated": -1.4508366584777832, |
|
"logits/real": -0.04887630045413971, |
|
"logps/generated": -186.7039031982422, |
|
"logps/real": -153.81202697753906, |
|
"loss": 0.0566, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -4.691603660583496, |
|
"rewards/margins": 5.064540386199951, |
|
"rewards/real": 0.3729364275932312, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.5524351226448634e-07, |
|
"logits/generated": -1.3482738733291626, |
|
"logits/real": -0.2794376015663147, |
|
"logps/generated": -198.63124084472656, |
|
"logps/real": -172.26560974121094, |
|
"loss": 0.0505, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -5.2879533767700195, |
|
"rewards/margins": 5.73902702331543, |
|
"rewards/real": 0.45107364654541016, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.5346605047991465e-07, |
|
"logits/generated": -1.5188000202178955, |
|
"logits/real": -0.07588844746351242, |
|
"logps/generated": -190.33047485351562, |
|
"logps/real": -149.24380493164062, |
|
"loss": 0.0473, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.852927207946777, |
|
"rewards/margins": 5.346456527709961, |
|
"rewards/real": 0.4935285151004791, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.51688588695343e-07, |
|
"logits/generated": -1.4112365245819092, |
|
"logits/real": -0.037810131907463074, |
|
"logps/generated": -181.25367736816406, |
|
"logps/real": -135.86203002929688, |
|
"loss": 0.045, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -4.706342697143555, |
|
"rewards/margins": 5.279058456420898, |
|
"rewards/real": 0.5727157592773438, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.499111269107714e-07, |
|
"logits/generated": -1.4743269681930542, |
|
"logits/real": -0.3346148133277893, |
|
"logps/generated": -201.05142211914062, |
|
"logps/real": -184.9207305908203, |
|
"loss": 0.0392, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.311040878295898, |
|
"rewards/margins": 5.642301559448242, |
|
"rewards/real": 0.3312605321407318, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.4813366512619975e-07, |
|
"logits/generated": -1.530133843421936, |
|
"logits/real": -0.06566596776247025, |
|
"logps/generated": -189.20339965820312, |
|
"logps/real": -152.02157592773438, |
|
"loss": 0.0492, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -5.331114768981934, |
|
"rewards/margins": 5.823639869689941, |
|
"rewards/real": 0.4925246238708496, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.4635620334162817e-07, |
|
"logits/generated": -1.4688317775726318, |
|
"logits/real": -0.11931806802749634, |
|
"logps/generated": -190.11228942871094, |
|
"logps/real": -157.91522216796875, |
|
"loss": 0.0519, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.377558708190918, |
|
"rewards/margins": 5.723202705383301, |
|
"rewards/real": 0.3456438183784485, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.4457874155705653e-07, |
|
"logits/generated": -1.5590466260910034, |
|
"logits/real": -0.058088403195142746, |
|
"logps/generated": -196.60153198242188, |
|
"logps/real": -145.74993896484375, |
|
"loss": 0.0481, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.340886116027832, |
|
"rewards/margins": 5.873441219329834, |
|
"rewards/real": 0.532554030418396, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.428012797724849e-07, |
|
"logits/generated": -1.46759831905365, |
|
"logits/real": -0.08282347023487091, |
|
"logps/generated": -195.71517944335938, |
|
"logps/real": -151.4615478515625, |
|
"loss": 0.0455, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.418810844421387, |
|
"rewards/margins": 5.871450424194336, |
|
"rewards/real": 0.452639639377594, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.4102381798791327e-07, |
|
"logits/generated": -1.6086094379425049, |
|
"logits/real": -0.11050989478826523, |
|
"logps/generated": -208.8617401123047, |
|
"logps/real": -159.17808532714844, |
|
"loss": 0.0382, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.02325439453125, |
|
"rewards/margins": 6.450479030609131, |
|
"rewards/real": 0.427224338054657, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.392463562033416e-07, |
|
"logits/generated": -1.5008881092071533, |
|
"logits/real": -0.15384702384471893, |
|
"logps/generated": -188.76950073242188, |
|
"logps/real": -159.75119018554688, |
|
"loss": 0.0412, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.168057918548584, |
|
"rewards/margins": 5.656711101531982, |
|
"rewards/real": 0.48865312337875366, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.3746889441877e-07, |
|
"logits/generated": -1.4526275396347046, |
|
"logits/real": -0.3002064824104309, |
|
"logps/generated": -199.5317840576172, |
|
"logps/real": -170.03663635253906, |
|
"loss": 0.0607, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -5.6653547286987305, |
|
"rewards/margins": 6.049663066864014, |
|
"rewards/real": 0.3843079209327698, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.3569143263419836e-07, |
|
"logits/generated": -1.6317417621612549, |
|
"logits/real": -0.10097722709178925, |
|
"logps/generated": -197.29315185546875, |
|
"logps/real": -150.00180053710938, |
|
"loss": 0.0414, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -5.416452407836914, |
|
"rewards/margins": 5.868271827697754, |
|
"rewards/real": 0.45181870460510254, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 2.3391397084962673e-07, |
|
"logits/generated": -1.4566489458084106, |
|
"logits/real": -0.1597038060426712, |
|
"logps/generated": -182.79869079589844, |
|
"logps/real": -150.32553100585938, |
|
"loss": 0.0467, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -5.3501973152160645, |
|
"rewards/margins": 5.728513240814209, |
|
"rewards/real": 0.37831613421440125, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 2.3213650906505507e-07, |
|
"logits/generated": -1.4283339977264404, |
|
"logits/real": -0.01142942439764738, |
|
"logps/generated": -187.8427734375, |
|
"logps/real": -133.63656616210938, |
|
"loss": 0.0486, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.248843193054199, |
|
"rewards/margins": 5.7334394454956055, |
|
"rewards/real": 0.4845956861972809, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.3035904728048346e-07, |
|
"logits/generated": -1.47895085811615, |
|
"logits/real": -0.06862284243106842, |
|
"logps/generated": -202.21640014648438, |
|
"logps/real": -152.25730895996094, |
|
"loss": 0.0335, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.783873558044434, |
|
"rewards/margins": 6.278851509094238, |
|
"rewards/real": 0.49497756361961365, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.2858158549591183e-07, |
|
"logits/generated": -1.5177428722381592, |
|
"logits/real": -0.22056560218334198, |
|
"logps/generated": -202.6049041748047, |
|
"logps/real": -165.22787475585938, |
|
"loss": 0.0576, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -5.6912360191345215, |
|
"rewards/margins": 6.0315961837768555, |
|
"rewards/real": 0.3403596580028534, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.268041237113402e-07, |
|
"logits/generated": -1.371692419052124, |
|
"logits/real": -0.06362702697515488, |
|
"logps/generated": -191.365234375, |
|
"logps/real": -143.11497497558594, |
|
"loss": 0.0449, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -5.428120136260986, |
|
"rewards/margins": 6.007481098175049, |
|
"rewards/real": 0.5793606042861938, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.2502666192676856e-07, |
|
"logits/generated": -1.279131293296814, |
|
"logits/real": -0.09258531033992767, |
|
"logps/generated": -190.04415893554688, |
|
"logps/real": -146.318115234375, |
|
"loss": 0.0404, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -5.40255069732666, |
|
"rewards/margins": 5.839011192321777, |
|
"rewards/real": 0.4364606440067291, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.2324920014219693e-07, |
|
"logits/generated": -1.4574114084243774, |
|
"logits/real": -0.11219004541635513, |
|
"logps/generated": -194.90048217773438, |
|
"logps/real": -162.45753479003906, |
|
"loss": 0.0458, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.726960182189941, |
|
"rewards/margins": 6.180015563964844, |
|
"rewards/real": 0.4530550539493561, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.2147173835762532e-07, |
|
"logits/generated": -1.5723118782043457, |
|
"logits/real": -0.04347873479127884, |
|
"logps/generated": -209.44775390625, |
|
"logps/real": -149.29434204101562, |
|
"loss": 0.0388, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.9704813957214355, |
|
"rewards/margins": 6.489261627197266, |
|
"rewards/real": 0.5187799334526062, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.1969427657305366e-07, |
|
"logits/generated": -1.4479305744171143, |
|
"logits/real": -0.0961630642414093, |
|
"logps/generated": -197.63656616210938, |
|
"logps/real": -152.1067352294922, |
|
"loss": 0.0491, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -5.782441139221191, |
|
"rewards/margins": 6.282473087310791, |
|
"rewards/real": 0.5000313520431519, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.1791681478848203e-07, |
|
"logits/generated": -1.435664415359497, |
|
"logits/real": -0.1993381530046463, |
|
"logps/generated": -190.99139404296875, |
|
"logps/real": -152.6283416748047, |
|
"loss": 0.0426, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -5.520625114440918, |
|
"rewards/margins": 5.948267936706543, |
|
"rewards/real": 0.42764243483543396, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.161393530039104e-07, |
|
"logits/generated": -1.458038568496704, |
|
"logits/real": -0.12041410058736801, |
|
"logps/generated": -192.7799530029297, |
|
"logps/real": -155.81527709960938, |
|
"loss": 0.0388, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.656401634216309, |
|
"rewards/margins": 6.175935745239258, |
|
"rewards/real": 0.519533097743988, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.1436189121933878e-07, |
|
"logits/generated": -1.3725850582122803, |
|
"logits/real": -0.1823103129863739, |
|
"logps/generated": -192.62945556640625, |
|
"logps/real": -168.57968139648438, |
|
"loss": 0.0402, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -5.725317001342773, |
|
"rewards/margins": 6.0909833908081055, |
|
"rewards/real": 0.36566513776779175, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.1258442943476715e-07, |
|
"logits/generated": -1.4110640287399292, |
|
"logits/real": 0.0704520046710968, |
|
"logps/generated": -194.18740844726562, |
|
"logps/real": -138.5281219482422, |
|
"loss": 0.0424, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.5607404708862305, |
|
"rewards/margins": 6.144190788269043, |
|
"rewards/real": 0.5834503769874573, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.1080696765019552e-07, |
|
"logits/generated": -1.479405164718628, |
|
"logits/real": -0.22825618088245392, |
|
"logps/generated": -200.736083984375, |
|
"logps/real": -163.63735961914062, |
|
"loss": 0.0318, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -5.762534141540527, |
|
"rewards/margins": 6.215503692626953, |
|
"rewards/real": 0.4529697000980377, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.0902950586562388e-07, |
|
"logits/generated": -1.4002245664596558, |
|
"logits/real": -0.14121662080287933, |
|
"logps/generated": -198.7134552001953, |
|
"logps/real": -152.26492309570312, |
|
"loss": 0.0427, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -5.689166069030762, |
|
"rewards/margins": 6.122091770172119, |
|
"rewards/real": 0.43292540311813354, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.0725204408105225e-07, |
|
"logits/generated": -1.4297267198562622, |
|
"logits/real": -0.19367796182632446, |
|
"logps/generated": -185.13780212402344, |
|
"logps/real": -156.70587158203125, |
|
"loss": 0.0436, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -5.418356895446777, |
|
"rewards/margins": 5.945398330688477, |
|
"rewards/real": 0.5270417928695679, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.0547458229648061e-07, |
|
"logits/generated": -1.5275622606277466, |
|
"logits/real": -0.3052076995372772, |
|
"logps/generated": -208.7298583984375, |
|
"logps/real": -178.06521606445312, |
|
"loss": 0.0376, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.276623249053955, |
|
"rewards/margins": 6.54474401473999, |
|
"rewards/real": 0.2681209444999695, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.0369712051190898e-07, |
|
"logits/generated": -1.4920480251312256, |
|
"logits/real": -0.11144111305475235, |
|
"logps/generated": -205.11953735351562, |
|
"logps/real": -154.30245971679688, |
|
"loss": 0.0376, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -5.8552117347717285, |
|
"rewards/margins": 6.331951141357422, |
|
"rewards/real": 0.4767402112483978, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.0191965872733735e-07, |
|
"logits/generated": -1.4427194595336914, |
|
"logits/real": -0.16335290670394897, |
|
"logps/generated": -204.39944458007812, |
|
"logps/real": -155.73521423339844, |
|
"loss": 0.0372, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.107733726501465, |
|
"rewards/margins": 6.5864152908325195, |
|
"rewards/real": 0.47868162393569946, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.001421969427657e-07, |
|
"logits/generated": -1.4014308452606201, |
|
"logits/real": -0.10531187057495117, |
|
"logps/generated": -205.13485717773438, |
|
"logps/real": -151.3143768310547, |
|
"loss": 0.0324, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.123740196228027, |
|
"rewards/margins": 6.699014186859131, |
|
"rewards/real": 0.5752742886543274, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.983647351581941e-07, |
|
"logits/generated": -1.3716758489608765, |
|
"logits/real": -0.017807159572839737, |
|
"logps/generated": -191.89364624023438, |
|
"logps/real": -142.2311553955078, |
|
"loss": 0.0314, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -5.589520454406738, |
|
"rewards/margins": 6.077818870544434, |
|
"rewards/real": 0.4882989823818207, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.9658727337362247e-07, |
|
"logits/generated": -1.2323060035705566, |
|
"logits/real": -0.11805130541324615, |
|
"logps/generated": -193.5916290283203, |
|
"logps/real": -163.78118896484375, |
|
"loss": 0.0438, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -5.801546096801758, |
|
"rewards/margins": 6.197666168212891, |
|
"rewards/real": 0.39611995220184326, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.9480981158905084e-07, |
|
"logits/generated": -1.4392839670181274, |
|
"logits/real": -0.19422808289527893, |
|
"logps/generated": -199.3451385498047, |
|
"logps/real": -163.566162109375, |
|
"loss": 0.0555, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -5.833590507507324, |
|
"rewards/margins": 6.286513328552246, |
|
"rewards/real": 0.45292234420776367, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.9303234980447918e-07, |
|
"logits/generated": -1.344807505607605, |
|
"logits/real": 0.05807851627469063, |
|
"logps/generated": -188.47280883789062, |
|
"logps/real": -131.35252380371094, |
|
"loss": 0.0404, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -5.383631706237793, |
|
"rewards/margins": 5.914809226989746, |
|
"rewards/real": 0.5311776995658875, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.9125488801990754e-07, |
|
"logits/generated": -1.4456682205200195, |
|
"logits/real": -0.1890706866979599, |
|
"logps/generated": -205.602783203125, |
|
"logps/real": -156.99949645996094, |
|
"loss": 0.0343, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -6.107948303222656, |
|
"rewards/margins": 6.506932258605957, |
|
"rewards/real": 0.3989841341972351, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.8947742623533593e-07, |
|
"logits/generated": -1.3190276622772217, |
|
"logits/real": -0.09516116231679916, |
|
"logps/generated": -193.70095825195312, |
|
"logps/real": -160.75755310058594, |
|
"loss": 0.0344, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -5.828548431396484, |
|
"rewards/margins": 6.300034523010254, |
|
"rewards/real": 0.4714859426021576, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.876999644507643e-07, |
|
"logits/generated": -1.348646879196167, |
|
"logits/real": -0.10354932397603989, |
|
"logps/generated": -194.30975341796875, |
|
"logps/real": -153.31259155273438, |
|
"loss": 0.0508, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -5.860109806060791, |
|
"rewards/margins": 6.231678009033203, |
|
"rewards/real": 0.3715675473213196, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.8592250266619267e-07, |
|
"logits/generated": -1.4050867557525635, |
|
"logits/real": -0.2533566951751709, |
|
"logps/generated": -195.93313598632812, |
|
"logps/real": -169.0387420654297, |
|
"loss": 0.0278, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.157801628112793, |
|
"rewards/margins": 6.619472503662109, |
|
"rewards/real": 0.46167105436325073, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.8414504088162103e-07, |
|
"logits/generated": -1.447456955909729, |
|
"logits/real": -0.07161243259906769, |
|
"logps/generated": -205.7145538330078, |
|
"logps/real": -151.47238159179688, |
|
"loss": 0.033, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -6.1400346755981445, |
|
"rewards/margins": 6.463052272796631, |
|
"rewards/real": 0.3230181634426117, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.8236757909704943e-07, |
|
"logits/generated": -1.3999097347259521, |
|
"logits/real": -0.06229109689593315, |
|
"logps/generated": -198.9770965576172, |
|
"logps/real": -147.52398681640625, |
|
"loss": 0.0359, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.335967540740967, |
|
"rewards/margins": 6.788211822509766, |
|
"rewards/real": 0.45224493741989136, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.805901173124778e-07, |
|
"logits/generated": -1.4580378532409668, |
|
"logits/real": -0.21836349368095398, |
|
"logps/generated": -199.5890655517578, |
|
"logps/real": -159.8959503173828, |
|
"loss": 0.0345, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.041956901550293, |
|
"rewards/margins": 6.469993591308594, |
|
"rewards/real": 0.4280371069908142, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.7881265552790613e-07, |
|
"logits/generated": -1.4729552268981934, |
|
"logits/real": -0.08541516959667206, |
|
"logps/generated": -209.23837280273438, |
|
"logps/real": -150.85592651367188, |
|
"loss": 0.0523, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.274621963500977, |
|
"rewards/margins": 6.670513153076172, |
|
"rewards/real": 0.3958915174007416, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.770351937433345e-07, |
|
"logits/generated": -1.460939645767212, |
|
"logits/real": -0.06804711371660233, |
|
"logps/generated": -195.3641815185547, |
|
"logps/real": -149.21224975585938, |
|
"loss": 0.0457, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.111988067626953, |
|
"rewards/margins": 6.648555755615234, |
|
"rewards/real": 0.5365672707557678, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.7525773195876286e-07, |
|
"logits/generated": -1.3989557027816772, |
|
"logits/real": -0.141148641705513, |
|
"logps/generated": -208.6423797607422, |
|
"logps/real": -164.75941467285156, |
|
"loss": 0.0468, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.388513565063477, |
|
"rewards/margins": 6.783452033996582, |
|
"rewards/real": 0.3949388563632965, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.7348027017419126e-07, |
|
"logits/generated": -1.3841809034347534, |
|
"logits/real": -0.14616461098194122, |
|
"logps/generated": -201.27157592773438, |
|
"logps/real": -156.52682495117188, |
|
"loss": 0.0468, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -5.992762565612793, |
|
"rewards/margins": 6.373073577880859, |
|
"rewards/real": 0.38031044602394104, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.7170280838961962e-07, |
|
"logits/generated": -1.4497352838516235, |
|
"logits/real": -0.1744016855955124, |
|
"logps/generated": -204.82797241210938, |
|
"logps/real": -154.94497680664062, |
|
"loss": 0.0389, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -6.267435550689697, |
|
"rewards/margins": 6.67733907699585, |
|
"rewards/real": 0.4099041521549225, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.69925346605048e-07, |
|
"logits/generated": -1.4557878971099854, |
|
"logits/real": 0.009424996562302113, |
|
"logps/generated": -198.59573364257812, |
|
"logps/real": -144.44679260253906, |
|
"loss": 0.0407, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.197464942932129, |
|
"rewards/margins": 6.6801652908325195, |
|
"rewards/real": 0.4827001094818115, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.6814788482047635e-07, |
|
"logits/generated": -1.4021763801574707, |
|
"logits/real": -0.14931827783584595, |
|
"logps/generated": -200.401123046875, |
|
"logps/real": -173.49856567382812, |
|
"loss": 0.0351, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.364272594451904, |
|
"rewards/margins": 6.713151454925537, |
|
"rewards/real": 0.34887903928756714, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.6637042303590475e-07, |
|
"logits/generated": -1.438078761100769, |
|
"logits/real": -0.23877568542957306, |
|
"logps/generated": -200.45779418945312, |
|
"logps/real": -152.62110900878906, |
|
"loss": 0.0334, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -5.951826095581055, |
|
"rewards/margins": 6.3755693435668945, |
|
"rewards/real": 0.42374467849731445, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.6459296125133309e-07, |
|
"logits/generated": -1.4022048711776733, |
|
"logits/real": -0.20034177601337433, |
|
"logps/generated": -200.08987426757812, |
|
"logps/real": -159.6756591796875, |
|
"loss": 0.0351, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.297463417053223, |
|
"rewards/margins": 6.7816481590271, |
|
"rewards/real": 0.4841853976249695, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.6281549946676145e-07, |
|
"logits/generated": -1.4501913785934448, |
|
"logits/real": -0.13076047599315643, |
|
"logps/generated": -203.63800048828125, |
|
"logps/real": -148.5162353515625, |
|
"loss": 0.0288, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.285429000854492, |
|
"rewards/margins": 6.7836737632751465, |
|
"rewards/real": 0.49824443459510803, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.6103803768218982e-07, |
|
"logits/generated": -1.515801191329956, |
|
"logits/real": -0.08876947313547134, |
|
"logps/generated": -214.5893096923828, |
|
"logps/real": -150.77139282226562, |
|
"loss": 0.0348, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.610386848449707, |
|
"rewards/margins": 7.061656951904297, |
|
"rewards/real": 0.45126986503601074, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.5926057589761818e-07, |
|
"logits/generated": -1.408809781074524, |
|
"logits/real": -0.14982599020004272, |
|
"logps/generated": -208.8214874267578, |
|
"logps/real": -160.70396423339844, |
|
"loss": 0.0466, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.5734405517578125, |
|
"rewards/margins": 7.01800537109375, |
|
"rewards/real": 0.4445651173591614, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.5748311411304658e-07, |
|
"logits/generated": -1.268897533416748, |
|
"logits/real": -0.1435459554195404, |
|
"logps/generated": -208.14059448242188, |
|
"logps/real": -162.9286346435547, |
|
"loss": 0.0424, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.466616630554199, |
|
"rewards/margins": 6.879193305969238, |
|
"rewards/real": 0.412576287984848, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.5570565232847494e-07, |
|
"logits/generated": -1.3088537454605103, |
|
"logits/real": -0.055105965584516525, |
|
"logps/generated": -202.38327026367188, |
|
"logps/real": -150.609130859375, |
|
"loss": 0.0231, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.501986503601074, |
|
"rewards/margins": 6.831887245178223, |
|
"rewards/real": 0.32990118861198425, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.539281905439033e-07, |
|
"logits/generated": -1.4072834253311157, |
|
"logits/real": -0.13368968665599823, |
|
"logps/generated": -196.32894897460938, |
|
"logps/real": -156.68746948242188, |
|
"loss": 0.034, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.223599433898926, |
|
"rewards/margins": 6.634499549865723, |
|
"rewards/real": 0.4108997881412506, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.5215072875933165e-07, |
|
"logits/generated": -1.4075140953063965, |
|
"logits/real": -0.3013412356376648, |
|
"logps/generated": -203.13999938964844, |
|
"logps/real": -163.4533233642578, |
|
"loss": 0.0479, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.3233642578125, |
|
"rewards/margins": 6.650763034820557, |
|
"rewards/real": 0.3273986279964447, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.5037326697476004e-07, |
|
"logits/generated": -1.4396686553955078, |
|
"logits/real": -0.002043303567916155, |
|
"logps/generated": -204.3917999267578, |
|
"logps/real": -139.45896911621094, |
|
"loss": 0.0308, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.184319496154785, |
|
"rewards/margins": 6.667932987213135, |
|
"rewards/real": 0.48361387848854065, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.485958051901884e-07, |
|
"logits/generated": -1.4441577196121216, |
|
"logits/real": -0.20746362209320068, |
|
"logps/generated": -203.9723358154297, |
|
"logps/real": -166.56283569335938, |
|
"loss": 0.0237, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.595439910888672, |
|
"rewards/margins": 6.902490139007568, |
|
"rewards/real": 0.3070511221885681, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.4681834340561677e-07, |
|
"logits/generated": -1.345245122909546, |
|
"logits/real": -0.15873827040195465, |
|
"logps/generated": -205.891357421875, |
|
"logps/real": -160.63975524902344, |
|
"loss": 0.0378, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.545729160308838, |
|
"rewards/margins": 6.943263053894043, |
|
"rewards/real": 0.3975338339805603, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.4504088162104514e-07, |
|
"logits/generated": -1.4238418340682983, |
|
"logits/real": -0.1873057782649994, |
|
"logps/generated": -206.091796875, |
|
"logps/real": -164.68458557128906, |
|
"loss": 0.0464, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.2456512451171875, |
|
"rewards/margins": 6.481778621673584, |
|
"rewards/real": 0.23612765967845917, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.432634198364735e-07, |
|
"logits/generated": -1.4479985237121582, |
|
"logits/real": -0.07325728237628937, |
|
"logps/generated": -201.4796600341797, |
|
"logps/real": -156.04754638671875, |
|
"loss": 0.0274, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.325311183929443, |
|
"rewards/margins": 6.745765686035156, |
|
"rewards/real": 0.42045480012893677, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.414859580519019e-07, |
|
"logits/generated": -1.4120184183120728, |
|
"logits/real": -0.08143848925828934, |
|
"logps/generated": -201.7879638671875, |
|
"logps/real": -144.10708618164062, |
|
"loss": 0.0317, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.235722541809082, |
|
"rewards/margins": 6.648829460144043, |
|
"rewards/real": 0.41310709714889526, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.3970849626733024e-07, |
|
"logits/generated": -1.3906993865966797, |
|
"logits/real": -0.18309733271598816, |
|
"logps/generated": -193.51211547851562, |
|
"logps/real": -168.23666381835938, |
|
"loss": 0.0396, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.0833845138549805, |
|
"rewards/margins": 6.361334800720215, |
|
"rewards/real": 0.2779490053653717, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.379310344827586e-07, |
|
"logits/generated": -1.269242763519287, |
|
"logits/real": -0.1333479881286621, |
|
"logps/generated": -199.2947540283203, |
|
"logps/real": -156.45643615722656, |
|
"loss": 0.0419, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.298374652862549, |
|
"rewards/margins": 6.739838600158691, |
|
"rewards/real": 0.44146427512168884, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.3615357269818697e-07, |
|
"logits/generated": -1.397362470626831, |
|
"logits/real": -0.3238930106163025, |
|
"logps/generated": -193.085205078125, |
|
"logps/real": -161.82781982421875, |
|
"loss": 0.0431, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.486599922180176, |
|
"rewards/margins": 6.664914608001709, |
|
"rewards/real": 0.17831535637378693, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.3437611091361536e-07, |
|
"logits/generated": -1.2911992073059082, |
|
"logits/real": -0.21506524085998535, |
|
"logps/generated": -193.64813232421875, |
|
"logps/real": -163.50411987304688, |
|
"loss": 0.0413, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -6.337356090545654, |
|
"rewards/margins": 6.605440616607666, |
|
"rewards/real": 0.26808419823646545, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.3259864912904373e-07, |
|
"logits/generated": -1.419874906539917, |
|
"logits/real": 0.030509447678923607, |
|
"logps/generated": -209.01718139648438, |
|
"logps/real": -138.38870239257812, |
|
"loss": 0.0304, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.7356061935424805, |
|
"rewards/margins": 7.161271572113037, |
|
"rewards/real": 0.4256650507450104, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.308211873444721e-07, |
|
"logits/generated": -1.4404840469360352, |
|
"logits/real": -0.01684228517115116, |
|
"logps/generated": -199.88868713378906, |
|
"logps/real": -144.11949157714844, |
|
"loss": 0.0398, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.491084098815918, |
|
"rewards/margins": 6.836607933044434, |
|
"rewards/real": 0.3455238938331604, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.2904372555990046e-07, |
|
"logits/generated": -1.4147288799285889, |
|
"logits/real": -0.16526241600513458, |
|
"logps/generated": -200.0225372314453, |
|
"logps/real": -156.98641967773438, |
|
"loss": 0.0344, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -6.307469844818115, |
|
"rewards/margins": 6.676236629486084, |
|
"rewards/real": 0.36876624822616577, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.2726626377532883e-07, |
|
"logits/generated": -1.414432168006897, |
|
"logits/real": -0.0065914043225348, |
|
"logps/generated": -200.77865600585938, |
|
"logps/real": -146.02127075195312, |
|
"loss": 0.0392, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.215537071228027, |
|
"rewards/margins": 6.686474800109863, |
|
"rewards/real": 0.47093725204467773, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.254888019907572e-07, |
|
"logits/generated": -1.260357141494751, |
|
"logits/real": -0.2691400349140167, |
|
"logps/generated": -192.88284301757812, |
|
"logps/real": -168.7711639404297, |
|
"loss": 0.0531, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -6.229541301727295, |
|
"rewards/margins": 6.6325273513793945, |
|
"rewards/real": 0.40298670530319214, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.2371134020618556e-07, |
|
"logits/generated": -1.354569435119629, |
|
"logits/real": -0.11174142360687256, |
|
"logps/generated": -199.36160278320312, |
|
"logps/real": -148.06524658203125, |
|
"loss": 0.0344, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.473987579345703, |
|
"rewards/margins": 6.997427940368652, |
|
"rewards/real": 0.5234400629997253, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 1.2193387842161392e-07, |
|
"logits/generated": -1.383927583694458, |
|
"logits/real": -0.0022398829460144043, |
|
"logps/generated": -199.3593292236328, |
|
"logps/real": -146.00210571289062, |
|
"loss": 0.0346, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.404942989349365, |
|
"rewards/margins": 6.934741020202637, |
|
"rewards/real": 0.5297980308532715, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 1.2015641663704232e-07, |
|
"logits/generated": -1.4926010370254517, |
|
"logits/real": -0.18301942944526672, |
|
"logps/generated": -210.3939208984375, |
|
"logps/real": -158.6860809326172, |
|
"loss": 0.0443, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -6.754066467285156, |
|
"rewards/margins": 7.018126487731934, |
|
"rewards/real": 0.2640603184700012, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 1.1837895485247067e-07, |
|
"logits/generated": -1.470483660697937, |
|
"logits/real": -0.2634314298629761, |
|
"logps/generated": -202.2202606201172, |
|
"logps/real": -157.89248657226562, |
|
"loss": 0.0362, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.489276885986328, |
|
"rewards/margins": 6.953598976135254, |
|
"rewards/real": 0.4643225073814392, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 1.1660149306789902e-07, |
|
"logits/generated": -1.4293309450149536, |
|
"logits/real": -0.23708269000053406, |
|
"logps/generated": -203.2031707763672, |
|
"logps/real": -169.27066040039062, |
|
"loss": 0.0356, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -6.314140319824219, |
|
"rewards/margins": 6.681276798248291, |
|
"rewards/real": 0.3671364486217499, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.148240312833274e-07, |
|
"logits/generated": -1.4235140085220337, |
|
"logits/real": -0.10966993868350983, |
|
"logps/generated": -210.45736694335938, |
|
"logps/real": -150.0312957763672, |
|
"loss": 0.0316, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.821863651275635, |
|
"rewards/margins": 7.2514801025390625, |
|
"rewards/real": 0.42961588501930237, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.1304656949875577e-07, |
|
"logits/generated": -1.354546070098877, |
|
"logits/real": -0.022744635120034218, |
|
"logps/generated": -192.60122680664062, |
|
"logps/real": -140.7333526611328, |
|
"loss": 0.036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.868205547332764, |
|
"rewards/margins": 6.2482709884643555, |
|
"rewards/real": 0.3800655007362366, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 1.1126910771418415e-07, |
|
"logits/generated": -1.4601426124572754, |
|
"logits/real": -0.15174929797649384, |
|
"logps/generated": -199.8236846923828, |
|
"logps/real": -161.28665161132812, |
|
"loss": 0.0287, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.322902202606201, |
|
"rewards/margins": 6.639632225036621, |
|
"rewards/real": 0.3167303204536438, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.094916459296125e-07, |
|
"logits/generated": -1.5440027713775635, |
|
"logits/real": -0.26040124893188477, |
|
"logps/generated": -221.20291137695312, |
|
"logps/real": -167.95321655273438, |
|
"loss": 0.0395, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.880659580230713, |
|
"rewards/margins": 7.220567226409912, |
|
"rewards/real": 0.3399079144001007, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.0771418414504088e-07, |
|
"logits/generated": -1.340903878211975, |
|
"logits/real": -0.2935740351676941, |
|
"logps/generated": -202.96920776367188, |
|
"logps/real": -176.96902465820312, |
|
"loss": 0.025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.716177463531494, |
|
"rewards/margins": 7.051765441894531, |
|
"rewards/real": 0.33558765053749084, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 1.0593672236046925e-07, |
|
"logits/generated": -1.4308208227157593, |
|
"logits/real": -0.14327159523963928, |
|
"logps/generated": -202.39431762695312, |
|
"logps/real": -149.82705688476562, |
|
"loss": 0.0313, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.53844690322876, |
|
"rewards/margins": 6.978585243225098, |
|
"rewards/real": 0.440138578414917, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.0415926057589762e-07, |
|
"logits/generated": -1.4518420696258545, |
|
"logits/real": -0.10385274887084961, |
|
"logps/generated": -213.8254852294922, |
|
"logps/real": -147.71575927734375, |
|
"loss": 0.0377, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.9299116134643555, |
|
"rewards/margins": 7.326942443847656, |
|
"rewards/real": 0.3970298767089844, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.0238179879132598e-07, |
|
"logits/generated": -1.4512475728988647, |
|
"logits/real": -0.1038379818201065, |
|
"logps/generated": -199.56454467773438, |
|
"logps/real": -145.60775756835938, |
|
"loss": 0.0445, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.219581604003906, |
|
"rewards/margins": 6.6343889236450195, |
|
"rewards/real": 0.4148074686527252, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.0060433700675434e-07, |
|
"logits/generated": -1.5067126750946045, |
|
"logits/real": -0.24580475687980652, |
|
"logps/generated": -216.79171752929688, |
|
"logps/real": -168.87303161621094, |
|
"loss": 0.0415, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.6367292404174805, |
|
"rewards/margins": 7.050206184387207, |
|
"rewards/real": 0.41347736120224, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.882687522218272e-08, |
|
"logits/generated": -1.4396215677261353, |
|
"logits/real": -0.028474459424614906, |
|
"logps/generated": -206.5319366455078, |
|
"logps/real": -149.35960388183594, |
|
"loss": 0.0301, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.7693681716918945, |
|
"rewards/margins": 7.073111534118652, |
|
"rewards/real": 0.30374377965927124, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 9.704941343761109e-08, |
|
"logits/generated": -1.3953574895858765, |
|
"logits/real": -0.08167268335819244, |
|
"logps/generated": -201.78231811523438, |
|
"logps/real": -156.30703735351562, |
|
"loss": 0.0361, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.351473808288574, |
|
"rewards/margins": 6.825016975402832, |
|
"rewards/real": 0.47354307770729065, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 9.527195165303946e-08, |
|
"logits/generated": -1.4204782247543335, |
|
"logits/real": -0.261289119720459, |
|
"logps/generated": -214.96920776367188, |
|
"logps/real": -163.7252655029297, |
|
"loss": 0.0438, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -6.997825622558594, |
|
"rewards/margins": 7.31102991104126, |
|
"rewards/real": 0.31320399045944214, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 9.349448986846782e-08, |
|
"logits/generated": -1.3477799892425537, |
|
"logits/real": -0.22864647209644318, |
|
"logps/generated": -206.8589630126953, |
|
"logps/real": -162.54312133789062, |
|
"loss": 0.0374, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.9989471435546875, |
|
"rewards/margins": 7.419368743896484, |
|
"rewards/real": 0.42042192816734314, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 9.17170280838962e-08, |
|
"logits/generated": -1.4512816667556763, |
|
"logits/real": -0.2580679953098297, |
|
"logps/generated": -214.8953857421875, |
|
"logps/real": -162.36611938476562, |
|
"loss": 0.0303, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.176792144775391, |
|
"rewards/margins": 7.437612056732178, |
|
"rewards/real": 0.26082050800323486, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 8.993956629932455e-08, |
|
"logits/generated": -1.3164294958114624, |
|
"logits/real": -0.13470031321048737, |
|
"logps/generated": -206.2738037109375, |
|
"logps/real": -154.78414916992188, |
|
"loss": 0.0313, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.593330383300781, |
|
"rewards/margins": 6.940329074859619, |
|
"rewards/real": 0.34699925780296326, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 8.816210451475293e-08, |
|
"logits/generated": -1.4106817245483398, |
|
"logits/real": -0.15953537821769714, |
|
"logps/generated": -208.6619415283203, |
|
"logps/real": -160.1926727294922, |
|
"loss": 0.0279, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.805656433105469, |
|
"rewards/margins": 7.248448371887207, |
|
"rewards/real": 0.44279175996780396, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 8.63846427301813e-08, |
|
"logits/generated": -1.3242509365081787, |
|
"logits/real": -0.10427751392126083, |
|
"logps/generated": -199.90911865234375, |
|
"logps/real": -149.58468627929688, |
|
"loss": 0.0391, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -6.570542812347412, |
|
"rewards/margins": 6.95410680770874, |
|
"rewards/real": 0.3835631310939789, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.460718094560966e-08, |
|
"logits/generated": -1.3325514793395996, |
|
"logits/real": -0.12340172380208969, |
|
"logps/generated": -205.9846954345703, |
|
"logps/real": -154.2774658203125, |
|
"loss": 0.0248, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.54462194442749, |
|
"rewards/margins": 6.925265312194824, |
|
"rewards/real": 0.380642831325531, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.282971916103803e-08, |
|
"logits/generated": -1.266965627670288, |
|
"logits/real": -0.21162667870521545, |
|
"logps/generated": -199.00389099121094, |
|
"logps/real": -171.83628845214844, |
|
"loss": 0.0329, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.594735622406006, |
|
"rewards/margins": 6.811354160308838, |
|
"rewards/real": 0.2166186273097992, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 8.10522573764664e-08, |
|
"logits/generated": -1.5509871244430542, |
|
"logits/real": -0.21420331299304962, |
|
"logps/generated": -228.1226348876953, |
|
"logps/real": -163.10928344726562, |
|
"loss": 0.0373, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -7.169036865234375, |
|
"rewards/margins": 7.495211601257324, |
|
"rewards/real": 0.3261755704879761, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 7.927479559189478e-08, |
|
"logits/generated": -1.387099266052246, |
|
"logits/real": -0.06811436265707016, |
|
"logps/generated": -203.49497985839844, |
|
"logps/real": -153.1364288330078, |
|
"loss": 0.0383, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.673671722412109, |
|
"rewards/margins": 7.097157955169678, |
|
"rewards/real": 0.42348676919937134, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 7.749733380732314e-08, |
|
"logits/generated": -1.4274492263793945, |
|
"logits/real": -0.0971667617559433, |
|
"logps/generated": -200.7941436767578, |
|
"logps/real": -154.57699584960938, |
|
"loss": 0.0454, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.580294609069824, |
|
"rewards/margins": 6.978941917419434, |
|
"rewards/real": 0.39864683151245117, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 7.571987202275151e-08, |
|
"logits/generated": -1.4912302494049072, |
|
"logits/real": -0.08408372104167938, |
|
"logps/generated": -204.36544799804688, |
|
"logps/real": -154.26144409179688, |
|
"loss": 0.0341, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.523919105529785, |
|
"rewards/margins": 6.955370903015137, |
|
"rewards/real": 0.43145233392715454, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 7.394241023817987e-08, |
|
"logits/generated": -1.4152591228485107, |
|
"logits/real": -0.11495399475097656, |
|
"logps/generated": -201.34291076660156, |
|
"logps/real": -165.26194763183594, |
|
"loss": 0.0272, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.8861589431762695, |
|
"rewards/margins": 7.37485408782959, |
|
"rewards/real": 0.48869651556015015, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 7.216494845360824e-08, |
|
"logits/generated": -1.3510379791259766, |
|
"logits/real": -0.21854618191719055, |
|
"logps/generated": -217.4750213623047, |
|
"logps/real": -169.8649139404297, |
|
"loss": 0.0342, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.587937355041504, |
|
"rewards/margins": 7.85870361328125, |
|
"rewards/real": 0.27076593041419983, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 7.038748666903662e-08, |
|
"logits/generated": -1.50370192527771, |
|
"logits/real": -0.2702942490577698, |
|
"logps/generated": -212.11178588867188, |
|
"logps/real": -167.09085083007812, |
|
"loss": 0.0319, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.054226875305176, |
|
"rewards/margins": 7.354388236999512, |
|
"rewards/real": 0.3001619875431061, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 6.861002488446497e-08, |
|
"logits/generated": -1.3323113918304443, |
|
"logits/real": 0.04159053415060043, |
|
"logps/generated": -205.6215057373047, |
|
"logps/real": -143.99964904785156, |
|
"loss": 0.0292, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.66431188583374, |
|
"rewards/margins": 6.992480278015137, |
|
"rewards/real": 0.3281685709953308, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 6.683256309989335e-08, |
|
"logits/generated": -1.4925063848495483, |
|
"logits/real": -0.21390242874622345, |
|
"logps/generated": -212.22586059570312, |
|
"logps/real": -167.41766357421875, |
|
"loss": 0.0257, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.017984867095947, |
|
"rewards/margins": 7.473367214202881, |
|
"rewards/real": 0.455382764339447, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 6.505510131532172e-08, |
|
"logits/generated": -1.393650770187378, |
|
"logits/real": -0.0947226956486702, |
|
"logps/generated": -210.9330596923828, |
|
"logps/real": -147.23170471191406, |
|
"loss": 0.0367, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.760880470275879, |
|
"rewards/margins": 7.202750205993652, |
|
"rewards/real": 0.4418713450431824, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 6.32776395307501e-08, |
|
"logits/generated": -1.341686487197876, |
|
"logits/real": -0.18603375554084778, |
|
"logps/generated": -201.18238830566406, |
|
"logps/real": -153.87188720703125, |
|
"loss": 0.0293, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.745263576507568, |
|
"rewards/margins": 7.202706813812256, |
|
"rewards/real": 0.45744413137435913, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 6.150017774617845e-08, |
|
"logits/generated": -1.3316876888275146, |
|
"logits/real": -0.1949067860841751, |
|
"logps/generated": -211.9070587158203, |
|
"logps/real": -177.78701782226562, |
|
"loss": 0.034, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.849858283996582, |
|
"rewards/margins": 7.174862861633301, |
|
"rewards/real": 0.3250047564506531, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5.972271596160682e-08, |
|
"logits/generated": -1.3474787473678589, |
|
"logits/real": -0.21125014126300812, |
|
"logps/generated": -203.97470092773438, |
|
"logps/real": -161.0288848876953, |
|
"loss": 0.0355, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.6565728187561035, |
|
"rewards/margins": 6.895970821380615, |
|
"rewards/real": 0.23939922451972961, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5.794525417703519e-08, |
|
"logits/generated": -1.4344779253005981, |
|
"logits/real": -0.17681774497032166, |
|
"logps/generated": -214.01559448242188, |
|
"logps/real": -165.4995880126953, |
|
"loss": 0.0359, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -7.138472557067871, |
|
"rewards/margins": 7.418495178222656, |
|
"rewards/real": 0.2800225615501404, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5.616779239246356e-08, |
|
"logits/generated": -1.3456244468688965, |
|
"logits/real": -0.23542913794517517, |
|
"logps/generated": -211.3947296142578, |
|
"logps/real": -163.82652282714844, |
|
"loss": 0.0427, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.151065826416016, |
|
"rewards/margins": 7.601234436035156, |
|
"rewards/real": 0.45016852021217346, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5.439033060789193e-08, |
|
"logits/generated": -1.5365709066390991, |
|
"logits/real": -0.08734168857336044, |
|
"logps/generated": -218.9232635498047, |
|
"logps/real": -155.0095977783203, |
|
"loss": 0.03, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.604518890380859, |
|
"rewards/margins": 7.9174652099609375, |
|
"rewards/real": 0.3129454255104065, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 5.26128688233203e-08, |
|
"logits/generated": -1.4437042474746704, |
|
"logits/real": -0.055155061185359955, |
|
"logps/generated": -205.85806274414062, |
|
"logps/real": -149.55331420898438, |
|
"loss": 0.0352, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.7587127685546875, |
|
"rewards/margins": 7.158652305603027, |
|
"rewards/real": 0.39993923902511597, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 5.0835407038748666e-08, |
|
"logits/generated": -1.4804272651672363, |
|
"logits/real": -0.2698648273944855, |
|
"logps/generated": -217.31039428710938, |
|
"logps/real": -168.95205688476562, |
|
"loss": 0.0321, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.3498854637146, |
|
"rewards/margins": 7.651535987854004, |
|
"rewards/real": 0.30164986848831177, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 4.905794525417703e-08, |
|
"logits/generated": -1.5234874486923218, |
|
"logits/real": -0.20794352889060974, |
|
"logps/generated": -211.02670288085938, |
|
"logps/real": -158.8268280029297, |
|
"loss": 0.0316, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.525379180908203, |
|
"rewards/margins": 6.8619537353515625, |
|
"rewards/real": 0.33657413721084595, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 4.72804834696054e-08, |
|
"logits/generated": -1.3400804996490479, |
|
"logits/real": -0.2256588637828827, |
|
"logps/generated": -208.4210205078125, |
|
"logps/real": -167.0167999267578, |
|
"loss": 0.04, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.771445274353027, |
|
"rewards/margins": 6.9308977127075195, |
|
"rewards/real": 0.1594521552324295, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 4.550302168503377e-08, |
|
"logits/generated": -1.3907737731933594, |
|
"logits/real": -0.08319269865751266, |
|
"logps/generated": -206.05990600585938, |
|
"logps/real": -149.99700927734375, |
|
"loss": 0.0316, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -6.864798545837402, |
|
"rewards/margins": 7.238038539886475, |
|
"rewards/real": 0.3732399344444275, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 4.372555990046214e-08, |
|
"logits/generated": -1.3409656286239624, |
|
"logits/real": -0.26008692383766174, |
|
"logps/generated": -204.02059936523438, |
|
"logps/real": -168.06771850585938, |
|
"loss": 0.0361, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.502602577209473, |
|
"rewards/margins": 6.684649467468262, |
|
"rewards/real": 0.1820472925901413, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 4.194809811589051e-08, |
|
"logits/generated": -1.5320767164230347, |
|
"logits/real": -0.30421024560928345, |
|
"logps/generated": -224.9248504638672, |
|
"logps/real": -178.38499450683594, |
|
"loss": 0.0244, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.527239799499512, |
|
"rewards/margins": 7.814047813415527, |
|
"rewards/real": 0.2868082523345947, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 4.0170636331318876e-08, |
|
"logits/generated": -1.3013502359390259, |
|
"logits/real": -0.1398877501487732, |
|
"logps/generated": -201.2360076904297, |
|
"logps/real": -163.42172241210938, |
|
"loss": 0.0342, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.56103515625, |
|
"rewards/margins": 6.8316826820373535, |
|
"rewards/real": 0.2706476151943207, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.839317454674725e-08, |
|
"logits/generated": -1.3678123950958252, |
|
"logits/real": -0.2855227291584015, |
|
"logps/generated": -197.8090362548828, |
|
"logps/real": -164.1342315673828, |
|
"loss": 0.0431, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.455258369445801, |
|
"rewards/margins": 6.888462066650391, |
|
"rewards/real": 0.4332040250301361, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.6615712762175614e-08, |
|
"logits/generated": -1.3677704334259033, |
|
"logits/real": 0.010342784225940704, |
|
"logps/generated": -209.58816528320312, |
|
"logps/real": -145.00323486328125, |
|
"loss": 0.0232, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.397311210632324, |
|
"rewards/margins": 7.7184648513793945, |
|
"rewards/real": 0.32115358114242554, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.4838250977603974e-08, |
|
"logits/generated": -1.524156928062439, |
|
"logits/real": -0.12270829826593399, |
|
"logps/generated": -218.85122680664062, |
|
"logps/real": -161.03256225585938, |
|
"loss": 0.0274, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -7.344332695007324, |
|
"rewards/margins": 7.675253391265869, |
|
"rewards/real": 0.3309203088283539, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.3060789193032346e-08, |
|
"logits/generated": -1.4847233295440674, |
|
"logits/real": -0.09207119047641754, |
|
"logps/generated": -210.13827514648438, |
|
"logps/real": -152.16636657714844, |
|
"loss": 0.0348, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -7.028836250305176, |
|
"rewards/margins": 7.411356449127197, |
|
"rewards/real": 0.38252073526382446, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.128332740846071e-08, |
|
"logits/generated": -1.3793671131134033, |
|
"logits/real": -0.09992913901805878, |
|
"logps/generated": -205.37014770507812, |
|
"logps/real": -151.1650848388672, |
|
"loss": 0.0286, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.737936973571777, |
|
"rewards/margins": 7.170534610748291, |
|
"rewards/real": 0.432596355676651, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.9505865623889085e-08, |
|
"logits/generated": -1.4762852191925049, |
|
"logits/real": -0.06375066190958023, |
|
"logps/generated": -212.5565185546875, |
|
"logps/real": -149.23876953125, |
|
"loss": 0.0268, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.110816955566406, |
|
"rewards/margins": 7.316006660461426, |
|
"rewards/real": 0.20518915355205536, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.7728403839317454e-08, |
|
"logits/generated": -1.3773549795150757, |
|
"logits/real": -0.1580524891614914, |
|
"logps/generated": -210.57321166992188, |
|
"logps/real": -156.308837890625, |
|
"loss": 0.0339, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -7.0069990158081055, |
|
"rewards/margins": 7.312821388244629, |
|
"rewards/real": 0.3058224320411682, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.5950942054745824e-08, |
|
"logits/generated": -1.3675754070281982, |
|
"logits/real": -0.2727252244949341, |
|
"logps/generated": -219.14755249023438, |
|
"logps/real": -166.90438842773438, |
|
"loss": 0.0296, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -7.234642028808594, |
|
"rewards/margins": 7.47792911529541, |
|
"rewards/real": 0.243287593126297, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.4173480270174193e-08, |
|
"logits/generated": -1.3553121089935303, |
|
"logits/real": -0.29976850748062134, |
|
"logps/generated": -209.1672821044922, |
|
"logps/real": -172.01132202148438, |
|
"loss": 0.0371, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.818951606750488, |
|
"rewards/margins": 7.143617153167725, |
|
"rewards/real": 0.32466596364974976, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2.2396018485602556e-08, |
|
"logits/generated": -1.4340794086456299, |
|
"logits/real": -0.17459583282470703, |
|
"logps/generated": -211.97738647460938, |
|
"logps/real": -155.25192260742188, |
|
"loss": 0.0357, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -7.195143222808838, |
|
"rewards/margins": 7.472241401672363, |
|
"rewards/real": 0.27709802985191345, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 2.0618556701030925e-08, |
|
"logits/generated": -1.2854077816009521, |
|
"logits/real": -0.05134361982345581, |
|
"logps/generated": -192.98057556152344, |
|
"logps/real": -141.09915161132812, |
|
"loss": 0.0428, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -6.195518493652344, |
|
"rewards/margins": 6.4610795974731445, |
|
"rewards/real": 0.26556122303009033, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.8841094916459295e-08, |
|
"logits/generated": -1.3392913341522217, |
|
"logits/real": -0.09781539440155029, |
|
"logps/generated": -208.2666473388672, |
|
"logps/real": -156.20436096191406, |
|
"loss": 0.0446, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.725115776062012, |
|
"rewards/margins": 6.97289514541626, |
|
"rewards/real": 0.2477794587612152, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.7063633131887664e-08, |
|
"logits/generated": -1.3316737413406372, |
|
"logits/real": -0.0454082116484642, |
|
"logps/generated": -206.0417938232422, |
|
"logps/real": -141.0169219970703, |
|
"loss": 0.0333, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.749381065368652, |
|
"rewards/margins": 7.064549922943115, |
|
"rewards/real": 0.3151686489582062, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.5286171347316033e-08, |
|
"logits/generated": -1.534264087677002, |
|
"logits/real": -0.13667024672031403, |
|
"logps/generated": -226.2362518310547, |
|
"logps/real": -160.57518005371094, |
|
"loss": 0.0234, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -7.485151767730713, |
|
"rewards/margins": 7.653719902038574, |
|
"rewards/real": 0.16856878995895386, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.35087095627444e-08, |
|
"logits/generated": -1.3561798334121704, |
|
"logits/real": -0.18901768326759338, |
|
"logps/generated": -204.1754913330078, |
|
"logps/real": -150.84564208984375, |
|
"loss": 0.0257, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.298308849334717, |
|
"rewards/margins": 6.538763523101807, |
|
"rewards/real": 0.24045482277870178, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.1731247778172769e-08, |
|
"logits/generated": -1.2899856567382812, |
|
"logits/real": -0.1477215737104416, |
|
"logps/generated": -201.3439178466797, |
|
"logps/real": -148.21932983398438, |
|
"loss": 0.0327, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.692836761474609, |
|
"rewards/margins": 6.993072509765625, |
|
"rewards/real": 0.3002353608608246, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 9.953785993601137e-09, |
|
"logits/generated": -1.358318567276001, |
|
"logits/real": -0.06824080646038055, |
|
"logps/generated": -206.3799285888672, |
|
"logps/real": -151.30718994140625, |
|
"loss": 0.0316, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -7.049784183502197, |
|
"rewards/margins": 7.375840663909912, |
|
"rewards/real": 0.3260560631752014, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 8.176324209029506e-09, |
|
"logits/generated": -1.4635486602783203, |
|
"logits/real": -0.02628200687468052, |
|
"logps/generated": -222.9819793701172, |
|
"logps/real": -150.2086181640625, |
|
"loss": 0.0358, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -7.298110008239746, |
|
"rewards/margins": 7.585814476013184, |
|
"rewards/real": 0.2877052426338196, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 6.398862424457874e-09, |
|
"logits/generated": -1.2963229417800903, |
|
"logits/real": -0.04144474118947983, |
|
"logps/generated": -208.9534454345703, |
|
"logps/real": -141.96522521972656, |
|
"loss": 0.029, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.3213348388671875, |
|
"rewards/margins": 7.746777534484863, |
|
"rewards/real": 0.425443172454834, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 4.621400639886242e-09, |
|
"logits/generated": -1.4178071022033691, |
|
"logits/real": -0.21926836669445038, |
|
"logps/generated": -208.5327606201172, |
|
"logps/real": -177.9146728515625, |
|
"loss": 0.0355, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -7.14594030380249, |
|
"rewards/margins": 7.450923919677734, |
|
"rewards/real": 0.3049830198287964, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 2.8439388553146107e-09, |
|
"logits/generated": -1.3670203685760498, |
|
"logits/real": -0.1273925006389618, |
|
"logps/generated": -208.1735382080078, |
|
"logps/real": -154.05970764160156, |
|
"loss": 0.0372, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.859274387359619, |
|
"rewards/margins": 7.159803867340088, |
|
"rewards/real": 0.3005295395851135, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.066477070742979e-09, |
|
"logits/generated": -1.3436192274093628, |
|
"logits/real": -0.02946655824780464, |
|
"logps/generated": -204.7676239013672, |
|
"logps/real": -143.30450439453125, |
|
"loss": 0.0369, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -6.587100982666016, |
|
"rewards/margins": 7.098004341125488, |
|
"rewards/real": 0.5109024047851562, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 3126, |
|
"total_flos": 0.0, |
|
"train_loss": 0.18478660625349003, |
|
"train_runtime": 27852.0483, |
|
"train_samples_per_second": 3.59, |
|
"train_steps_per_second": 0.112 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3126, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|