|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9993998799759952, |
|
"eval_steps": 100, |
|
"global_step": 1249, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4e-08, |
|
"logits/chosen": -2.682399272918701, |
|
"logits/rejected": -2.7047135829925537, |
|
"logps/chosen": -275.10638427734375, |
|
"logps/rejected": -271.8466491699219, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"logits/chosen": -2.606243848800659, |
|
"logits/rejected": -2.633491277694702, |
|
"logps/chosen": -301.7389831542969, |
|
"logps/rejected": -324.2469787597656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4166666567325592, |
|
"rewards/chosen": 0.00018933300452772528, |
|
"rewards/margins": 9.413135558133945e-06, |
|
"rewards/rejected": 0.00017991992353927344, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.000000000000001e-07, |
|
"logits/chosen": -2.5866377353668213, |
|
"logits/rejected": -2.5900259017944336, |
|
"logps/chosen": -269.0643615722656, |
|
"logps/rejected": -289.1509094238281, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.0011652575340121984, |
|
"rewards/margins": 6.114605639595538e-05, |
|
"rewards/rejected": -0.0012264035176485777, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"logits/chosen": -2.57534122467041, |
|
"logits/rejected": -2.5880730152130127, |
|
"logps/chosen": -291.5533752441406, |
|
"logps/rejected": -311.4080505371094, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.0030827566515654325, |
|
"rewards/margins": 0.0002471129409968853, |
|
"rewards/rejected": -0.0033298698253929615, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"logits/chosen": -2.612988233566284, |
|
"logits/rejected": -2.619412899017334, |
|
"logps/chosen": -264.4569396972656, |
|
"logps/rejected": -273.54107666015625, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.005209661088883877, |
|
"rewards/margins": 0.0008221397292800248, |
|
"rewards/rejected": -0.006031800992786884, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"logits/chosen": -2.563633680343628, |
|
"logits/rejected": -2.5437724590301514, |
|
"logps/chosen": -264.8982849121094, |
|
"logps/rejected": -269.68804931640625, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.008133028633892536, |
|
"rewards/margins": -1.2002186849713326e-05, |
|
"rewards/rejected": -0.008121026679873466, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"logits/chosen": -2.6302762031555176, |
|
"logits/rejected": -2.633596420288086, |
|
"logps/chosen": -277.2352600097656, |
|
"logps/rejected": -296.19476318359375, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.013791452161967754, |
|
"rewards/margins": 0.0020633486565202475, |
|
"rewards/rejected": -0.01585480198264122, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"logits/chosen": -2.6230902671813965, |
|
"logits/rejected": -2.6135356426239014, |
|
"logps/chosen": -280.782958984375, |
|
"logps/rejected": -286.6590881347656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.01885022595524788, |
|
"rewards/margins": 0.00015832395001780242, |
|
"rewards/rejected": -0.01900855079293251, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"logits/chosen": -2.6521055698394775, |
|
"logits/rejected": -2.650635004043579, |
|
"logps/chosen": -276.92156982421875, |
|
"logps/rejected": -297.58477783203125, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.017211003229022026, |
|
"rewards/margins": 0.0015935760457068682, |
|
"rewards/rejected": -0.018804579973220825, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"logits/chosen": -2.650332450866699, |
|
"logits/rejected": -2.639291286468506, |
|
"logps/chosen": -311.17218017578125, |
|
"logps/rejected": -316.7694091796875, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.017588406801223755, |
|
"rewards/margins": 0.008481341414153576, |
|
"rewards/rejected": -0.026069749146699905, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.000000000000001e-06, |
|
"logits/chosen": -2.6498260498046875, |
|
"logits/rejected": -2.6482200622558594, |
|
"logps/chosen": -276.51043701171875, |
|
"logps/rejected": -290.5121154785156, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.02430056408047676, |
|
"rewards/margins": 0.004733243025839329, |
|
"rewards/rejected": -0.029033806174993515, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_logits/chosen": -2.6246910095214844, |
|
"eval_logits/rejected": -2.6219358444213867, |
|
"eval_logps/chosen": -222.40379333496094, |
|
"eval_logps/rejected": -228.25218200683594, |
|
"eval_loss": 0.6920604109764099, |
|
"eval_rewards/accuracies": 0.5426666736602783, |
|
"eval_rewards/chosen": -0.01691427268087864, |
|
"eval_rewards/margins": 0.0023173687513917685, |
|
"eval_rewards/rejected": -0.01923164166510105, |
|
"eval_runtime": 1621.9667, |
|
"eval_samples_per_second": 1.846, |
|
"eval_steps_per_second": 0.231, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.4e-06, |
|
"logits/chosen": -2.6501355171203613, |
|
"logits/rejected": -2.652731418609619, |
|
"logps/chosen": -298.9729919433594, |
|
"logps/rejected": -309.8643493652344, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.025512468069791794, |
|
"rewards/margins": 0.008652618154883385, |
|
"rewards/rejected": -0.03416508436203003, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.800000000000001e-06, |
|
"logits/chosen": -2.676758289337158, |
|
"logits/rejected": -2.6891021728515625, |
|
"logps/chosen": -278.479736328125, |
|
"logps/rejected": -296.43212890625, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.031918395310640335, |
|
"rewards/margins": 0.006463131867349148, |
|
"rewards/rejected": -0.03838152438402176, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999755876225375e-06, |
|
"logits/chosen": -2.645005702972412, |
|
"logits/rejected": -2.62728214263916, |
|
"logps/chosen": -294.44366455078125, |
|
"logps/rejected": -315.0718994140625, |
|
"loss": 0.6878, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.035983841866254807, |
|
"rewards/margins": 0.011375428177416325, |
|
"rewards/rejected": -0.047359269112348557, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.997803172081864e-06, |
|
"logits/chosen": -2.6803853511810303, |
|
"logits/rejected": -2.680997371673584, |
|
"logps/chosen": -289.1062927246094, |
|
"logps/rejected": -302.7191467285156, |
|
"loss": 0.6855, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.041079964488744736, |
|
"rewards/margins": 0.016574053093791008, |
|
"rewards/rejected": -0.057654011994600296, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9938992891651825e-06, |
|
"logits/chosen": -2.6616640090942383, |
|
"logits/rejected": -2.6513876914978027, |
|
"logps/chosen": -277.707763671875, |
|
"logps/rejected": -300.9044494628906, |
|
"loss": 0.683, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.05293840169906616, |
|
"rewards/margins": 0.0216156505048275, |
|
"rewards/rejected": -0.07455406337976456, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.988047277024456e-06, |
|
"logits/chosen": -2.7210304737091064, |
|
"logits/rejected": -2.7316393852233887, |
|
"logps/chosen": -288.5920715332031, |
|
"logps/rejected": -304.19964599609375, |
|
"loss": 0.6804, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.07934443652629852, |
|
"rewards/margins": 0.026776760816574097, |
|
"rewards/rejected": -0.10612119734287262, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.980251707005417e-06, |
|
"logits/chosen": -2.71783185005188, |
|
"logits/rejected": -2.690868854522705, |
|
"logps/chosen": -307.91888427734375, |
|
"logps/rejected": -315.943359375, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.10906956344842911, |
|
"rewards/margins": 0.009946177713572979, |
|
"rewards/rejected": -0.11901573836803436, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.970518668679459e-06, |
|
"logits/chosen": -2.729719638824463, |
|
"logits/rejected": -2.714111804962158, |
|
"logps/chosen": -304.322998046875, |
|
"logps/rejected": -311.78167724609375, |
|
"loss": 0.6816, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.09336166828870773, |
|
"rewards/margins": 0.0258515365421772, |
|
"rewards/rejected": -0.11921320110559464, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.958855765086722e-06, |
|
"logits/chosen": -2.757159471511841, |
|
"logits/rejected": -2.7543435096740723, |
|
"logps/chosen": -284.23687744140625, |
|
"logps/rejected": -293.60595703125, |
|
"loss": 0.6848, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.06431926041841507, |
|
"rewards/margins": 0.019170444458723068, |
|
"rewards/rejected": -0.08348970115184784, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.945272106796919e-06, |
|
"logits/chosen": -2.770078420639038, |
|
"logits/rejected": -2.7745845317840576, |
|
"logps/chosen": -285.0936584472656, |
|
"logps/rejected": -300.80975341796875, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.0751299113035202, |
|
"rewards/margins": 0.02216259017586708, |
|
"rewards/rejected": -0.09729250520467758, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_logits/chosen": -2.762911558151245, |
|
"eval_logits/rejected": -2.759880542755127, |
|
"eval_logps/chosen": -228.746826171875, |
|
"eval_logps/rejected": -235.77212524414062, |
|
"eval_loss": 0.6872997283935547, |
|
"eval_rewards/accuracies": 0.5566666722297668, |
|
"eval_rewards/chosen": -0.08034466207027435, |
|
"eval_rewards/margins": 0.014086335897445679, |
|
"eval_rewards/rejected": -0.09443099796772003, |
|
"eval_runtime": 1664.3062, |
|
"eval_samples_per_second": 1.799, |
|
"eval_steps_per_second": 0.225, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.929778304792537e-06, |
|
"logits/chosen": -2.7531464099884033, |
|
"logits/rejected": -2.7568936347961426, |
|
"logps/chosen": -310.95513916015625, |
|
"logps/rejected": -315.43353271484375, |
|
"loss": 0.6749, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.08977816253900528, |
|
"rewards/margins": 0.044371671974658966, |
|
"rewards/rejected": -0.13414981961250305, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.912386462179987e-06, |
|
"logits/chosen": -2.7818262577056885, |
|
"logits/rejected": -2.777902126312256, |
|
"logps/chosen": -298.80780029296875, |
|
"logps/rejected": -325.99853515625, |
|
"loss": 0.6741, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.10158131271600723, |
|
"rewards/margins": 0.044485487043857574, |
|
"rewards/rejected": -0.1460667848587036, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.893110164735167e-06, |
|
"logits/chosen": -2.8827590942382812, |
|
"logits/rejected": -2.8796629905700684, |
|
"logps/chosen": -305.14068603515625, |
|
"logps/rejected": -319.39471435546875, |
|
"loss": 0.6796, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.1311652809381485, |
|
"rewards/margins": 0.0319681391119957, |
|
"rewards/rejected": -0.1631334125995636, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.871964470290823e-06, |
|
"logits/chosen": -2.90864634513855, |
|
"logits/rejected": -2.9260551929473877, |
|
"logps/chosen": -309.19970703125, |
|
"logps/rejected": -332.04119873046875, |
|
"loss": 0.6654, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.1595773994922638, |
|
"rewards/margins": 0.06538807600736618, |
|
"rewards/rejected": -0.22496548295021057, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.848965896974006e-06, |
|
"logits/chosen": -2.947906494140625, |
|
"logits/rejected": -2.940717935562134, |
|
"logps/chosen": -302.98651123046875, |
|
"logps/rejected": -325.380859375, |
|
"loss": 0.6756, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.19427716732025146, |
|
"rewards/margins": 0.043551910668611526, |
|
"rewards/rejected": -0.2378290891647339, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.8241324103028055e-06, |
|
"logits/chosen": -3.116504430770874, |
|
"logits/rejected": -3.088792324066162, |
|
"logps/chosen": -312.82159423828125, |
|
"logps/rejected": -328.6378479003906, |
|
"loss": 0.6614, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.24597156047821045, |
|
"rewards/margins": 0.07650710642337799, |
|
"rewards/rejected": -0.322478711605072, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.797483409152438e-06, |
|
"logits/chosen": -3.2219741344451904, |
|
"logits/rejected": -3.211695432662964, |
|
"logps/chosen": -308.4544372558594, |
|
"logps/rejected": -333.4085388183594, |
|
"loss": 0.6625, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.3319942355155945, |
|
"rewards/margins": 0.08054188638925552, |
|
"rewards/rejected": -0.412536084651947, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.769039710601669e-06, |
|
"logits/chosen": -3.368110179901123, |
|
"logits/rejected": -3.3736987113952637, |
|
"logps/chosen": -316.3270263671875, |
|
"logps/rejected": -338.3800048828125, |
|
"loss": 0.6608, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.42326441407203674, |
|
"rewards/margins": 0.08332939445972443, |
|
"rewards/rejected": -0.5065938234329224, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.738823533671383e-06, |
|
"logits/chosen": -3.503385543823242, |
|
"logits/rejected": -3.490826368331909, |
|
"logps/chosen": -351.4194030761719, |
|
"logps/rejected": -368.7189025878906, |
|
"loss": 0.6789, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.5290869474411011, |
|
"rewards/margins": 0.048080917447805405, |
|
"rewards/rejected": -0.5771678686141968, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.706858481968017e-06, |
|
"logits/chosen": -3.464003801345825, |
|
"logits/rejected": -3.469198226928711, |
|
"logps/chosen": -340.75830078125, |
|
"logps/rejected": -352.869384765625, |
|
"loss": 0.6795, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.6002467274665833, |
|
"rewards/margins": 0.04778647795319557, |
|
"rewards/rejected": -0.6480332612991333, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_logits/chosen": -3.5198659896850586, |
|
"eval_logits/rejected": -3.5140511989593506, |
|
"eval_logps/chosen": -272.0929260253906, |
|
"eval_logps/rejected": -281.4856262207031, |
|
"eval_loss": 0.683901846408844, |
|
"eval_rewards/accuracies": 0.5460000038146973, |
|
"eval_rewards/chosen": -0.5138051509857178, |
|
"eval_rewards/margins": 0.037760715931653976, |
|
"eval_rewards/rejected": -0.551565945148468, |
|
"eval_runtime": 1688.0776, |
|
"eval_samples_per_second": 1.774, |
|
"eval_steps_per_second": 0.222, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.673169525245416e-06, |
|
"logits/chosen": -3.4468257427215576, |
|
"logits/rejected": -3.422842502593994, |
|
"logps/chosen": -337.1869812011719, |
|
"logps/rejected": -369.9530029296875, |
|
"loss": 0.6606, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.5638504028320312, |
|
"rewards/margins": 0.09270543605089188, |
|
"rewards/rejected": -0.6565557718276978, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.63778297989952e-06, |
|
"logits/chosen": -3.598461866378784, |
|
"logits/rejected": -3.5812110900878906, |
|
"logps/chosen": -344.33819580078125, |
|
"logps/rejected": -364.0965270996094, |
|
"loss": 0.6658, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.5991551280021667, |
|
"rewards/margins": 0.08249818533658981, |
|
"rewards/rejected": -0.6816532015800476, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.60072648841109e-06, |
|
"logits/chosen": -3.7547969818115234, |
|
"logits/rejected": -3.74609637260437, |
|
"logps/chosen": -364.9189758300781, |
|
"logps/rejected": -394.4504089355469, |
|
"loss": 0.6271, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7271077632904053, |
|
"rewards/margins": 0.17919641733169556, |
|
"rewards/rejected": -0.9063041806221008, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.562028997752574e-06, |
|
"logits/chosen": -3.9404075145721436, |
|
"logits/rejected": -3.9303627014160156, |
|
"logps/chosen": -380.1604919433594, |
|
"logps/rejected": -408.8302307128906, |
|
"loss": 0.676, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.8844378590583801, |
|
"rewards/margins": 0.08313676714897156, |
|
"rewards/rejected": -0.9675747156143188, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.521720736775947e-06, |
|
"logits/chosen": -3.974989414215088, |
|
"logits/rejected": -3.998753786087036, |
|
"logps/chosen": -397.6668701171875, |
|
"logps/rejected": -411.4002990722656, |
|
"loss": 0.6559, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0197855234146118, |
|
"rewards/margins": 0.12257959693670273, |
|
"rewards/rejected": -1.1423652172088623, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.479833192599198e-06, |
|
"logits/chosen": -3.942905902862549, |
|
"logits/rejected": -3.9193332195281982, |
|
"logps/chosen": -387.99420166015625, |
|
"logps/rejected": -410.8706970214844, |
|
"loss": 0.6613, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.8740938305854797, |
|
"rewards/margins": 0.10989212989807129, |
|
"rewards/rejected": -0.9839859008789062, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.436399086009928e-06, |
|
"logits/chosen": -3.781745195388794, |
|
"logits/rejected": -3.746504306793213, |
|
"logps/chosen": -363.17657470703125, |
|
"logps/rejected": -384.13323974609375, |
|
"loss": 0.6487, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.7560319304466248, |
|
"rewards/margins": 0.12066509574651718, |
|
"rewards/rejected": -0.8766969442367554, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.391452345905239e-06, |
|
"logits/chosen": -3.672318696975708, |
|
"logits/rejected": -3.6834559440612793, |
|
"logps/chosen": -373.22344970703125, |
|
"logps/rejected": -389.52777099609375, |
|
"loss": 0.6611, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.8263392448425293, |
|
"rewards/margins": 0.11394073814153671, |
|
"rewards/rejected": -0.9402799606323242, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3450280827879125e-06, |
|
"logits/chosen": -3.7310726642608643, |
|
"logits/rejected": -3.7564334869384766, |
|
"logps/chosen": -374.36260986328125, |
|
"logps/rejected": -394.80010986328125, |
|
"loss": 0.6607, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.8552727699279785, |
|
"rewards/margins": 0.11295287311077118, |
|
"rewards/rejected": -0.9682257771492004, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.297162561339554e-06, |
|
"logits/chosen": -3.6382896900177, |
|
"logits/rejected": -3.6042380332946777, |
|
"logps/chosen": -380.48590087890625, |
|
"logps/rejected": -407.5166015625, |
|
"loss": 0.6561, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.8749423027038574, |
|
"rewards/margins": 0.1411461979150772, |
|
"rewards/rejected": -1.016088604927063, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_logits/chosen": -3.7580296993255615, |
|
"eval_logits/rejected": -3.7484097480773926, |
|
"eval_logps/chosen": -302.29541015625, |
|
"eval_logps/rejected": -314.21051025390625, |
|
"eval_loss": 0.6812021136283875, |
|
"eval_rewards/accuracies": 0.5573333501815796, |
|
"eval_rewards/chosen": -0.815830409526825, |
|
"eval_rewards/margins": 0.06298430263996124, |
|
"eval_rewards/rejected": -0.8788146376609802, |
|
"eval_runtime": 1618.6439, |
|
"eval_samples_per_second": 1.85, |
|
"eval_steps_per_second": 0.232, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.247893172092157e-06, |
|
"logits/chosen": -3.615405559539795, |
|
"logits/rejected": -3.612015962600708, |
|
"logps/chosen": -370.3289794921875, |
|
"logps/rejected": -405.1226806640625, |
|
"loss": 0.6581, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.9003459215164185, |
|
"rewards/margins": 0.13107234239578247, |
|
"rewards/rejected": -1.0314182043075562, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.197258402220187e-06, |
|
"logits/chosen": -3.65478515625, |
|
"logits/rejected": -3.6666762828826904, |
|
"logps/chosen": -379.541748046875, |
|
"logps/rejected": -418.70880126953125, |
|
"loss": 0.657, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.9110058546066284, |
|
"rewards/margins": 0.3442539870738983, |
|
"rewards/rejected": -1.2552598714828491, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.145297805476023e-06, |
|
"logits/chosen": -3.6817328929901123, |
|
"logits/rejected": -3.688814640045166, |
|
"logps/chosen": -376.09783935546875, |
|
"logps/rejected": -406.4654846191406, |
|
"loss": 0.6439, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.8991987109184265, |
|
"rewards/margins": 0.1862173080444336, |
|
"rewards/rejected": -1.0854160785675049, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.092051971292228e-06, |
|
"logits/chosen": -3.716754198074341, |
|
"logits/rejected": -3.7106194496154785, |
|
"logps/chosen": -376.1932067871094, |
|
"logps/rejected": -401.6561584472656, |
|
"loss": 0.6554, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.954433798789978, |
|
"rewards/margins": 0.13505356013774872, |
|
"rewards/rejected": -1.0894873142242432, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.037562493074792e-06, |
|
"logits/chosen": -3.8129425048828125, |
|
"logits/rejected": -3.833683729171753, |
|
"logps/chosen": -403.4329528808594, |
|
"logps/rejected": -423.4547424316406, |
|
"loss": 0.6365, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.0463995933532715, |
|
"rewards/margins": 0.19165393710136414, |
|
"rewards/rejected": -1.2380534410476685, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.981871935712112e-06, |
|
"logits/chosen": -3.983973741531372, |
|
"logits/rejected": -3.940070629119873, |
|
"logps/chosen": -385.3892517089844, |
|
"logps/rejected": -416.1813049316406, |
|
"loss": 0.6533, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.0447807312011719, |
|
"rewards/margins": 0.14543746411800385, |
|
"rewards/rejected": -1.190218210220337, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.925023802325094e-06, |
|
"logits/chosen": -4.042995929718018, |
|
"logits/rejected": -4.0136399269104, |
|
"logps/chosen": -401.97381591796875, |
|
"logps/rejected": -437.3184509277344, |
|
"loss": 0.6455, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.0883817672729492, |
|
"rewards/margins": 0.2549799978733063, |
|
"rewards/rejected": -1.343361735343933, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.867062500284342e-06, |
|
"logits/chosen": -4.073556423187256, |
|
"logits/rejected": -4.043025493621826, |
|
"logps/chosen": -388.99090576171875, |
|
"logps/rejected": -425.56787109375, |
|
"loss": 0.6433, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.1277819871902466, |
|
"rewards/margins": 0.1785212755203247, |
|
"rewards/rejected": -1.3063032627105713, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.8080333065209885e-06, |
|
"logits/chosen": -4.076624393463135, |
|
"logits/rejected": -4.084932327270508, |
|
"logps/chosen": -391.6429138183594, |
|
"logps/rejected": -392.52362060546875, |
|
"loss": 0.7064, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.1167463064193726, |
|
"rewards/margins": 0.06357622146606445, |
|
"rewards/rejected": -1.180322527885437, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7479823321582624e-06, |
|
"logits/chosen": -3.93993878364563, |
|
"logits/rejected": -3.9026870727539062, |
|
"logps/chosen": -378.84918212890625, |
|
"logps/rejected": -416.9828186035156, |
|
"loss": 0.633, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.9214572906494141, |
|
"rewards/margins": 0.19020086526870728, |
|
"rewards/rejected": -1.1116580963134766, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_logits/chosen": -4.107741355895996, |
|
"eval_logits/rejected": -4.097755432128906, |
|
"eval_logps/chosen": -310.98577880859375, |
|
"eval_logps/rejected": -324.42694091796875, |
|
"eval_loss": 0.678744912147522, |
|
"eval_rewards/accuracies": 0.5596666932106018, |
|
"eval_rewards/chosen": -0.9027342796325684, |
|
"eval_rewards/margins": 0.0782446414232254, |
|
"eval_rewards/rejected": -0.9809789061546326, |
|
"eval_runtime": 1619.4632, |
|
"eval_samples_per_second": 1.849, |
|
"eval_steps_per_second": 0.232, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.686956486491419e-06, |
|
"logits/chosen": -3.9462807178497314, |
|
"logits/rejected": -3.941249132156372, |
|
"logps/chosen": -386.48590087890625, |
|
"logps/rejected": -424.64508056640625, |
|
"loss": 0.6313, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.9227803349494934, |
|
"rewards/margins": 0.29515519738197327, |
|
"rewards/rejected": -1.217935562133789, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.625003440344166e-06, |
|
"logits/chosen": -4.044493198394775, |
|
"logits/rejected": -4.073317527770996, |
|
"logps/chosen": -369.5896301269531, |
|
"logps/rejected": -382.105224609375, |
|
"loss": 0.664, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.9153301119804382, |
|
"rewards/margins": 0.08651997148990631, |
|
"rewards/rejected": -1.0018501281738281, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.562171588830231e-06, |
|
"logits/chosen": -4.016448497772217, |
|
"logits/rejected": -3.9958884716033936, |
|
"logps/chosen": -377.91912841796875, |
|
"logps/rejected": -404.9534606933594, |
|
"loss": 0.6943, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.8825477361679077, |
|
"rewards/margins": 0.0769728347659111, |
|
"rewards/rejected": -0.9595205187797546, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.4985100135491245e-06, |
|
"logits/chosen": -4.008540630340576, |
|
"logits/rejected": -3.9679737091064453, |
|
"logps/chosen": -382.65924072265625, |
|
"logps/rejected": -425.71954345703125, |
|
"loss": 0.629, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8818261027336121, |
|
"rewards/margins": 0.2214728146791458, |
|
"rewards/rejected": -1.1032989025115967, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4340684442456673e-06, |
|
"logits/chosen": -4.049837589263916, |
|
"logits/rejected": -4.043179035186768, |
|
"logps/chosen": -384.42938232421875, |
|
"logps/rejected": -410.5345153808594, |
|
"loss": 0.6434, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.9648186564445496, |
|
"rewards/margins": 0.1496874988079071, |
|
"rewards/rejected": -1.1145063638687134, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3688972199631974e-06, |
|
"logits/chosen": -4.042217254638672, |
|
"logits/rejected": -4.064842224121094, |
|
"logps/chosen": -387.6910705566406, |
|
"logps/rejected": -413.78662109375, |
|
"loss": 0.6329, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.9935464859008789, |
|
"rewards/margins": 0.24879872798919678, |
|
"rewards/rejected": -1.2423454523086548, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.3030472497208354e-06, |
|
"logits/chosen": -4.0646257400512695, |
|
"logits/rejected": -4.018919944763184, |
|
"logps/chosen": -385.509765625, |
|
"logps/rejected": -450.21429443359375, |
|
"loss": 0.6219, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0814400911331177, |
|
"rewards/margins": 0.26284489035606384, |
|
"rewards/rejected": -1.3442847728729248, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.236569972745492e-06, |
|
"logits/chosen": -4.145129203796387, |
|
"logits/rejected": -4.1337690353393555, |
|
"logps/chosen": -380.5148620605469, |
|
"logps/rejected": -406.1288146972656, |
|
"loss": 0.6628, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.0649586915969849, |
|
"rewards/margins": 0.15909543633460999, |
|
"rewards/rejected": -1.2240540981292725, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1695173182897126e-06, |
|
"logits/chosen": -4.133418083190918, |
|
"logits/rejected": -4.11216402053833, |
|
"logps/chosen": -398.23626708984375, |
|
"logps/rejected": -435.56646728515625, |
|
"loss": 0.6554, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.1820625066757202, |
|
"rewards/margins": 0.16709741950035095, |
|
"rewards/rejected": -1.349160075187683, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.10194166506673e-06, |
|
"logits/chosen": -4.175902366638184, |
|
"logits/rejected": -4.1359357833862305, |
|
"logps/chosen": -390.8631286621094, |
|
"logps/rejected": -439.28436279296875, |
|
"loss": 0.6302, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.061226487159729, |
|
"rewards/margins": 0.2546694874763489, |
|
"rewards/rejected": -1.3158957958221436, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_logits/chosen": -4.443523406982422, |
|
"eval_logits/rejected": -4.431849002838135, |
|
"eval_logps/chosen": -337.6354675292969, |
|
"eval_logps/rejected": -353.24932861328125, |
|
"eval_loss": 0.6784851551055908, |
|
"eval_rewards/accuracies": 0.5596666932106018, |
|
"eval_rewards/chosen": -1.1692306995391846, |
|
"eval_rewards/margins": 0.09997232258319855, |
|
"eval_rewards/rejected": -1.2692030668258667, |
|
"eval_runtime": 1619.3984, |
|
"eval_samples_per_second": 1.849, |
|
"eval_steps_per_second": 0.232, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0338958003344115e-06, |
|
"logits/chosen": -4.3245649337768555, |
|
"logits/rejected": -4.272718906402588, |
|
"logps/chosen": -396.521240234375, |
|
"logps/rejected": -432.1895446777344, |
|
"loss": 0.6883, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.2242915630340576, |
|
"rewards/margins": 0.24001073837280273, |
|
"rewards/rejected": -1.4643023014068604, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9654328786600823e-06, |
|
"logits/chosen": -4.306203365325928, |
|
"logits/rejected": -4.252989768981934, |
|
"logps/chosen": -397.3540344238281, |
|
"logps/rejected": -442.0118103027344, |
|
"loss": 0.6197, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.2108550071716309, |
|
"rewards/margins": 0.21409356594085693, |
|
"rewards/rejected": -1.4249485731124878, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.896606380398402e-06, |
|
"logits/chosen": -4.365767478942871, |
|
"logits/rejected": -4.406495094299316, |
|
"logps/chosen": -417.7538146972656, |
|
"logps/rejected": -443.05548095703125, |
|
"loss": 0.6623, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.3476839065551758, |
|
"rewards/margins": 0.18296115100383759, |
|
"rewards/rejected": -1.5306451320648193, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.827470069914772e-06, |
|
"logits/chosen": -4.2744035720825195, |
|
"logits/rejected": -4.236593723297119, |
|
"logps/chosen": -425.37908935546875, |
|
"logps/rejected": -453.02862548828125, |
|
"loss": 0.6756, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.3940837383270264, |
|
"rewards/margins": 0.14192768931388855, |
|
"rewards/rejected": -1.5360115766525269, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7580779535868675e-06, |
|
"logits/chosen": -4.252664089202881, |
|
"logits/rejected": -4.254392147064209, |
|
"logps/chosen": -409.19378662109375, |
|
"logps/rejected": -438.70556640625, |
|
"loss": 0.6575, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.230445384979248, |
|
"rewards/margins": 0.18368306756019592, |
|
"rewards/rejected": -1.4141284227371216, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.688484237617129e-06, |
|
"logits/chosen": -4.151357650756836, |
|
"logits/rejected": -4.122767925262451, |
|
"logps/chosen": -400.3338928222656, |
|
"logps/rejected": -436.16571044921875, |
|
"loss": 0.6222, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.108682632446289, |
|
"rewards/margins": 0.24583642184734344, |
|
"rewards/rejected": -1.3545191287994385, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.6187432856891585e-06, |
|
"logits/chosen": -4.1051225662231445, |
|
"logits/rejected": -4.0978288650512695, |
|
"logps/chosen": -407.8655700683594, |
|
"logps/rejected": -446.1006774902344, |
|
"loss": 0.6432, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.1575034856796265, |
|
"rewards/margins": 0.18804897367954254, |
|
"rewards/rejected": -1.3455523252487183, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.548909576501096e-06, |
|
"logits/chosen": -4.150703430175781, |
|
"logits/rejected": -4.143389701843262, |
|
"logps/chosen": -413.3582458496094, |
|
"logps/rejected": -443.1758728027344, |
|
"loss": 0.6353, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.2182948589324951, |
|
"rewards/margins": 0.19429844617843628, |
|
"rewards/rejected": -1.4125932455062866, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4790376612091503e-06, |
|
"logits/chosen": -4.271695613861084, |
|
"logits/rejected": -4.229399681091309, |
|
"logps/chosen": -443.7752990722656, |
|
"logps/rejected": -475.551025390625, |
|
"loss": 0.6236, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.456383228302002, |
|
"rewards/margins": 0.23586714267730713, |
|
"rewards/rejected": -1.6922504901885986, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.40918212081453e-06, |
|
"logits/chosen": -4.358768939971924, |
|
"logits/rejected": -4.3066534996032715, |
|
"logps/chosen": -407.3147888183594, |
|
"logps/rejected": -466.919921875, |
|
"loss": 0.5743, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.3695669174194336, |
|
"rewards/margins": 0.4060749411582947, |
|
"rewards/rejected": -1.7756417989730835, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_logits/chosen": -4.518208026885986, |
|
"eval_logits/rejected": -4.504719257354736, |
|
"eval_logps/chosen": -375.0574645996094, |
|
"eval_logps/rejected": -392.7273254394531, |
|
"eval_loss": 0.6835331916809082, |
|
"eval_rewards/accuracies": 0.5630000233650208, |
|
"eval_rewards/chosen": -1.5434508323669434, |
|
"eval_rewards/margins": 0.12053229659795761, |
|
"eval_rewards/rejected": -1.6639831066131592, |
|
"eval_runtime": 1619.3262, |
|
"eval_samples_per_second": 1.849, |
|
"eval_steps_per_second": 0.232, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3393975235270654e-06, |
|
"logits/chosen": -4.303341865539551, |
|
"logits/rejected": -4.286491394042969, |
|
"logps/chosen": -452.05718994140625, |
|
"logps/rejected": -493.17144775390625, |
|
"loss": 0.6602, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.5600488185882568, |
|
"rewards/margins": 0.23919770121574402, |
|
"rewards/rejected": -1.7992465496063232, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2697383821388153e-06, |
|
"logits/chosen": -4.293368816375732, |
|
"logits/rejected": -4.3109025955200195, |
|
"logps/chosen": -435.1492614746094, |
|
"logps/rejected": -460.28033447265625, |
|
"loss": 0.6504, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.4816340208053589, |
|
"rewards/margins": 0.2186344861984253, |
|
"rewards/rejected": -1.7002685070037842, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2002591114409657e-06, |
|
"logits/chosen": -4.212637424468994, |
|
"logits/rejected": -4.208783149719238, |
|
"logps/chosen": -432.06805419921875, |
|
"logps/rejected": -468.85601806640625, |
|
"loss": 0.6325, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.403395652770996, |
|
"rewards/margins": 0.24927303194999695, |
|
"rewards/rejected": -1.6526685953140259, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.131013985717285e-06, |
|
"logits/chosen": -4.271391868591309, |
|
"logits/rejected": -4.220091819763184, |
|
"logps/chosen": -442.1729431152344, |
|
"logps/rejected": -491.44384765625, |
|
"loss": 0.6362, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.4394041299819946, |
|
"rewards/margins": 0.2614460587501526, |
|
"rewards/rejected": -1.7008501291275024, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.062057096347338e-06, |
|
"logits/chosen": -4.25800895690918, |
|
"logits/rejected": -4.223499774932861, |
|
"logps/chosen": -419.89495849609375, |
|
"logps/rejected": -435.30389404296875, |
|
"loss": 0.6593, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.2841438055038452, |
|
"rewards/margins": 0.15097984671592712, |
|
"rewards/rejected": -1.4351234436035156, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9934423095525733e-06, |
|
"logits/chosen": -4.121432304382324, |
|
"logits/rejected": -4.1321306228637695, |
|
"logps/chosen": -416.6729431152344, |
|
"logps/rejected": -442.0414123535156, |
|
"loss": 0.6534, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.1921896934509277, |
|
"rewards/margins": 0.2546849846839905, |
|
"rewards/rejected": -1.4468748569488525, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9252232243182986e-06, |
|
"logits/chosen": -4.221813678741455, |
|
"logits/rejected": -4.169572830200195, |
|
"logps/chosen": -377.6047668457031, |
|
"logps/rejected": -425.6900939941406, |
|
"loss": 0.6026, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.071094036102295, |
|
"rewards/margins": 0.307754784822464, |
|
"rewards/rejected": -1.3788487911224365, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8574531305244043e-06, |
|
"logits/chosen": -4.010577201843262, |
|
"logits/rejected": -3.9968719482421875, |
|
"logps/chosen": -418.9974060058594, |
|
"logps/rejected": -465.41650390625, |
|
"loss": 0.5956, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.2276278734207153, |
|
"rewards/margins": 0.3166094124317169, |
|
"rewards/rejected": -1.5442373752593994, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.7901849673175559e-06, |
|
"logits/chosen": -4.077489376068115, |
|
"logits/rejected": -4.033568382263184, |
|
"logps/chosen": -420.52984619140625, |
|
"logps/rejected": -455.3899841308594, |
|
"loss": 0.6434, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.2396165132522583, |
|
"rewards/margins": 0.20500020682811737, |
|
"rewards/rejected": -1.4446165561676025, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7234712817573555e-06, |
|
"logits/chosen": -4.063477516174316, |
|
"logits/rejected": -4.062304496765137, |
|
"logps/chosen": -456.8814392089844, |
|
"logps/rejected": -480.8173828125, |
|
"loss": 0.6443, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.4422929286956787, |
|
"rewards/margins": 0.22365431487560272, |
|
"rewards/rejected": -1.6659473180770874, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_logits/chosen": -4.257167816162109, |
|
"eval_logits/rejected": -4.245348930358887, |
|
"eval_logps/chosen": -359.3107604980469, |
|
"eval_logps/rejected": -377.020751953125, |
|
"eval_loss": 0.6778839230537415, |
|
"eval_rewards/accuracies": 0.5666666626930237, |
|
"eval_rewards/chosen": -1.385983943939209, |
|
"eval_rewards/margins": 0.12093351036310196, |
|
"eval_rewards/rejected": -1.5069174766540527, |
|
"eval_runtime": 1620.4754, |
|
"eval_samples_per_second": 1.848, |
|
"eval_steps_per_second": 0.231, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6573641877687936e-06, |
|
"logits/chosen": -4.076521873474121, |
|
"logits/rejected": -4.057218074798584, |
|
"logps/chosen": -422.85699462890625, |
|
"logps/rejected": -470.95465087890625, |
|
"loss": 0.6161, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.3746501207351685, |
|
"rewards/margins": 0.2610599100589752, |
|
"rewards/rejected": -1.6357100009918213, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.591915325433034e-06, |
|
"logits/chosen": -4.133788108825684, |
|
"logits/rejected": -4.142486572265625, |
|
"logps/chosen": -414.0416564941406, |
|
"logps/rejected": -449.82373046875, |
|
"loss": 0.6194, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.315294861793518, |
|
"rewards/margins": 0.30571961402893066, |
|
"rewards/rejected": -1.6210145950317383, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5271758206483664e-06, |
|
"logits/chosen": -4.143270015716553, |
|
"logits/rejected": -4.132315158843994, |
|
"logps/chosen": -438.13037109375, |
|
"logps/rejected": -471.50030517578125, |
|
"loss": 0.6481, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.4561512470245361, |
|
"rewards/margins": 0.22672787308692932, |
|
"rewards/rejected": -1.682879090309143, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4631962451927966e-06, |
|
"logits/chosen": -4.0487775802612305, |
|
"logits/rejected": -4.032698631286621, |
|
"logps/chosen": -431.3470153808594, |
|
"logps/rejected": -473.2076110839844, |
|
"loss": 0.6076, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.3846697807312012, |
|
"rewards/margins": 0.27571359276771545, |
|
"rewards/rejected": -1.6603834629058838, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4000265772195032e-06, |
|
"logits/chosen": -4.225982666015625, |
|
"logits/rejected": -4.171608924865723, |
|
"logps/chosen": -430.31201171875, |
|
"logps/rejected": -475.3255920410156, |
|
"loss": 0.6196, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.3736594915390015, |
|
"rewards/margins": 0.2828761339187622, |
|
"rewards/rejected": -1.6565355062484741, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3377161622160077e-06, |
|
"logits/chosen": -4.169137001037598, |
|
"logits/rejected": -4.160987854003906, |
|
"logps/chosen": -430.1625061035156, |
|
"logps/rejected": -470.7685546875, |
|
"loss": 0.6046, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3943125009536743, |
|
"rewards/margins": 0.2797546982765198, |
|
"rewards/rejected": -1.6740672588348389, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.276313674457553e-06, |
|
"logits/chosen": -4.306554794311523, |
|
"logits/rejected": -4.296151161193848, |
|
"logps/chosen": -415.35101318359375, |
|
"logps/rejected": -470.18145751953125, |
|
"loss": 0.5782, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.413987636566162, |
|
"rewards/margins": 0.35374554991722107, |
|
"rewards/rejected": -1.7677332162857056, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2158670789848095e-06, |
|
"logits/chosen": -4.3886284828186035, |
|
"logits/rejected": -4.385241508483887, |
|
"logps/chosen": -460.49005126953125, |
|
"logps/rejected": -502.52130126953125, |
|
"loss": 0.608, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.6725581884384155, |
|
"rewards/margins": 0.34351325035095215, |
|
"rewards/rejected": -2.016071319580078, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1564235941356016e-06, |
|
"logits/chosen": -4.544154167175293, |
|
"logits/rejected": -4.480313301086426, |
|
"logps/chosen": -452.08245849609375, |
|
"logps/rejected": -509.763427734375, |
|
"loss": 0.6428, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.7576968669891357, |
|
"rewards/margins": 0.3635411858558655, |
|
"rewards/rejected": -2.1212382316589355, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.0980296546599254e-06, |
|
"logits/chosen": -4.359221935272217, |
|
"logits/rejected": -4.357415199279785, |
|
"logps/chosen": -476.0204162597656, |
|
"logps/rejected": -513.69287109375, |
|
"loss": 0.6651, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.769547462463379, |
|
"rewards/margins": 0.4242987036705017, |
|
"rewards/rejected": -2.1938462257385254, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_logits/chosen": -4.617808818817139, |
|
"eval_logits/rejected": -4.603901386260986, |
|
"eval_logps/chosen": -387.0414123535156, |
|
"eval_logps/rejected": -406.733154296875, |
|
"eval_loss": 0.6818779706954956, |
|
"eval_rewards/accuracies": 0.5693333148956299, |
|
"eval_rewards/chosen": -1.6632905006408691, |
|
"eval_rewards/margins": 0.14075076580047607, |
|
"eval_rewards/rejected": -1.8040413856506348, |
|
"eval_runtime": 1619.8038, |
|
"eval_samples_per_second": 1.848, |
|
"eval_steps_per_second": 0.232, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.040730875447083e-06, |
|
"logits/chosen": -4.3218793869018555, |
|
"logits/rejected": -4.3270487785339355, |
|
"logps/chosen": -451.809814453125, |
|
"logps/rejected": -478.60491943359375, |
|
"loss": 0.6503, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.565606713294983, |
|
"rewards/margins": 0.20372018218040466, |
|
"rewards/rejected": -1.7693268060684204, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.845720158932414e-07, |
|
"logits/chosen": -4.289103031158447, |
|
"logits/rejected": -4.304252624511719, |
|
"logps/chosen": -420.05926513671875, |
|
"logps/rejected": -442.1366271972656, |
|
"loss": 0.6505, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.4516870975494385, |
|
"rewards/margins": 0.20668797194957733, |
|
"rewards/rejected": -1.6583747863769531, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.295969449372796e-07, |
|
"logits/chosen": -4.317067623138428, |
|
"logits/rejected": -4.289021968841553, |
|
"logps/chosen": -417.4593811035156, |
|
"logps/rejected": -454.826904296875, |
|
"loss": 0.6219, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.3394687175750732, |
|
"rewards/margins": 0.25516074895858765, |
|
"rewards/rejected": -1.5946292877197266, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.758486067922176e-07, |
|
"logits/chosen": -4.306538105010986, |
|
"logits/rejected": -4.262487411499023, |
|
"logps/chosen": -414.806884765625, |
|
"logps/rejected": -465.29931640625, |
|
"loss": 0.5998, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3654025793075562, |
|
"rewards/margins": 0.3171504735946655, |
|
"rewards/rejected": -1.6825529336929321, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.233689873990006e-07, |
|
"logits/chosen": -4.317531585693359, |
|
"logits/rejected": -4.282795429229736, |
|
"logps/chosen": -420.382568359375, |
|
"logps/rejected": -493.355712890625, |
|
"loss": 0.5953, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.419424295425415, |
|
"rewards/margins": 0.5507811903953552, |
|
"rewards/rejected": -1.970205545425415, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.721990816288555e-07, |
|
"logits/chosen": -4.283775329589844, |
|
"logits/rejected": -4.2459330558776855, |
|
"logps/chosen": -395.4678955078125, |
|
"logps/rejected": -434.7349548339844, |
|
"loss": 0.6193, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.3240183591842651, |
|
"rewards/margins": 0.2636045813560486, |
|
"rewards/rejected": -1.587622880935669, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.223788612598148e-07, |
|
"logits/chosen": -4.310162544250488, |
|
"logits/rejected": -4.287927627563477, |
|
"logps/chosen": -436.2978515625, |
|
"logps/rejected": -470.63079833984375, |
|
"loss": 0.6316, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.4355452060699463, |
|
"rewards/margins": 0.18634586036205292, |
|
"rewards/rejected": -1.6218910217285156, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.73947243752448e-07, |
|
"logits/chosen": -4.377969264984131, |
|
"logits/rejected": -4.389736652374268, |
|
"logps/chosen": -458.318359375, |
|
"logps/rejected": -487.77874755859375, |
|
"loss": 0.6632, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.6293509006500244, |
|
"rewards/margins": 0.36782675981521606, |
|
"rewards/rejected": -1.9971777200698853, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.269420618491759e-07, |
|
"logits/chosen": -4.321467876434326, |
|
"logits/rejected": -4.286978721618652, |
|
"logps/chosen": -420.07220458984375, |
|
"logps/rejected": -458.8563537597656, |
|
"loss": 0.6383, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.4905959367752075, |
|
"rewards/margins": 0.22982291877269745, |
|
"rewards/rejected": -1.720418930053711, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.814000340209267e-07, |
|
"logits/chosen": -4.299461364746094, |
|
"logits/rejected": -4.237879276275635, |
|
"logps/chosen": -435.0042419433594, |
|
"logps/rejected": -491.1070861816406, |
|
"loss": 0.5993, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4882529973983765, |
|
"rewards/margins": 0.3519567549228668, |
|
"rewards/rejected": -1.8402099609375, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_logits/chosen": -4.549124717712402, |
|
"eval_logits/rejected": -4.535641193389893, |
|
"eval_logps/chosen": -378.4713439941406, |
|
"eval_logps/rejected": -398.2364196777344, |
|
"eval_loss": 0.6785325407981873, |
|
"eval_rewards/accuracies": 0.5683333277702332, |
|
"eval_rewards/chosen": -1.577589511871338, |
|
"eval_rewards/margins": 0.1414840668439865, |
|
"eval_rewards/rejected": -1.7190735340118408, |
|
"eval_runtime": 1620.6034, |
|
"eval_samples_per_second": 1.847, |
|
"eval_steps_per_second": 0.231, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.373567357842111e-07, |
|
"logits/chosen": -4.278590202331543, |
|
"logits/rejected": -4.246352195739746, |
|
"logps/chosen": -429.3436584472656, |
|
"logps/rejected": -471.84722900390625, |
|
"loss": 0.5957, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.422374963760376, |
|
"rewards/margins": 0.33030739426612854, |
|
"rewards/rejected": -1.7526824474334717, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.948465719110226e-07, |
|
"logits/chosen": -4.373248100280762, |
|
"logits/rejected": -4.3430304527282715, |
|
"logps/chosen": -415.3047790527344, |
|
"logps/rejected": -445.019287109375, |
|
"loss": 0.6443, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.40928053855896, |
|
"rewards/margins": 0.228702574968338, |
|
"rewards/rejected": -1.6379830837249756, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.539027495532766e-07, |
|
"logits/chosen": -4.33120059967041, |
|
"logits/rejected": -4.347836494445801, |
|
"logps/chosen": -415.25555419921875, |
|
"logps/rejected": -453.518798828125, |
|
"loss": 0.6177, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.4035775661468506, |
|
"rewards/margins": 0.28108319640159607, |
|
"rewards/rejected": -1.6846606731414795, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.14557252302783e-07, |
|
"logits/chosen": -4.341530799865723, |
|
"logits/rejected": -4.305572032928467, |
|
"logps/chosen": -433.0341796875, |
|
"logps/rejected": -472.1036682128906, |
|
"loss": 0.6713, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.5271722078323364, |
|
"rewards/margins": 0.23339995741844177, |
|
"rewards/rejected": -1.760572075843811, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.7684081520700884e-07, |
|
"logits/chosen": -4.2357988357543945, |
|
"logits/rejected": -4.237625598907471, |
|
"logps/chosen": -450.54827880859375, |
|
"logps/rejected": -471.02423095703125, |
|
"loss": 0.6429, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.5025393962860107, |
|
"rewards/margins": 0.2623196542263031, |
|
"rewards/rejected": -1.7648589611053467, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.407829007601507e-07, |
|
"logits/chosen": -4.270508766174316, |
|
"logits/rejected": -4.225382328033447, |
|
"logps/chosen": -428.3861389160156, |
|
"logps/rejected": -479.0707092285156, |
|
"loss": 0.6119, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.4203639030456543, |
|
"rewards/margins": 0.3360464870929718, |
|
"rewards/rejected": -1.7564103603363037, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.064116758882724e-07, |
|
"logits/chosen": -4.24053955078125, |
|
"logits/rejected": -4.1828999519348145, |
|
"logps/chosen": -443.78485107421875, |
|
"logps/rejected": -504.28070068359375, |
|
"loss": 0.6037, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.5132863521575928, |
|
"rewards/margins": 0.4130435883998871, |
|
"rewards/rejected": -1.9263302087783813, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.737539899464908e-07, |
|
"logits/chosen": -4.2971696853637695, |
|
"logits/rejected": -4.288466453552246, |
|
"logps/chosen": -403.1819763183594, |
|
"logps/rejected": -454.8402404785156, |
|
"loss": 0.6141, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.3817451000213623, |
|
"rewards/margins": 0.379900187253952, |
|
"rewards/rejected": -1.7616455554962158, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.4283535374538645e-07, |
|
"logits/chosen": -4.242150783538818, |
|
"logits/rejected": -4.240577220916748, |
|
"logps/chosen": -434.7925720214844, |
|
"logps/rejected": -473.67803955078125, |
|
"loss": 0.6122, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.4003090858459473, |
|
"rewards/margins": 0.31833842396736145, |
|
"rewards/rejected": -1.7186473608016968, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1367991962303298e-07, |
|
"logits/chosen": -4.232297420501709, |
|
"logits/rejected": -4.2173943519592285, |
|
"logps/chosen": -418.9490661621094, |
|
"logps/rejected": -446.4789123535156, |
|
"loss": 0.6759, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.5112884044647217, |
|
"rewards/margins": 0.1443391889333725, |
|
"rewards/rejected": -1.6556276082992554, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_logits/chosen": -4.485485553741455, |
|
"eval_logits/rejected": -4.472198009490967, |
|
"eval_logps/chosen": -375.86541748046875, |
|
"eval_logps/rejected": -395.56036376953125, |
|
"eval_loss": 0.6777821779251099, |
|
"eval_rewards/accuracies": 0.5686666369438171, |
|
"eval_rewards/chosen": -1.5515305995941162, |
|
"eval_rewards/margins": 0.14078289270401, |
|
"eval_rewards/rejected": -1.6923134326934814, |
|
"eval_runtime": 1618.2899, |
|
"eval_samples_per_second": 1.85, |
|
"eval_steps_per_second": 0.232, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.8631046257820278e-07, |
|
"logits/chosen": -4.233702659606934, |
|
"logits/rejected": -4.249814033508301, |
|
"logps/chosen": -432.20458984375, |
|
"logps/rejected": -461.7394104003906, |
|
"loss": 0.6275, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.41855788230896, |
|
"rewards/margins": 0.2600322365760803, |
|
"rewards/rejected": -1.678590178489685, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.6074836247950143e-07, |
|
"logits/chosen": -4.206725597381592, |
|
"logits/rejected": -4.209759712219238, |
|
"logps/chosen": -443.52459716796875, |
|
"logps/rejected": -470.96588134765625, |
|
"loss": 0.6415, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.4318974018096924, |
|
"rewards/margins": 0.21563585102558136, |
|
"rewards/rejected": -1.6475334167480469, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.370135873643097e-07, |
|
"logits/chosen": -4.226916313171387, |
|
"logits/rejected": -4.266509532928467, |
|
"logps/chosen": -436.6336975097656, |
|
"logps/rejected": -456.3214416503906, |
|
"loss": 0.6262, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.4590485095977783, |
|
"rewards/margins": 0.2844700217247009, |
|
"rewards/rejected": -1.7435184717178345, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1512467784059372e-07, |
|
"logits/chosen": -4.302299499511719, |
|
"logits/rejected": -4.261002540588379, |
|
"logps/chosen": -398.5301208496094, |
|
"logps/rejected": -437.83673095703125, |
|
"loss": 0.6221, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.3698493242263794, |
|
"rewards/margins": 0.2959776520729065, |
|
"rewards/rejected": -1.6658270359039307, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.509873260376251e-08, |
|
"logits/chosen": -4.228875160217285, |
|
"logits/rejected": -4.17572021484375, |
|
"logps/chosen": -429.0230407714844, |
|
"logps/rejected": -501.6388244628906, |
|
"loss": 0.5814, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4344505071640015, |
|
"rewards/margins": 0.4450379014015198, |
|
"rewards/rejected": -1.8794885873794556, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.695139507988559e-08, |
|
"logits/chosen": -4.261081218719482, |
|
"logits/rejected": -4.282492160797119, |
|
"logps/chosen": -444.05096435546875, |
|
"logps/rejected": -479.0032653808594, |
|
"loss": 0.6376, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.457777500152588, |
|
"rewards/margins": 0.2464013397693634, |
|
"rewards/rejected": -1.704178810119629, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.069684120570684e-08, |
|
"logits/chosen": -4.286696434020996, |
|
"logits/rejected": -4.1916022300720215, |
|
"logps/chosen": -431.74053955078125, |
|
"logps/rejected": -482.0506896972656, |
|
"loss": 0.6355, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.5193744897842407, |
|
"rewards/margins": 0.3015151619911194, |
|
"rewards/rejected": -1.8208894729614258, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.634776835499871e-08, |
|
"logits/chosen": -4.216092109680176, |
|
"logits/rejected": -4.185781955718994, |
|
"logps/chosen": -413.5948791503906, |
|
"logps/rejected": -455.0482482910156, |
|
"loss": 0.6371, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.4376914501190186, |
|
"rewards/margins": 0.27367502450942993, |
|
"rewards/rejected": -1.7113662958145142, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.3915385419908964e-08, |
|
"logits/chosen": -4.179436206817627, |
|
"logits/rejected": -4.222240447998047, |
|
"logps/chosen": -430.4244079589844, |
|
"logps/rejected": -464.51629638671875, |
|
"loss": 0.6297, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.4367833137512207, |
|
"rewards/margins": 0.29136672616004944, |
|
"rewards/rejected": -1.7281500101089478, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.3409404055043938e-08, |
|
"logits/chosen": -4.306519508361816, |
|
"logits/rejected": -4.272242546081543, |
|
"logps/chosen": -437.791259765625, |
|
"logps/rejected": -471.58575439453125, |
|
"loss": 0.6402, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.5211617946624756, |
|
"rewards/margins": 0.2325367033481598, |
|
"rewards/rejected": -1.7536985874176025, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_logits/chosen": -4.457742691040039, |
|
"eval_logits/rejected": -4.444427967071533, |
|
"eval_logps/chosen": -375.10284423828125, |
|
"eval_logps/rejected": -394.7001647949219, |
|
"eval_loss": 0.6772644519805908, |
|
"eval_rewards/accuracies": 0.5690000057220459, |
|
"eval_rewards/chosen": -1.5439047813415527, |
|
"eval_rewards/margins": 0.13980673253536224, |
|
"eval_rewards/rejected": -1.6837116479873657, |
|
"eval_runtime": 1624.6813, |
|
"eval_samples_per_second": 1.843, |
|
"eval_steps_per_second": 0.231, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.4838031091134186e-08, |
|
"logits/chosen": -4.243393898010254, |
|
"logits/rejected": -4.175347805023193, |
|
"logps/chosen": -404.5989990234375, |
|
"logps/rejected": -469.9576110839844, |
|
"loss": 0.5982, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.3969910144805908, |
|
"rewards/margins": 0.3869909644126892, |
|
"rewards/rejected": -1.7839819192886353, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.207962124201774e-09, |
|
"logits/chosen": -4.263760566711426, |
|
"logits/rejected": -4.225130081176758, |
|
"logps/chosen": -435.9114685058594, |
|
"logps/rejected": -472.66839599609375, |
|
"loss": 0.6017, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.37319016456604, |
|
"rewards/margins": 0.3209651708602905, |
|
"rewards/rejected": -1.6941554546356201, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.5243762852441023e-09, |
|
"logits/chosen": -4.210858345031738, |
|
"logits/rejected": -4.163503170013428, |
|
"logps/chosen": -427.44342041015625, |
|
"logps/rejected": -469.5209045410156, |
|
"loss": 0.6376, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.4386152029037476, |
|
"rewards/margins": 0.2547362446784973, |
|
"rewards/rejected": -1.6933513879776, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 7.909321945129278e-10, |
|
"logits/chosen": -4.18636417388916, |
|
"logits/rejected": -4.141125679016113, |
|
"logps/chosen": -443.1625061035156, |
|
"logps/rejected": -493.43975830078125, |
|
"loss": 0.6016, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.4015429019927979, |
|
"rewards/margins": 0.35573890805244446, |
|
"rewards/rejected": -1.7572818994522095, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1249, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6475976420174225, |
|
"train_runtime": 42677.4758, |
|
"train_samples_per_second": 0.469, |
|
"train_steps_per_second": 0.029 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1249, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|