|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 1563, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 43.373348236083984, |
|
"kl": 0.018903231248259544, |
|
"learning_rate": 1.592356687898089e-08, |
|
"logps/chosen": -299.6726379394531, |
|
"logps/rejected": -346.7806396484375, |
|
"loss": 0.5001, |
|
"rewards/chosen": -0.0007079701172187924, |
|
"rewards/margins": -0.001962849870324135, |
|
"rewards/rejected": 0.0012548796366900206, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 45.312686920166016, |
|
"kl": 0.05140366405248642, |
|
"learning_rate": 3.184713375796178e-08, |
|
"logps/chosen": -310.0472717285156, |
|
"logps/rejected": -311.03424072265625, |
|
"loss": 0.4979, |
|
"rewards/chosen": 0.019934307783842087, |
|
"rewards/margins": 0.010924299247562885, |
|
"rewards/rejected": 0.009010007604956627, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 44.28553771972656, |
|
"kl": 0.01275167427957058, |
|
"learning_rate": 4.777070063694268e-08, |
|
"logps/chosen": -258.36212158203125, |
|
"logps/rejected": -348.38824462890625, |
|
"loss": 0.4943, |
|
"rewards/chosen": 0.04069889709353447, |
|
"rewards/margins": 0.04853983223438263, |
|
"rewards/rejected": -0.007840934209525585, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 45.054351806640625, |
|
"kl": 0.0, |
|
"learning_rate": 6.369426751592356e-08, |
|
"logps/chosen": -245.97470092773438, |
|
"logps/rejected": -350.9342041015625, |
|
"loss": 0.4837, |
|
"rewards/chosen": 0.07507207244634628, |
|
"rewards/margins": 0.13315154612064362, |
|
"rewards/rejected": -0.05807947367429733, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 43.90499496459961, |
|
"kl": 0.0, |
|
"learning_rate": 7.961783439490445e-08, |
|
"logps/chosen": -240.2781524658203, |
|
"logps/rejected": -334.9874267578125, |
|
"loss": 0.465, |
|
"rewards/chosen": 0.16415421664714813, |
|
"rewards/margins": 0.27640852332115173, |
|
"rewards/rejected": -0.11225433647632599, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 42.99668884277344, |
|
"kl": 0.0, |
|
"learning_rate": 9.554140127388536e-08, |
|
"logps/chosen": -310.78314208984375, |
|
"logps/rejected": -325.97113037109375, |
|
"loss": 0.4387, |
|
"rewards/chosen": 0.21505948901176453, |
|
"rewards/margins": 0.4358912408351898, |
|
"rewards/rejected": -0.22083178162574768, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 42.578617095947266, |
|
"kl": 0.0, |
|
"learning_rate": 1.1146496815286624e-07, |
|
"logps/chosen": -263.4063415527344, |
|
"logps/rejected": -312.7994689941406, |
|
"loss": 0.4157, |
|
"rewards/chosen": 0.25127312541007996, |
|
"rewards/margins": 0.7119064331054688, |
|
"rewards/rejected": -0.46063321828842163, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 38.7848014831543, |
|
"kl": 0.0, |
|
"learning_rate": 1.2738853503184713e-07, |
|
"logps/chosen": -283.001953125, |
|
"logps/rejected": -342.24493408203125, |
|
"loss": 0.3764, |
|
"rewards/chosen": 0.18532009422779083, |
|
"rewards/margins": 1.1129443645477295, |
|
"rewards/rejected": -0.9276243448257446, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 33.522525787353516, |
|
"kl": 0.0, |
|
"learning_rate": 1.43312101910828e-07, |
|
"logps/chosen": -263.763916015625, |
|
"logps/rejected": -356.7685852050781, |
|
"loss": 0.3194, |
|
"rewards/chosen": 0.22150889039039612, |
|
"rewards/margins": 1.852413535118103, |
|
"rewards/rejected": -1.6309047937393188, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 34.30215072631836, |
|
"kl": 0.0, |
|
"learning_rate": 1.592356687898089e-07, |
|
"logps/chosen": -305.10076904296875, |
|
"logps/rejected": -346.50970458984375, |
|
"loss": 0.3089, |
|
"rewards/chosen": 0.23655760288238525, |
|
"rewards/margins": 2.413478136062622, |
|
"rewards/rejected": -2.1769204139709473, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 63.321285247802734, |
|
"kl": 0.0, |
|
"learning_rate": 1.7515923566878978e-07, |
|
"logps/chosen": -304.7918701171875, |
|
"logps/rejected": -375.59271240234375, |
|
"loss": 0.289, |
|
"rewards/chosen": 0.26213210821151733, |
|
"rewards/margins": 2.895750045776367, |
|
"rewards/rejected": -2.633617877960205, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 31.712352752685547, |
|
"kl": 0.0, |
|
"learning_rate": 1.9108280254777072e-07, |
|
"logps/chosen": -261.5682067871094, |
|
"logps/rejected": -345.7394104003906, |
|
"loss": 0.2843, |
|
"rewards/chosen": 0.3677944540977478, |
|
"rewards/margins": 2.7658519744873047, |
|
"rewards/rejected": -2.398057460784912, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 33.49573516845703, |
|
"kl": 0.0, |
|
"learning_rate": 2.070063694267516e-07, |
|
"logps/chosen": -300.9778137207031, |
|
"logps/rejected": -324.8972473144531, |
|
"loss": 0.2645, |
|
"rewards/chosen": 0.3324030041694641, |
|
"rewards/margins": 3.486532688140869, |
|
"rewards/rejected": -3.1541295051574707, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 33.059207916259766, |
|
"kl": 0.0, |
|
"learning_rate": 2.2292993630573247e-07, |
|
"logps/chosen": -318.46124267578125, |
|
"logps/rejected": -387.02947998046875, |
|
"loss": 0.2496, |
|
"rewards/chosen": 0.3689573109149933, |
|
"rewards/margins": 3.851461887359619, |
|
"rewards/rejected": -3.482504367828369, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 31.94024658203125, |
|
"kl": 0.0, |
|
"learning_rate": 2.388535031847134e-07, |
|
"logps/chosen": -358.51654052734375, |
|
"logps/rejected": -330.6874694824219, |
|
"loss": 0.2602, |
|
"rewards/chosen": 0.41941699385643005, |
|
"rewards/margins": 3.710043430328369, |
|
"rewards/rejected": -3.2906270027160645, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 34.636436462402344, |
|
"kl": 0.0, |
|
"learning_rate": 2.5477707006369425e-07, |
|
"logps/chosen": -268.0237121582031, |
|
"logps/rejected": -380.11676025390625, |
|
"loss": 0.2606, |
|
"rewards/chosen": 0.410856157541275, |
|
"rewards/margins": 4.273175239562988, |
|
"rewards/rejected": -3.862318754196167, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 29.300586700439453, |
|
"kl": 0.0, |
|
"learning_rate": 2.7070063694267513e-07, |
|
"logps/chosen": -323.76416015625, |
|
"logps/rejected": -359.8213195800781, |
|
"loss": 0.2378, |
|
"rewards/chosen": 0.46109551191329956, |
|
"rewards/margins": 4.482287406921387, |
|
"rewards/rejected": -4.021191596984863, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 32.00096130371094, |
|
"kl": 0.0, |
|
"learning_rate": 2.86624203821656e-07, |
|
"logps/chosen": -292.0968322753906, |
|
"logps/rejected": -414.10980224609375, |
|
"loss": 0.2176, |
|
"rewards/chosen": 0.5200246572494507, |
|
"rewards/margins": 5.136369228363037, |
|
"rewards/rejected": -4.616344928741455, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 31.802518844604492, |
|
"kl": 0.0, |
|
"learning_rate": 3.0254777070063694e-07, |
|
"logps/chosen": -289.4820556640625, |
|
"logps/rejected": -367.87030029296875, |
|
"loss": 0.2372, |
|
"rewards/chosen": 0.5198050737380981, |
|
"rewards/margins": 5.272117614746094, |
|
"rewards/rejected": -4.752312183380127, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 37.19041061401367, |
|
"kl": 0.0, |
|
"learning_rate": 3.184713375796178e-07, |
|
"logps/chosen": -280.93896484375, |
|
"logps/rejected": -394.2848205566406, |
|
"loss": 0.2338, |
|
"rewards/chosen": 0.4976850152015686, |
|
"rewards/margins": 5.734658241271973, |
|
"rewards/rejected": -5.236973285675049, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 26.246625900268555, |
|
"kl": 0.0, |
|
"learning_rate": 3.343949044585987e-07, |
|
"logps/chosen": -315.616943359375, |
|
"logps/rejected": -338.24346923828125, |
|
"loss": 0.2378, |
|
"rewards/chosen": 0.5364874601364136, |
|
"rewards/margins": 4.776383399963379, |
|
"rewards/rejected": -4.239895820617676, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 34.44779586791992, |
|
"kl": 0.0, |
|
"learning_rate": 3.5031847133757957e-07, |
|
"logps/chosen": -253.6793975830078, |
|
"logps/rejected": -391.8357238769531, |
|
"loss": 0.2379, |
|
"rewards/chosen": 0.40439778566360474, |
|
"rewards/margins": 5.385909557342529, |
|
"rewards/rejected": -4.98151159286499, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 45.468196868896484, |
|
"kl": 0.0, |
|
"learning_rate": 3.6624203821656045e-07, |
|
"logps/chosen": -283.26025390625, |
|
"logps/rejected": -404.784423828125, |
|
"loss": 0.2448, |
|
"rewards/chosen": 0.5039983987808228, |
|
"rewards/margins": 5.760153770446777, |
|
"rewards/rejected": -5.256155967712402, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 29.123733520507812, |
|
"kl": 0.0, |
|
"learning_rate": 3.8216560509554143e-07, |
|
"logps/chosen": -292.062255859375, |
|
"logps/rejected": -350.0133056640625, |
|
"loss": 0.2361, |
|
"rewards/chosen": 0.5945883989334106, |
|
"rewards/margins": 5.297600746154785, |
|
"rewards/rejected": -4.703011989593506, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 31.618877410888672, |
|
"kl": 0.0, |
|
"learning_rate": 3.980891719745223e-07, |
|
"logps/chosen": -304.79302978515625, |
|
"logps/rejected": -412.56396484375, |
|
"loss": 0.2332, |
|
"rewards/chosen": 0.5294879674911499, |
|
"rewards/margins": 5.99056339263916, |
|
"rewards/rejected": -5.461075782775879, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 31.75819969177246, |
|
"kl": 0.0, |
|
"learning_rate": 4.140127388535032e-07, |
|
"logps/chosen": -343.9717712402344, |
|
"logps/rejected": -413.4139709472656, |
|
"loss": 0.2209, |
|
"rewards/chosen": 0.6387773752212524, |
|
"rewards/margins": 6.253381729125977, |
|
"rewards/rejected": -5.6146039962768555, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 80.12673950195312, |
|
"kl": 0.0, |
|
"learning_rate": 4.2993630573248406e-07, |
|
"logps/chosen": -276.16864013671875, |
|
"logps/rejected": -370.7424621582031, |
|
"loss": 0.2331, |
|
"rewards/chosen": 0.4045650362968445, |
|
"rewards/margins": 5.879421710968018, |
|
"rewards/rejected": -5.474856853485107, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 30.59686851501465, |
|
"kl": 0.0, |
|
"learning_rate": 4.4585987261146494e-07, |
|
"logps/chosen": -264.196533203125, |
|
"logps/rejected": -359.3782043457031, |
|
"loss": 0.2489, |
|
"rewards/chosen": 0.45963722467422485, |
|
"rewards/margins": 4.636446952819824, |
|
"rewards/rejected": -4.176809310913086, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 28.13033103942871, |
|
"kl": 0.0, |
|
"learning_rate": 4.6178343949044587e-07, |
|
"logps/chosen": -269.31793212890625, |
|
"logps/rejected": -414.6089782714844, |
|
"loss": 0.2443, |
|
"rewards/chosen": 0.4464253783226013, |
|
"rewards/margins": 5.700473308563232, |
|
"rewards/rejected": -5.2540483474731445, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 29.259023666381836, |
|
"kl": 0.0, |
|
"learning_rate": 4.777070063694267e-07, |
|
"logps/chosen": -315.52374267578125, |
|
"logps/rejected": -365.8375549316406, |
|
"loss": 0.2258, |
|
"rewards/chosen": 0.6224046349525452, |
|
"rewards/margins": 6.201630115509033, |
|
"rewards/rejected": -5.579225063323975, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 32.15901565551758, |
|
"kl": 0.0, |
|
"learning_rate": 4.936305732484076e-07, |
|
"logps/chosen": -284.3131103515625, |
|
"logps/rejected": -374.58770751953125, |
|
"loss": 0.2226, |
|
"rewards/chosen": 0.5719965100288391, |
|
"rewards/margins": 6.714544773101807, |
|
"rewards/rejected": -6.142548561096191, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 33.57276153564453, |
|
"kl": 0.0, |
|
"learning_rate": 4.989331436699858e-07, |
|
"logps/chosen": -291.7167053222656, |
|
"logps/rejected": -374.89031982421875, |
|
"loss": 0.2333, |
|
"rewards/chosen": 0.6062914133071899, |
|
"rewards/margins": 6.262972831726074, |
|
"rewards/rejected": -5.656682014465332, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 27.332050323486328, |
|
"kl": 0.0, |
|
"learning_rate": 4.971550497866287e-07, |
|
"logps/chosen": -309.1024475097656, |
|
"logps/rejected": -402.1785583496094, |
|
"loss": 0.2336, |
|
"rewards/chosen": 0.5264068841934204, |
|
"rewards/margins": 6.772003173828125, |
|
"rewards/rejected": -6.245596408843994, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 26.317598342895508, |
|
"kl": 0.0, |
|
"learning_rate": 4.953769559032717e-07, |
|
"logps/chosen": -277.711669921875, |
|
"logps/rejected": -364.07745361328125, |
|
"loss": 0.2229, |
|
"rewards/chosen": 0.6749172210693359, |
|
"rewards/margins": 6.215371131896973, |
|
"rewards/rejected": -5.540453910827637, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 26.295717239379883, |
|
"kl": 0.0, |
|
"learning_rate": 4.935988620199146e-07, |
|
"logps/chosen": -315.84600830078125, |
|
"logps/rejected": -419.64947509765625, |
|
"loss": 0.2055, |
|
"rewards/chosen": 0.8088045120239258, |
|
"rewards/margins": 7.324716091156006, |
|
"rewards/rejected": -6.515912055969238, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 28.048669815063477, |
|
"kl": 0.0, |
|
"learning_rate": 4.918207681365576e-07, |
|
"logps/chosen": -278.0254821777344, |
|
"logps/rejected": -392.7597351074219, |
|
"loss": 0.2087, |
|
"rewards/chosen": 0.7063338756561279, |
|
"rewards/margins": 7.6127119064331055, |
|
"rewards/rejected": -6.906378269195557, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 29.8698673248291, |
|
"kl": 0.0, |
|
"learning_rate": 4.900426742532006e-07, |
|
"logps/chosen": -272.4722900390625, |
|
"logps/rejected": -392.2569580078125, |
|
"loss": 0.2483, |
|
"rewards/chosen": 0.6673704385757446, |
|
"rewards/margins": 6.765078067779541, |
|
"rewards/rejected": -6.097708225250244, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 27.31839370727539, |
|
"kl": 0.0, |
|
"learning_rate": 4.882645803698435e-07, |
|
"logps/chosen": -295.7112731933594, |
|
"logps/rejected": -434.19317626953125, |
|
"loss": 0.2083, |
|
"rewards/chosen": 0.6688440442085266, |
|
"rewards/margins": 7.306863307952881, |
|
"rewards/rejected": -6.638019561767578, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 35.319053649902344, |
|
"kl": 0.0, |
|
"learning_rate": 4.864864864864865e-07, |
|
"logps/chosen": -283.3681945800781, |
|
"logps/rejected": -392.4473571777344, |
|
"loss": 0.2282, |
|
"rewards/chosen": 0.6834143400192261, |
|
"rewards/margins": 7.1807074546813965, |
|
"rewards/rejected": -6.497292995452881, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 28.466360092163086, |
|
"kl": 0.0, |
|
"learning_rate": 4.847083926031294e-07, |
|
"logps/chosen": -301.3221130371094, |
|
"logps/rejected": -402.22845458984375, |
|
"loss": 0.2128, |
|
"rewards/chosen": 0.8221393823623657, |
|
"rewards/margins": 7.278465270996094, |
|
"rewards/rejected": -6.456326484680176, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 27.28252601623535, |
|
"kl": 0.0, |
|
"learning_rate": 4.829302987197724e-07, |
|
"logps/chosen": -306.77642822265625, |
|
"logps/rejected": -413.36346435546875, |
|
"loss": 0.2073, |
|
"rewards/chosen": 0.6411277651786804, |
|
"rewards/margins": 7.287237644195557, |
|
"rewards/rejected": -6.646109580993652, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 32.64134979248047, |
|
"kl": 0.0, |
|
"learning_rate": 4.811522048364154e-07, |
|
"logps/chosen": -291.44818115234375, |
|
"logps/rejected": -427.9637756347656, |
|
"loss": 0.2164, |
|
"rewards/chosen": 0.8150373697280884, |
|
"rewards/margins": 7.651273250579834, |
|
"rewards/rejected": -6.836236476898193, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 23.997915267944336, |
|
"kl": 0.0, |
|
"learning_rate": 4.793741109530583e-07, |
|
"logps/chosen": -260.71527099609375, |
|
"logps/rejected": -391.9830627441406, |
|
"loss": 0.2175, |
|
"rewards/chosen": 0.8054065704345703, |
|
"rewards/margins": 7.5576653480529785, |
|
"rewards/rejected": -6.752259254455566, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 29.320816040039062, |
|
"kl": 0.0, |
|
"learning_rate": 4.775960170697012e-07, |
|
"logps/chosen": -268.3311462402344, |
|
"logps/rejected": -404.9643249511719, |
|
"loss": 0.2104, |
|
"rewards/chosen": 0.7653949856758118, |
|
"rewards/margins": 7.521485805511475, |
|
"rewards/rejected": -6.756091117858887, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 27.857921600341797, |
|
"kl": 0.0, |
|
"learning_rate": 4.7581792318634425e-07, |
|
"logps/chosen": -290.99407958984375, |
|
"logps/rejected": -405.1004943847656, |
|
"loss": 0.2164, |
|
"rewards/chosen": 0.7016764879226685, |
|
"rewards/margins": 7.881253242492676, |
|
"rewards/rejected": -7.179576873779297, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 26.333301544189453, |
|
"kl": 0.0, |
|
"learning_rate": 4.7403982930298717e-07, |
|
"logps/chosen": -246.4652557373047, |
|
"logps/rejected": -408.4164123535156, |
|
"loss": 0.2048, |
|
"rewards/chosen": 0.7065305709838867, |
|
"rewards/margins": 7.9227142333984375, |
|
"rewards/rejected": -7.216183662414551, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 29.780120849609375, |
|
"kl": 0.0, |
|
"learning_rate": 4.7226173541963014e-07, |
|
"logps/chosen": -300.751953125, |
|
"logps/rejected": -409.5931091308594, |
|
"loss": 0.2022, |
|
"rewards/chosen": 0.8955439329147339, |
|
"rewards/margins": 8.069990158081055, |
|
"rewards/rejected": -7.174446105957031, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 26.441162109375, |
|
"kl": 0.0, |
|
"learning_rate": 4.7048364153627306e-07, |
|
"logps/chosen": -298.4041442871094, |
|
"logps/rejected": -396.9103088378906, |
|
"loss": 0.2223, |
|
"rewards/chosen": 0.9372671842575073, |
|
"rewards/margins": 7.630146026611328, |
|
"rewards/rejected": -6.692878723144531, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 27.794498443603516, |
|
"kl": 0.0, |
|
"learning_rate": 4.6870554765291604e-07, |
|
"logps/chosen": -304.92340087890625, |
|
"logps/rejected": -444.83526611328125, |
|
"loss": 0.1866, |
|
"rewards/chosen": 0.7705713510513306, |
|
"rewards/margins": 8.343452453613281, |
|
"rewards/rejected": -7.57288122177124, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 27.902463912963867, |
|
"kl": 0.0, |
|
"learning_rate": 4.66927453769559e-07, |
|
"logps/chosen": -277.5145263671875, |
|
"logps/rejected": -426.4693908691406, |
|
"loss": 0.2184, |
|
"rewards/chosen": 0.719990074634552, |
|
"rewards/margins": 8.114697456359863, |
|
"rewards/rejected": -7.394706726074219, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 33.13041305541992, |
|
"kl": 0.0, |
|
"learning_rate": 4.65149359886202e-07, |
|
"logps/chosen": -333.77032470703125, |
|
"logps/rejected": -404.6194152832031, |
|
"loss": 0.2038, |
|
"rewards/chosen": 0.7432358860969543, |
|
"rewards/margins": 8.134955406188965, |
|
"rewards/rejected": -7.391718864440918, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 29.542213439941406, |
|
"kl": 0.0, |
|
"learning_rate": 4.633712660028449e-07, |
|
"logps/chosen": -286.2034606933594, |
|
"logps/rejected": -383.937744140625, |
|
"loss": 0.2245, |
|
"rewards/chosen": 0.6780925989151001, |
|
"rewards/margins": 6.890599727630615, |
|
"rewards/rejected": -6.212507724761963, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 25.915569305419922, |
|
"kl": 0.0, |
|
"learning_rate": 4.615931721194879e-07, |
|
"logps/chosen": -287.2189025878906, |
|
"logps/rejected": -431.18115234375, |
|
"loss": 0.2253, |
|
"rewards/chosen": 0.7109718918800354, |
|
"rewards/margins": 7.668128967285156, |
|
"rewards/rejected": -6.957158088684082, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 28.604249954223633, |
|
"kl": 0.0, |
|
"learning_rate": 4.5981507823613085e-07, |
|
"logps/chosen": -249.9962158203125, |
|
"logps/rejected": -427.6434631347656, |
|
"loss": 0.1922, |
|
"rewards/chosen": 0.9254335165023804, |
|
"rewards/margins": 8.525291442871094, |
|
"rewards/rejected": -7.599857330322266, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 32.13933563232422, |
|
"kl": 0.0, |
|
"learning_rate": 4.580369843527738e-07, |
|
"logps/chosen": -250.48397827148438, |
|
"logps/rejected": -437.86541748046875, |
|
"loss": 0.2104, |
|
"rewards/chosen": 0.916563868522644, |
|
"rewards/margins": 8.427824020385742, |
|
"rewards/rejected": -7.511260986328125, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 24.797935485839844, |
|
"kl": 0.0, |
|
"learning_rate": 4.562588904694168e-07, |
|
"logps/chosen": -289.09368896484375, |
|
"logps/rejected": -382.2262268066406, |
|
"loss": 0.2292, |
|
"rewards/chosen": 0.8693010210990906, |
|
"rewards/margins": 8.239991188049316, |
|
"rewards/rejected": -7.37069034576416, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 29.941953659057617, |
|
"kl": 0.0, |
|
"learning_rate": 4.544807965860597e-07, |
|
"logps/chosen": -286.82781982421875, |
|
"logps/rejected": -399.9955139160156, |
|
"loss": 0.2097, |
|
"rewards/chosen": 1.1082446575164795, |
|
"rewards/margins": 8.612449645996094, |
|
"rewards/rejected": -7.504205226898193, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 28.574798583984375, |
|
"kl": 0.0, |
|
"learning_rate": 4.5270270270270264e-07, |
|
"logps/chosen": -296.76751708984375, |
|
"logps/rejected": -426.483154296875, |
|
"loss": 0.201, |
|
"rewards/chosen": 1.1846811771392822, |
|
"rewards/margins": 8.457054138183594, |
|
"rewards/rejected": -7.272373199462891, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 24.285078048706055, |
|
"kl": 0.0, |
|
"learning_rate": 4.509246088193456e-07, |
|
"logps/chosen": -253.2152099609375, |
|
"logps/rejected": -407.7524719238281, |
|
"loss": 0.2124, |
|
"rewards/chosen": 0.8104816675186157, |
|
"rewards/margins": 8.143354415893555, |
|
"rewards/rejected": -7.332873344421387, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 28.82489585876465, |
|
"kl": 0.0, |
|
"learning_rate": 4.491465149359886e-07, |
|
"logps/chosen": -282.62725830078125, |
|
"logps/rejected": -399.4122009277344, |
|
"loss": 0.2058, |
|
"rewards/chosen": 0.829262375831604, |
|
"rewards/margins": 8.509421348571777, |
|
"rewards/rejected": -7.680159568786621, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 26.383644104003906, |
|
"kl": 0.0, |
|
"learning_rate": 4.4736842105263156e-07, |
|
"logps/chosen": -253.8015899658203, |
|
"logps/rejected": -423.59454345703125, |
|
"loss": 0.1916, |
|
"rewards/chosen": 0.9279552698135376, |
|
"rewards/margins": 8.754355430603027, |
|
"rewards/rejected": -7.8264007568359375, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 30.41068458557129, |
|
"kl": 0.0, |
|
"learning_rate": 4.4559032716927454e-07, |
|
"logps/chosen": -306.1068420410156, |
|
"logps/rejected": -404.15777587890625, |
|
"loss": 0.1763, |
|
"rewards/chosen": 1.2404173612594604, |
|
"rewards/margins": 8.863131523132324, |
|
"rewards/rejected": -7.622714042663574, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 27.128496170043945, |
|
"kl": 0.0, |
|
"learning_rate": 4.438122332859175e-07, |
|
"logps/chosen": -288.37420654296875, |
|
"logps/rejected": -434.837158203125, |
|
"loss": 0.2103, |
|
"rewards/chosen": 0.9712983965873718, |
|
"rewards/margins": 8.523191452026367, |
|
"rewards/rejected": -7.551893711090088, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 24.96108627319336, |
|
"kl": 0.0, |
|
"learning_rate": 4.420341394025605e-07, |
|
"logps/chosen": -225.9550018310547, |
|
"logps/rejected": -397.37725830078125, |
|
"loss": 0.1946, |
|
"rewards/chosen": 0.9277496337890625, |
|
"rewards/margins": 8.359576225280762, |
|
"rewards/rejected": -7.431826591491699, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 27.4498348236084, |
|
"kl": 0.0, |
|
"learning_rate": 4.4025604551920335e-07, |
|
"logps/chosen": -326.1064453125, |
|
"logps/rejected": -457.33917236328125, |
|
"loss": 0.1974, |
|
"rewards/chosen": 0.9113430976867676, |
|
"rewards/margins": 9.018231391906738, |
|
"rewards/rejected": -8.106887817382812, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 26.263656616210938, |
|
"kl": 0.0, |
|
"learning_rate": 4.384779516358463e-07, |
|
"logps/chosen": -264.4283752441406, |
|
"logps/rejected": -424.5361328125, |
|
"loss": 0.2007, |
|
"rewards/chosen": 0.9189130663871765, |
|
"rewards/margins": 8.478607177734375, |
|
"rewards/rejected": -7.559694766998291, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 32.865238189697266, |
|
"kl": 0.0, |
|
"learning_rate": 4.366998577524893e-07, |
|
"logps/chosen": -321.8217468261719, |
|
"logps/rejected": -422.74346923828125, |
|
"loss": 0.1828, |
|
"rewards/chosen": 1.213265299797058, |
|
"rewards/margins": 9.324636459350586, |
|
"rewards/rejected": -8.111371994018555, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 25.276630401611328, |
|
"kl": 0.0, |
|
"learning_rate": 4.3492176386913227e-07, |
|
"logps/chosen": -286.50762939453125, |
|
"logps/rejected": -416.09246826171875, |
|
"loss": 0.1918, |
|
"rewards/chosen": 1.121449589729309, |
|
"rewards/margins": 8.465108871459961, |
|
"rewards/rejected": -7.343659400939941, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 33.83981704711914, |
|
"kl": 0.0, |
|
"learning_rate": 4.3314366998577524e-07, |
|
"logps/chosen": -310.34197998046875, |
|
"logps/rejected": -378.6121826171875, |
|
"loss": 0.2018, |
|
"rewards/chosen": 1.2548576593399048, |
|
"rewards/margins": 8.125253677368164, |
|
"rewards/rejected": -6.870396614074707, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 33.48633575439453, |
|
"kl": 0.0, |
|
"learning_rate": 4.313655761024182e-07, |
|
"logps/chosen": -281.07489013671875, |
|
"logps/rejected": -429.463134765625, |
|
"loss": 0.193, |
|
"rewards/chosen": 1.1550464630126953, |
|
"rewards/margins": 9.02354907989502, |
|
"rewards/rejected": -7.868502616882324, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 24.945789337158203, |
|
"kl": 0.0, |
|
"learning_rate": 4.2958748221906114e-07, |
|
"logps/chosen": -272.5113830566406, |
|
"logps/rejected": -378.4676513671875, |
|
"loss": 0.1983, |
|
"rewards/chosen": 1.260040044784546, |
|
"rewards/margins": 8.773920059204102, |
|
"rewards/rejected": -7.513879299163818, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 27.782686233520508, |
|
"kl": 0.0, |
|
"learning_rate": 4.278093883357041e-07, |
|
"logps/chosen": -265.1444396972656, |
|
"logps/rejected": -415.40264892578125, |
|
"loss": 0.1671, |
|
"rewards/chosen": 1.351818323135376, |
|
"rewards/margins": 9.423505783081055, |
|
"rewards/rejected": -8.071688652038574, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 25.29680824279785, |
|
"kl": 0.0, |
|
"learning_rate": 4.260312944523471e-07, |
|
"logps/chosen": -320.04193115234375, |
|
"logps/rejected": -462.45574951171875, |
|
"loss": 0.1719, |
|
"rewards/chosen": 1.2879645824432373, |
|
"rewards/margins": 9.490580558776855, |
|
"rewards/rejected": -8.202616691589355, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 24.49195671081543, |
|
"kl": 0.0, |
|
"learning_rate": 4.2425320056899e-07, |
|
"logps/chosen": -289.80413818359375, |
|
"logps/rejected": -404.28094482421875, |
|
"loss": 0.1993, |
|
"rewards/chosen": 1.0154571533203125, |
|
"rewards/margins": 8.819131851196289, |
|
"rewards/rejected": -7.803675174713135, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 26.57324981689453, |
|
"kl": 0.0, |
|
"learning_rate": 4.22475106685633e-07, |
|
"logps/chosen": -249.9967041015625, |
|
"logps/rejected": -436.7953186035156, |
|
"loss": 0.1902, |
|
"rewards/chosen": 1.0482652187347412, |
|
"rewards/margins": 9.115781784057617, |
|
"rewards/rejected": -8.067517280578613, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 28.738710403442383, |
|
"kl": 0.0, |
|
"learning_rate": 4.2069701280227595e-07, |
|
"logps/chosen": -241.308349609375, |
|
"logps/rejected": -425.63970947265625, |
|
"loss": 0.2145, |
|
"rewards/chosen": 0.9155824780464172, |
|
"rewards/margins": 8.491414070129395, |
|
"rewards/rejected": -7.575830936431885, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 26.93291664123535, |
|
"kl": 0.0, |
|
"learning_rate": 4.189189189189189e-07, |
|
"logps/chosen": -298.3195495605469, |
|
"logps/rejected": -429.3912048339844, |
|
"loss": 0.1832, |
|
"rewards/chosen": 1.0986849069595337, |
|
"rewards/margins": 9.5584716796875, |
|
"rewards/rejected": -8.459787368774414, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 28.49407386779785, |
|
"kl": 0.0, |
|
"learning_rate": 4.1714082503556185e-07, |
|
"logps/chosen": -289.1352844238281, |
|
"logps/rejected": -421.0469665527344, |
|
"loss": 0.1782, |
|
"rewards/chosen": 0.9938220977783203, |
|
"rewards/margins": 9.06061840057373, |
|
"rewards/rejected": -8.066795349121094, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 23.859844207763672, |
|
"kl": 0.0, |
|
"learning_rate": 4.153627311522048e-07, |
|
"logps/chosen": -259.1231689453125, |
|
"logps/rejected": -452.96746826171875, |
|
"loss": 0.1904, |
|
"rewards/chosen": 0.8238496780395508, |
|
"rewards/margins": 9.638608932495117, |
|
"rewards/rejected": -8.814759254455566, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 32.167110443115234, |
|
"kl": 0.0, |
|
"learning_rate": 4.135846372688478e-07, |
|
"logps/chosen": -295.9509582519531, |
|
"logps/rejected": -448.93377685546875, |
|
"loss": 0.1753, |
|
"rewards/chosen": 0.963021457195282, |
|
"rewards/margins": 9.281633377075195, |
|
"rewards/rejected": -8.318610191345215, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 27.581504821777344, |
|
"kl": 0.0, |
|
"learning_rate": 4.1180654338549077e-07, |
|
"logps/chosen": -281.5043029785156, |
|
"logps/rejected": -388.4980163574219, |
|
"loss": 0.2066, |
|
"rewards/chosen": 1.0012767314910889, |
|
"rewards/margins": 8.807787895202637, |
|
"rewards/rejected": -7.806510925292969, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 29.895170211791992, |
|
"kl": 0.0, |
|
"learning_rate": 4.100284495021337e-07, |
|
"logps/chosen": -287.0378723144531, |
|
"logps/rejected": -427.29864501953125, |
|
"loss": 0.2007, |
|
"rewards/chosen": 0.8726502656936646, |
|
"rewards/margins": 9.106569290161133, |
|
"rewards/rejected": -8.233919143676758, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 24.92208480834961, |
|
"kl": 0.0, |
|
"learning_rate": 4.082503556187766e-07, |
|
"logps/chosen": -268.7417297363281, |
|
"logps/rejected": -411.2549743652344, |
|
"loss": 0.1975, |
|
"rewards/chosen": 0.902958869934082, |
|
"rewards/margins": 8.691009521484375, |
|
"rewards/rejected": -7.788050174713135, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 30.231552124023438, |
|
"kl": 0.0, |
|
"learning_rate": 4.064722617354196e-07, |
|
"logps/chosen": -286.7518310546875, |
|
"logps/rejected": -451.4786071777344, |
|
"loss": 0.1867, |
|
"rewards/chosen": 1.2976207733154297, |
|
"rewards/margins": 9.737428665161133, |
|
"rewards/rejected": -8.43980884552002, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 24.8756046295166, |
|
"kl": 0.0, |
|
"learning_rate": 4.0469416785206256e-07, |
|
"logps/chosen": -282.55706787109375, |
|
"logps/rejected": -433.0348205566406, |
|
"loss": 0.1846, |
|
"rewards/chosen": 1.047466516494751, |
|
"rewards/margins": 9.786076545715332, |
|
"rewards/rejected": -8.738609313964844, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 29.7546329498291, |
|
"kl": 0.0, |
|
"learning_rate": 4.0291607396870553e-07, |
|
"logps/chosen": -285.54852294921875, |
|
"logps/rejected": -450.5826110839844, |
|
"loss": 0.1708, |
|
"rewards/chosen": 1.2604820728302002, |
|
"rewards/margins": 9.911845207214355, |
|
"rewards/rejected": -8.651362419128418, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 26.89280891418457, |
|
"kl": 0.0, |
|
"learning_rate": 4.011379800853485e-07, |
|
"logps/chosen": -274.69195556640625, |
|
"logps/rejected": -415.19989013671875, |
|
"loss": 0.1939, |
|
"rewards/chosen": 1.1895002126693726, |
|
"rewards/margins": 9.175015449523926, |
|
"rewards/rejected": -7.985515594482422, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 49.50102996826172, |
|
"kl": 0.0, |
|
"learning_rate": 3.993598862019915e-07, |
|
"logps/chosen": -257.03985595703125, |
|
"logps/rejected": -437.6024475097656, |
|
"loss": 0.179, |
|
"rewards/chosen": 1.1160862445831299, |
|
"rewards/margins": 10.291946411132812, |
|
"rewards/rejected": -9.175861358642578, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 23.507465362548828, |
|
"kl": 0.0, |
|
"learning_rate": 3.975817923186344e-07, |
|
"logps/chosen": -244.9132537841797, |
|
"logps/rejected": -417.92010498046875, |
|
"loss": 0.1943, |
|
"rewards/chosen": 0.9916771054267883, |
|
"rewards/margins": 8.110875129699707, |
|
"rewards/rejected": -7.119197845458984, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 23.709091186523438, |
|
"kl": 0.0, |
|
"learning_rate": 3.9580369843527737e-07, |
|
"logps/chosen": -266.93243408203125, |
|
"logps/rejected": -474.86004638671875, |
|
"loss": 0.1754, |
|
"rewards/chosen": 1.332706093788147, |
|
"rewards/margins": 9.872304916381836, |
|
"rewards/rejected": -8.53959846496582, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 28.132789611816406, |
|
"kl": 0.0, |
|
"learning_rate": 3.940256045519203e-07, |
|
"logps/chosen": -269.0207824707031, |
|
"logps/rejected": -441.16259765625, |
|
"loss": 0.1764, |
|
"rewards/chosen": 1.136040449142456, |
|
"rewards/margins": 9.543194770812988, |
|
"rewards/rejected": -8.407154083251953, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 27.44135284423828, |
|
"kl": 0.0, |
|
"learning_rate": 3.9224751066856327e-07, |
|
"logps/chosen": -261.8495178222656, |
|
"logps/rejected": -416.13372802734375, |
|
"loss": 0.2019, |
|
"rewards/chosen": 1.0264383554458618, |
|
"rewards/margins": 9.626348495483398, |
|
"rewards/rejected": -8.599910736083984, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 34.613067626953125, |
|
"kl": 0.0, |
|
"learning_rate": 3.9046941678520624e-07, |
|
"logps/chosen": -305.896240234375, |
|
"logps/rejected": -436.7113342285156, |
|
"loss": 0.2045, |
|
"rewards/chosen": 1.227920651435852, |
|
"rewards/margins": 9.572298049926758, |
|
"rewards/rejected": -8.344377517700195, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 31.94422721862793, |
|
"kl": 0.0, |
|
"learning_rate": 3.886913229018492e-07, |
|
"logps/chosen": -275.5303955078125, |
|
"logps/rejected": -421.15020751953125, |
|
"loss": 0.2019, |
|
"rewards/chosen": 1.0201870203018188, |
|
"rewards/margins": 9.202049255371094, |
|
"rewards/rejected": -8.181862831115723, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 24.941471099853516, |
|
"kl": 0.0, |
|
"learning_rate": 3.8691322901849213e-07, |
|
"logps/chosen": -309.7427978515625, |
|
"logps/rejected": -456.80987548828125, |
|
"loss": 0.1867, |
|
"rewards/chosen": 1.4211246967315674, |
|
"rewards/margins": 9.695344924926758, |
|
"rewards/rejected": -8.274219512939453, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 25.9896240234375, |
|
"kl": 0.0, |
|
"learning_rate": 3.851351351351351e-07, |
|
"logps/chosen": -285.4349365234375, |
|
"logps/rejected": -400.69219970703125, |
|
"loss": 0.1817, |
|
"rewards/chosen": 1.505924105644226, |
|
"rewards/margins": 8.47026538848877, |
|
"rewards/rejected": -6.964341640472412, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 29.482158660888672, |
|
"kl": 0.0, |
|
"learning_rate": 3.833570412517781e-07, |
|
"logps/chosen": -262.8547058105469, |
|
"logps/rejected": -404.0810546875, |
|
"loss": 0.1838, |
|
"rewards/chosen": 1.2003206014633179, |
|
"rewards/margins": 8.89756965637207, |
|
"rewards/rejected": -7.697249412536621, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 24.927303314208984, |
|
"kl": 0.0, |
|
"learning_rate": 3.8157894736842105e-07, |
|
"logps/chosen": -279.3279724121094, |
|
"logps/rejected": -426.81646728515625, |
|
"loss": 0.1816, |
|
"rewards/chosen": 1.2700178623199463, |
|
"rewards/margins": 9.352417945861816, |
|
"rewards/rejected": -8.082399368286133, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 19.47848892211914, |
|
"kl": 0.0, |
|
"learning_rate": 3.7980085348506403e-07, |
|
"logps/chosen": -254.8472137451172, |
|
"logps/rejected": -410.69219970703125, |
|
"loss": 0.1617, |
|
"rewards/chosen": 1.251443862915039, |
|
"rewards/margins": 9.37706184387207, |
|
"rewards/rejected": -8.125617980957031, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 27.02752685546875, |
|
"kl": 0.0, |
|
"learning_rate": 3.7802275960170695e-07, |
|
"logps/chosen": -326.91485595703125, |
|
"logps/rejected": -427.81121826171875, |
|
"loss": 0.1869, |
|
"rewards/chosen": 1.2399226427078247, |
|
"rewards/margins": 9.168660163879395, |
|
"rewards/rejected": -7.928736686706543, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 23.25274658203125, |
|
"kl": 0.0, |
|
"learning_rate": 3.7624466571834987e-07, |
|
"logps/chosen": -263.9060363769531, |
|
"logps/rejected": -486.6805114746094, |
|
"loss": 0.1867, |
|
"rewards/chosen": 1.059422254562378, |
|
"rewards/margins": 9.522220611572266, |
|
"rewards/rejected": -8.462798118591309, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 28.608474731445312, |
|
"kl": 0.0, |
|
"learning_rate": 3.7446657183499284e-07, |
|
"logps/chosen": -294.48590087890625, |
|
"logps/rejected": -468.9127502441406, |
|
"loss": 0.1785, |
|
"rewards/chosen": 1.3862662315368652, |
|
"rewards/margins": 10.08003044128418, |
|
"rewards/rejected": -8.693764686584473, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 28.52597999572754, |
|
"kl": 0.0, |
|
"learning_rate": 3.726884779516358e-07, |
|
"logps/chosen": -303.76116943359375, |
|
"logps/rejected": -385.89471435546875, |
|
"loss": 0.1849, |
|
"rewards/chosen": 0.8810665011405945, |
|
"rewards/margins": 8.036872863769531, |
|
"rewards/rejected": -7.155806064605713, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 26.567203521728516, |
|
"kl": 0.0, |
|
"learning_rate": 3.709103840682788e-07, |
|
"logps/chosen": -300.5899963378906, |
|
"logps/rejected": -455.53887939453125, |
|
"loss": 0.1752, |
|
"rewards/chosen": 1.6550683975219727, |
|
"rewards/margins": 9.88080883026123, |
|
"rewards/rejected": -8.225740432739258, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 26.40464973449707, |
|
"kl": 0.0, |
|
"learning_rate": 3.6913229018492176e-07, |
|
"logps/chosen": -291.1101989746094, |
|
"logps/rejected": -429.8409118652344, |
|
"loss": 0.1738, |
|
"rewards/chosen": 1.6012027263641357, |
|
"rewards/margins": 10.346236228942871, |
|
"rewards/rejected": -8.745033264160156, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 25.405059814453125, |
|
"kl": 0.0, |
|
"learning_rate": 3.6735419630156474e-07, |
|
"logps/chosen": -277.75, |
|
"logps/rejected": -444.23687744140625, |
|
"loss": 0.1632, |
|
"rewards/chosen": 1.4320881366729736, |
|
"rewards/margins": 10.243904113769531, |
|
"rewards/rejected": -8.811816215515137, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 26.102277755737305, |
|
"kl": 0.0, |
|
"learning_rate": 3.655761024182077e-07, |
|
"logps/chosen": -275.7671813964844, |
|
"logps/rejected": -451.432861328125, |
|
"loss": 0.1818, |
|
"rewards/chosen": 1.5062631368637085, |
|
"rewards/margins": 9.949407577514648, |
|
"rewards/rejected": -8.443144798278809, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 27.81848907470703, |
|
"kl": 0.0, |
|
"learning_rate": 3.637980085348506e-07, |
|
"logps/chosen": -285.43463134765625, |
|
"logps/rejected": -456.70257568359375, |
|
"loss": 0.1667, |
|
"rewards/chosen": 1.1070717573165894, |
|
"rewards/margins": 10.036450386047363, |
|
"rewards/rejected": -8.9293794631958, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 26.733715057373047, |
|
"kl": 0.0, |
|
"learning_rate": 3.6201991465149355e-07, |
|
"logps/chosen": -275.365478515625, |
|
"logps/rejected": -447.39874267578125, |
|
"loss": 0.1584, |
|
"rewards/chosen": 1.669955849647522, |
|
"rewards/margins": 9.940733909606934, |
|
"rewards/rejected": -8.27077865600586, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 28.002460479736328, |
|
"kl": 0.0, |
|
"learning_rate": 3.602418207681365e-07, |
|
"logps/chosen": -275.7063903808594, |
|
"logps/rejected": -432.69097900390625, |
|
"loss": 0.1961, |
|
"rewards/chosen": 1.1493828296661377, |
|
"rewards/margins": 9.39341926574707, |
|
"rewards/rejected": -8.244035720825195, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 25.290157318115234, |
|
"kl": 0.0, |
|
"learning_rate": 3.584637268847795e-07, |
|
"logps/chosen": -276.50872802734375, |
|
"logps/rejected": -427.5359802246094, |
|
"loss": 0.1825, |
|
"rewards/chosen": 1.6107838153839111, |
|
"rewards/margins": 10.058174133300781, |
|
"rewards/rejected": -8.447389602661133, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 27.16476058959961, |
|
"kl": 0.0, |
|
"learning_rate": 3.5668563300142247e-07, |
|
"logps/chosen": -259.827880859375, |
|
"logps/rejected": -409.2623596191406, |
|
"loss": 0.1808, |
|
"rewards/chosen": 1.3736449480056763, |
|
"rewards/margins": 9.484289169311523, |
|
"rewards/rejected": -8.110644340515137, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 28.099790573120117, |
|
"kl": 0.0, |
|
"learning_rate": 3.5490753911806545e-07, |
|
"logps/chosen": -279.4620666503906, |
|
"logps/rejected": -404.58563232421875, |
|
"loss": 0.185, |
|
"rewards/chosen": 1.319478988647461, |
|
"rewards/margins": 9.3961181640625, |
|
"rewards/rejected": -8.076639175415039, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 28.673524856567383, |
|
"kl": 0.0, |
|
"learning_rate": 3.5312944523470837e-07, |
|
"logps/chosen": -283.595703125, |
|
"logps/rejected": -403.426025390625, |
|
"loss": 0.1838, |
|
"rewards/chosen": 1.2756729125976562, |
|
"rewards/margins": 9.318367958068848, |
|
"rewards/rejected": -8.042695999145508, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 24.276857376098633, |
|
"kl": 0.0, |
|
"learning_rate": 3.5135135135135134e-07, |
|
"logps/chosen": -278.02337646484375, |
|
"logps/rejected": -390.92987060546875, |
|
"loss": 0.189, |
|
"rewards/chosen": 1.1147047281265259, |
|
"rewards/margins": 9.350360870361328, |
|
"rewards/rejected": -8.235654830932617, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 24.185720443725586, |
|
"kl": 0.0, |
|
"learning_rate": 3.495732574679943e-07, |
|
"logps/chosen": -317.0126953125, |
|
"logps/rejected": -441.64324951171875, |
|
"loss": 0.1709, |
|
"rewards/chosen": 1.2199360132217407, |
|
"rewards/margins": 10.402612686157227, |
|
"rewards/rejected": -9.182676315307617, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 21.31283187866211, |
|
"kl": 0.0, |
|
"learning_rate": 3.4779516358463724e-07, |
|
"logps/chosen": -299.08355712890625, |
|
"logps/rejected": -408.64312744140625, |
|
"loss": 0.1669, |
|
"rewards/chosen": 1.33742356300354, |
|
"rewards/margins": 9.711603164672852, |
|
"rewards/rejected": -8.374178886413574, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 27.55930519104004, |
|
"kl": 0.0, |
|
"learning_rate": 3.460170697012802e-07, |
|
"logps/chosen": -358.379150390625, |
|
"logps/rejected": -390.5495910644531, |
|
"loss": 0.1711, |
|
"rewards/chosen": 1.5247728824615479, |
|
"rewards/margins": 10.199918746948242, |
|
"rewards/rejected": -8.675146102905273, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 22.836402893066406, |
|
"kl": 0.0, |
|
"learning_rate": 3.442389758179232e-07, |
|
"logps/chosen": -265.90155029296875, |
|
"logps/rejected": -407.1119079589844, |
|
"loss": 0.1813, |
|
"rewards/chosen": 1.2516599893569946, |
|
"rewards/margins": 9.78925895690918, |
|
"rewards/rejected": -8.53760051727295, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 22.36507225036621, |
|
"kl": 0.0, |
|
"learning_rate": 3.424608819345661e-07, |
|
"logps/chosen": -272.3090515136719, |
|
"logps/rejected": -486.1949157714844, |
|
"loss": 0.1542, |
|
"rewards/chosen": 1.5906411409378052, |
|
"rewards/margins": 10.901646614074707, |
|
"rewards/rejected": -9.311005592346191, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 25.3875732421875, |
|
"kl": 0.0, |
|
"learning_rate": 3.406827880512091e-07, |
|
"logps/chosen": -263.0488586425781, |
|
"logps/rejected": -443.8939514160156, |
|
"loss": 0.1639, |
|
"rewards/chosen": 1.4088127613067627, |
|
"rewards/margins": 10.464536666870117, |
|
"rewards/rejected": -9.05572509765625, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 49.29353713989258, |
|
"kl": 0.0, |
|
"learning_rate": 3.3890469416785205e-07, |
|
"logps/chosen": -277.53131103515625, |
|
"logps/rejected": -402.04229736328125, |
|
"loss": 0.1994, |
|
"rewards/chosen": 1.2445075511932373, |
|
"rewards/margins": 9.684932708740234, |
|
"rewards/rejected": -8.440424919128418, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 21.34065055847168, |
|
"kl": 0.0, |
|
"learning_rate": 3.37126600284495e-07, |
|
"logps/chosen": -231.4390106201172, |
|
"logps/rejected": -402.5743103027344, |
|
"loss": 0.1749, |
|
"rewards/chosen": 1.636435866355896, |
|
"rewards/margins": 9.378325462341309, |
|
"rewards/rejected": -7.741889953613281, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 28.961084365844727, |
|
"kl": 0.0, |
|
"learning_rate": 3.35348506401138e-07, |
|
"logps/chosen": -255.66268920898438, |
|
"logps/rejected": -418.0624084472656, |
|
"loss": 0.1836, |
|
"rewards/chosen": 1.2639700174331665, |
|
"rewards/margins": 9.351852416992188, |
|
"rewards/rejected": -8.087881088256836, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 28.00955581665039, |
|
"kl": 0.0, |
|
"learning_rate": 3.335704125177809e-07, |
|
"logps/chosen": -257.3089599609375, |
|
"logps/rejected": -375.06072998046875, |
|
"loss": 0.1711, |
|
"rewards/chosen": 1.5304003953933716, |
|
"rewards/margins": 9.457714080810547, |
|
"rewards/rejected": -7.927313804626465, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 27.666671752929688, |
|
"kl": 0.0, |
|
"learning_rate": 3.3179231863442384e-07, |
|
"logps/chosen": -310.2152404785156, |
|
"logps/rejected": -440.0098571777344, |
|
"loss": 0.1814, |
|
"rewards/chosen": 1.2127114534378052, |
|
"rewards/margins": 10.335000991821289, |
|
"rewards/rejected": -9.122289657592773, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 22.16933822631836, |
|
"kl": 0.0, |
|
"learning_rate": 3.300142247510668e-07, |
|
"logps/chosen": -260.0948181152344, |
|
"logps/rejected": -431.2792053222656, |
|
"loss": 0.1578, |
|
"rewards/chosen": 1.282593011856079, |
|
"rewards/margins": 9.947371482849121, |
|
"rewards/rejected": -8.664778709411621, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 25.740550994873047, |
|
"kl": 0.0, |
|
"learning_rate": 3.282361308677098e-07, |
|
"logps/chosen": -293.82623291015625, |
|
"logps/rejected": -439.79083251953125, |
|
"loss": 0.1757, |
|
"rewards/chosen": 1.2234735488891602, |
|
"rewards/margins": 10.45723819732666, |
|
"rewards/rejected": -9.233762741088867, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 23.11900520324707, |
|
"kl": 0.0, |
|
"learning_rate": 3.2645803698435276e-07, |
|
"logps/chosen": -284.302490234375, |
|
"logps/rejected": -444.50164794921875, |
|
"loss": 0.1794, |
|
"rewards/chosen": 1.5410137176513672, |
|
"rewards/margins": 10.157942771911621, |
|
"rewards/rejected": -8.616928100585938, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 25.599075317382812, |
|
"kl": 0.0, |
|
"learning_rate": 3.2467994310099573e-07, |
|
"logps/chosen": -297.5779724121094, |
|
"logps/rejected": -416.24603271484375, |
|
"loss": 0.188, |
|
"rewards/chosen": 1.241750955581665, |
|
"rewards/margins": 10.071483612060547, |
|
"rewards/rejected": -8.829732894897461, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 32.84114074707031, |
|
"kl": 0.0, |
|
"learning_rate": 3.229018492176387e-07, |
|
"logps/chosen": -295.51666259765625, |
|
"logps/rejected": -381.6005859375, |
|
"loss": 0.1772, |
|
"rewards/chosen": 1.4009755849838257, |
|
"rewards/margins": 10.052778244018555, |
|
"rewards/rejected": -8.651803016662598, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 25.808273315429688, |
|
"kl": 0.0, |
|
"learning_rate": 3.211237553342817e-07, |
|
"logps/chosen": -252.7512969970703, |
|
"logps/rejected": -456.89727783203125, |
|
"loss": 0.1781, |
|
"rewards/chosen": 1.603581190109253, |
|
"rewards/margins": 11.127111434936523, |
|
"rewards/rejected": -9.523530960083008, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 22.859973907470703, |
|
"kl": 0.0, |
|
"learning_rate": 3.193456614509246e-07, |
|
"logps/chosen": -257.5951232910156, |
|
"logps/rejected": -429.0784606933594, |
|
"loss": 0.1679, |
|
"rewards/chosen": 1.4137523174285889, |
|
"rewards/margins": 10.75071907043457, |
|
"rewards/rejected": -9.336965560913086, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 20.03971290588379, |
|
"kl": 0.0, |
|
"learning_rate": 3.175675675675675e-07, |
|
"logps/chosen": -275.7456359863281, |
|
"logps/rejected": -432.35675048828125, |
|
"loss": 0.166, |
|
"rewards/chosen": 1.5532034635543823, |
|
"rewards/margins": 10.516412734985352, |
|
"rewards/rejected": -8.963208198547363, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 25.613548278808594, |
|
"kl": 0.0, |
|
"learning_rate": 3.157894736842105e-07, |
|
"logps/chosen": -259.3094482421875, |
|
"logps/rejected": -428.52545166015625, |
|
"loss": 0.1715, |
|
"rewards/chosen": 1.5239208936691284, |
|
"rewards/margins": 10.248991012573242, |
|
"rewards/rejected": -8.72507095336914, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 24.860313415527344, |
|
"kl": 0.0, |
|
"learning_rate": 3.1401137980085347e-07, |
|
"logps/chosen": -253.43408203125, |
|
"logps/rejected": -421.2649841308594, |
|
"loss": 0.1661, |
|
"rewards/chosen": 1.7669661045074463, |
|
"rewards/margins": 10.6715726852417, |
|
"rewards/rejected": -8.904605865478516, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 20.709943771362305, |
|
"kl": 0.0, |
|
"learning_rate": 3.1223328591749644e-07, |
|
"logps/chosen": -309.0604248046875, |
|
"logps/rejected": -446.6376953125, |
|
"loss": 0.1677, |
|
"rewards/chosen": 1.6336666345596313, |
|
"rewards/margins": 10.273514747619629, |
|
"rewards/rejected": -8.639848709106445, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 24.48786163330078, |
|
"kl": 0.0, |
|
"learning_rate": 3.104551920341394e-07, |
|
"logps/chosen": -258.40936279296875, |
|
"logps/rejected": -425.4024963378906, |
|
"loss": 0.1901, |
|
"rewards/chosen": 1.3902490139007568, |
|
"rewards/margins": 10.214468002319336, |
|
"rewards/rejected": -8.824217796325684, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 23.349231719970703, |
|
"kl": 0.0, |
|
"learning_rate": 3.0867709815078234e-07, |
|
"logps/chosen": -283.63848876953125, |
|
"logps/rejected": -453.16619873046875, |
|
"loss": 0.1636, |
|
"rewards/chosen": 1.5939074754714966, |
|
"rewards/margins": 10.390283584594727, |
|
"rewards/rejected": -8.79637622833252, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 19.494522094726562, |
|
"kl": 0.0, |
|
"learning_rate": 3.068990042674253e-07, |
|
"logps/chosen": -273.1427917480469, |
|
"logps/rejected": -421.3829650878906, |
|
"loss": 0.1513, |
|
"rewards/chosen": 1.827331304550171, |
|
"rewards/margins": 10.396958351135254, |
|
"rewards/rejected": -8.569626808166504, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 23.5523624420166, |
|
"kl": 0.0, |
|
"learning_rate": 3.051209103840683e-07, |
|
"logps/chosen": -246.75692749023438, |
|
"logps/rejected": -434.19219970703125, |
|
"loss": 0.1616, |
|
"rewards/chosen": 1.8926893472671509, |
|
"rewards/margins": 10.59654712677002, |
|
"rewards/rejected": -8.703857421875, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 25.01667022705078, |
|
"kl": 0.0, |
|
"learning_rate": 3.033428165007112e-07, |
|
"logps/chosen": -266.95745849609375, |
|
"logps/rejected": -419.81640625, |
|
"loss": 0.1713, |
|
"rewards/chosen": 1.3775568008422852, |
|
"rewards/margins": 9.493722915649414, |
|
"rewards/rejected": -8.116167068481445, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 23.61768341064453, |
|
"kl": 0.0, |
|
"learning_rate": 3.015647226173542e-07, |
|
"logps/chosen": -261.8187561035156, |
|
"logps/rejected": -449.9925842285156, |
|
"loss": 0.15, |
|
"rewards/chosen": 1.6472257375717163, |
|
"rewards/margins": 10.508228302001953, |
|
"rewards/rejected": -8.861001968383789, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 22.56501007080078, |
|
"kl": 0.0, |
|
"learning_rate": 2.9978662873399715e-07, |
|
"logps/chosen": -332.85504150390625, |
|
"logps/rejected": -378.87347412109375, |
|
"loss": 0.1669, |
|
"rewards/chosen": 1.8122440576553345, |
|
"rewards/margins": 10.017203330993652, |
|
"rewards/rejected": -8.20495891571045, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 20.667505264282227, |
|
"kl": 0.0, |
|
"learning_rate": 2.9800853485064007e-07, |
|
"logps/chosen": -276.8742980957031, |
|
"logps/rejected": -414.7447204589844, |
|
"loss": 0.1585, |
|
"rewards/chosen": 1.9667861461639404, |
|
"rewards/margins": 10.267745971679688, |
|
"rewards/rejected": -8.300959587097168, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 24.693187713623047, |
|
"kl": 0.0, |
|
"learning_rate": 2.9623044096728305e-07, |
|
"logps/chosen": -274.4808044433594, |
|
"logps/rejected": -429.78558349609375, |
|
"loss": 0.1546, |
|
"rewards/chosen": 1.594257116317749, |
|
"rewards/margins": 9.889884948730469, |
|
"rewards/rejected": -8.29562759399414, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 30.14167022705078, |
|
"kl": 0.0, |
|
"learning_rate": 2.94452347083926e-07, |
|
"logps/chosen": -272.5168762207031, |
|
"logps/rejected": -415.374755859375, |
|
"loss": 0.1694, |
|
"rewards/chosen": 1.5294649600982666, |
|
"rewards/margins": 10.039915084838867, |
|
"rewards/rejected": -8.51045036315918, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 22.99307632446289, |
|
"kl": 0.0, |
|
"learning_rate": 2.92674253200569e-07, |
|
"logps/chosen": -246.6560821533203, |
|
"logps/rejected": -418.26104736328125, |
|
"loss": 0.1584, |
|
"rewards/chosen": 1.566481113433838, |
|
"rewards/margins": 9.754480361938477, |
|
"rewards/rejected": -8.18799877166748, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 19.63002586364746, |
|
"kl": 0.0, |
|
"learning_rate": 2.9089615931721197e-07, |
|
"logps/chosen": -289.92266845703125, |
|
"logps/rejected": -431.01336669921875, |
|
"loss": 0.152, |
|
"rewards/chosen": 1.7927067279815674, |
|
"rewards/margins": 9.82994270324707, |
|
"rewards/rejected": -8.037236213684082, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 21.843643188476562, |
|
"kl": 0.0, |
|
"learning_rate": 2.8911806543385494e-07, |
|
"logps/chosen": -272.4095764160156, |
|
"logps/rejected": -384.18939208984375, |
|
"loss": 0.1658, |
|
"rewards/chosen": 1.6324083805084229, |
|
"rewards/margins": 9.954187393188477, |
|
"rewards/rejected": -8.321779251098633, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 27.172988891601562, |
|
"kl": 0.0, |
|
"learning_rate": 2.873399715504978e-07, |
|
"logps/chosen": -260.12127685546875, |
|
"logps/rejected": -388.8544616699219, |
|
"loss": 0.163, |
|
"rewards/chosen": 1.7627140283584595, |
|
"rewards/margins": 10.121767044067383, |
|
"rewards/rejected": -8.359052658081055, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 23.648014068603516, |
|
"kl": 0.0, |
|
"learning_rate": 2.855618776671408e-07, |
|
"logps/chosen": -262.92486572265625, |
|
"logps/rejected": -410.4188537597656, |
|
"loss": 0.1686, |
|
"rewards/chosen": 1.493422508239746, |
|
"rewards/margins": 9.909089088439941, |
|
"rewards/rejected": -8.415667533874512, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 23.27745819091797, |
|
"kl": 0.0, |
|
"learning_rate": 2.8378378378378376e-07, |
|
"logps/chosen": -288.63421630859375, |
|
"logps/rejected": -406.29974365234375, |
|
"loss": 0.1504, |
|
"rewards/chosen": 1.8163105249404907, |
|
"rewards/margins": 10.033960342407227, |
|
"rewards/rejected": -8.217649459838867, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 22.61689567565918, |
|
"kl": 0.0, |
|
"learning_rate": 2.8200568990042673e-07, |
|
"logps/chosen": -246.67105102539062, |
|
"logps/rejected": -425.2874450683594, |
|
"loss": 0.1575, |
|
"rewards/chosen": 1.4416098594665527, |
|
"rewards/margins": 10.138015747070312, |
|
"rewards/rejected": -8.696405410766602, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 20.569795608520508, |
|
"kl": 0.0, |
|
"learning_rate": 2.802275960170697e-07, |
|
"logps/chosen": -291.814208984375, |
|
"logps/rejected": -399.8885192871094, |
|
"loss": 0.1491, |
|
"rewards/chosen": 1.771124243736267, |
|
"rewards/margins": 9.939413070678711, |
|
"rewards/rejected": -8.168289184570312, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 27.513673782348633, |
|
"kl": 0.0, |
|
"learning_rate": 2.784495021337127e-07, |
|
"logps/chosen": -264.59918212890625, |
|
"logps/rejected": -426.15185546875, |
|
"loss": 0.1835, |
|
"rewards/chosen": 1.4606132507324219, |
|
"rewards/margins": 9.935892105102539, |
|
"rewards/rejected": -8.475278854370117, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 25.520795822143555, |
|
"kl": 0.0, |
|
"learning_rate": 2.766714082503556e-07, |
|
"logps/chosen": -298.03466796875, |
|
"logps/rejected": -420.57635498046875, |
|
"loss": 0.1614, |
|
"rewards/chosen": 1.6418640613555908, |
|
"rewards/margins": 10.411275863647461, |
|
"rewards/rejected": -8.769411087036133, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 25.321683883666992, |
|
"kl": 0.0, |
|
"learning_rate": 2.7489331436699857e-07, |
|
"logps/chosen": -278.0859069824219, |
|
"logps/rejected": -383.1441650390625, |
|
"loss": 0.1725, |
|
"rewards/chosen": 1.6416689157485962, |
|
"rewards/margins": 9.834056854248047, |
|
"rewards/rejected": -8.192388534545898, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 18.801525115966797, |
|
"kl": 0.0, |
|
"learning_rate": 2.7311522048364154e-07, |
|
"logps/chosen": -293.8526306152344, |
|
"logps/rejected": -464.01580810546875, |
|
"loss": 0.1438, |
|
"rewards/chosen": 1.7998111248016357, |
|
"rewards/margins": 11.001166343688965, |
|
"rewards/rejected": -9.20135498046875, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 31.995546340942383, |
|
"kl": 0.0, |
|
"learning_rate": 2.7133712660028446e-07, |
|
"logps/chosen": -266.13580322265625, |
|
"logps/rejected": -452.4052734375, |
|
"loss": 0.1547, |
|
"rewards/chosen": 1.85707688331604, |
|
"rewards/margins": 10.742141723632812, |
|
"rewards/rejected": -8.885065078735352, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 28.300655364990234, |
|
"kl": 0.0, |
|
"learning_rate": 2.6955903271692744e-07, |
|
"logps/chosen": -276.36151123046875, |
|
"logps/rejected": -424.6527404785156, |
|
"loss": 0.1654, |
|
"rewards/chosen": 1.4731123447418213, |
|
"rewards/margins": 10.520303726196289, |
|
"rewards/rejected": -9.04719066619873, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 21.696815490722656, |
|
"kl": 0.0, |
|
"learning_rate": 2.677809388335704e-07, |
|
"logps/chosen": -248.7739715576172, |
|
"logps/rejected": -468.64617919921875, |
|
"loss": 0.14, |
|
"rewards/chosen": 1.3544524908065796, |
|
"rewards/margins": 10.655045509338379, |
|
"rewards/rejected": -9.300593376159668, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 26.037267684936523, |
|
"kl": 0.0, |
|
"learning_rate": 2.6600284495021333e-07, |
|
"logps/chosen": -278.1371765136719, |
|
"logps/rejected": -395.54315185546875, |
|
"loss": 0.1708, |
|
"rewards/chosen": 1.6430387496948242, |
|
"rewards/margins": 9.990675926208496, |
|
"rewards/rejected": -8.347638130187988, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 27.351716995239258, |
|
"kl": 0.0, |
|
"learning_rate": 2.642247510668563e-07, |
|
"logps/chosen": -296.94207763671875, |
|
"logps/rejected": -418.74951171875, |
|
"loss": 0.159, |
|
"rewards/chosen": 1.9864873886108398, |
|
"rewards/margins": 11.01016902923584, |
|
"rewards/rejected": -9.023681640625, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 27.478801727294922, |
|
"kl": 0.0, |
|
"learning_rate": 2.624466571834993e-07, |
|
"logps/chosen": -288.36370849609375, |
|
"logps/rejected": -431.9353942871094, |
|
"loss": 0.1483, |
|
"rewards/chosen": 2.1340115070343018, |
|
"rewards/margins": 11.531509399414062, |
|
"rewards/rejected": -9.397497177124023, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 30.95049285888672, |
|
"kl": 0.0, |
|
"learning_rate": 2.6066856330014225e-07, |
|
"logps/chosen": -252.1579132080078, |
|
"logps/rejected": -424.7654724121094, |
|
"loss": 0.1578, |
|
"rewards/chosen": 1.2540996074676514, |
|
"rewards/margins": 10.03384780883789, |
|
"rewards/rejected": -8.77974796295166, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 25.74098777770996, |
|
"kl": 0.0, |
|
"learning_rate": 2.5889046941678523e-07, |
|
"logps/chosen": -260.67315673828125, |
|
"logps/rejected": -398.57952880859375, |
|
"loss": 0.1554, |
|
"rewards/chosen": 1.9767462015151978, |
|
"rewards/margins": 10.994711875915527, |
|
"rewards/rejected": -9.017965316772461, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 24.75625991821289, |
|
"kl": 0.0, |
|
"learning_rate": 2.5711237553342815e-07, |
|
"logps/chosen": -297.8096923828125, |
|
"logps/rejected": -454.16265869140625, |
|
"loss": 0.1601, |
|
"rewards/chosen": 1.8933601379394531, |
|
"rewards/margins": 10.956032752990723, |
|
"rewards/rejected": -9.062673568725586, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 19.37633514404297, |
|
"kl": 0.0, |
|
"learning_rate": 2.5533428165007107e-07, |
|
"logps/chosen": -246.20755004882812, |
|
"logps/rejected": -417.218994140625, |
|
"loss": 0.1625, |
|
"rewards/chosen": 1.7149181365966797, |
|
"rewards/margins": 10.08183479309082, |
|
"rewards/rejected": -8.366915702819824, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 22.729887008666992, |
|
"kl": 0.0, |
|
"learning_rate": 2.5355618776671404e-07, |
|
"logps/chosen": -295.47833251953125, |
|
"logps/rejected": -383.05230712890625, |
|
"loss": 0.1556, |
|
"rewards/chosen": 1.9083483219146729, |
|
"rewards/margins": 10.142326354980469, |
|
"rewards/rejected": -8.233978271484375, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 20.790363311767578, |
|
"kl": 0.0, |
|
"learning_rate": 2.51778093883357e-07, |
|
"logps/chosen": -252.0298309326172, |
|
"logps/rejected": -439.210205078125, |
|
"loss": 0.1419, |
|
"rewards/chosen": 1.6935409307479858, |
|
"rewards/margins": 10.923540115356445, |
|
"rewards/rejected": -9.229999542236328, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 20.501516342163086, |
|
"kl": 0.0, |
|
"learning_rate": 2.5e-07, |
|
"logps/chosen": -305.12030029296875, |
|
"logps/rejected": -444.84796142578125, |
|
"loss": 0.1369, |
|
"rewards/chosen": 1.9432222843170166, |
|
"rewards/margins": 11.106147766113281, |
|
"rewards/rejected": -9.162925720214844, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 18.870508193969727, |
|
"kl": 0.0, |
|
"learning_rate": 2.4822190611664296e-07, |
|
"logps/chosen": -276.5811462402344, |
|
"logps/rejected": -418.9603576660156, |
|
"loss": 0.155, |
|
"rewards/chosen": 1.7197465896606445, |
|
"rewards/margins": 10.493929862976074, |
|
"rewards/rejected": -8.774185180664062, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 29.126863479614258, |
|
"kl": 0.0, |
|
"learning_rate": 2.4644381223328594e-07, |
|
"logps/chosen": -265.5671691894531, |
|
"logps/rejected": -456.1929626464844, |
|
"loss": 0.1591, |
|
"rewards/chosen": 1.5650779008865356, |
|
"rewards/margins": 10.798846244812012, |
|
"rewards/rejected": -9.233770370483398, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 24.848697662353516, |
|
"kl": 0.0, |
|
"learning_rate": 2.4466571834992886e-07, |
|
"logps/chosen": -254.2496337890625, |
|
"logps/rejected": -432.71038818359375, |
|
"loss": 0.1563, |
|
"rewards/chosen": 1.713888168334961, |
|
"rewards/margins": 10.464970588684082, |
|
"rewards/rejected": -8.751081466674805, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 25.702816009521484, |
|
"kl": 0.0, |
|
"learning_rate": 2.4288762446657183e-07, |
|
"logps/chosen": -275.37030029296875, |
|
"logps/rejected": -427.5972595214844, |
|
"loss": 0.1629, |
|
"rewards/chosen": 1.6576436758041382, |
|
"rewards/margins": 10.351664543151855, |
|
"rewards/rejected": -8.69402027130127, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 22.683273315429688, |
|
"kl": 0.0, |
|
"learning_rate": 2.411095305832148e-07, |
|
"logps/chosen": -282.93426513671875, |
|
"logps/rejected": -441.66064453125, |
|
"loss": 0.1249, |
|
"rewards/chosen": 1.9189395904541016, |
|
"rewards/margins": 11.167387008666992, |
|
"rewards/rejected": -9.248448371887207, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 24.9903564453125, |
|
"kl": 0.0, |
|
"learning_rate": 2.393314366998578e-07, |
|
"logps/chosen": -294.500732421875, |
|
"logps/rejected": -424.95391845703125, |
|
"loss": 0.1396, |
|
"rewards/chosen": 1.8850313425064087, |
|
"rewards/margins": 10.923759460449219, |
|
"rewards/rejected": -9.038727760314941, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 22.959312438964844, |
|
"kl": 0.0, |
|
"learning_rate": 2.375533428165007e-07, |
|
"logps/chosen": -276.75775146484375, |
|
"logps/rejected": -445.98052978515625, |
|
"loss": 0.1612, |
|
"rewards/chosen": 1.7431560754776, |
|
"rewards/margins": 10.924168586730957, |
|
"rewards/rejected": -9.181012153625488, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 21.280902862548828, |
|
"kl": 0.0, |
|
"learning_rate": 2.3577524893314365e-07, |
|
"logps/chosen": -266.2899475097656, |
|
"logps/rejected": -419.53594970703125, |
|
"loss": 0.1366, |
|
"rewards/chosen": 2.116002321243286, |
|
"rewards/margins": 10.943870544433594, |
|
"rewards/rejected": -8.82786750793457, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 23.134008407592773, |
|
"kl": 0.0, |
|
"learning_rate": 2.3399715504978662e-07, |
|
"logps/chosen": -295.0397033691406, |
|
"logps/rejected": -433.302978515625, |
|
"loss": 0.1485, |
|
"rewards/chosen": 1.6045787334442139, |
|
"rewards/margins": 10.714460372924805, |
|
"rewards/rejected": -9.109882354736328, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 21.066099166870117, |
|
"kl": 0.0, |
|
"learning_rate": 2.322190611664296e-07, |
|
"logps/chosen": -242.4779815673828, |
|
"logps/rejected": -413.7901306152344, |
|
"loss": 0.1463, |
|
"rewards/chosen": 1.6984901428222656, |
|
"rewards/margins": 10.702180862426758, |
|
"rewards/rejected": -9.003690719604492, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 23.61625099182129, |
|
"kl": 0.0, |
|
"learning_rate": 2.304409672830725e-07, |
|
"logps/chosen": -270.14599609375, |
|
"logps/rejected": -437.8699645996094, |
|
"loss": 0.1589, |
|
"rewards/chosen": 1.7722479104995728, |
|
"rewards/margins": 10.022378921508789, |
|
"rewards/rejected": -8.250131607055664, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 22.70294952392578, |
|
"kl": 0.0, |
|
"learning_rate": 2.2866287339971549e-07, |
|
"logps/chosen": -285.99383544921875, |
|
"logps/rejected": -425.30511474609375, |
|
"loss": 0.1263, |
|
"rewards/chosen": 2.163151502609253, |
|
"rewards/margins": 10.922938346862793, |
|
"rewards/rejected": -8.759786605834961, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 20.896127700805664, |
|
"kl": 0.0, |
|
"learning_rate": 2.2688477951635846e-07, |
|
"logps/chosen": -257.12646484375, |
|
"logps/rejected": -474.15789794921875, |
|
"loss": 0.1402, |
|
"rewards/chosen": 1.7868337631225586, |
|
"rewards/margins": 10.79575252532959, |
|
"rewards/rejected": -9.008918762207031, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 20.20517921447754, |
|
"kl": 0.0, |
|
"learning_rate": 2.251066856330014e-07, |
|
"logps/chosen": -281.36016845703125, |
|
"logps/rejected": -409.21807861328125, |
|
"loss": 0.1405, |
|
"rewards/chosen": 1.9572327136993408, |
|
"rewards/margins": 10.755359649658203, |
|
"rewards/rejected": -8.798127174377441, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 24.792766571044922, |
|
"kl": 0.0, |
|
"learning_rate": 2.2332859174964438e-07, |
|
"logps/chosen": -274.1449890136719, |
|
"logps/rejected": -440.4224548339844, |
|
"loss": 0.1612, |
|
"rewards/chosen": 1.4629716873168945, |
|
"rewards/margins": 10.279823303222656, |
|
"rewards/rejected": -8.816852569580078, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 25.996437072753906, |
|
"kl": 0.0, |
|
"learning_rate": 2.2155049786628733e-07, |
|
"logps/chosen": -258.3365783691406, |
|
"logps/rejected": -455.66571044921875, |
|
"loss": 0.1593, |
|
"rewards/chosen": 1.8357813358306885, |
|
"rewards/margins": 11.063421249389648, |
|
"rewards/rejected": -9.227640151977539, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 21.876049041748047, |
|
"kl": 0.0, |
|
"learning_rate": 2.1977240398293027e-07, |
|
"logps/chosen": -254.3147430419922, |
|
"logps/rejected": -438.74005126953125, |
|
"loss": 0.1293, |
|
"rewards/chosen": 1.6060841083526611, |
|
"rewards/margins": 10.991594314575195, |
|
"rewards/rejected": -9.38550853729248, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 27.55910301208496, |
|
"kl": 0.0, |
|
"learning_rate": 2.1799431009957325e-07, |
|
"logps/chosen": -275.89837646484375, |
|
"logps/rejected": -442.4541931152344, |
|
"loss": 0.1545, |
|
"rewards/chosen": 1.7877737283706665, |
|
"rewards/margins": 11.597868919372559, |
|
"rewards/rejected": -9.81009578704834, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 19.6149959564209, |
|
"kl": 0.0, |
|
"learning_rate": 2.1621621621621622e-07, |
|
"logps/chosen": -327.98602294921875, |
|
"logps/rejected": -448.33856201171875, |
|
"loss": 0.0995, |
|
"rewards/chosen": 2.8413889408111572, |
|
"rewards/margins": 11.999773025512695, |
|
"rewards/rejected": -9.158384323120117, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 20.440847396850586, |
|
"kl": 0.0, |
|
"learning_rate": 2.1443812233285914e-07, |
|
"logps/chosen": -273.84930419921875, |
|
"logps/rejected": -407.75018310546875, |
|
"loss": 0.1382, |
|
"rewards/chosen": 2.0720973014831543, |
|
"rewards/margins": 10.910283088684082, |
|
"rewards/rejected": -8.83818531036377, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 24.151046752929688, |
|
"kl": 0.0, |
|
"learning_rate": 2.1266002844950212e-07, |
|
"logps/chosen": -284.7245788574219, |
|
"logps/rejected": -433.08197021484375, |
|
"loss": 0.1285, |
|
"rewards/chosen": 2.2533042430877686, |
|
"rewards/margins": 11.679128646850586, |
|
"rewards/rejected": -9.425825119018555, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 24.470195770263672, |
|
"kl": 0.0, |
|
"learning_rate": 2.108819345661451e-07, |
|
"logps/chosen": -320.61468505859375, |
|
"logps/rejected": -442.22869873046875, |
|
"loss": 0.1429, |
|
"rewards/chosen": 1.9059406518936157, |
|
"rewards/margins": 11.600639343261719, |
|
"rewards/rejected": -9.69469928741455, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 26.81120491027832, |
|
"kl": 0.0, |
|
"learning_rate": 2.0910384068278806e-07, |
|
"logps/chosen": -257.6952209472656, |
|
"logps/rejected": -392.0545959472656, |
|
"loss": 0.1483, |
|
"rewards/chosen": 1.6184053421020508, |
|
"rewards/margins": 10.500303268432617, |
|
"rewards/rejected": -8.881898880004883, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 19.685026168823242, |
|
"kl": 0.0, |
|
"learning_rate": 2.0732574679943098e-07, |
|
"logps/chosen": -305.773681640625, |
|
"logps/rejected": -393.2521057128906, |
|
"loss": 0.1456, |
|
"rewards/chosen": 2.0023820400238037, |
|
"rewards/margins": 11.047486305236816, |
|
"rewards/rejected": -9.045104026794434, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 24.304468154907227, |
|
"kl": 0.0, |
|
"learning_rate": 2.0554765291607396e-07, |
|
"logps/chosen": -266.98773193359375, |
|
"logps/rejected": -409.9839782714844, |
|
"loss": 0.152, |
|
"rewards/chosen": 1.678897500038147, |
|
"rewards/margins": 10.779475212097168, |
|
"rewards/rejected": -9.100576400756836, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 23.530902862548828, |
|
"kl": 0.0, |
|
"learning_rate": 2.0376955903271693e-07, |
|
"logps/chosen": -268.0154724121094, |
|
"logps/rejected": -446.49041748046875, |
|
"loss": 0.1365, |
|
"rewards/chosen": 1.426351547241211, |
|
"rewards/margins": 10.987870216369629, |
|
"rewards/rejected": -9.561517715454102, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 25.060619354248047, |
|
"kl": 0.0, |
|
"learning_rate": 2.0199146514935988e-07, |
|
"logps/chosen": -304.2224426269531, |
|
"logps/rejected": -436.77734375, |
|
"loss": 0.1559, |
|
"rewards/chosen": 1.8927513360977173, |
|
"rewards/margins": 10.877464294433594, |
|
"rewards/rejected": -8.984713554382324, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 26.11311149597168, |
|
"kl": 0.0, |
|
"learning_rate": 2.0021337126600283e-07, |
|
"logps/chosen": -263.1839599609375, |
|
"logps/rejected": -427.57110595703125, |
|
"loss": 0.1546, |
|
"rewards/chosen": 1.825537919998169, |
|
"rewards/margins": 10.585159301757812, |
|
"rewards/rejected": -8.759620666503906, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 21.942873001098633, |
|
"kl": 0.0, |
|
"learning_rate": 1.984352773826458e-07, |
|
"logps/chosen": -278.3871765136719, |
|
"logps/rejected": -397.9619140625, |
|
"loss": 0.1374, |
|
"rewards/chosen": 1.7600288391113281, |
|
"rewards/margins": 10.372520446777344, |
|
"rewards/rejected": -8.612492561340332, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 21.323123931884766, |
|
"kl": 0.0, |
|
"learning_rate": 1.9665718349928875e-07, |
|
"logps/chosen": -238.81234741210938, |
|
"logps/rejected": -444.0513610839844, |
|
"loss": 0.1392, |
|
"rewards/chosen": 1.7811119556427002, |
|
"rewards/margins": 10.868303298950195, |
|
"rewards/rejected": -9.087190628051758, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 23.634305953979492, |
|
"kl": 0.0, |
|
"learning_rate": 1.9487908961593172e-07, |
|
"logps/chosen": -254.67422485351562, |
|
"logps/rejected": -439.06842041015625, |
|
"loss": 0.1281, |
|
"rewards/chosen": 1.955566167831421, |
|
"rewards/margins": 11.299899101257324, |
|
"rewards/rejected": -9.344332695007324, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 23.24001121520996, |
|
"kl": 0.0, |
|
"learning_rate": 1.931009957325747e-07, |
|
"logps/chosen": -261.807861328125, |
|
"logps/rejected": -439.89434814453125, |
|
"loss": 0.1331, |
|
"rewards/chosen": 1.958783745765686, |
|
"rewards/margins": 10.877193450927734, |
|
"rewards/rejected": -8.91840934753418, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 21.924964904785156, |
|
"kl": 0.0, |
|
"learning_rate": 1.9132290184921761e-07, |
|
"logps/chosen": -232.3147735595703, |
|
"logps/rejected": -428.94415283203125, |
|
"loss": 0.1509, |
|
"rewards/chosen": 1.5595568418502808, |
|
"rewards/margins": 10.47365951538086, |
|
"rewards/rejected": -8.914102554321289, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 22.37010383605957, |
|
"kl": 0.0, |
|
"learning_rate": 1.895448079658606e-07, |
|
"logps/chosen": -310.47308349609375, |
|
"logps/rejected": -423.1930236816406, |
|
"loss": 0.1239, |
|
"rewards/chosen": 2.3751728534698486, |
|
"rewards/margins": 11.746635437011719, |
|
"rewards/rejected": -9.371461868286133, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 18.277446746826172, |
|
"kl": 0.0, |
|
"learning_rate": 1.8776671408250356e-07, |
|
"logps/chosen": -238.3054656982422, |
|
"logps/rejected": -409.2549743652344, |
|
"loss": 0.1534, |
|
"rewards/chosen": 2.0489933490753174, |
|
"rewards/margins": 10.762951850891113, |
|
"rewards/rejected": -8.713958740234375, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 23.205121994018555, |
|
"kl": 0.0, |
|
"learning_rate": 1.859886201991465e-07, |
|
"logps/chosen": -312.9981994628906, |
|
"logps/rejected": -412.560791015625, |
|
"loss": 0.1551, |
|
"rewards/chosen": 1.8553714752197266, |
|
"rewards/margins": 10.774530410766602, |
|
"rewards/rejected": -8.919158935546875, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 22.926227569580078, |
|
"kl": 0.0, |
|
"learning_rate": 1.8421052631578946e-07, |
|
"logps/chosen": -326.86187744140625, |
|
"logps/rejected": -425.36798095703125, |
|
"loss": 0.1256, |
|
"rewards/chosen": 2.2971606254577637, |
|
"rewards/margins": 11.508152961730957, |
|
"rewards/rejected": -9.210992813110352, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 19.493038177490234, |
|
"kl": 0.0, |
|
"learning_rate": 1.8243243243243243e-07, |
|
"logps/chosen": -293.5108337402344, |
|
"logps/rejected": -433.5061950683594, |
|
"loss": 0.1287, |
|
"rewards/chosen": 1.9984190464019775, |
|
"rewards/margins": 11.403292655944824, |
|
"rewards/rejected": -9.40487289428711, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 20.409494400024414, |
|
"kl": 0.0, |
|
"learning_rate": 1.8065433854907538e-07, |
|
"logps/chosen": -254.408935546875, |
|
"logps/rejected": -463.6639099121094, |
|
"loss": 0.1377, |
|
"rewards/chosen": 2.059924602508545, |
|
"rewards/margins": 11.853521347045898, |
|
"rewards/rejected": -9.793596267700195, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 24.266460418701172, |
|
"kl": 0.0, |
|
"learning_rate": 1.7887624466571835e-07, |
|
"logps/chosen": -272.4609375, |
|
"logps/rejected": -444.1348571777344, |
|
"loss": 0.1345, |
|
"rewards/chosen": 1.9829723834991455, |
|
"rewards/margins": 11.607297897338867, |
|
"rewards/rejected": -9.6243257522583, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 22.047306060791016, |
|
"kl": 0.0, |
|
"learning_rate": 1.770981507823613e-07, |
|
"logps/chosen": -298.2177429199219, |
|
"logps/rejected": -434.88232421875, |
|
"loss": 0.1425, |
|
"rewards/chosen": 1.9892578125, |
|
"rewards/margins": 11.173238754272461, |
|
"rewards/rejected": -9.183981895446777, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 17.649913787841797, |
|
"kl": 0.0, |
|
"learning_rate": 1.7532005689900424e-07, |
|
"logps/chosen": -275.7359619140625, |
|
"logps/rejected": -409.9220275878906, |
|
"loss": 0.1268, |
|
"rewards/chosen": 1.9714463949203491, |
|
"rewards/margins": 10.847058296203613, |
|
"rewards/rejected": -8.875612258911133, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 21.967309951782227, |
|
"kl": 0.0, |
|
"learning_rate": 1.7354196301564722e-07, |
|
"logps/chosen": -281.526123046875, |
|
"logps/rejected": -468.4922790527344, |
|
"loss": 0.1484, |
|
"rewards/chosen": 1.7273063659667969, |
|
"rewards/margins": 11.125950813293457, |
|
"rewards/rejected": -9.39864444732666, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 24.07299041748047, |
|
"kl": 0.0, |
|
"learning_rate": 1.717638691322902e-07, |
|
"logps/chosen": -288.5100402832031, |
|
"logps/rejected": -438.12579345703125, |
|
"loss": 0.1292, |
|
"rewards/chosen": 2.2782421112060547, |
|
"rewards/margins": 11.389139175415039, |
|
"rewards/rejected": -9.110896110534668, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 24.698833465576172, |
|
"kl": 0.0, |
|
"learning_rate": 1.6998577524893314e-07, |
|
"logps/chosen": -262.21307373046875, |
|
"logps/rejected": -432.73095703125, |
|
"loss": 0.1417, |
|
"rewards/chosen": 1.5888358354568481, |
|
"rewards/margins": 11.009684562683105, |
|
"rewards/rejected": -9.42085075378418, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 19.981353759765625, |
|
"kl": 0.0, |
|
"learning_rate": 1.6820768136557609e-07, |
|
"logps/chosen": -248.55294799804688, |
|
"logps/rejected": -417.9847717285156, |
|
"loss": 0.142, |
|
"rewards/chosen": 2.168029308319092, |
|
"rewards/margins": 11.213571548461914, |
|
"rewards/rejected": -9.04554271697998, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 25.718555450439453, |
|
"kl": 0.0, |
|
"learning_rate": 1.6642958748221906e-07, |
|
"logps/chosen": -310.0647888183594, |
|
"logps/rejected": -412.52001953125, |
|
"loss": 0.1359, |
|
"rewards/chosen": 2.2913904190063477, |
|
"rewards/margins": 11.509147644042969, |
|
"rewards/rejected": -9.217756271362305, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 20.378263473510742, |
|
"kl": 0.0, |
|
"learning_rate": 1.64651493598862e-07, |
|
"logps/chosen": -271.46343994140625, |
|
"logps/rejected": -405.5977478027344, |
|
"loss": 0.1325, |
|
"rewards/chosen": 2.2153549194335938, |
|
"rewards/margins": 11.555242538452148, |
|
"rewards/rejected": -9.339888572692871, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 23.4803466796875, |
|
"kl": 0.0, |
|
"learning_rate": 1.6287339971550498e-07, |
|
"logps/chosen": -292.6392822265625, |
|
"logps/rejected": -450.56121826171875, |
|
"loss": 0.1272, |
|
"rewards/chosen": 1.7447988986968994, |
|
"rewards/margins": 11.052331924438477, |
|
"rewards/rejected": -9.307534217834473, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 21.62887954711914, |
|
"kl": 0.0, |
|
"learning_rate": 1.6109530583214793e-07, |
|
"logps/chosen": -255.4695281982422, |
|
"logps/rejected": -440.318115234375, |
|
"loss": 0.1474, |
|
"rewards/chosen": 2.0870609283447266, |
|
"rewards/margins": 11.327590942382812, |
|
"rewards/rejected": -9.240530014038086, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 20.0389461517334, |
|
"kl": 0.0, |
|
"learning_rate": 1.5931721194879087e-07, |
|
"logps/chosen": -288.50909423828125, |
|
"logps/rejected": -440.1390075683594, |
|
"loss": 0.1267, |
|
"rewards/chosen": 2.4988930225372314, |
|
"rewards/margins": 12.02427864074707, |
|
"rewards/rejected": -9.525384902954102, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 27.30327606201172, |
|
"kl": 0.0, |
|
"learning_rate": 1.5753911806543385e-07, |
|
"logps/chosen": -315.9127197265625, |
|
"logps/rejected": -439.2584533691406, |
|
"loss": 0.1366, |
|
"rewards/chosen": 2.387932300567627, |
|
"rewards/margins": 11.112966537475586, |
|
"rewards/rejected": -8.725034713745117, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 22.370424270629883, |
|
"kl": 0.0, |
|
"learning_rate": 1.5576102418207682e-07, |
|
"logps/chosen": -296.82879638671875, |
|
"logps/rejected": -427.3119201660156, |
|
"loss": 0.1312, |
|
"rewards/chosen": 2.379429817199707, |
|
"rewards/margins": 11.889947891235352, |
|
"rewards/rejected": -9.510516166687012, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 22.60016441345215, |
|
"kl": 0.0, |
|
"learning_rate": 1.5398293029871974e-07, |
|
"logps/chosen": -284.07672119140625, |
|
"logps/rejected": -433.29278564453125, |
|
"loss": 0.1373, |
|
"rewards/chosen": 1.8526710271835327, |
|
"rewards/margins": 10.757834434509277, |
|
"rewards/rejected": -8.905162811279297, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 21.129493713378906, |
|
"kl": 0.0, |
|
"learning_rate": 1.5220483641536272e-07, |
|
"logps/chosen": -293.85552978515625, |
|
"logps/rejected": -427.25091552734375, |
|
"loss": 0.1265, |
|
"rewards/chosen": 2.3632965087890625, |
|
"rewards/margins": 11.906625747680664, |
|
"rewards/rejected": -9.543329238891602, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 19.694997787475586, |
|
"kl": 0.0, |
|
"learning_rate": 1.504267425320057e-07, |
|
"logps/chosen": -303.8923645019531, |
|
"logps/rejected": -465.65948486328125, |
|
"loss": 0.1168, |
|
"rewards/chosen": 2.5519447326660156, |
|
"rewards/margins": 12.214879989624023, |
|
"rewards/rejected": -9.662935256958008, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 22.003957748413086, |
|
"kl": 0.0, |
|
"learning_rate": 1.4864864864864866e-07, |
|
"logps/chosen": -275.05450439453125, |
|
"logps/rejected": -431.39459228515625, |
|
"loss": 0.1346, |
|
"rewards/chosen": 1.9555597305297852, |
|
"rewards/margins": 11.127113342285156, |
|
"rewards/rejected": -9.171553611755371, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 22.918169021606445, |
|
"kl": 0.0, |
|
"learning_rate": 1.4687055476529158e-07, |
|
"logps/chosen": -302.3306579589844, |
|
"logps/rejected": -442.83282470703125, |
|
"loss": 0.1249, |
|
"rewards/chosen": 2.11725115776062, |
|
"rewards/margins": 10.936277389526367, |
|
"rewards/rejected": -8.819025039672852, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 21.72097396850586, |
|
"kl": 0.0, |
|
"learning_rate": 1.4509246088193456e-07, |
|
"logps/chosen": -287.4019470214844, |
|
"logps/rejected": -401.7447814941406, |
|
"loss": 0.1338, |
|
"rewards/chosen": 2.2235841751098633, |
|
"rewards/margins": 10.831747055053711, |
|
"rewards/rejected": -8.608162879943848, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 22.042200088500977, |
|
"kl": 0.0, |
|
"learning_rate": 1.4331436699857753e-07, |
|
"logps/chosen": -278.91546630859375, |
|
"logps/rejected": -414.25885009765625, |
|
"loss": 0.1356, |
|
"rewards/chosen": 2.200042724609375, |
|
"rewards/margins": 11.185142517089844, |
|
"rewards/rejected": -8.985098838806152, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 17.934968948364258, |
|
"kl": 0.0, |
|
"learning_rate": 1.4153627311522048e-07, |
|
"logps/chosen": -308.2718200683594, |
|
"logps/rejected": -443.8597717285156, |
|
"loss": 0.1129, |
|
"rewards/chosen": 2.3466713428497314, |
|
"rewards/margins": 11.657171249389648, |
|
"rewards/rejected": -9.31049919128418, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 24.983243942260742, |
|
"kl": 0.0, |
|
"learning_rate": 1.3975817923186345e-07, |
|
"logps/chosen": -260.9937438964844, |
|
"logps/rejected": -389.94403076171875, |
|
"loss": 0.1388, |
|
"rewards/chosen": 2.287144184112549, |
|
"rewards/margins": 10.213197708129883, |
|
"rewards/rejected": -7.926053524017334, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 23.24259376525879, |
|
"kl": 0.0, |
|
"learning_rate": 1.379800853485064e-07, |
|
"logps/chosen": -270.7221374511719, |
|
"logps/rejected": -430.4684143066406, |
|
"loss": 0.1291, |
|
"rewards/chosen": 2.354933977127075, |
|
"rewards/margins": 11.62955379486084, |
|
"rewards/rejected": -9.274620056152344, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 21.11787986755371, |
|
"kl": 0.0, |
|
"learning_rate": 1.3620199146514935e-07, |
|
"logps/chosen": -304.3623962402344, |
|
"logps/rejected": -417.33929443359375, |
|
"loss": 0.1336, |
|
"rewards/chosen": 2.0863852500915527, |
|
"rewards/margins": 11.270166397094727, |
|
"rewards/rejected": -9.183780670166016, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 23.130449295043945, |
|
"kl": 0.0, |
|
"learning_rate": 1.3442389758179232e-07, |
|
"logps/chosen": -289.29156494140625, |
|
"logps/rejected": -438.5927734375, |
|
"loss": 0.1334, |
|
"rewards/chosen": 1.9771530628204346, |
|
"rewards/margins": 11.332016944885254, |
|
"rewards/rejected": -9.354864120483398, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 21.25185775756836, |
|
"kl": 0.0, |
|
"learning_rate": 1.326458036984353e-07, |
|
"logps/chosen": -246.78213500976562, |
|
"logps/rejected": -435.413818359375, |
|
"loss": 0.1494, |
|
"rewards/chosen": 1.6873725652694702, |
|
"rewards/margins": 10.664718627929688, |
|
"rewards/rejected": -8.977346420288086, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 16.545639038085938, |
|
"kl": 0.0, |
|
"learning_rate": 1.3086770981507821e-07, |
|
"logps/chosen": -270.4247741699219, |
|
"logps/rejected": -447.81787109375, |
|
"loss": 0.1282, |
|
"rewards/chosen": 2.250525951385498, |
|
"rewards/margins": 11.644302368164062, |
|
"rewards/rejected": -9.393776893615723, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 18.8116512298584, |
|
"kl": 0.0, |
|
"learning_rate": 1.290896159317212e-07, |
|
"logps/chosen": -315.2632751464844, |
|
"logps/rejected": -431.31005859375, |
|
"loss": 0.1056, |
|
"rewards/chosen": 2.4115493297576904, |
|
"rewards/margins": 11.539453506469727, |
|
"rewards/rejected": -9.127903938293457, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 22.677959442138672, |
|
"kl": 0.0, |
|
"learning_rate": 1.2731152204836416e-07, |
|
"logps/chosen": -250.3134307861328, |
|
"logps/rejected": -441.03948974609375, |
|
"loss": 0.1227, |
|
"rewards/chosen": 2.1084024906158447, |
|
"rewards/margins": 11.651433944702148, |
|
"rewards/rejected": -9.543031692504883, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 21.911409378051758, |
|
"kl": 0.0, |
|
"learning_rate": 1.255334281650071e-07, |
|
"logps/chosen": -237.79061889648438, |
|
"logps/rejected": -430.192138671875, |
|
"loss": 0.1288, |
|
"rewards/chosen": 2.0766196250915527, |
|
"rewards/margins": 11.207183837890625, |
|
"rewards/rejected": -9.13056468963623, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 21.959409713745117, |
|
"kl": 0.0, |
|
"learning_rate": 1.2375533428165005e-07, |
|
"logps/chosen": -308.5229187011719, |
|
"logps/rejected": -417.3135681152344, |
|
"loss": 0.1202, |
|
"rewards/chosen": 2.373558759689331, |
|
"rewards/margins": 11.75732135772705, |
|
"rewards/rejected": -9.383761405944824, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 20.8580265045166, |
|
"kl": 0.0, |
|
"learning_rate": 1.2197724039829303e-07, |
|
"logps/chosen": -243.6355438232422, |
|
"logps/rejected": -430.52801513671875, |
|
"loss": 0.1265, |
|
"rewards/chosen": 2.0868420600891113, |
|
"rewards/margins": 11.455188751220703, |
|
"rewards/rejected": -9.36834716796875, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 17.877643585205078, |
|
"kl": 0.0, |
|
"learning_rate": 1.2019914651493598e-07, |
|
"logps/chosen": -261.396484375, |
|
"logps/rejected": -397.22882080078125, |
|
"loss": 0.1269, |
|
"rewards/chosen": 2.2846105098724365, |
|
"rewards/margins": 11.026288986206055, |
|
"rewards/rejected": -8.741678237915039, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 20.07047462463379, |
|
"kl": 0.0, |
|
"learning_rate": 1.1842105263157894e-07, |
|
"logps/chosen": -252.1815643310547, |
|
"logps/rejected": -434.57977294921875, |
|
"loss": 0.1224, |
|
"rewards/chosen": 2.228175640106201, |
|
"rewards/margins": 11.402864456176758, |
|
"rewards/rejected": -9.174688339233398, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 22.779888153076172, |
|
"kl": 0.0, |
|
"learning_rate": 1.166429587482219e-07, |
|
"logps/chosen": -310.24267578125, |
|
"logps/rejected": -429.3841857910156, |
|
"loss": 0.132, |
|
"rewards/chosen": 2.069182872772217, |
|
"rewards/margins": 11.373079299926758, |
|
"rewards/rejected": -9.3038969039917, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 20.758487701416016, |
|
"kl": 0.0, |
|
"learning_rate": 1.1486486486486487e-07, |
|
"logps/chosen": -254.3895263671875, |
|
"logps/rejected": -465.3462829589844, |
|
"loss": 0.1237, |
|
"rewards/chosen": 2.068655252456665, |
|
"rewards/margins": 11.601139068603516, |
|
"rewards/rejected": -9.532483100891113, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 19.828561782836914, |
|
"kl": 0.0, |
|
"learning_rate": 1.1308677098150782e-07, |
|
"logps/chosen": -282.51617431640625, |
|
"logps/rejected": -437.31103515625, |
|
"loss": 0.1296, |
|
"rewards/chosen": 2.1852059364318848, |
|
"rewards/margins": 11.828689575195312, |
|
"rewards/rejected": -9.64348316192627, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 19.10045623779297, |
|
"kl": 0.0, |
|
"learning_rate": 1.1130867709815078e-07, |
|
"logps/chosen": -290.7330017089844, |
|
"logps/rejected": -437.0845642089844, |
|
"loss": 0.1163, |
|
"rewards/chosen": 2.1793453693389893, |
|
"rewards/margins": 11.713521003723145, |
|
"rewards/rejected": -9.534174919128418, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 22.69818878173828, |
|
"kl": 0.0, |
|
"learning_rate": 1.0953058321479374e-07, |
|
"logps/chosen": -288.8890075683594, |
|
"logps/rejected": -411.38629150390625, |
|
"loss": 0.1308, |
|
"rewards/chosen": 2.0378472805023193, |
|
"rewards/margins": 10.46517276763916, |
|
"rewards/rejected": -8.427325248718262, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 21.247556686401367, |
|
"kl": 0.0, |
|
"learning_rate": 1.077524893314367e-07, |
|
"logps/chosen": -283.88751220703125, |
|
"logps/rejected": -429.2752990722656, |
|
"loss": 0.119, |
|
"rewards/chosen": 2.1524832248687744, |
|
"rewards/margins": 11.539883613586426, |
|
"rewards/rejected": -9.387399673461914, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 21.65992546081543, |
|
"kl": 0.0, |
|
"learning_rate": 1.0597439544807964e-07, |
|
"logps/chosen": -278.073486328125, |
|
"logps/rejected": -448.2752990722656, |
|
"loss": 0.1171, |
|
"rewards/chosen": 2.266263484954834, |
|
"rewards/margins": 12.238935470581055, |
|
"rewards/rejected": -9.972672462463379, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 19.32549285888672, |
|
"kl": 0.0, |
|
"learning_rate": 1.0419630156472262e-07, |
|
"logps/chosen": -303.88616943359375, |
|
"logps/rejected": -435.61810302734375, |
|
"loss": 0.1139, |
|
"rewards/chosen": 2.234591007232666, |
|
"rewards/margins": 11.52080249786377, |
|
"rewards/rejected": -9.286211013793945, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 20.137916564941406, |
|
"kl": 0.0, |
|
"learning_rate": 1.0241820768136557e-07, |
|
"logps/chosen": -281.22686767578125, |
|
"logps/rejected": -411.6228942871094, |
|
"loss": 0.1232, |
|
"rewards/chosen": 2.257664203643799, |
|
"rewards/margins": 11.2540864944458, |
|
"rewards/rejected": -8.996421813964844, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 15.928502082824707, |
|
"kl": 0.0, |
|
"learning_rate": 1.0064011379800854e-07, |
|
"logps/chosen": -316.901611328125, |
|
"logps/rejected": -428.92822265625, |
|
"loss": 0.1213, |
|
"rewards/chosen": 2.580500841140747, |
|
"rewards/margins": 11.729310989379883, |
|
"rewards/rejected": -9.148810386657715, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 24.323083877563477, |
|
"kl": 0.0, |
|
"learning_rate": 9.886201991465149e-08, |
|
"logps/chosen": -261.3116149902344, |
|
"logps/rejected": -456.38201904296875, |
|
"loss": 0.1232, |
|
"rewards/chosen": 1.9357669353485107, |
|
"rewards/margins": 11.504796981811523, |
|
"rewards/rejected": -9.569029808044434, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 19.80019760131836, |
|
"kl": 0.0, |
|
"learning_rate": 9.708392603129445e-08, |
|
"logps/chosen": -286.7679748535156, |
|
"logps/rejected": -409.57220458984375, |
|
"loss": 0.1177, |
|
"rewards/chosen": 1.9313856363296509, |
|
"rewards/margins": 11.41652774810791, |
|
"rewards/rejected": -9.48514175415039, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 15.446332931518555, |
|
"kl": 0.0, |
|
"learning_rate": 9.530583214793741e-08, |
|
"logps/chosen": -256.6762390136719, |
|
"logps/rejected": -445.1065368652344, |
|
"loss": 0.1037, |
|
"rewards/chosen": 2.22512149810791, |
|
"rewards/margins": 12.185127258300781, |
|
"rewards/rejected": -9.960004806518555, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 17.317325592041016, |
|
"kl": 0.0, |
|
"learning_rate": 9.352773826458037e-08, |
|
"logps/chosen": -248.8364715576172, |
|
"logps/rejected": -417.3561096191406, |
|
"loss": 0.1064, |
|
"rewards/chosen": 2.178281307220459, |
|
"rewards/margins": 12.139802932739258, |
|
"rewards/rejected": -9.96152114868164, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 16.94424057006836, |
|
"kl": 0.0, |
|
"learning_rate": 9.174964438122331e-08, |
|
"logps/chosen": -253.8694305419922, |
|
"logps/rejected": -388.809326171875, |
|
"loss": 0.1238, |
|
"rewards/chosen": 1.9573352336883545, |
|
"rewards/margins": 11.183786392211914, |
|
"rewards/rejected": -9.22645092010498, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 19.46892738342285, |
|
"kl": 0.0, |
|
"learning_rate": 8.997155049786629e-08, |
|
"logps/chosen": -299.6037902832031, |
|
"logps/rejected": -408.68701171875, |
|
"loss": 0.1313, |
|
"rewards/chosen": 2.082526683807373, |
|
"rewards/margins": 11.771112442016602, |
|
"rewards/rejected": -9.688586235046387, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 21.77025604248047, |
|
"kl": 0.0, |
|
"learning_rate": 8.819345661450925e-08, |
|
"logps/chosen": -276.5013122558594, |
|
"logps/rejected": -462.1937561035156, |
|
"loss": 0.1368, |
|
"rewards/chosen": 1.8290278911590576, |
|
"rewards/margins": 11.848515510559082, |
|
"rewards/rejected": -10.019487380981445, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 20.317564010620117, |
|
"kl": 0.0, |
|
"learning_rate": 8.64153627311522e-08, |
|
"logps/chosen": -275.123046875, |
|
"logps/rejected": -400.65899658203125, |
|
"loss": 0.1178, |
|
"rewards/chosen": 2.3277885913848877, |
|
"rewards/margins": 11.247326850891113, |
|
"rewards/rejected": -8.919538497924805, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 18.717248916625977, |
|
"kl": 0.0, |
|
"learning_rate": 8.463726884779517e-08, |
|
"logps/chosen": -261.8305358886719, |
|
"logps/rejected": -415.6966857910156, |
|
"loss": 0.1082, |
|
"rewards/chosen": 2.135937452316284, |
|
"rewards/margins": 11.28874683380127, |
|
"rewards/rejected": -9.152809143066406, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 21.309633255004883, |
|
"kl": 0.0, |
|
"learning_rate": 8.285917496443812e-08, |
|
"logps/chosen": -231.76626586914062, |
|
"logps/rejected": -448.5263671875, |
|
"loss": 0.1118, |
|
"rewards/chosen": 2.206472873687744, |
|
"rewards/margins": 12.001043319702148, |
|
"rewards/rejected": -9.794569969177246, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 19.966053009033203, |
|
"kl": 0.0, |
|
"learning_rate": 8.108108108108108e-08, |
|
"logps/chosen": -287.36712646484375, |
|
"logps/rejected": -427.26898193359375, |
|
"loss": 0.1209, |
|
"rewards/chosen": 2.056323528289795, |
|
"rewards/margins": 10.724042892456055, |
|
"rewards/rejected": -8.667718887329102, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 22.85637855529785, |
|
"kl": 0.0, |
|
"learning_rate": 7.930298719772404e-08, |
|
"logps/chosen": -253.1752471923828, |
|
"logps/rejected": -414.54827880859375, |
|
"loss": 0.1332, |
|
"rewards/chosen": 1.9090397357940674, |
|
"rewards/margins": 11.734771728515625, |
|
"rewards/rejected": -9.825732231140137, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 17.524154663085938, |
|
"kl": 0.0, |
|
"learning_rate": 7.7524893314367e-08, |
|
"logps/chosen": -271.58856201171875, |
|
"logps/rejected": -431.1787109375, |
|
"loss": 0.1257, |
|
"rewards/chosen": 2.0929927825927734, |
|
"rewards/margins": 11.595911026000977, |
|
"rewards/rejected": -9.502918243408203, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 20.388002395629883, |
|
"kl": 0.0, |
|
"learning_rate": 7.574679943100994e-08, |
|
"logps/chosen": -294.2145080566406, |
|
"logps/rejected": -427.0972595214844, |
|
"loss": 0.132, |
|
"rewards/chosen": 2.2338738441467285, |
|
"rewards/margins": 11.665974617004395, |
|
"rewards/rejected": -9.432100296020508, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 27.609825134277344, |
|
"kl": 0.0, |
|
"learning_rate": 7.396870554765292e-08, |
|
"logps/chosen": -277.4027404785156, |
|
"logps/rejected": -441.5484924316406, |
|
"loss": 0.1205, |
|
"rewards/chosen": 1.9443111419677734, |
|
"rewards/margins": 11.26426887512207, |
|
"rewards/rejected": -9.319957733154297, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 21.430208206176758, |
|
"kl": 0.0, |
|
"learning_rate": 7.219061166429587e-08, |
|
"logps/chosen": -232.32846069335938, |
|
"logps/rejected": -424.41668701171875, |
|
"loss": 0.143, |
|
"rewards/chosen": 2.0610389709472656, |
|
"rewards/margins": 10.865748405456543, |
|
"rewards/rejected": -8.804708480834961, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 17.94335174560547, |
|
"kl": 0.0, |
|
"learning_rate": 7.041251778093883e-08, |
|
"logps/chosen": -246.70504760742188, |
|
"logps/rejected": -452.76953125, |
|
"loss": 0.0997, |
|
"rewards/chosen": 2.304919481277466, |
|
"rewards/margins": 11.7770414352417, |
|
"rewards/rejected": -9.472122192382812, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 22.950284957885742, |
|
"kl": 0.0, |
|
"learning_rate": 6.863442389758179e-08, |
|
"logps/chosen": -267.05242919921875, |
|
"logps/rejected": -420.9137268066406, |
|
"loss": 0.1369, |
|
"rewards/chosen": 1.899043083190918, |
|
"rewards/margins": 11.120938301086426, |
|
"rewards/rejected": -9.221895217895508, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 18.250551223754883, |
|
"kl": 0.0, |
|
"learning_rate": 6.685633001422475e-08, |
|
"logps/chosen": -231.0555877685547, |
|
"logps/rejected": -449.4051818847656, |
|
"loss": 0.13, |
|
"rewards/chosen": 2.14901065826416, |
|
"rewards/margins": 11.469152450561523, |
|
"rewards/rejected": -9.320141792297363, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 20.363611221313477, |
|
"kl": 0.0, |
|
"learning_rate": 6.507823613086771e-08, |
|
"logps/chosen": -278.7294006347656, |
|
"logps/rejected": -418.37030029296875, |
|
"loss": 0.1272, |
|
"rewards/chosen": 2.073495626449585, |
|
"rewards/margins": 10.949335098266602, |
|
"rewards/rejected": -8.875838279724121, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 18.06746482849121, |
|
"kl": 0.0, |
|
"learning_rate": 6.330014224751067e-08, |
|
"logps/chosen": -283.6380920410156, |
|
"logps/rejected": -453.05987548828125, |
|
"loss": 0.1107, |
|
"rewards/chosen": 2.3998241424560547, |
|
"rewards/margins": 12.352110862731934, |
|
"rewards/rejected": -9.952287673950195, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 20.415699005126953, |
|
"kl": 0.0, |
|
"learning_rate": 6.152204836415363e-08, |
|
"logps/chosen": -305.8134765625, |
|
"logps/rejected": -397.2032775878906, |
|
"loss": 0.1191, |
|
"rewards/chosen": 2.6986749172210693, |
|
"rewards/margins": 11.584226608276367, |
|
"rewards/rejected": -8.885551452636719, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 20.92513084411621, |
|
"kl": 0.0, |
|
"learning_rate": 5.974395448079659e-08, |
|
"logps/chosen": -295.1770935058594, |
|
"logps/rejected": -471.5146484375, |
|
"loss": 0.1234, |
|
"rewards/chosen": 1.9177255630493164, |
|
"rewards/margins": 11.333423614501953, |
|
"rewards/rejected": -9.415698051452637, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 21.794998168945312, |
|
"kl": 0.0, |
|
"learning_rate": 5.796586059743954e-08, |
|
"logps/chosen": -271.8743591308594, |
|
"logps/rejected": -505.2965393066406, |
|
"loss": 0.1268, |
|
"rewards/chosen": 1.8860387802124023, |
|
"rewards/margins": 12.0774507522583, |
|
"rewards/rejected": -10.191411972045898, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 21.202049255371094, |
|
"kl": 0.0, |
|
"learning_rate": 5.61877667140825e-08, |
|
"logps/chosen": -277.30548095703125, |
|
"logps/rejected": -465.62744140625, |
|
"loss": 0.1104, |
|
"rewards/chosen": 2.4026882648468018, |
|
"rewards/margins": 12.06915283203125, |
|
"rewards/rejected": -9.666464805603027, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 23.27330780029297, |
|
"kl": 0.0, |
|
"learning_rate": 5.4409672830725456e-08, |
|
"logps/chosen": -276.54815673828125, |
|
"logps/rejected": -438.12005615234375, |
|
"loss": 0.1114, |
|
"rewards/chosen": 2.2622342109680176, |
|
"rewards/margins": 11.430865287780762, |
|
"rewards/rejected": -9.168631553649902, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 22.490863800048828, |
|
"kl": 0.0, |
|
"learning_rate": 5.2631578947368416e-08, |
|
"logps/chosen": -275.0463562011719, |
|
"logps/rejected": -461.47552490234375, |
|
"loss": 0.1348, |
|
"rewards/chosen": 2.0476670265197754, |
|
"rewards/margins": 12.236173629760742, |
|
"rewards/rejected": -10.188508033752441, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 22.146873474121094, |
|
"kl": 0.0, |
|
"learning_rate": 5.0853485064011376e-08, |
|
"logps/chosen": -285.8138122558594, |
|
"logps/rejected": -442.80169677734375, |
|
"loss": 0.114, |
|
"rewards/chosen": 2.3625457286834717, |
|
"rewards/margins": 11.950872421264648, |
|
"rewards/rejected": -9.588325500488281, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 18.684457778930664, |
|
"kl": 0.0, |
|
"learning_rate": 4.9075391180654337e-08, |
|
"logps/chosen": -281.9924011230469, |
|
"logps/rejected": -444.9959411621094, |
|
"loss": 0.1139, |
|
"rewards/chosen": 2.2367939949035645, |
|
"rewards/margins": 11.815010070800781, |
|
"rewards/rejected": -9.578217506408691, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 24.193016052246094, |
|
"kl": 0.0, |
|
"learning_rate": 4.72972972972973e-08, |
|
"logps/chosen": -319.1490783691406, |
|
"logps/rejected": -415.7984924316406, |
|
"loss": 0.1288, |
|
"rewards/chosen": 2.3501713275909424, |
|
"rewards/margins": 11.565084457397461, |
|
"rewards/rejected": -9.214912414550781, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 20.57202911376953, |
|
"kl": 0.0, |
|
"learning_rate": 4.551920341394026e-08, |
|
"logps/chosen": -267.69696044921875, |
|
"logps/rejected": -447.53302001953125, |
|
"loss": 0.139, |
|
"rewards/chosen": 2.0053253173828125, |
|
"rewards/margins": 11.46650218963623, |
|
"rewards/rejected": -9.461176872253418, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 20.329166412353516, |
|
"kl": 0.0, |
|
"learning_rate": 4.374110953058322e-08, |
|
"logps/chosen": -264.3152160644531, |
|
"logps/rejected": -474.037353515625, |
|
"loss": 0.117, |
|
"rewards/chosen": 2.193683624267578, |
|
"rewards/margins": 12.409022331237793, |
|
"rewards/rejected": -10.215338706970215, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 22.352367401123047, |
|
"kl": 0.0, |
|
"learning_rate": 4.196301564722617e-08, |
|
"logps/chosen": -286.8226623535156, |
|
"logps/rejected": -478.8553771972656, |
|
"loss": 0.1177, |
|
"rewards/chosen": 1.9940738677978516, |
|
"rewards/margins": 12.453526496887207, |
|
"rewards/rejected": -10.459451675415039, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 18.939308166503906, |
|
"kl": 0.0, |
|
"learning_rate": 4.018492176386913e-08, |
|
"logps/chosen": -276.21746826171875, |
|
"logps/rejected": -413.55389404296875, |
|
"loss": 0.1328, |
|
"rewards/chosen": 2.231210708618164, |
|
"rewards/margins": 11.543118476867676, |
|
"rewards/rejected": -9.311908721923828, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 18.116722106933594, |
|
"kl": 0.0, |
|
"learning_rate": 3.840682788051209e-08, |
|
"logps/chosen": -240.20028686523438, |
|
"logps/rejected": -416.8455505371094, |
|
"loss": 0.1142, |
|
"rewards/chosen": 2.345201253890991, |
|
"rewards/margins": 11.189725875854492, |
|
"rewards/rejected": -8.844523429870605, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 25.406269073486328, |
|
"kl": 0.0, |
|
"learning_rate": 3.6628733997155046e-08, |
|
"logps/chosen": -254.66464233398438, |
|
"logps/rejected": -418.0938415527344, |
|
"loss": 0.1186, |
|
"rewards/chosen": 2.0774874687194824, |
|
"rewards/margins": 11.201638221740723, |
|
"rewards/rejected": -9.124151229858398, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 18.636796951293945, |
|
"kl": 0.0, |
|
"learning_rate": 3.4850640113798006e-08, |
|
"logps/chosen": -275.15472412109375, |
|
"logps/rejected": -442.4652404785156, |
|
"loss": 0.1126, |
|
"rewards/chosen": 2.310152053833008, |
|
"rewards/margins": 12.194239616394043, |
|
"rewards/rejected": -9.884087562561035, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 27.727458953857422, |
|
"kl": 0.0, |
|
"learning_rate": 3.3072546230440967e-08, |
|
"logps/chosen": -282.36627197265625, |
|
"logps/rejected": -417.9161071777344, |
|
"loss": 0.1169, |
|
"rewards/chosen": 2.358586549758911, |
|
"rewards/margins": 11.822304725646973, |
|
"rewards/rejected": -9.463717460632324, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 18.799943923950195, |
|
"kl": 0.0, |
|
"learning_rate": 3.129445234708392e-08, |
|
"logps/chosen": -282.34100341796875, |
|
"logps/rejected": -400.6058044433594, |
|
"loss": 0.1164, |
|
"rewards/chosen": 2.1689610481262207, |
|
"rewards/margins": 11.757662773132324, |
|
"rewards/rejected": -9.588701248168945, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 18.94513702392578, |
|
"kl": 0.0, |
|
"learning_rate": 2.9516358463726884e-08, |
|
"logps/chosen": -265.2249755859375, |
|
"logps/rejected": -420.23870849609375, |
|
"loss": 0.1276, |
|
"rewards/chosen": 2.1199347972869873, |
|
"rewards/margins": 11.105647087097168, |
|
"rewards/rejected": -8.985712051391602, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 17.380094528198242, |
|
"kl": 0.0, |
|
"learning_rate": 2.7738264580369844e-08, |
|
"logps/chosen": -291.02392578125, |
|
"logps/rejected": -407.71038818359375, |
|
"loss": 0.1115, |
|
"rewards/chosen": 2.1979808807373047, |
|
"rewards/margins": 11.310641288757324, |
|
"rewards/rejected": -9.11266040802002, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 19.2188720703125, |
|
"kl": 0.0, |
|
"learning_rate": 2.59601706970128e-08, |
|
"logps/chosen": -280.51495361328125, |
|
"logps/rejected": -436.34893798828125, |
|
"loss": 0.1182, |
|
"rewards/chosen": 2.2310471534729004, |
|
"rewards/margins": 10.99431324005127, |
|
"rewards/rejected": -8.763265609741211, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 23.651437759399414, |
|
"kl": 0.0, |
|
"learning_rate": 2.418207681365576e-08, |
|
"logps/chosen": -260.20220947265625, |
|
"logps/rejected": -447.58489990234375, |
|
"loss": 0.1224, |
|
"rewards/chosen": 1.9462487697601318, |
|
"rewards/margins": 11.421347618103027, |
|
"rewards/rejected": -9.475099563598633, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 24.50865936279297, |
|
"kl": 0.0, |
|
"learning_rate": 2.240398293029872e-08, |
|
"logps/chosen": -273.6228942871094, |
|
"logps/rejected": -388.49114990234375, |
|
"loss": 0.1235, |
|
"rewards/chosen": 2.335383415222168, |
|
"rewards/margins": 10.98925495147705, |
|
"rewards/rejected": -8.653871536254883, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 17.999431610107422, |
|
"kl": 0.0, |
|
"learning_rate": 2.0625889046941676e-08, |
|
"logps/chosen": -287.52899169921875, |
|
"logps/rejected": -439.962646484375, |
|
"loss": 0.1142, |
|
"rewards/chosen": 2.2732601165771484, |
|
"rewards/margins": 11.966636657714844, |
|
"rewards/rejected": -9.693376541137695, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 22.014801025390625, |
|
"kl": 0.0, |
|
"learning_rate": 1.8847795163584636e-08, |
|
"logps/chosen": -259.125, |
|
"logps/rejected": -452.64752197265625, |
|
"loss": 0.1162, |
|
"rewards/chosen": 2.1289539337158203, |
|
"rewards/margins": 11.959836959838867, |
|
"rewards/rejected": -9.830883026123047, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 24.860143661499023, |
|
"kl": 0.0, |
|
"learning_rate": 1.7069701280227596e-08, |
|
"logps/chosen": -233.20724487304688, |
|
"logps/rejected": -443.26153564453125, |
|
"loss": 0.1203, |
|
"rewards/chosen": 1.8372972011566162, |
|
"rewards/margins": 11.322072982788086, |
|
"rewards/rejected": -9.484776496887207, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 16.33979034423828, |
|
"kl": 0.0, |
|
"learning_rate": 1.5291607396870554e-08, |
|
"logps/chosen": -242.3277587890625, |
|
"logps/rejected": -427.3155822753906, |
|
"loss": 0.1275, |
|
"rewards/chosen": 2.065825939178467, |
|
"rewards/margins": 11.441205978393555, |
|
"rewards/rejected": -9.375380516052246, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 22.828136444091797, |
|
"kl": 0.0, |
|
"learning_rate": 1.3513513513513514e-08, |
|
"logps/chosen": -296.04815673828125, |
|
"logps/rejected": -441.4925842285156, |
|
"loss": 0.1139, |
|
"rewards/chosen": 2.2007675170898438, |
|
"rewards/margins": 11.669252395629883, |
|
"rewards/rejected": -9.468484878540039, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 24.322763442993164, |
|
"kl": 0.0, |
|
"learning_rate": 1.1735419630156473e-08, |
|
"logps/chosen": -313.05377197265625, |
|
"logps/rejected": -380.57952880859375, |
|
"loss": 0.1281, |
|
"rewards/chosen": 2.0147297382354736, |
|
"rewards/margins": 10.899151802062988, |
|
"rewards/rejected": -8.884422302246094, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 15.227581977844238, |
|
"kl": 0.0, |
|
"learning_rate": 9.95732574679943e-09, |
|
"logps/chosen": -231.4268798828125, |
|
"logps/rejected": -411.5580139160156, |
|
"loss": 0.1128, |
|
"rewards/chosen": 2.0990829467773438, |
|
"rewards/margins": 11.027290344238281, |
|
"rewards/rejected": -8.928207397460938, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 21.88052749633789, |
|
"kl": 0.0, |
|
"learning_rate": 8.179231863442388e-09, |
|
"logps/chosen": -233.326416015625, |
|
"logps/rejected": -443.15234375, |
|
"loss": 0.1306, |
|
"rewards/chosen": 1.9721084833145142, |
|
"rewards/margins": 11.35097885131836, |
|
"rewards/rejected": -9.378870010375977, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 17.996585845947266, |
|
"kl": 0.0, |
|
"learning_rate": 6.401137980085348e-09, |
|
"logps/chosen": -264.0694274902344, |
|
"logps/rejected": -443.869384765625, |
|
"loss": 0.1047, |
|
"rewards/chosen": 2.1066718101501465, |
|
"rewards/margins": 11.750232696533203, |
|
"rewards/rejected": -9.643560409545898, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 14.86182689666748, |
|
"kl": 0.0, |
|
"learning_rate": 4.623044096728307e-09, |
|
"logps/chosen": -302.3523864746094, |
|
"logps/rejected": -436.1973571777344, |
|
"loss": 0.1027, |
|
"rewards/chosen": 2.3409790992736816, |
|
"rewards/margins": 12.13941764831543, |
|
"rewards/rejected": -9.79843807220459, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 20.058828353881836, |
|
"kl": 0.0, |
|
"learning_rate": 2.844950213371266e-09, |
|
"logps/chosen": -264.7875061035156, |
|
"logps/rejected": -401.29241943359375, |
|
"loss": 0.118, |
|
"rewards/chosen": 2.1413722038269043, |
|
"rewards/margins": 11.171432495117188, |
|
"rewards/rejected": -9.030058860778809, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 14.352324485778809, |
|
"kl": 0.0, |
|
"learning_rate": 1.0668563300142248e-09, |
|
"logps/chosen": -248.8438262939453, |
|
"logps/rejected": -420.352783203125, |
|
"loss": 0.1116, |
|
"rewards/chosen": 2.209176540374756, |
|
"rewards/margins": 12.117198944091797, |
|
"rewards/rejected": -9.9080228805542, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1563, |
|
"total_flos": 0.0, |
|
"train_loss": 0.1710020688315347, |
|
"train_runtime": 11106.5263, |
|
"train_samples_per_second": 9.004, |
|
"train_steps_per_second": 0.141 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1563, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|