File size: 10,659 Bytes
dcd212e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984301412872841,
"eval_steps": 500,
"global_step": 159,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.006279434850863423,
"grad_norm": 5.603575760096964,
"learning_rate": 3.125e-08,
"logits/chosen": 0.18015038967132568,
"logits/rejected": 0.2519298493862152,
"logps/chosen": -297.10906982421875,
"logps/pi_response": -130.58929443359375,
"logps/ref_response": -130.58929443359375,
"logps/rejected": -316.44769287109375,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.06279434850863422,
"grad_norm": 5.927013620517721,
"learning_rate": 3.1249999999999997e-07,
"logits/chosen": 0.16517189145088196,
"logits/rejected": 0.31397953629493713,
"logps/chosen": -243.69070434570312,
"logps/pi_response": -120.14439392089844,
"logps/ref_response": -120.15902709960938,
"logps/rejected": -281.0929870605469,
"loss": 0.6929,
"rewards/accuracies": 0.5069444179534912,
"rewards/chosen": 9.301294630859047e-05,
"rewards/margins": 0.0011727283708751202,
"rewards/rejected": -0.001079715322703123,
"step": 10
},
{
"epoch": 0.12558869701726844,
"grad_norm": 6.040197286277514,
"learning_rate": 4.990353313429303e-07,
"logits/chosen": 0.13175079226493835,
"logits/rejected": 0.32193881273269653,
"logps/chosen": -244.1867218017578,
"logps/pi_response": -121.63621520996094,
"logps/ref_response": -121.85536193847656,
"logps/rejected": -266.6852722167969,
"loss": 0.6886,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.01275191456079483,
"rewards/margins": 0.0108140604570508,
"rewards/rejected": -0.023565974086523056,
"step": 20
},
{
"epoch": 0.18838304552590268,
"grad_norm": 6.198937762628843,
"learning_rate": 4.882681251368548e-07,
"logits/chosen": 0.1714327037334442,
"logits/rejected": 0.3029390871524811,
"logps/chosen": -244.3697509765625,
"logps/pi_response": -109.6906967163086,
"logps/ref_response": -110.8894271850586,
"logps/rejected": -290.1263732910156,
"loss": 0.6686,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.042444389313459396,
"rewards/margins": 0.055226124823093414,
"rewards/rejected": -0.09767051041126251,
"step": 30
},
{
"epoch": 0.25117739403453687,
"grad_norm": 5.780662683878471,
"learning_rate": 4.6604720940421207e-07,
"logits/chosen": 0.2108462154865265,
"logits/rejected": 0.3984522521495819,
"logps/chosen": -287.33648681640625,
"logps/pi_response": -125.24674224853516,
"logps/ref_response": -129.86325073242188,
"logps/rejected": -316.37408447265625,
"loss": 0.6347,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.08861993253231049,
"rewards/margins": 0.1344626396894455,
"rewards/rejected": -0.22308258712291718,
"step": 40
},
{
"epoch": 0.3139717425431711,
"grad_norm": 5.870253990249712,
"learning_rate": 4.3344075855595097e-07,
"logits/chosen": 0.37238794565200806,
"logits/rejected": 0.5105336308479309,
"logps/chosen": -246.9188232421875,
"logps/pi_response": -108.8661117553711,
"logps/ref_response": -116.5090560913086,
"logps/rejected": -310.613037109375,
"loss": 0.606,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.1104748398065567,
"rewards/margins": 0.24330809712409973,
"rewards/rejected": -0.3537829518318176,
"step": 50
},
{
"epoch": 0.37676609105180536,
"grad_norm": 9.335664801309742,
"learning_rate": 3.920161866827889e-07,
"logits/chosen": 0.5169380903244019,
"logits/rejected": 0.6794592142105103,
"logps/chosen": -267.8638000488281,
"logps/pi_response": -116.31705474853516,
"logps/ref_response": -119.4989242553711,
"logps/rejected": -347.69482421875,
"loss": 0.5813,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.19944007694721222,
"rewards/margins": 0.4112206995487213,
"rewards/rejected": -0.6106608510017395,
"step": 60
},
{
"epoch": 0.43956043956043955,
"grad_norm": 7.251270500863869,
"learning_rate": 3.4376480090239047e-07,
"logits/chosen": 0.6026707887649536,
"logits/rejected": 0.7696335911750793,
"logps/chosen": -236.616455078125,
"logps/pi_response": -114.6134262084961,
"logps/ref_response": -116.70068359375,
"logps/rejected": -368.1206359863281,
"loss": 0.5567,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.262630432844162,
"rewards/margins": 0.5536423921585083,
"rewards/rejected": -0.8162728548049927,
"step": 70
},
{
"epoch": 0.5023547880690737,
"grad_norm": 7.652733380703419,
"learning_rate": 2.910060778827554e-07,
"logits/chosen": 0.4738255441188812,
"logits/rejected": 0.8158077001571655,
"logps/chosen": -325.28857421875,
"logps/pi_response": -128.89028930664062,
"logps/ref_response": -127.53900146484375,
"logps/rejected": -348.1357116699219,
"loss": 0.5848,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.36525511741638184,
"rewards/margins": 0.3771258294582367,
"rewards/rejected": -0.7423809766769409,
"step": 80
},
{
"epoch": 0.565149136577708,
"grad_norm": 6.2380923795979655,
"learning_rate": 2.3627616503391812e-07,
"logits/chosen": 0.43513059616088867,
"logits/rejected": 0.8127248883247375,
"logps/chosen": -301.32684326171875,
"logps/pi_response": -135.54782104492188,
"logps/ref_response": -129.38760375976562,
"logps/rejected": -420.8568420410156,
"loss": 0.5435,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.29517459869384766,
"rewards/margins": 0.6168515086174011,
"rewards/rejected": -0.9120261073112488,
"step": 90
},
{
"epoch": 0.6279434850863422,
"grad_norm": 7.580754321821852,
"learning_rate": 1.8220596619089573e-07,
"logits/chosen": 0.6774718165397644,
"logits/rejected": 0.8504177331924438,
"logps/chosen": -283.39569091796875,
"logps/pi_response": -121.60555267333984,
"logps/ref_response": -114.0061264038086,
"logps/rejected": -304.47467041015625,
"loss": 0.5426,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -0.3073732852935791,
"rewards/margins": 0.3342594504356384,
"rewards/rejected": -0.6416326761245728,
"step": 100
},
{
"epoch": 0.6907378335949764,
"grad_norm": 7.058762821080314,
"learning_rate": 1.3139467229135998e-07,
"logits/chosen": 0.6427871584892273,
"logits/rejected": 0.9476824998855591,
"logps/chosen": -297.91217041015625,
"logps/pi_response": -136.62130737304688,
"logps/ref_response": -125.8144760131836,
"logps/rejected": -383.70025634765625,
"loss": 0.5314,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.33735179901123047,
"rewards/margins": 0.5089818835258484,
"rewards/rejected": -0.8463336825370789,
"step": 110
},
{
"epoch": 0.7535321821036107,
"grad_norm": 7.301058922446218,
"learning_rate": 8.628481651367875e-08,
"logits/chosen": 0.5713673830032349,
"logits/rejected": 0.8841819763183594,
"logps/chosen": -320.47723388671875,
"logps/pi_response": -131.65252685546875,
"logps/ref_response": -120.58707427978516,
"logps/rejected": -369.21661376953125,
"loss": 0.5542,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.39976996183395386,
"rewards/margins": 0.5199070572853088,
"rewards/rejected": -0.9196769595146179,
"step": 120
},
{
"epoch": 0.8163265306122449,
"grad_norm": 6.728836632952595,
"learning_rate": 4.904486005914027e-08,
"logits/chosen": 0.5993797779083252,
"logits/rejected": 0.8266558647155762,
"logps/chosen": -276.78338623046875,
"logps/pi_response": -135.93252563476562,
"logps/ref_response": -123.1449966430664,
"logps/rejected": -372.402587890625,
"loss": 0.5271,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.3279780447483063,
"rewards/margins": 0.5935764908790588,
"rewards/rejected": -0.921554446220398,
"step": 130
},
{
"epoch": 0.8791208791208791,
"grad_norm": 7.263275222680605,
"learning_rate": 2.1464952759020856e-08,
"logits/chosen": 0.5662352442741394,
"logits/rejected": 0.8682255744934082,
"logps/chosen": -283.9330139160156,
"logps/pi_response": -130.8988037109375,
"logps/ref_response": -121.63087463378906,
"logps/rejected": -404.0865173339844,
"loss": 0.5299,
"rewards/accuracies": 0.78125,
"rewards/chosen": -0.3388724625110626,
"rewards/margins": 0.7475099563598633,
"rewards/rejected": -1.0863823890686035,
"step": 140
},
{
"epoch": 0.9419152276295133,
"grad_norm": 7.996654815605861,
"learning_rate": 4.8708793644441086e-09,
"logits/chosen": 0.5043578147888184,
"logits/rejected": 0.8957873582839966,
"logps/chosen": -317.9568786621094,
"logps/pi_response": -145.6123504638672,
"logps/ref_response": -132.86119079589844,
"logps/rejected": -398.7013854980469,
"loss": 0.5193,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.3905355930328369,
"rewards/margins": 0.5843077898025513,
"rewards/rejected": -0.9748433232307434,
"step": 150
},
{
"epoch": 0.9984301412872841,
"step": 159,
"total_flos": 0.0,
"train_loss": 0.5789255676029613,
"train_runtime": 4418.8006,
"train_samples_per_second": 4.612,
"train_steps_per_second": 0.036
}
],
"logging_steps": 10,
"max_steps": 159,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|