|
{ |
|
"best_metric": 1.288225769996643, |
|
"best_model_checkpoint": "output/the-king-and-the-jester/checkpoint-533", |
|
"epoch": 13.0, |
|
"global_step": 533, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00013197813593027427, |
|
"loss": 2.4716, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00011710752518939715, |
|
"loss": 2.4075, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.485208346024516e-05, |
|
"loss": 2.2225, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 6.86e-05, |
|
"loss": 2.2262, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.2347916539754844e-05, |
|
"loss": 2.1374, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.0092474810602843e-05, |
|
"loss": 2.168, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5.22186406972573e-06, |
|
"loss": 2.0691, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"loss": 2.0637, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.0384345054626465, |
|
"eval_runtime": 3.02, |
|
"eval_samples_per_second": 22.848, |
|
"eval_steps_per_second": 2.98, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.0052154064178467, |
|
"eval_runtime": 2.7482, |
|
"eval_samples_per_second": 21.833, |
|
"eval_steps_per_second": 2.911, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.197007505031765e-06, |
|
"loss": 2.0701, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.5675842264214697e-05, |
|
"loss": 2.1089, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.58284204500588e-05, |
|
"loss": 2.1069, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 6.0732717017669706e-05, |
|
"loss": 2.1029, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 8.677773105069102e-05, |
|
"loss": 2.0072, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.00011018706319231134, |
|
"loss": 2.0136, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00012756647503932202, |
|
"loss": 1.9948, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.0001363960370713319, |
|
"loss": 2.0721, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.9534873962402344, |
|
"eval_runtime": 2.6646, |
|
"eval_samples_per_second": 22.517, |
|
"eval_steps_per_second": 3.002, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.00013539550607801564, |
|
"loss": 1.954, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 0.00012470995414859683, |
|
"loss": 1.9953, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.00010588873393008382, |
|
"loss": 1.942, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 8.16608300886963e-05, |
|
"loss": 1.9347, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 5.553916991130374e-05, |
|
"loss": 1.9066, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 3.131126606991618e-05, |
|
"loss": 1.8905, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 1.249004585140324e-05, |
|
"loss": 1.9008, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.8044939219843706e-06, |
|
"loss": 1.9337, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 1.8858658075332642, |
|
"eval_runtime": 2.6819, |
|
"eval_samples_per_second": 22.372, |
|
"eval_steps_per_second": 2.983, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 8.03962928668091e-07, |
|
"loss": 1.8946, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 9.633524960678029e-06, |
|
"loss": 1.8829, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 2.7012936807688628e-05, |
|
"loss": 1.8463, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 5.042226894930894e-05, |
|
"loss": 1.8504, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 7.646728298233026e-05, |
|
"loss": 1.8816, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 0.00010137157954994128, |
|
"loss": 1.8994, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 0.00012152415773578527, |
|
"loss": 1.8732, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 0.00013400299249496822, |
|
"loss": 1.8941, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 1.875728726387024, |
|
"eval_runtime": 2.6807, |
|
"eval_samples_per_second": 22.383, |
|
"eval_steps_per_second": 2.984, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 0.00013699871396120457, |
|
"loss": 1.7863, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 0.0001300769572075284, |
|
"loss": 1.7972, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 0.0001142413430313578, |
|
"loss": 1.8453, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 9.178795785882326e-05, |
|
"loss": 1.7723, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 6.597243246886372e-05, |
|
"loss": 1.7477, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 4.0537891490046174e-05, |
|
"loss": 1.8018, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 1.917221867898604e-05, |
|
"loss": 1.8131, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 4.9733318543963394e-06, |
|
"loss": 1.838, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.7917, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 1.8161486387252808, |
|
"eval_runtime": 2.661, |
|
"eval_samples_per_second": 22.548, |
|
"eval_steps_per_second": 3.006, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 4.973331854396309e-06, |
|
"loss": 1.7952, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 1.917221867898606e-05, |
|
"loss": 1.7604, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 4.053789149004621e-05, |
|
"loss": 1.7446, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 6.597243246886352e-05, |
|
"loss": 1.6903, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 9.178795785882305e-05, |
|
"loss": 1.7928, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 0.00011424134303135765, |
|
"loss": 1.6792, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 0.00013007695720752838, |
|
"loss": 1.8006, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 0.00013699871396120457, |
|
"loss": 1.7115, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 1.8405648469924927, |
|
"eval_runtime": 2.6612, |
|
"eval_samples_per_second": 22.546, |
|
"eval_steps_per_second": 3.006, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 0.00013400299249496822, |
|
"loss": 1.6111, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 0.00012152415773578526, |
|
"loss": 1.7498, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"learning_rate": 0.00010137157954994115, |
|
"loss": 1.7173, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 7.646728298233034e-05, |
|
"loss": 1.6387, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 5.0422268949309024e-05, |
|
"loss": 1.7363, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"learning_rate": 2.7012936807688787e-05, |
|
"loss": 1.6338, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 9.633524960678075e-06, |
|
"loss": 1.6839, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 8.039629286681063e-07, |
|
"loss": 1.6574, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 1.7874430418014526, |
|
"eval_runtime": 2.6845, |
|
"eval_samples_per_second": 22.35, |
|
"eval_steps_per_second": 2.98, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 1.8044939219843553e-06, |
|
"loss": 1.7106, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 1.2490045851403185e-05, |
|
"loss": 1.651, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 3.131126606991631e-05, |
|
"loss": 1.6335, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 5.553916991130366e-05, |
|
"loss": 1.6561, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 8.166083008869623e-05, |
|
"loss": 1.6365, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 0.00010588873393008359, |
|
"loss": 1.5825, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 0.00012470995414859675, |
|
"loss": 1.6183, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 0.00013539550607801564, |
|
"loss": 1.6877, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 1.809894323348999, |
|
"eval_runtime": 2.6825, |
|
"eval_samples_per_second": 22.367, |
|
"eval_steps_per_second": 2.982, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 0.00013639603707133193, |
|
"loss": 1.6439, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 0.0001275664750393221, |
|
"loss": 1.5897, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 0.00011018706319231131, |
|
"loss": 1.5505, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 8.67777310506911e-05, |
|
"loss": 1.6075, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 8.54, |
|
"learning_rate": 6.073271701766978e-05, |
|
"loss": 1.6166, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 3.5828420450058975e-05, |
|
"loss": 1.5752, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 1.5675842264214674e-05, |
|
"loss": 1.5862, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 3.1970075050318028e-06, |
|
"loss": 1.6337, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 1.7743412256240845, |
|
"eval_runtime": 2.6816, |
|
"eval_samples_per_second": 22.374, |
|
"eval_steps_per_second": 2.983, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 2.012860387953829e-07, |
|
"loss": 1.571, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 7.123042792471586e-06, |
|
"loss": 1.5196, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 2.295865696864207e-05, |
|
"loss": 1.5784, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"learning_rate": 4.541204214117682e-05, |
|
"loss": 1.4763, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"learning_rate": 7.122756753113636e-05, |
|
"loss": 1.5361, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 9.66621085099539e-05, |
|
"loss": 1.5069, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"learning_rate": 0.00011802778132101384, |
|
"loss": 1.4928, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"learning_rate": 0.00013222666814560375, |
|
"loss": 1.5968, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0001372, |
|
"loss": 1.5821, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 1.8010404109954834, |
|
"eval_runtime": 2.6585, |
|
"eval_samples_per_second": 22.569, |
|
"eval_steps_per_second": 3.009, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 10.12, |
|
"learning_rate": 0.0001322266681456038, |
|
"loss": 1.5347, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 10.24, |
|
"learning_rate": 0.00011802778132101396, |
|
"loss": 1.4811, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 10.37, |
|
"learning_rate": 9.666210850995405e-05, |
|
"loss": 1.5224, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"learning_rate": 7.122756753113628e-05, |
|
"loss": 1.5008, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 10.61, |
|
"learning_rate": 4.541204214117674e-05, |
|
"loss": 1.5444, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 10.73, |
|
"learning_rate": 2.2958656968642017e-05, |
|
"loss": 1.4584, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 10.85, |
|
"learning_rate": 7.123042792471548e-06, |
|
"loss": 1.5025, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"learning_rate": 2.0128603879541336e-07, |
|
"loss": 1.4657, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 1.7706276178359985, |
|
"eval_runtime": 2.6826, |
|
"eval_samples_per_second": 22.367, |
|
"eval_steps_per_second": 2.982, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 11.38, |
|
"learning_rate": 4.234791653975473e-05, |
|
"loss": 1.6323, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 11.5, |
|
"learning_rate": 6.859999999999978e-05, |
|
"loss": 1.5573, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 11.62, |
|
"learning_rate": 9.485208346024488e-05, |
|
"loss": 1.5613, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 11.75, |
|
"learning_rate": 0.00011710752518939722, |
|
"loss": 1.5808, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 11.88, |
|
"learning_rate": 0.00013197813593027427, |
|
"loss": 1.5626, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 0.0001372, |
|
"loss": 1.5582, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 1.4257222414016724, |
|
"eval_runtime": 2.9531, |
|
"eval_samples_per_second": 22.688, |
|
"eval_steps_per_second": 3.048, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 11.83, |
|
"learning_rate": 0.00012756647503932202, |
|
"loss": 1.532, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 11.95, |
|
"learning_rate": 0.0001363960370713319, |
|
"loss": 1.6289, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 1.3016469478607178, |
|
"eval_runtime": 2.8272, |
|
"eval_samples_per_second": 21.222, |
|
"eval_steps_per_second": 2.83, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 12.07, |
|
"learning_rate": 0.00013539550607801572, |
|
"loss": 1.5711, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 12.2, |
|
"learning_rate": 0.00012470995414859683, |
|
"loss": 1.5507, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 12.32, |
|
"learning_rate": 0.00010588873393008394, |
|
"loss": 1.5444, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 12.44, |
|
"learning_rate": 8.166083008869614e-05, |
|
"loss": 1.5625, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 12.56, |
|
"learning_rate": 5.553916991130382e-05, |
|
"loss": 1.523, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 12.68, |
|
"learning_rate": 3.131126606991604e-05, |
|
"loss": 1.5342, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 1.2490045851403148e-05, |
|
"loss": 1.4935, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 12.93, |
|
"learning_rate": 1.8044939219843934e-06, |
|
"loss": 1.5076, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 1.288225769996643, |
|
"eval_runtime": 2.8008, |
|
"eval_samples_per_second": 21.423, |
|
"eval_steps_per_second": 2.856, |
|
"step": 533 |
|
} |
|
], |
|
"max_steps": 533, |
|
"num_train_epochs": 13, |
|
"total_flos": 545185824768000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|