{ "best_metric": 1.288225769996643, "best_model_checkpoint": "output/the-king-and-the-jester/checkpoint-533", "epoch": 13.0, "global_step": 533, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12, "learning_rate": 0.00013197813593027427, "loss": 2.4716, "step": 5 }, { "epoch": 0.25, "learning_rate": 0.00011710752518939715, "loss": 2.4075, "step": 10 }, { "epoch": 0.38, "learning_rate": 9.485208346024516e-05, "loss": 2.2225, "step": 15 }, { "epoch": 0.5, "learning_rate": 6.86e-05, "loss": 2.2262, "step": 20 }, { "epoch": 0.62, "learning_rate": 4.2347916539754844e-05, "loss": 2.1374, "step": 25 }, { "epoch": 0.75, "learning_rate": 2.0092474810602843e-05, "loss": 2.168, "step": 30 }, { "epoch": 0.88, "learning_rate": 5.22186406972573e-06, "loss": 2.0691, "step": 35 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 2.0637, "step": 40 }, { "epoch": 1.0, "eval_loss": 2.0384345054626465, "eval_runtime": 3.02, "eval_samples_per_second": 22.848, "eval_steps_per_second": 2.98, "step": 40 }, { "epoch": 1.0, "eval_loss": 2.0052154064178467, "eval_runtime": 2.7482, "eval_samples_per_second": 21.833, "eval_steps_per_second": 2.911, "step": 41 }, { "epoch": 1.1, "learning_rate": 3.197007505031765e-06, "loss": 2.0701, "step": 45 }, { "epoch": 1.22, "learning_rate": 1.5675842264214697e-05, "loss": 2.1089, "step": 50 }, { "epoch": 1.34, "learning_rate": 3.58284204500588e-05, "loss": 2.1069, "step": 55 }, { "epoch": 1.46, "learning_rate": 6.0732717017669706e-05, "loss": 2.1029, "step": 60 }, { "epoch": 1.59, "learning_rate": 8.677773105069102e-05, "loss": 2.0072, "step": 65 }, { "epoch": 1.71, "learning_rate": 0.00011018706319231134, "loss": 2.0136, "step": 70 }, { "epoch": 1.83, "learning_rate": 0.00012756647503932202, "loss": 1.9948, "step": 75 }, { "epoch": 1.95, "learning_rate": 0.0001363960370713319, "loss": 2.0721, "step": 80 }, { "epoch": 2.0, "eval_loss": 1.9534873962402344, "eval_runtime": 2.6646, "eval_samples_per_second": 22.517, "eval_steps_per_second": 3.002, "step": 82 }, { "epoch": 2.07, "learning_rate": 0.00013539550607801564, "loss": 1.954, "step": 85 }, { "epoch": 2.2, "learning_rate": 0.00012470995414859683, "loss": 1.9953, "step": 90 }, { "epoch": 2.32, "learning_rate": 0.00010588873393008382, "loss": 1.942, "step": 95 }, { "epoch": 2.44, "learning_rate": 8.16608300886963e-05, "loss": 1.9347, "step": 100 }, { "epoch": 2.56, "learning_rate": 5.553916991130374e-05, "loss": 1.9066, "step": 105 }, { "epoch": 2.68, "learning_rate": 3.131126606991618e-05, "loss": 1.8905, "step": 110 }, { "epoch": 2.8, "learning_rate": 1.249004585140324e-05, "loss": 1.9008, "step": 115 }, { "epoch": 2.93, "learning_rate": 1.8044939219843706e-06, "loss": 1.9337, "step": 120 }, { "epoch": 3.0, "eval_loss": 1.8858658075332642, "eval_runtime": 2.6819, "eval_samples_per_second": 22.372, "eval_steps_per_second": 2.983, "step": 123 }, { "epoch": 3.05, "learning_rate": 8.03962928668091e-07, "loss": 1.8946, "step": 125 }, { "epoch": 3.17, "learning_rate": 9.633524960678029e-06, "loss": 1.8829, "step": 130 }, { "epoch": 3.29, "learning_rate": 2.7012936807688628e-05, "loss": 1.8463, "step": 135 }, { "epoch": 3.41, "learning_rate": 5.042226894930894e-05, "loss": 1.8504, "step": 140 }, { "epoch": 3.54, "learning_rate": 7.646728298233026e-05, "loss": 1.8816, "step": 145 }, { "epoch": 3.66, "learning_rate": 0.00010137157954994128, "loss": 1.8994, "step": 150 }, { "epoch": 3.78, "learning_rate": 0.00012152415773578527, "loss": 1.8732, "step": 155 }, { "epoch": 3.9, "learning_rate": 0.00013400299249496822, "loss": 1.8941, "step": 160 }, { "epoch": 4.0, "eval_loss": 1.875728726387024, "eval_runtime": 2.6807, "eval_samples_per_second": 22.383, "eval_steps_per_second": 2.984, "step": 164 }, { "epoch": 4.02, "learning_rate": 0.00013699871396120457, "loss": 1.7863, "step": 165 }, { "epoch": 4.15, "learning_rate": 0.0001300769572075284, "loss": 1.7972, "step": 170 }, { "epoch": 4.27, "learning_rate": 0.0001142413430313578, "loss": 1.8453, "step": 175 }, { "epoch": 4.39, "learning_rate": 9.178795785882326e-05, "loss": 1.7723, "step": 180 }, { "epoch": 4.51, "learning_rate": 6.597243246886372e-05, "loss": 1.7477, "step": 185 }, { "epoch": 4.63, "learning_rate": 4.0537891490046174e-05, "loss": 1.8018, "step": 190 }, { "epoch": 4.76, "learning_rate": 1.917221867898604e-05, "loss": 1.8131, "step": 195 }, { "epoch": 4.88, "learning_rate": 4.9733318543963394e-06, "loss": 1.838, "step": 200 }, { "epoch": 5.0, "learning_rate": 0.0, "loss": 1.7917, "step": 205 }, { "epoch": 5.0, "eval_loss": 1.8161486387252808, "eval_runtime": 2.661, "eval_samples_per_second": 22.548, "eval_steps_per_second": 3.006, "step": 205 }, { "epoch": 5.12, "learning_rate": 4.973331854396309e-06, "loss": 1.7952, "step": 210 }, { "epoch": 5.24, "learning_rate": 1.917221867898606e-05, "loss": 1.7604, "step": 215 }, { "epoch": 5.37, "learning_rate": 4.053789149004621e-05, "loss": 1.7446, "step": 220 }, { "epoch": 5.49, "learning_rate": 6.597243246886352e-05, "loss": 1.6903, "step": 225 }, { "epoch": 5.61, "learning_rate": 9.178795785882305e-05, "loss": 1.7928, "step": 230 }, { "epoch": 5.73, "learning_rate": 0.00011424134303135765, "loss": 1.6792, "step": 235 }, { "epoch": 5.85, "learning_rate": 0.00013007695720752838, "loss": 1.8006, "step": 240 }, { "epoch": 5.98, "learning_rate": 0.00013699871396120457, "loss": 1.7115, "step": 245 }, { "epoch": 6.0, "eval_loss": 1.8405648469924927, "eval_runtime": 2.6612, "eval_samples_per_second": 22.546, "eval_steps_per_second": 3.006, "step": 246 }, { "epoch": 6.1, "learning_rate": 0.00013400299249496822, "loss": 1.6111, "step": 250 }, { "epoch": 6.22, "learning_rate": 0.00012152415773578526, "loss": 1.7498, "step": 255 }, { "epoch": 6.34, "learning_rate": 0.00010137157954994115, "loss": 1.7173, "step": 260 }, { "epoch": 6.46, "learning_rate": 7.646728298233034e-05, "loss": 1.6387, "step": 265 }, { "epoch": 6.59, "learning_rate": 5.0422268949309024e-05, "loss": 1.7363, "step": 270 }, { "epoch": 6.71, "learning_rate": 2.7012936807688787e-05, "loss": 1.6338, "step": 275 }, { "epoch": 6.83, "learning_rate": 9.633524960678075e-06, "loss": 1.6839, "step": 280 }, { "epoch": 6.95, "learning_rate": 8.039629286681063e-07, "loss": 1.6574, "step": 285 }, { "epoch": 7.0, "eval_loss": 1.7874430418014526, "eval_runtime": 2.6845, "eval_samples_per_second": 22.35, "eval_steps_per_second": 2.98, "step": 287 }, { "epoch": 7.07, "learning_rate": 1.8044939219843553e-06, "loss": 1.7106, "step": 290 }, { "epoch": 7.2, "learning_rate": 1.2490045851403185e-05, "loss": 1.651, "step": 295 }, { "epoch": 7.32, "learning_rate": 3.131126606991631e-05, "loss": 1.6335, "step": 300 }, { "epoch": 7.44, "learning_rate": 5.553916991130366e-05, "loss": 1.6561, "step": 305 }, { "epoch": 7.56, "learning_rate": 8.166083008869623e-05, "loss": 1.6365, "step": 310 }, { "epoch": 7.68, "learning_rate": 0.00010588873393008359, "loss": 1.5825, "step": 315 }, { "epoch": 7.8, "learning_rate": 0.00012470995414859675, "loss": 1.6183, "step": 320 }, { "epoch": 7.93, "learning_rate": 0.00013539550607801564, "loss": 1.6877, "step": 325 }, { "epoch": 8.0, "eval_loss": 1.809894323348999, "eval_runtime": 2.6825, "eval_samples_per_second": 22.367, "eval_steps_per_second": 2.982, "step": 328 }, { "epoch": 8.05, "learning_rate": 0.00013639603707133193, "loss": 1.6439, "step": 330 }, { "epoch": 8.17, "learning_rate": 0.0001275664750393221, "loss": 1.5897, "step": 335 }, { "epoch": 8.29, "learning_rate": 0.00011018706319231131, "loss": 1.5505, "step": 340 }, { "epoch": 8.41, "learning_rate": 8.67777310506911e-05, "loss": 1.6075, "step": 345 }, { "epoch": 8.54, "learning_rate": 6.073271701766978e-05, "loss": 1.6166, "step": 350 }, { "epoch": 8.66, "learning_rate": 3.5828420450058975e-05, "loss": 1.5752, "step": 355 }, { "epoch": 8.78, "learning_rate": 1.5675842264214674e-05, "loss": 1.5862, "step": 360 }, { "epoch": 8.9, "learning_rate": 3.1970075050318028e-06, "loss": 1.6337, "step": 365 }, { "epoch": 9.0, "eval_loss": 1.7743412256240845, "eval_runtime": 2.6816, "eval_samples_per_second": 22.374, "eval_steps_per_second": 2.983, "step": 369 }, { "epoch": 9.02, "learning_rate": 2.012860387953829e-07, "loss": 1.571, "step": 370 }, { "epoch": 9.15, "learning_rate": 7.123042792471586e-06, "loss": 1.5196, "step": 375 }, { "epoch": 9.27, "learning_rate": 2.295865696864207e-05, "loss": 1.5784, "step": 380 }, { "epoch": 9.39, "learning_rate": 4.541204214117682e-05, "loss": 1.4763, "step": 385 }, { "epoch": 9.51, "learning_rate": 7.122756753113636e-05, "loss": 1.5361, "step": 390 }, { "epoch": 9.63, "learning_rate": 9.66621085099539e-05, "loss": 1.5069, "step": 395 }, { "epoch": 9.76, "learning_rate": 0.00011802778132101384, "loss": 1.4928, "step": 400 }, { "epoch": 9.88, "learning_rate": 0.00013222666814560375, "loss": 1.5968, "step": 405 }, { "epoch": 10.0, "learning_rate": 0.0001372, "loss": 1.5821, "step": 410 }, { "epoch": 10.0, "eval_loss": 1.8010404109954834, "eval_runtime": 2.6585, "eval_samples_per_second": 22.569, "eval_steps_per_second": 3.009, "step": 410 }, { "epoch": 10.12, "learning_rate": 0.0001322266681456038, "loss": 1.5347, "step": 415 }, { "epoch": 10.24, "learning_rate": 0.00011802778132101396, "loss": 1.4811, "step": 420 }, { "epoch": 10.37, "learning_rate": 9.666210850995405e-05, "loss": 1.5224, "step": 425 }, { "epoch": 10.49, "learning_rate": 7.122756753113628e-05, "loss": 1.5008, "step": 430 }, { "epoch": 10.61, "learning_rate": 4.541204214117674e-05, "loss": 1.5444, "step": 435 }, { "epoch": 10.73, "learning_rate": 2.2958656968642017e-05, "loss": 1.4584, "step": 440 }, { "epoch": 10.85, "learning_rate": 7.123042792471548e-06, "loss": 1.5025, "step": 445 }, { "epoch": 10.98, "learning_rate": 2.0128603879541336e-07, "loss": 1.4657, "step": 450 }, { "epoch": 11.0, "eval_loss": 1.7706276178359985, "eval_runtime": 2.6826, "eval_samples_per_second": 22.367, "eval_steps_per_second": 2.982, "step": 451 }, { "epoch": 11.38, "learning_rate": 4.234791653975473e-05, "loss": 1.6323, "step": 455 }, { "epoch": 11.5, "learning_rate": 6.859999999999978e-05, "loss": 1.5573, "step": 460 }, { "epoch": 11.62, "learning_rate": 9.485208346024488e-05, "loss": 1.5613, "step": 465 }, { "epoch": 11.75, "learning_rate": 0.00011710752518939722, "loss": 1.5808, "step": 470 }, { "epoch": 11.88, "learning_rate": 0.00013197813593027427, "loss": 1.5626, "step": 475 }, { "epoch": 12.0, "learning_rate": 0.0001372, "loss": 1.5582, "step": 480 }, { "epoch": 12.0, "eval_loss": 1.4257222414016724, "eval_runtime": 2.9531, "eval_samples_per_second": 22.688, "eval_steps_per_second": 3.048, "step": 480 }, { "epoch": 11.83, "learning_rate": 0.00012756647503932202, "loss": 1.532, "step": 485 }, { "epoch": 11.95, "learning_rate": 0.0001363960370713319, "loss": 1.6289, "step": 490 }, { "epoch": 12.0, "eval_loss": 1.3016469478607178, "eval_runtime": 2.8272, "eval_samples_per_second": 21.222, "eval_steps_per_second": 2.83, "step": 492 }, { "epoch": 12.07, "learning_rate": 0.00013539550607801572, "loss": 1.5711, "step": 495 }, { "epoch": 12.2, "learning_rate": 0.00012470995414859683, "loss": 1.5507, "step": 500 }, { "epoch": 12.32, "learning_rate": 0.00010588873393008394, "loss": 1.5444, "step": 505 }, { "epoch": 12.44, "learning_rate": 8.166083008869614e-05, "loss": 1.5625, "step": 510 }, { "epoch": 12.56, "learning_rate": 5.553916991130382e-05, "loss": 1.523, "step": 515 }, { "epoch": 12.68, "learning_rate": 3.131126606991604e-05, "loss": 1.5342, "step": 520 }, { "epoch": 12.8, "learning_rate": 1.2490045851403148e-05, "loss": 1.4935, "step": 525 }, { "epoch": 12.93, "learning_rate": 1.8044939219843934e-06, "loss": 1.5076, "step": 530 }, { "epoch": 13.0, "eval_loss": 1.288225769996643, "eval_runtime": 2.8008, "eval_samples_per_second": 21.423, "eval_steps_per_second": 2.856, "step": 533 } ], "max_steps": 533, "num_train_epochs": 13, "total_flos": 545185824768000.0, "trial_name": null, "trial_params": null }