|
{ |
|
"best_metric": 1.3656095266342163, |
|
"best_model_checkpoint": "output/bring-me-the-horizon/checkpoint-216", |
|
"epoch": 8.0, |
|
"global_step": 216, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00012505669320030482, |
|
"loss": 2.9107, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.292589525111794e-05, |
|
"loss": 2.6407, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 5.218294542987356e-05, |
|
"loss": 2.5181, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.725216267546246e-05, |
|
"loss": 2.4529, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5.001712368734975e-07, |
|
"loss": 2.466, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.72078013420105, |
|
"eval_runtime": 0.543, |
|
"eval_samples_per_second": 75.509, |
|
"eval_steps_per_second": 11.05, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 7.857716640189785e-06, |
|
"loss": 2.5427, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.671999039779749e-05, |
|
"loss": 2.3173, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 7.686881626551516e-05, |
|
"loss": 2.2888, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00011409021435531856, |
|
"loss": 2.3614, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00013520660867542716, |
|
"loss": 2.1993, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 2.672280788421631, |
|
"eval_runtime": 0.5476, |
|
"eval_samples_per_second": 74.866, |
|
"eval_steps_per_second": 10.956, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.00013274211424821946, |
|
"loss": 2.2735, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.00010756924162575734, |
|
"loss": 2.1555, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 6.860000000000001e-05, |
|
"loss": 2.1802, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2.9630758374242683e-05, |
|
"loss": 1.978, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 4.457885751780558e-06, |
|
"loss": 2.4223, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 1.926320195198059, |
|
"eval_runtime": 0.4807, |
|
"eval_samples_per_second": 74.883, |
|
"eval_steps_per_second": 10.4, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.9933913245728396e-06, |
|
"loss": 2.1228, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 2.310978564468141e-05, |
|
"loss": 1.9814, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 6.033118373448485e-05, |
|
"loss": 2.1607, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 0.00010048000960220248, |
|
"loss": 2.1746, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 0.00012934228335981018, |
|
"loss": 1.8565, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 1.8828926086425781, |
|
"eval_runtime": 0.4815, |
|
"eval_samples_per_second": 74.766, |
|
"eval_steps_per_second": 10.384, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 0.0001366998287631265, |
|
"loss": 2.1214, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 0.00011994783732453755, |
|
"loss": 1.898, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 8.501705457012643e-05, |
|
"loss": 1.8994, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 4.42741047488822e-05, |
|
"loss": 1.9389, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 1.2143306799695228e-05, |
|
"loss": 1.7737, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.7275, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 1.8320238590240479, |
|
"eval_runtime": 0.4801, |
|
"eval_samples_per_second": 74.982, |
|
"eval_steps_per_second": 10.414, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 1.2143306799695106e-05, |
|
"loss": 1.6759, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 4.4274104748882125e-05, |
|
"loss": 1.934, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 8.501705457012647e-05, |
|
"loss": 1.7796, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 0.00011994783732453749, |
|
"loss": 1.5568, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 0.00013669982876312649, |
|
"loss": 1.7681, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 1.8055299520492554, |
|
"eval_runtime": 0.4812, |
|
"eval_samples_per_second": 74.816, |
|
"eval_steps_per_second": 10.391, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 0.00012934228335981018, |
|
"loss": 1.5298, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 0.00010048000960220254, |
|
"loss": 1.5527, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 6.033118373448493e-05, |
|
"loss": 1.6001, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 2.3109785644681573e-05, |
|
"loss": 1.5066, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"learning_rate": 1.9933913245728244e-06, |
|
"loss": 1.5358, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 1.7874510288238525, |
|
"eval_runtime": 0.4807, |
|
"eval_samples_per_second": 74.894, |
|
"eval_steps_per_second": 10.402, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 4.457885751780527e-06, |
|
"loss": 1.45, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 2.963075837424261e-05, |
|
"loss": 1.4034, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 6.859999999999982e-05, |
|
"loss": 1.3896, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 0.00010756924162575728, |
|
"loss": 1.3993, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 0.00013274211424821943, |
|
"loss": 1.4986, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 1.761746883392334, |
|
"eval_runtime": 0.4842, |
|
"eval_samples_per_second": 74.353, |
|
"eval_steps_per_second": 10.327, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 0.00012115064879796188, |
|
"loss": 1.688, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 0.00013673615134109727, |
|
"loss": 1.5172, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 1.3656095266342163, |
|
"eval_runtime": 0.4451, |
|
"eval_samples_per_second": 74.143, |
|
"eval_steps_per_second": 11.234, |
|
"step": 216 |
|
} |
|
], |
|
"max_steps": 2916, |
|
"num_train_epochs": 108, |
|
"total_flos": 222490165248000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|