|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 22678, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9823617602963227e-05, |
|
"loss": 1.393, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.964723520592645e-05, |
|
"loss": 1.3599, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9470852808889677e-05, |
|
"loss": 1.3453, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.92944704118529e-05, |
|
"loss": 1.3328, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.9118088014816124e-05, |
|
"loss": 1.3271, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.8941705617779345e-05, |
|
"loss": 1.3291, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.876532322074257e-05, |
|
"loss": 1.3199, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.8588940823705796e-05, |
|
"loss": 1.3173, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.841255842666902e-05, |
|
"loss": 1.3141, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.8236176029632246e-05, |
|
"loss": 1.3151, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.8059793632595468e-05, |
|
"loss": 1.3105, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.7883411235558693e-05, |
|
"loss": 1.3042, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.7707028838521918e-05, |
|
"loss": 1.2985, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.7530646441485143e-05, |
|
"loss": 1.2946, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.7354264044448365e-05, |
|
"loss": 1.2958, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.717788164741159e-05, |
|
"loss": 1.2867, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.7001499250374815e-05, |
|
"loss": 1.2938, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.682511685333804e-05, |
|
"loss": 1.2934, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.664873445630126e-05, |
|
"loss": 1.2892, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.6472352059264487e-05, |
|
"loss": 1.2835, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.6295969662227712e-05, |
|
"loss": 1.2838, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.6119587265190934e-05, |
|
"loss": 1.2842, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.2190536260604858, |
|
"eval_runtime": 28.9964, |
|
"eval_samples_per_second": 30.728, |
|
"eval_steps_per_second": 1.931, |
|
"step": 11339 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.594320486815416e-05, |
|
"loss": 1.2845, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.5766822471117384e-05, |
|
"loss": 1.276, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.559044007408061e-05, |
|
"loss": 1.2702, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.541405767704383e-05, |
|
"loss": 1.2726, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.5237675280007056e-05, |
|
"loss": 1.2744, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.5061292882970279e-05, |
|
"loss": 1.2741, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.4884910485933506e-05, |
|
"loss": 1.2642, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.470852808889673e-05, |
|
"loss": 1.2635, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.4532145691859953e-05, |
|
"loss": 1.2656, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.4355763294823178e-05, |
|
"loss": 1.2606, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.4179380897786401e-05, |
|
"loss": 1.2652, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.4002998500749626e-05, |
|
"loss": 1.2636, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.3826616103712851e-05, |
|
"loss": 1.2712, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.3650233706676075e-05, |
|
"loss": 1.2634, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.3473851309639298e-05, |
|
"loss": 1.2585, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.3297468912602522e-05, |
|
"loss": 1.2556, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 1.3121086515565748e-05, |
|
"loss": 1.2595, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.2944704118528972e-05, |
|
"loss": 1.2609, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.2768321721492197e-05, |
|
"loss": 1.2552, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.259193932445542e-05, |
|
"loss": 1.2548, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.2415556927418644e-05, |
|
"loss": 1.2599, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.223917453038187e-05, |
|
"loss": 1.2582, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.2062792133345094e-05, |
|
"loss": 1.2563, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.1791033744812012, |
|
"eval_runtime": 29.0168, |
|
"eval_samples_per_second": 30.706, |
|
"eval_steps_per_second": 1.93, |
|
"step": 22678 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 56695, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 9.552148881938842e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|