|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9296875, |
|
"eval_steps": 25, |
|
"global_step": 750, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.43993993993994e-05, |
|
"loss": 1.1099, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 1.0085899829864502, |
|
"eval_runtime": 168.9968, |
|
"eval_samples_per_second": 1.456, |
|
"eval_steps_per_second": 0.183, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.3773773773773775e-05, |
|
"loss": 0.966, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 0.955312192440033, |
|
"eval_runtime": 169.2723, |
|
"eval_samples_per_second": 1.453, |
|
"eval_steps_per_second": 0.183, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.314814814814815e-05, |
|
"loss": 0.9251, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 0.9321236610412598, |
|
"eval_runtime": 169.3712, |
|
"eval_samples_per_second": 1.452, |
|
"eval_steps_per_second": 0.183, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.2547547547547548e-05, |
|
"loss": 0.9126, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 0.9122704267501831, |
|
"eval_runtime": 169.5064, |
|
"eval_samples_per_second": 1.451, |
|
"eval_steps_per_second": 0.183, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.1996996996997e-05, |
|
"loss": 0.884, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 0.9045689105987549, |
|
"eval_runtime": 169.4144, |
|
"eval_samples_per_second": 1.452, |
|
"eval_steps_per_second": 0.183, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.142142142142142e-05, |
|
"loss": 0.9249, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 0.9013924598693848, |
|
"eval_runtime": 169.5232, |
|
"eval_samples_per_second": 1.451, |
|
"eval_steps_per_second": 0.183, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.0845845845845847e-05, |
|
"loss": 0.8931, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 0.9005089402198792, |
|
"eval_runtime": 169.4928, |
|
"eval_samples_per_second": 1.451, |
|
"eval_steps_per_second": 0.183, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.0245245245245244e-05, |
|
"loss": 0.8724, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 0.8974775671958923, |
|
"eval_runtime": 169.6039, |
|
"eval_samples_per_second": 1.45, |
|
"eval_steps_per_second": 0.183, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.961961961961962e-05, |
|
"loss": 0.8692, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_loss": 0.8939986824989319, |
|
"eval_runtime": 169.6629, |
|
"eval_samples_per_second": 1.45, |
|
"eval_steps_per_second": 0.183, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.9019019019019018e-05, |
|
"loss": 0.8791, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_loss": 0.8978664875030518, |
|
"eval_runtime": 169.6122, |
|
"eval_samples_per_second": 1.45, |
|
"eval_steps_per_second": 0.183, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.8443443443443443e-05, |
|
"loss": 0.8534, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_loss": 0.8919960856437683, |
|
"eval_runtime": 169.6602, |
|
"eval_samples_per_second": 1.45, |
|
"eval_steps_per_second": 0.183, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.781781781781782e-05, |
|
"loss": 0.8173, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 0.8853669166564941, |
|
"eval_runtime": 169.6012, |
|
"eval_samples_per_second": 1.45, |
|
"eval_steps_per_second": 0.183, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.7192192192192195e-05, |
|
"loss": 0.7995, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_loss": 0.8796117305755615, |
|
"eval_runtime": 169.6226, |
|
"eval_samples_per_second": 1.45, |
|
"eval_steps_per_second": 0.183, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.6566566566566568e-05, |
|
"loss": 0.8031, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_loss": 0.8701273798942566, |
|
"eval_runtime": 169.668, |
|
"eval_samples_per_second": 1.45, |
|
"eval_steps_per_second": 0.183, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.594094094094094e-05, |
|
"loss": 0.7914, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_loss": 0.8678767681121826, |
|
"eval_runtime": 169.5954, |
|
"eval_samples_per_second": 1.451, |
|
"eval_steps_per_second": 0.183, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 1.5315315315315316e-05, |
|
"loss": 0.7792, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_loss": 0.8583410382270813, |
|
"eval_runtime": 169.6512, |
|
"eval_samples_per_second": 1.45, |
|
"eval_steps_per_second": 0.183, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.468968968968969e-05, |
|
"loss": 0.7739, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_loss": 0.8527249693870544, |
|
"eval_runtime": 169.6094, |
|
"eval_samples_per_second": 1.45, |
|
"eval_steps_per_second": 0.183, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.4064064064064064e-05, |
|
"loss": 0.7829, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_loss": 0.8476611971855164, |
|
"eval_runtime": 169.7218, |
|
"eval_samples_per_second": 1.449, |
|
"eval_steps_per_second": 0.183, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.3438438438438438e-05, |
|
"loss": 0.7881, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_loss": 0.8437426686286926, |
|
"eval_runtime": 169.563, |
|
"eval_samples_per_second": 1.451, |
|
"eval_steps_per_second": 0.183, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.2812812812812813e-05, |
|
"loss": 0.791, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_loss": 0.8393863439559937, |
|
"eval_runtime": 169.5296, |
|
"eval_samples_per_second": 1.451, |
|
"eval_steps_per_second": 0.183, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.2187187187187189e-05, |
|
"loss": 0.6864, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_loss": 0.863290548324585, |
|
"eval_runtime": 169.551, |
|
"eval_samples_per_second": 1.451, |
|
"eval_steps_per_second": 0.183, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.1561561561561563e-05, |
|
"loss": 0.6342, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"eval_loss": 0.8623055219650269, |
|
"eval_runtime": 169.5813, |
|
"eval_samples_per_second": 1.451, |
|
"eval_steps_per_second": 0.183, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.0935935935935937e-05, |
|
"loss": 0.6208, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_loss": 0.8574148416519165, |
|
"eval_runtime": 169.5698, |
|
"eval_samples_per_second": 1.451, |
|
"eval_steps_per_second": 0.183, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.031031031031031e-05, |
|
"loss": 0.5885, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_loss": 0.8582069873809814, |
|
"eval_runtime": 169.5406, |
|
"eval_samples_per_second": 1.451, |
|
"eval_steps_per_second": 0.183, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 9.684684684684685e-06, |
|
"loss": 0.607, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"eval_loss": 0.8599240779876709, |
|
"eval_runtime": 169.6355, |
|
"eval_samples_per_second": 1.45, |
|
"eval_steps_per_second": 0.183, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 9.05905905905906e-06, |
|
"loss": 0.6149, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"eval_loss": 0.8572269678115845, |
|
"eval_runtime": 169.5236, |
|
"eval_samples_per_second": 1.451, |
|
"eval_steps_per_second": 0.183, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 8.433433433433434e-06, |
|
"loss": 0.5893, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"eval_loss": 0.8562537431716919, |
|
"eval_runtime": 169.5361, |
|
"eval_samples_per_second": 1.451, |
|
"eval_steps_per_second": 0.183, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 7.807807807807808e-06, |
|
"loss": 0.6176, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_loss": 0.8548978567123413, |
|
"eval_runtime": 169.5309, |
|
"eval_samples_per_second": 1.451, |
|
"eval_steps_per_second": 0.183, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 7.182182182182183e-06, |
|
"loss": 0.5953, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_loss": 0.8483610153198242, |
|
"eval_runtime": 169.5898, |
|
"eval_samples_per_second": 1.451, |
|
"eval_steps_per_second": 0.183, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 6.556556556556556e-06, |
|
"loss": 0.6088, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_loss": 0.8442074060440063, |
|
"eval_runtime": 169.5261, |
|
"eval_samples_per_second": 1.451, |
|
"eval_steps_per_second": 0.183, |
|
"step": 750 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 1000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 25, |
|
"total_flos": 6.766615596367872e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|