|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9999572667834707, |
|
"eval_steps": 200, |
|
"global_step": 2925, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.942542412504543e-05, |
|
"loss": 1.0325, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 0.8921748995780945, |
|
"eval_runtime": 119.3845, |
|
"eval_samples_per_second": 43.565, |
|
"eval_steps_per_second": 21.787, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.77281074950681e-05, |
|
"loss": 0.9136, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 0.880075991153717, |
|
"eval_runtime": 119.7923, |
|
"eval_samples_per_second": 43.417, |
|
"eval_steps_per_second": 21.713, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.498606908508754e-05, |
|
"loss": 0.9043, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 0.8752565979957581, |
|
"eval_runtime": 120.3536, |
|
"eval_samples_per_second": 43.214, |
|
"eval_steps_per_second": 21.611, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.132534962458962e-05, |
|
"loss": 0.8903, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 0.8710557222366333, |
|
"eval_runtime": 120.031, |
|
"eval_samples_per_second": 43.33, |
|
"eval_steps_per_second": 21.669, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.69142180005327e-05, |
|
"loss": 0.9056, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 0.8682616949081421, |
|
"eval_runtime": 119.9746, |
|
"eval_samples_per_second": 43.351, |
|
"eval_steps_per_second": 21.68, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.195543659791132e-05, |
|
"loss": 0.9001, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 0.867277204990387, |
|
"eval_runtime": 119.7846, |
|
"eval_samples_per_second": 43.42, |
|
"eval_steps_per_second": 21.714, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.667694110977506e-05, |
|
"loss": 0.8949, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 0.8667991161346436, |
|
"eval_runtime": 120.2926, |
|
"eval_samples_per_second": 43.236, |
|
"eval_steps_per_second": 21.622, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.1321363229207096e-05, |
|
"loss": 0.8917, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 0.8655699491500854, |
|
"eval_runtime": 120.5955, |
|
"eval_samples_per_second": 43.128, |
|
"eval_steps_per_second": 21.568, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.613487782393661e-05, |
|
"loss": 0.8951, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 0.8647685050964355, |
|
"eval_runtime": 120.2019, |
|
"eval_samples_per_second": 43.269, |
|
"eval_steps_per_second": 21.639, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.1355887245137383e-05, |
|
"loss": 0.9014, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 0.8637479543685913, |
|
"eval_runtime": 119.9752, |
|
"eval_samples_per_second": 43.351, |
|
"eval_steps_per_second": 21.679, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.20406290826649e-06, |
|
"loss": 0.8874, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 0.8629816174507141, |
|
"eval_runtime": 120.3564, |
|
"eval_samples_per_second": 43.213, |
|
"eval_steps_per_second": 21.611, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.8702478614051355e-06, |
|
"loss": 0.8968, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_loss": 0.8628040552139282, |
|
"eval_runtime": 120.5881, |
|
"eval_samples_per_second": 43.13, |
|
"eval_steps_per_second": 21.569, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.5076844803522922e-06, |
|
"loss": 0.8755, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_loss": 0.8626384735107422, |
|
"eval_runtime": 120.166, |
|
"eval_samples_per_second": 43.282, |
|
"eval_steps_per_second": 21.645, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.2497051885228827e-07, |
|
"loss": 0.9029, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 0.8625245094299316, |
|
"eval_runtime": 120.5975, |
|
"eval_samples_per_second": 43.127, |
|
"eval_steps_per_second": 21.568, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 2925, |
|
"total_flos": 1.4966547015951974e+17, |
|
"train_loss": 0.9060284554245126, |
|
"train_runtime": 4147.2863, |
|
"train_samples_per_second": 11.285, |
|
"train_steps_per_second": 0.705 |
|
} |
|
], |
|
"logging_steps": 200, |
|
"max_steps": 2925, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"total_flos": 1.4966547015951974e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|