|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 625, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 5e-06, |
|
"loss": 2.1158, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3913, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.449378330373002e-06, |
|
"loss": 1.2838, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 8.898756660746004e-06, |
|
"loss": 1.2016, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 8.348134991119005e-06, |
|
"loss": 1.0974, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 7.797513321492007e-06, |
|
"loss": 0.8999, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 7.246891651865009e-06, |
|
"loss": 0.7662, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 6.6962699822380115e-06, |
|
"loss": 0.7183, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 6.145648312611013e-06, |
|
"loss": 0.6274, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 5.595026642984015e-06, |
|
"loss": 0.4377, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 5.044404973357016e-06, |
|
"loss": 0.2646, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 4.493783303730018e-06, |
|
"loss": 0.2512, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 3.94316163410302e-06, |
|
"loss": 0.2128, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 3.3925399644760213e-06, |
|
"loss": 0.1259, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 2.841918294849023e-06, |
|
"loss": 0.0753, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 2.291296625222025e-06, |
|
"loss": 0.0686, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 1.7406749555950267e-06, |
|
"loss": 0.054, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 1.1900532859680285e-06, |
|
"loss": 0.0332, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 6.394316163410303e-07, |
|
"loss": 0.0204, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 8.880994671403197e-08, |
|
"loss": 0.0144, |
|
"step": 620 |
|
} |
|
], |
|
"logging_steps": 31, |
|
"max_steps": 625, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 625, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|