{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 0.214, | |
"eval_steps": 500, | |
"global_step": 107, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.02, | |
"learning_rate": 4e-05, | |
"loss": 1.3524, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 8e-05, | |
"loss": 1.1881, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 0.00012, | |
"loss": 1.1723, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 0.00016, | |
"loss": 1.0255, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.1, | |
"learning_rate": 0.0002, | |
"loss": 0.9895, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 0.00019555555555555556, | |
"loss": 0.9051, | |
"step": 60 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 0.00019111111111111114, | |
"loss": 0.9445, | |
"step": 70 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 0.0001866666666666667, | |
"loss": 0.8694, | |
"step": 80 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 0.00018222222222222224, | |
"loss": 0.9219, | |
"step": 90 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 0.00017777777777777779, | |
"loss": 0.8704, | |
"step": 100 | |
} | |
], | |
"logging_steps": 10, | |
"max_steps": 500, | |
"num_train_epochs": 1, | |
"save_steps": 500, | |
"total_flos": 4441092837605376.0, | |
"trial_name": null, | |
"trial_params": null | |
} | |