|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.05790622665655238, |
|
"eval_steps": 500, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002895311332827619, |
|
"grad_norm": 1.7338896989822388, |
|
"learning_rate": 4.985523443335862e-05, |
|
"loss": 3.1507, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.005790622665655238, |
|
"grad_norm": 1.2579742670059204, |
|
"learning_rate": 4.9710468866717244e-05, |
|
"loss": 2.6357, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.008685933998482857, |
|
"grad_norm": 0.9996505975723267, |
|
"learning_rate": 4.956570330007586e-05, |
|
"loss": 2.4288, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.011581245331310476, |
|
"grad_norm": 0.9657206535339355, |
|
"learning_rate": 4.942093773343448e-05, |
|
"loss": 2.2119, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.014476556664138095, |
|
"grad_norm": 1.044640064239502, |
|
"learning_rate": 4.92761721667931e-05, |
|
"loss": 2.1225, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.017371867996965714, |
|
"grad_norm": 0.8428456783294678, |
|
"learning_rate": 4.913140660015172e-05, |
|
"loss": 2.0409, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.02026717932979333, |
|
"grad_norm": 1.2827749252319336, |
|
"learning_rate": 4.898664103351033e-05, |
|
"loss": 1.9031, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.023162490662620952, |
|
"grad_norm": 0.9291247129440308, |
|
"learning_rate": 4.884187546686895e-05, |
|
"loss": 1.8438, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.02605780199544857, |
|
"grad_norm": 0.7757951617240906, |
|
"learning_rate": 4.8697109900227575e-05, |
|
"loss": 1.7833, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.02895311332827619, |
|
"grad_norm": 1.520269751548767, |
|
"learning_rate": 4.855234433358619e-05, |
|
"loss": 1.6942, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.03184842466110381, |
|
"grad_norm": 1.2558553218841553, |
|
"learning_rate": 4.840757876694481e-05, |
|
"loss": 1.6669, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.03474373599393143, |
|
"grad_norm": 1.3526251316070557, |
|
"learning_rate": 4.826281320030343e-05, |
|
"loss": 1.6115, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.037639047326759045, |
|
"grad_norm": 2.5751099586486816, |
|
"learning_rate": 4.811804763366205e-05, |
|
"loss": 1.5629, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.04053435865958666, |
|
"grad_norm": 1.3569105863571167, |
|
"learning_rate": 4.797328206702067e-05, |
|
"loss": 1.5258, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.04342966999241429, |
|
"grad_norm": 1.7734428644180298, |
|
"learning_rate": 4.782851650037929e-05, |
|
"loss": 1.475, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.046324981325241904, |
|
"grad_norm": 0.7975415587425232, |
|
"learning_rate": 4.7683750933737905e-05, |
|
"loss": 1.437, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.04922029265806952, |
|
"grad_norm": 0.8590123057365417, |
|
"learning_rate": 4.753898536709653e-05, |
|
"loss": 1.4263, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.05211560399089714, |
|
"grad_norm": 1.1225152015686035, |
|
"learning_rate": 4.739421980045515e-05, |
|
"loss": 1.3528, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.05501091532372476, |
|
"grad_norm": 1.1342971324920654, |
|
"learning_rate": 4.7249454233813765e-05, |
|
"loss": 1.3576, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.05790622665655238, |
|
"grad_norm": 0.9445364475250244, |
|
"learning_rate": 4.710468866717238e-05, |
|
"loss": 1.3283, |
|
"step": 10000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 172693, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.180672512e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|