|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9951219512195122, |
|
"eval_steps": 500, |
|
"global_step": 102, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00975609756097561, |
|
"grad_norm": 6.261786177237342, |
|
"learning_rate": 9.090909090909092e-05, |
|
"loss": 1.1256, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.04878048780487805, |
|
"grad_norm": 2.1112596954065914, |
|
"learning_rate": 0.00045454545454545455, |
|
"loss": 1.049, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0975609756097561, |
|
"grad_norm": 1.5895813487100507, |
|
"learning_rate": 0.0009090909090909091, |
|
"loss": 1.0974, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.14634146341463414, |
|
"grad_norm": 0.32863787167794134, |
|
"learning_rate": 0.0009952402219937815, |
|
"loss": 0.9164, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.1951219512195122, |
|
"grad_norm": 0.23963144918280163, |
|
"learning_rate": 0.0009760588329553571, |
|
"loss": 0.8434, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.24390243902439024, |
|
"grad_norm": 0.14493890865001488, |
|
"learning_rate": 0.0009427280128266049, |
|
"loss": 0.8011, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.2926829268292683, |
|
"grad_norm": 0.12052712473534186, |
|
"learning_rate": 0.0008962384209755452, |
|
"loss": 0.7652, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.34146341463414637, |
|
"grad_norm": 0.0981844421832599, |
|
"learning_rate": 0.0008379718220723773, |
|
"loss": 0.7453, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.3902439024390244, |
|
"grad_norm": 0.18540719570949135, |
|
"learning_rate": 0.0007696600172495997, |
|
"loss": 0.7317, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.43902439024390244, |
|
"grad_norm": 0.12172997652512386, |
|
"learning_rate": 0.0006933333714707094, |
|
"loss": 0.7179, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.4878048780487805, |
|
"grad_norm": 0.1050067175867305, |
|
"learning_rate": 0.0006112604669781572, |
|
"loss": 0.7047, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5365853658536586, |
|
"grad_norm": 0.0824375568337565, |
|
"learning_rate": 0.0005258806764421047, |
|
"loss": 0.6991, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.5853658536585366, |
|
"grad_norm": 0.09661916029526949, |
|
"learning_rate": 0.00043973165987233853, |
|
"loss": 0.6865, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.6341463414634146, |
|
"grad_norm": 0.07527219081930579, |
|
"learning_rate": 0.0003553739402317162, |
|
"loss": 0.6833, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.6829268292682927, |
|
"grad_norm": 0.07406811411334399, |
|
"learning_rate": 0.00027531479951641924, |
|
"loss": 0.6793, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.7317073170731707, |
|
"grad_norm": 0.21265879474641486, |
|
"learning_rate": 0.00020193375726538737, |
|
"loss": 0.6726, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.7804878048780488, |
|
"grad_norm": 0.06699755926217109, |
|
"learning_rate": 0.0001374118464283119, |
|
"loss": 0.6685, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.8292682926829268, |
|
"grad_norm": 0.0915919461803413, |
|
"learning_rate": 8.366678865639687e-05, |
|
"loss": 0.6716, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.8780487804878049, |
|
"grad_norm": 0.06490947271665126, |
|
"learning_rate": 4.2295995737316854e-05, |
|
"loss": 0.6692, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.926829268292683, |
|
"grad_norm": 0.1069137841001841, |
|
"learning_rate": 1.4529091286973995e-05, |
|
"loss": 0.6654, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.975609756097561, |
|
"grad_norm": 0.043578368206132125, |
|
"learning_rate": 1.1913638493762368e-06, |
|
"loss": 0.6674, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9951219512195122, |
|
"eval_loss": 1.0117870569229126, |
|
"eval_runtime": 125.6267, |
|
"eval_samples_per_second": 20.871, |
|
"eval_steps_per_second": 0.653, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.9951219512195122, |
|
"step": 102, |
|
"total_flos": 128481799176192.0, |
|
"train_loss": 0.7558732021088693, |
|
"train_runtime": 2589.5829, |
|
"train_samples_per_second": 5.059, |
|
"train_steps_per_second": 0.039 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 102, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 128481799176192.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|