|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.6531149665213268, |
|
"eval_steps": 500, |
|
"global_step": 2500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05539654694857354, |
|
"grad_norm": 3.9366531372070312, |
|
"learning_rate": 2.7700831024930747e-06, |
|
"loss": 12.1842, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11079309389714707, |
|
"grad_norm": 0.5843711495399475, |
|
"learning_rate": 5.540166204986149e-06, |
|
"loss": 5.2545, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1661896408457206, |
|
"grad_norm": 0.3186987638473511, |
|
"learning_rate": 8.310249307479224e-06, |
|
"loss": 1.5282, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22158618779429415, |
|
"grad_norm": 0.22694998979568481, |
|
"learning_rate": 1.1080332409972299e-05, |
|
"loss": 0.8633, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2769827347428677, |
|
"grad_norm": 0.4204826056957245, |
|
"learning_rate": 1.3850415512465375e-05, |
|
"loss": 0.6224, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3323792816914412, |
|
"grad_norm": 0.23341064155101776, |
|
"learning_rate": 1.6620498614958448e-05, |
|
"loss": 0.5006, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.38777582864001475, |
|
"grad_norm": 0.29134589433670044, |
|
"learning_rate": 1.9390581717451524e-05, |
|
"loss": 0.4471, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.4431723755885883, |
|
"grad_norm": 0.4506121277809143, |
|
"learning_rate": 2.2160664819944597e-05, |
|
"loss": 0.4142, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.49856892253716184, |
|
"grad_norm": 0.46713459491729736, |
|
"learning_rate": 2.4930747922437674e-05, |
|
"loss": 0.3951, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.5539654694857354, |
|
"grad_norm": 0.49562305212020874, |
|
"learning_rate": 2.770083102493075e-05, |
|
"loss": 0.3778, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6093620164343089, |
|
"grad_norm": 0.4143618047237396, |
|
"learning_rate": 3.0470914127423823e-05, |
|
"loss": 0.374, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.6647585633828824, |
|
"grad_norm": 0.28604185581207275, |
|
"learning_rate": 3.3240997229916896e-05, |
|
"loss": 0.3631, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.720155110331456, |
|
"grad_norm": 0.3391754627227783, |
|
"learning_rate": 3.601108033240997e-05, |
|
"loss": 0.3613, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.7755516572800295, |
|
"grad_norm": 0.237514466047287, |
|
"learning_rate": 3.878116343490305e-05, |
|
"loss": 0.3551, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.830948204228603, |
|
"grad_norm": 0.2939525246620178, |
|
"learning_rate": 4.155124653739612e-05, |
|
"loss": 0.3504, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.8863447511771766, |
|
"grad_norm": 0.3228910267353058, |
|
"learning_rate": 4.4321329639889195e-05, |
|
"loss": 0.3489, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.9417412981257501, |
|
"grad_norm": 0.4955917000770569, |
|
"learning_rate": 4.709141274238227e-05, |
|
"loss": 0.346, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.9971378450743237, |
|
"grad_norm": 0.7099990248680115, |
|
"learning_rate": 4.986149584487535e-05, |
|
"loss": 0.3378, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.0525343920228973, |
|
"grad_norm": 0.39987462759017944, |
|
"learning_rate": 4.997864395968252e-05, |
|
"loss": 0.3337, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.1079309389714709, |
|
"grad_norm": 0.3917470872402191, |
|
"learning_rate": 4.991006183625085e-05, |
|
"loss": 0.3329, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5486165718779146, |
|
"grad_norm": 0.07418235391378403, |
|
"learning_rate": 2.7436634439508755e-06, |
|
"loss": 0.3044, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5747411705387676, |
|
"grad_norm": 0.09766950458288193, |
|
"learning_rate": 2.874314084139012e-06, |
|
"loss": 0.3074, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.6008657691996206, |
|
"grad_norm": 0.09052286297082901, |
|
"learning_rate": 3.0049647243271495e-06, |
|
"loss": 0.303, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6269903678604738, |
|
"grad_norm": 0.08768751472234726, |
|
"learning_rate": 3.135615364515286e-06, |
|
"loss": 0.3006, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.6531149665213268, |
|
"grad_norm": 0.08635412901639938, |
|
"learning_rate": 3.2662660047034235e-06, |
|
"loss": 0.2994, |
|
"step": 2500 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 191350, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.3435564032e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|