|
{ |
|
"best_metric": 0.5069935321807861, |
|
"best_model_checkpoint": "saves/Llama2-7B/lora/train_1/checkpoint-110", |
|
"epoch": 0.22969647251845776, |
|
"eval_steps": 10, |
|
"global_step": 210, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010937927262783703, |
|
"grad_norm": 0.5937469005584717, |
|
"learning_rate": 0.00029999015487222375, |
|
"loss": 1.2211, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.010937927262783703, |
|
"eval_loss": 0.550807535648346, |
|
"eval_runtime": 234.7461, |
|
"eval_samples_per_second": 20.073, |
|
"eval_steps_per_second": 0.63, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.021875854525567406, |
|
"grad_norm": 0.4648227393627167, |
|
"learning_rate": 0.00029996062078124905, |
|
"loss": 0.446, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.021875854525567406, |
|
"eval_loss": 0.5179979205131531, |
|
"eval_runtime": 234.6313, |
|
"eval_samples_per_second": 20.083, |
|
"eval_steps_per_second": 0.631, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03281378178835111, |
|
"grad_norm": 0.42532142996788025, |
|
"learning_rate": 0.0002999114016039678, |
|
"loss": 0.3889, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03281378178835111, |
|
"eval_loss": 0.5139310956001282, |
|
"eval_runtime": 234.5844, |
|
"eval_samples_per_second": 20.087, |
|
"eval_steps_per_second": 0.631, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04375170905113481, |
|
"grad_norm": 0.47056564688682556, |
|
"learning_rate": 0.00029984250380130117, |
|
"loss": 0.3757, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04375170905113481, |
|
"eval_loss": 0.575846791267395, |
|
"eval_runtime": 234.7308, |
|
"eval_samples_per_second": 20.074, |
|
"eval_steps_per_second": 0.631, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05468963631391851, |
|
"grad_norm": 0.2359870821237564, |
|
"learning_rate": 0.0002997539364173515, |
|
"loss": 0.3551, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05468963631391851, |
|
"eval_loss": 0.5461050271987915, |
|
"eval_runtime": 234.7378, |
|
"eval_samples_per_second": 20.073, |
|
"eval_steps_per_second": 0.63, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06562756357670221, |
|
"grad_norm": 0.19186203181743622, |
|
"learning_rate": 0.00029964571107821494, |
|
"loss": 0.3351, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06562756357670221, |
|
"eval_loss": 0.5407155752182007, |
|
"eval_runtime": 234.7344, |
|
"eval_samples_per_second": 20.074, |
|
"eval_steps_per_second": 0.63, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07656549083948591, |
|
"grad_norm": 0.25036656856536865, |
|
"learning_rate": 0.00029951784199045534, |
|
"loss": 0.3269, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.07656549083948591, |
|
"eval_loss": 0.5335067510604858, |
|
"eval_runtime": 234.559, |
|
"eval_samples_per_second": 20.089, |
|
"eval_steps_per_second": 0.631, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08750341810226962, |
|
"grad_norm": 0.3150342106819153, |
|
"learning_rate": 0.0002993703459392396, |
|
"loss": 0.3231, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.08750341810226962, |
|
"eval_loss": 0.5120783448219299, |
|
"eval_runtime": 234.641, |
|
"eval_samples_per_second": 20.082, |
|
"eval_steps_per_second": 0.631, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09844134536505332, |
|
"grad_norm": 0.3419171869754791, |
|
"learning_rate": 0.00029920324228613376, |
|
"loss": 0.3324, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.09844134536505332, |
|
"eval_loss": 0.5196456909179688, |
|
"eval_runtime": 234.6621, |
|
"eval_samples_per_second": 20.08, |
|
"eval_steps_per_second": 0.631, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.10937927262783702, |
|
"grad_norm": 0.2197423279285431, |
|
"learning_rate": 0.0002990165529665622, |
|
"loss": 0.3039, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10937927262783702, |
|
"eval_loss": 0.5251961350440979, |
|
"eval_runtime": 234.8661, |
|
"eval_samples_per_second": 20.062, |
|
"eval_steps_per_second": 0.63, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12031719989062073, |
|
"grad_norm": 0.16334187984466553, |
|
"learning_rate": 0.0002988103024869277, |
|
"loss": 0.3083, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12031719989062073, |
|
"eval_loss": 0.5069935321807861, |
|
"eval_runtime": 234.5526, |
|
"eval_samples_per_second": 20.089, |
|
"eval_steps_per_second": 0.631, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.13125512715340443, |
|
"grad_norm": 0.2504599392414093, |
|
"learning_rate": 0.00029858451792139453, |
|
"loss": 0.3034, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.13125512715340443, |
|
"eval_loss": 0.5375232100486755, |
|
"eval_runtime": 234.9066, |
|
"eval_samples_per_second": 20.059, |
|
"eval_steps_per_second": 0.63, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.14219305441618812, |
|
"grad_norm": 0.20455080270767212, |
|
"learning_rate": 0.0002983392289083346, |
|
"loss": 0.3061, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14219305441618812, |
|
"eval_loss": 0.5345537066459656, |
|
"eval_runtime": 234.955, |
|
"eval_samples_per_second": 20.055, |
|
"eval_steps_per_second": 0.63, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.15313098167897182, |
|
"grad_norm": 0.20371787250041962, |
|
"learning_rate": 0.0002980744676464371, |
|
"loss": 0.3021, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15313098167897182, |
|
"eval_loss": 0.5111611485481262, |
|
"eval_runtime": 234.7152, |
|
"eval_samples_per_second": 20.075, |
|
"eval_steps_per_second": 0.631, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.16406890894175555, |
|
"grad_norm": 0.14063598215579987, |
|
"learning_rate": 0.0002977902688904813, |
|
"loss": 0.3067, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.16406890894175555, |
|
"eval_loss": 0.5191352367401123, |
|
"eval_runtime": 234.5955, |
|
"eval_samples_per_second": 20.086, |
|
"eval_steps_per_second": 0.631, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17500683620453925, |
|
"grad_norm": 0.20113405585289001, |
|
"learning_rate": 0.00029748666994677467, |
|
"loss": 0.2958, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.17500683620453925, |
|
"eval_loss": 0.5278245210647583, |
|
"eval_runtime": 234.5747, |
|
"eval_samples_per_second": 20.087, |
|
"eval_steps_per_second": 0.631, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18594476346732294, |
|
"grad_norm": 0.20178332924842834, |
|
"learning_rate": 0.00029716371066825593, |
|
"loss": 0.3002, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.18594476346732294, |
|
"eval_loss": 0.5170465111732483, |
|
"eval_runtime": 234.6689, |
|
"eval_samples_per_second": 20.079, |
|
"eval_steps_per_second": 0.631, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19688269073010664, |
|
"grad_norm": 0.19617249071598053, |
|
"learning_rate": 0.0002968214334492632, |
|
"loss": 0.2896, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.19688269073010664, |
|
"eval_loss": 0.5085877180099487, |
|
"eval_runtime": 234.6318, |
|
"eval_samples_per_second": 20.083, |
|
"eval_steps_per_second": 0.631, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.20782061799289034, |
|
"grad_norm": 0.2610602080821991, |
|
"learning_rate": 0.00029645988321996917, |
|
"loss": 0.2989, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.20782061799289034, |
|
"eval_loss": 0.530036985874176, |
|
"eval_runtime": 234.599, |
|
"eval_samples_per_second": 20.085, |
|
"eval_steps_per_second": 0.631, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21875854525567404, |
|
"grad_norm": 0.1975240856409073, |
|
"learning_rate": 0.00029607910744048336, |
|
"loss": 0.3012, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.21875854525567404, |
|
"eval_loss": 0.5137269496917725, |
|
"eval_runtime": 234.4763, |
|
"eval_samples_per_second": 20.096, |
|
"eval_steps_per_second": 0.631, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22969647251845776, |
|
"grad_norm": 0.19913341104984283, |
|
"learning_rate": 0.00029567915609462174, |
|
"loss": 0.2802, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.22969647251845776, |
|
"eval_loss": 0.5159074068069458, |
|
"eval_runtime": 234.549, |
|
"eval_samples_per_second": 20.09, |
|
"eval_steps_per_second": 0.631, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.22969647251845776, |
|
"step": 210, |
|
"total_flos": 3.9689172647569e+17, |
|
"train_loss": 0.36669377031780426, |
|
"train_runtime": 15000.4627, |
|
"train_samples_per_second": 46.807, |
|
"train_steps_per_second": 0.183 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2742, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 10, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.9689172647569e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|