|
{ |
|
"best_metric": 0.8432615399360657, |
|
"best_model_checkpoint": "Llama-3.2-1B-sportsqa-V1/checkpoint-150", |
|
"epoch": 6.511041009463723, |
|
"eval_steps": 10, |
|
"global_step": 170, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.3785488958990536, |
|
"grad_norm": 0.9724205136299133, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4368, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.3785488958990536, |
|
"eval_loss": 1.348753809928894, |
|
"eval_runtime": 6.5589, |
|
"eval_samples_per_second": 85.838, |
|
"eval_steps_per_second": 10.825, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.7570977917981072, |
|
"grad_norm": 0.5118973255157471, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2118, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.7570977917981072, |
|
"eval_loss": 1.1572917699813843, |
|
"eval_runtime": 6.43, |
|
"eval_samples_per_second": 87.558, |
|
"eval_steps_per_second": 11.042, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.1482649842271293, |
|
"grad_norm": 0.41862204670906067, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1684, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.1482649842271293, |
|
"eval_loss": 1.0873514413833618, |
|
"eval_runtime": 6.4834, |
|
"eval_samples_per_second": 86.838, |
|
"eval_steps_per_second": 10.951, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.526813880126183, |
|
"grad_norm": 0.42144763469696045, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9907, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.526813880126183, |
|
"eval_loss": 1.0390795469284058, |
|
"eval_runtime": 6.4784, |
|
"eval_samples_per_second": 86.904, |
|
"eval_steps_per_second": 10.959, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.9053627760252367, |
|
"grad_norm": 0.6625965237617493, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8936, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.9053627760252367, |
|
"eval_loss": 0.9832194447517395, |
|
"eval_runtime": 6.4575, |
|
"eval_samples_per_second": 87.185, |
|
"eval_steps_per_second": 10.995, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.2965299684542586, |
|
"grad_norm": 0.5491335391998291, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9537, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.2965299684542586, |
|
"eval_loss": 0.9610708355903625, |
|
"eval_runtime": 6.4935, |
|
"eval_samples_per_second": 86.702, |
|
"eval_steps_per_second": 10.934, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.6750788643533125, |
|
"grad_norm": 0.6222676038742065, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7753, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.6750788643533125, |
|
"eval_loss": 0.9286745190620422, |
|
"eval_runtime": 6.5271, |
|
"eval_samples_per_second": 86.256, |
|
"eval_steps_per_second": 10.878, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 3.0662460567823344, |
|
"grad_norm": 0.47406917810440063, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8796, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 3.0662460567823344, |
|
"eval_loss": 0.9002482891082764, |
|
"eval_runtime": 6.5139, |
|
"eval_samples_per_second": 86.43, |
|
"eval_steps_per_second": 10.9, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 3.444794952681388, |
|
"grad_norm": 0.54665207862854, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6697, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 3.444794952681388, |
|
"eval_loss": 0.8929345607757568, |
|
"eval_runtime": 6.349, |
|
"eval_samples_per_second": 88.676, |
|
"eval_steps_per_second": 11.183, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 3.823343848580442, |
|
"grad_norm": 0.47858959436416626, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7382, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.823343848580442, |
|
"eval_loss": 0.8687711358070374, |
|
"eval_runtime": 6.4617, |
|
"eval_samples_per_second": 87.129, |
|
"eval_steps_per_second": 10.988, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.214511041009464, |
|
"grad_norm": 0.6165306568145752, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6795, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 4.214511041009464, |
|
"eval_loss": 0.8820136189460754, |
|
"eval_runtime": 6.4456, |
|
"eval_samples_per_second": 87.346, |
|
"eval_steps_per_second": 11.015, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 4.593059936908517, |
|
"grad_norm": 0.5470077395439148, |
|
"learning_rate": 0.0002, |
|
"loss": 0.646, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 4.593059936908517, |
|
"eval_loss": 0.8543145060539246, |
|
"eval_runtime": 6.4186, |
|
"eval_samples_per_second": 87.713, |
|
"eval_steps_per_second": 11.062, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 4.971608832807571, |
|
"grad_norm": 0.5192540287971497, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5972, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 4.971608832807571, |
|
"eval_loss": 0.8454616069793701, |
|
"eval_runtime": 6.3674, |
|
"eval_samples_per_second": 88.419, |
|
"eval_steps_per_second": 11.151, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 5.3627760252365935, |
|
"grad_norm": 0.5664217472076416, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6179, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 5.3627760252365935, |
|
"eval_loss": 0.845314085483551, |
|
"eval_runtime": 6.4608, |
|
"eval_samples_per_second": 87.141, |
|
"eval_steps_per_second": 10.989, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 5.7413249211356465, |
|
"grad_norm": 0.5141603350639343, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5488, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 5.7413249211356465, |
|
"eval_loss": 0.8432615399360657, |
|
"eval_runtime": 6.4351, |
|
"eval_samples_per_second": 87.489, |
|
"eval_steps_per_second": 11.033, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 6.132492113564669, |
|
"grad_norm": 0.4900747835636139, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5574, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 6.132492113564669, |
|
"eval_loss": 0.8658251166343689, |
|
"eval_runtime": 6.4211, |
|
"eval_samples_per_second": 87.679, |
|
"eval_steps_per_second": 11.057, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 6.511041009463723, |
|
"grad_norm": 0.5367661118507385, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4778, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 6.511041009463723, |
|
"eval_loss": 0.8633659482002258, |
|
"eval_runtime": 6.4456, |
|
"eval_samples_per_second": 87.346, |
|
"eval_steps_per_second": 11.015, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 6.511041009463723, |
|
"step": 170, |
|
"total_flos": 1.5618225878728704e+16, |
|
"train_loss": 0.8142486740561092, |
|
"train_runtime": 890.4577, |
|
"train_samples_per_second": 39.817, |
|
"train_steps_per_second": 0.204 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 182, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 7, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 5 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.5618225878728704e+16, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|