Llama-3.2-1B-sportsqa-V1 / trainer_state.json
Kwhale's picture
Training in progress, step 10
f1e9514 verified
raw
history blame
7.59 kB
{
"best_metric": 0.8432615399360657,
"best_model_checkpoint": "Llama-3.2-1B-sportsqa-V1/checkpoint-150",
"epoch": 6.511041009463723,
"eval_steps": 10,
"global_step": 170,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.3785488958990536,
"grad_norm": 0.9724205136299133,
"learning_rate": 0.0002,
"loss": 1.4368,
"step": 10
},
{
"epoch": 0.3785488958990536,
"eval_loss": 1.348753809928894,
"eval_runtime": 6.5589,
"eval_samples_per_second": 85.838,
"eval_steps_per_second": 10.825,
"step": 10
},
{
"epoch": 0.7570977917981072,
"grad_norm": 0.5118973255157471,
"learning_rate": 0.0002,
"loss": 1.2118,
"step": 20
},
{
"epoch": 0.7570977917981072,
"eval_loss": 1.1572917699813843,
"eval_runtime": 6.43,
"eval_samples_per_second": 87.558,
"eval_steps_per_second": 11.042,
"step": 20
},
{
"epoch": 1.1482649842271293,
"grad_norm": 0.41862204670906067,
"learning_rate": 0.0002,
"loss": 1.1684,
"step": 30
},
{
"epoch": 1.1482649842271293,
"eval_loss": 1.0873514413833618,
"eval_runtime": 6.4834,
"eval_samples_per_second": 86.838,
"eval_steps_per_second": 10.951,
"step": 30
},
{
"epoch": 1.526813880126183,
"grad_norm": 0.42144763469696045,
"learning_rate": 0.0002,
"loss": 0.9907,
"step": 40
},
{
"epoch": 1.526813880126183,
"eval_loss": 1.0390795469284058,
"eval_runtime": 6.4784,
"eval_samples_per_second": 86.904,
"eval_steps_per_second": 10.959,
"step": 40
},
{
"epoch": 1.9053627760252367,
"grad_norm": 0.6625965237617493,
"learning_rate": 0.0002,
"loss": 0.8936,
"step": 50
},
{
"epoch": 1.9053627760252367,
"eval_loss": 0.9832194447517395,
"eval_runtime": 6.4575,
"eval_samples_per_second": 87.185,
"eval_steps_per_second": 10.995,
"step": 50
},
{
"epoch": 2.2965299684542586,
"grad_norm": 0.5491335391998291,
"learning_rate": 0.0002,
"loss": 0.9537,
"step": 60
},
{
"epoch": 2.2965299684542586,
"eval_loss": 0.9610708355903625,
"eval_runtime": 6.4935,
"eval_samples_per_second": 86.702,
"eval_steps_per_second": 10.934,
"step": 60
},
{
"epoch": 2.6750788643533125,
"grad_norm": 0.6222676038742065,
"learning_rate": 0.0002,
"loss": 0.7753,
"step": 70
},
{
"epoch": 2.6750788643533125,
"eval_loss": 0.9286745190620422,
"eval_runtime": 6.5271,
"eval_samples_per_second": 86.256,
"eval_steps_per_second": 10.878,
"step": 70
},
{
"epoch": 3.0662460567823344,
"grad_norm": 0.47406917810440063,
"learning_rate": 0.0002,
"loss": 0.8796,
"step": 80
},
{
"epoch": 3.0662460567823344,
"eval_loss": 0.9002482891082764,
"eval_runtime": 6.5139,
"eval_samples_per_second": 86.43,
"eval_steps_per_second": 10.9,
"step": 80
},
{
"epoch": 3.444794952681388,
"grad_norm": 0.54665207862854,
"learning_rate": 0.0002,
"loss": 0.6697,
"step": 90
},
{
"epoch": 3.444794952681388,
"eval_loss": 0.8929345607757568,
"eval_runtime": 6.349,
"eval_samples_per_second": 88.676,
"eval_steps_per_second": 11.183,
"step": 90
},
{
"epoch": 3.823343848580442,
"grad_norm": 0.47858959436416626,
"learning_rate": 0.0002,
"loss": 0.7382,
"step": 100
},
{
"epoch": 3.823343848580442,
"eval_loss": 0.8687711358070374,
"eval_runtime": 6.4617,
"eval_samples_per_second": 87.129,
"eval_steps_per_second": 10.988,
"step": 100
},
{
"epoch": 4.214511041009464,
"grad_norm": 0.6165306568145752,
"learning_rate": 0.0002,
"loss": 0.6795,
"step": 110
},
{
"epoch": 4.214511041009464,
"eval_loss": 0.8820136189460754,
"eval_runtime": 6.4456,
"eval_samples_per_second": 87.346,
"eval_steps_per_second": 11.015,
"step": 110
},
{
"epoch": 4.593059936908517,
"grad_norm": 0.5470077395439148,
"learning_rate": 0.0002,
"loss": 0.646,
"step": 120
},
{
"epoch": 4.593059936908517,
"eval_loss": 0.8543145060539246,
"eval_runtime": 6.4186,
"eval_samples_per_second": 87.713,
"eval_steps_per_second": 11.062,
"step": 120
},
{
"epoch": 4.971608832807571,
"grad_norm": 0.5192540287971497,
"learning_rate": 0.0002,
"loss": 0.5972,
"step": 130
},
{
"epoch": 4.971608832807571,
"eval_loss": 0.8454616069793701,
"eval_runtime": 6.3674,
"eval_samples_per_second": 88.419,
"eval_steps_per_second": 11.151,
"step": 130
},
{
"epoch": 5.3627760252365935,
"grad_norm": 0.5664217472076416,
"learning_rate": 0.0002,
"loss": 0.6179,
"step": 140
},
{
"epoch": 5.3627760252365935,
"eval_loss": 0.845314085483551,
"eval_runtime": 6.4608,
"eval_samples_per_second": 87.141,
"eval_steps_per_second": 10.989,
"step": 140
},
{
"epoch": 5.7413249211356465,
"grad_norm": 0.5141603350639343,
"learning_rate": 0.0002,
"loss": 0.5488,
"step": 150
},
{
"epoch": 5.7413249211356465,
"eval_loss": 0.8432615399360657,
"eval_runtime": 6.4351,
"eval_samples_per_second": 87.489,
"eval_steps_per_second": 11.033,
"step": 150
},
{
"epoch": 6.132492113564669,
"grad_norm": 0.4900747835636139,
"learning_rate": 0.0002,
"loss": 0.5574,
"step": 160
},
{
"epoch": 6.132492113564669,
"eval_loss": 0.8658251166343689,
"eval_runtime": 6.4211,
"eval_samples_per_second": 87.679,
"eval_steps_per_second": 11.057,
"step": 160
},
{
"epoch": 6.511041009463723,
"grad_norm": 0.5367661118507385,
"learning_rate": 0.0002,
"loss": 0.4778,
"step": 170
},
{
"epoch": 6.511041009463723,
"eval_loss": 0.8633659482002258,
"eval_runtime": 6.4456,
"eval_samples_per_second": 87.346,
"eval_steps_per_second": 11.015,
"step": 170
},
{
"epoch": 6.511041009463723,
"step": 170,
"total_flos": 1.5618225878728704e+16,
"train_loss": 0.8142486740561092,
"train_runtime": 890.4577,
"train_samples_per_second": 39.817,
"train_steps_per_second": 0.204
}
],
"logging_steps": 10,
"max_steps": 182,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 10,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.01
},
"attributes": {
"early_stopping_patience_counter": 5
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.5618225878728704e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}