|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 10560, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.21040940284729, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.79, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8595349299413694, |
|
"eval_f1": 0.13053613053613053, |
|
"eval_loss": 0.4638139605522156, |
|
"eval_precision": 0.330188679245283, |
|
"eval_recall": 0.08134805345729228, |
|
"eval_runtime": 4.4538, |
|
"eval_samples_per_second": 209.931, |
|
"eval_steps_per_second": 3.368, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.8728626370429993, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.3919, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9275067077412302, |
|
"eval_f1": 0.6317512274959083, |
|
"eval_loss": 0.2518700361251831, |
|
"eval_precision": 0.5953727506426735, |
|
"eval_recall": 0.6728646135967461, |
|
"eval_runtime": 4.5405, |
|
"eval_samples_per_second": 205.922, |
|
"eval_steps_per_second": 3.304, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.2418427467346191, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.2386, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9381894067375535, |
|
"eval_f1": 0.7159389794844818, |
|
"eval_loss": 0.1926584243774414, |
|
"eval_precision": 0.6540124939932724, |
|
"eval_recall": 0.7908192911098199, |
|
"eval_runtime": 4.5327, |
|
"eval_samples_per_second": 206.279, |
|
"eval_steps_per_second": 3.309, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.416585087776184, |
|
"learning_rate": 4e-05, |
|
"loss": 0.193, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.944797774023651, |
|
"eval_f1": 0.7463787200421385, |
|
"eval_loss": 0.16769319772720337, |
|
"eval_precision": 0.6825626204238922, |
|
"eval_recall": 0.8233585124927367, |
|
"eval_runtime": 4.5414, |
|
"eval_samples_per_second": 205.885, |
|
"eval_steps_per_second": 3.303, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.0125393867492676, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.1712, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9475802444599026, |
|
"eval_f1": 0.7629299028616435, |
|
"eval_loss": 0.1593998372554779, |
|
"eval_precision": 0.6958812260536399, |
|
"eval_recall": 0.8442765833817548, |
|
"eval_runtime": 4.5978, |
|
"eval_samples_per_second": 203.36, |
|
"eval_steps_per_second": 3.262, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.202476143836975, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.1596, |
|
"step": 3168 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9497664712312431, |
|
"eval_f1": 0.7750591949486978, |
|
"eval_loss": 0.1543859839439392, |
|
"eval_precision": 0.708173076923077, |
|
"eval_recall": 0.8558977338756537, |
|
"eval_runtime": 4.6439, |
|
"eval_samples_per_second": 201.34, |
|
"eval_steps_per_second": 3.23, |
|
"step": 3168 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 2.2051475048065186, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.1524, |
|
"step": 3696 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.950561462784458, |
|
"eval_f1": 0.7727628489433862, |
|
"eval_loss": 0.15189574658870697, |
|
"eval_precision": 0.7012310606060606, |
|
"eval_recall": 0.8605461940732132, |
|
"eval_runtime": 4.7943, |
|
"eval_samples_per_second": 195.022, |
|
"eval_steps_per_second": 3.129, |
|
"step": 3696 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.5321469306945801, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1452, |
|
"step": 4224 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9521514458908874, |
|
"eval_f1": 0.7842202806460153, |
|
"eval_loss": 0.14606598019599915, |
|
"eval_precision": 0.7203307392996109, |
|
"eval_recall": 0.8605461940732132, |
|
"eval_runtime": 4.7586, |
|
"eval_samples_per_second": 196.486, |
|
"eval_steps_per_second": 3.152, |
|
"step": 4224 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 1.277453064918518, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.1397, |
|
"step": 4752 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9535426811090132, |
|
"eval_f1": 0.7858095492131236, |
|
"eval_loss": 0.14319637417793274, |
|
"eval_precision": 0.7263313609467456, |
|
"eval_recall": 0.8558977338756537, |
|
"eval_runtime": 4.5883, |
|
"eval_samples_per_second": 203.779, |
|
"eval_steps_per_second": 3.269, |
|
"step": 4752 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 1.3887606859207153, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.1369, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9539401768856206, |
|
"eval_f1": 0.784512683578104, |
|
"eval_loss": 0.13940192759037018, |
|
"eval_precision": 0.7257905138339921, |
|
"eval_recall": 0.8535735037768739, |
|
"eval_runtime": 4.5346, |
|
"eval_samples_per_second": 206.194, |
|
"eval_steps_per_second": 3.308, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 1.879906415939331, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.1336, |
|
"step": 5808 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.954287985690152, |
|
"eval_f1": 0.7872111767866737, |
|
"eval_loss": 0.13752727210521698, |
|
"eval_precision": 0.7321339330334833, |
|
"eval_recall": 0.8512492736780941, |
|
"eval_runtime": 4.5356, |
|
"eval_samples_per_second": 206.145, |
|
"eval_steps_per_second": 3.307, |
|
"step": 5808 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 1.3187131881713867, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1305, |
|
"step": 6336 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9547351684388353, |
|
"eval_f1": 0.7895726955119592, |
|
"eval_loss": 0.13747519254684448, |
|
"eval_precision": 0.7345, |
|
"eval_recall": 0.8535735037768739, |
|
"eval_runtime": 4.5267, |
|
"eval_samples_per_second": 206.554, |
|
"eval_steps_per_second": 3.314, |
|
"step": 6336 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 1.600463628768921, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.1281, |
|
"step": 6864 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9547351684388353, |
|
"eval_f1": 0.7887248322147652, |
|
"eval_loss": 0.13505811989307404, |
|
"eval_precision": 0.7330339321357285, |
|
"eval_recall": 0.8535735037768739, |
|
"eval_runtime": 4.6346, |
|
"eval_samples_per_second": 201.745, |
|
"eval_steps_per_second": 3.237, |
|
"step": 6864 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 1.121996283531189, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.1252, |
|
"step": 7392 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9553314121037464, |
|
"eval_f1": 0.794345158708989, |
|
"eval_loss": 0.13602255284786224, |
|
"eval_precision": 0.7342209072978304, |
|
"eval_recall": 0.8651946542707728, |
|
"eval_runtime": 4.5689, |
|
"eval_samples_per_second": 204.646, |
|
"eval_steps_per_second": 3.283, |
|
"step": 7392 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 1.0593712329864502, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.124, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9541389247739243, |
|
"eval_f1": 0.7874899759422614, |
|
"eval_loss": 0.13636602461338043, |
|
"eval_precision": 0.7292079207920792, |
|
"eval_recall": 0.8558977338756537, |
|
"eval_runtime": 4.6077, |
|
"eval_samples_per_second": 202.922, |
|
"eval_steps_per_second": 3.255, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 1.4150090217590332, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1234, |
|
"step": 8448 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.954933916327139, |
|
"eval_f1": 0.7875565009306035, |
|
"eval_loss": 0.13505925238132477, |
|
"eval_precision": 0.7259803921568627, |
|
"eval_recall": 0.8605461940732132, |
|
"eval_runtime": 4.5535, |
|
"eval_samples_per_second": 205.338, |
|
"eval_steps_per_second": 3.294, |
|
"step": 8448 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 0.8472805619239807, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.1224, |
|
"step": 8976 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.954933916327139, |
|
"eval_f1": 0.7918106886466365, |
|
"eval_loss": 0.13572688400745392, |
|
"eval_precision": 0.7299019607843137, |
|
"eval_recall": 0.8651946542707728, |
|
"eval_runtime": 4.5196, |
|
"eval_samples_per_second": 206.878, |
|
"eval_steps_per_second": 3.319, |
|
"step": 8976 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 1.521100401878357, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1208, |
|
"step": 9504 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.9553314121037464, |
|
"eval_f1": 0.7947830715996806, |
|
"eval_loss": 0.1359829157590866, |
|
"eval_precision": 0.7333005893909627, |
|
"eval_recall": 0.8675188843695526, |
|
"eval_runtime": 4.5506, |
|
"eval_samples_per_second": 205.468, |
|
"eval_steps_per_second": 3.296, |
|
"step": 9504 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 0.9698516726493835, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1201, |
|
"step": 10032 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.95553015999205, |
|
"eval_f1": 0.7956301625366373, |
|
"eval_loss": 0.13499899208545685, |
|
"eval_precision": 0.734744094488189, |
|
"eval_recall": 0.8675188843695526, |
|
"eval_runtime": 4.5437, |
|
"eval_samples_per_second": 205.78, |
|
"eval_steps_per_second": 3.301, |
|
"step": 10032 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 1.61683189868927, |
|
"learning_rate": 0.0, |
|
"loss": 0.1205, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.95553015999205, |
|
"eval_f1": 0.794345158708989, |
|
"eval_loss": 0.1346217691898346, |
|
"eval_precision": 0.7342209072978304, |
|
"eval_recall": 0.8651946542707728, |
|
"eval_runtime": 4.6351, |
|
"eval_samples_per_second": 201.721, |
|
"eval_steps_per_second": 3.236, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 10560, |
|
"total_flos": 4541164131293502.0, |
|
"train_loss": 0.18834613236514006, |
|
"train_runtime": 1257.3573, |
|
"train_samples_per_second": 134.202, |
|
"train_steps_per_second": 8.399 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 10560, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 4541164131293502.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|