|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 10560, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.3427704572677612, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.7018, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9115075027327835, |
|
"eval_f1": 0.5138413685847589, |
|
"eval_loss": 0.33528366684913635, |
|
"eval_precision": 0.5528781793842035, |
|
"eval_recall": 0.4799535153980244, |
|
"eval_runtime": 4.4519, |
|
"eval_samples_per_second": 210.021, |
|
"eval_steps_per_second": 3.369, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.260859727859497, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.2639, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9412203120341847, |
|
"eval_f1": 0.7251732101616628, |
|
"eval_loss": 0.1912250965833664, |
|
"eval_precision": 0.6493566176470589, |
|
"eval_recall": 0.821034282393957, |
|
"eval_runtime": 4.5644, |
|
"eval_samples_per_second": 204.846, |
|
"eval_steps_per_second": 3.286, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 2.1796255111694336, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.1862, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9465865050183842, |
|
"eval_f1": 0.7531402204562931, |
|
"eval_loss": 0.1671685427427292, |
|
"eval_precision": 0.6738532110091743, |
|
"eval_recall": 0.8535735037768739, |
|
"eval_runtime": 4.5056, |
|
"eval_samples_per_second": 207.52, |
|
"eval_steps_per_second": 3.329, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.1285958290100098, |
|
"learning_rate": 4e-05, |
|
"loss": 0.1612, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9539401768856206, |
|
"eval_f1": 0.7823765020026703, |
|
"eval_loss": 0.14461010694503784, |
|
"eval_precision": 0.7238142292490118, |
|
"eval_recall": 0.8512492736780941, |
|
"eval_runtime": 4.5298, |
|
"eval_samples_per_second": 206.41, |
|
"eval_steps_per_second": 3.311, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.1812392473220825, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.1439, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9545364205505317, |
|
"eval_f1": 0.7862656374767102, |
|
"eval_loss": 0.13903872668743134, |
|
"eval_precision": 0.7254420432220039, |
|
"eval_recall": 0.8582219639744335, |
|
"eval_runtime": 4.6125, |
|
"eval_samples_per_second": 202.712, |
|
"eval_steps_per_second": 3.252, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.9920209646224976, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.1358, |
|
"step": 3168 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9551326642154427, |
|
"eval_f1": 0.7892923403127484, |
|
"eval_loss": 0.1392282098531723, |
|
"eval_precision": 0.7256335282651072, |
|
"eval_recall": 0.8651946542707728, |
|
"eval_runtime": 4.8436, |
|
"eval_samples_per_second": 193.037, |
|
"eval_steps_per_second": 3.097, |
|
"step": 3168 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 2.6399483680725098, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.129, |
|
"step": 3696 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9561264036569611, |
|
"eval_f1": 0.7918540068764879, |
|
"eval_loss": 0.13837336003780365, |
|
"eval_precision": 0.7266990291262136, |
|
"eval_recall": 0.8698431144683324, |
|
"eval_runtime": 4.5425, |
|
"eval_samples_per_second": 205.834, |
|
"eval_steps_per_second": 3.302, |
|
"step": 3696 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.901687741279602, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1228, |
|
"step": 4224 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.957517638875087, |
|
"eval_f1": 0.7969124301304232, |
|
"eval_loss": 0.13390584290027618, |
|
"eval_precision": 0.7352652259332023, |
|
"eval_recall": 0.8698431144683324, |
|
"eval_runtime": 4.5586, |
|
"eval_samples_per_second": 205.105, |
|
"eval_steps_per_second": 3.29, |
|
"step": 4224 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 1.3437026739120483, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.1168, |
|
"step": 4752 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9577163867633907, |
|
"eval_f1": 0.7960010807889759, |
|
"eval_loss": 0.13209262490272522, |
|
"eval_precision": 0.7439393939393939, |
|
"eval_recall": 0.8558977338756537, |
|
"eval_runtime": 4.5415, |
|
"eval_samples_per_second": 205.88, |
|
"eval_steps_per_second": 3.303, |
|
"step": 4752 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 3.851469039916992, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.1146, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.958113882539998, |
|
"eval_f1": 0.7973009446693656, |
|
"eval_loss": 0.12995323538780212, |
|
"eval_precision": 0.7444556451612904, |
|
"eval_recall": 0.8582219639744335, |
|
"eval_runtime": 4.5491, |
|
"eval_samples_per_second": 205.534, |
|
"eval_steps_per_second": 3.297, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 2.413081645965576, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.1105, |
|
"step": 5808 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9571201430984796, |
|
"eval_f1": 0.7947830715996806, |
|
"eval_loss": 0.13270916044712067, |
|
"eval_precision": 0.7333005893909627, |
|
"eval_recall": 0.8675188843695526, |
|
"eval_runtime": 4.536, |
|
"eval_samples_per_second": 206.127, |
|
"eval_steps_per_second": 3.307, |
|
"step": 5808 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 1.1331512928009033, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1083, |
|
"step": 6336 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9569213952101759, |
|
"eval_f1": 0.794345158708989, |
|
"eval_loss": 0.1333465278148651, |
|
"eval_precision": 0.7342209072978304, |
|
"eval_recall": 0.8651946542707728, |
|
"eval_runtime": 4.5053, |
|
"eval_samples_per_second": 207.533, |
|
"eval_steps_per_second": 3.329, |
|
"step": 6336 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 1.8731575012207031, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.106, |
|
"step": 6864 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9591076219815164, |
|
"eval_f1": 0.7998916869753587, |
|
"eval_loss": 0.12651574611663818, |
|
"eval_precision": 0.7489858012170385, |
|
"eval_recall": 0.8582219639744335, |
|
"eval_runtime": 4.4726, |
|
"eval_samples_per_second": 209.052, |
|
"eval_steps_per_second": 3.354, |
|
"step": 6864 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 0.8700233697891235, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.1032, |
|
"step": 7392 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9589088740932128, |
|
"eval_f1": 0.7973009446693656, |
|
"eval_loss": 0.12690864503383636, |
|
"eval_precision": 0.7444556451612904, |
|
"eval_recall": 0.8582219639744335, |
|
"eval_runtime": 4.5513, |
|
"eval_samples_per_second": 205.436, |
|
"eval_steps_per_second": 3.296, |
|
"step": 7392 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 1.2827842235565186, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.1023, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9585113783166054, |
|
"eval_f1": 0.7998922704012928, |
|
"eval_loss": 0.12912563979625702, |
|
"eval_precision": 0.7454819277108434, |
|
"eval_recall": 0.862870424171993, |
|
"eval_runtime": 4.5324, |
|
"eval_samples_per_second": 206.293, |
|
"eval_steps_per_second": 3.31, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 1.694359302520752, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1014, |
|
"step": 8448 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.957517638875087, |
|
"eval_f1": 0.7947269303201507, |
|
"eval_loss": 0.12707427144050598, |
|
"eval_precision": 0.7399799599198397, |
|
"eval_recall": 0.8582219639744335, |
|
"eval_runtime": 4.562, |
|
"eval_samples_per_second": 204.955, |
|
"eval_steps_per_second": 3.288, |
|
"step": 8448 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 1.139172911643982, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.1002, |
|
"step": 8976 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.9589088740932128, |
|
"eval_f1": 0.8041789445486203, |
|
"eval_loss": 0.12810933589935303, |
|
"eval_precision": 0.7460238568588469, |
|
"eval_recall": 0.8721673445671121, |
|
"eval_runtime": 4.5254, |
|
"eval_samples_per_second": 206.614, |
|
"eval_steps_per_second": 3.315, |
|
"step": 8976 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 1.6876777410507202, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0986, |
|
"step": 9504 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.9573188909867832, |
|
"eval_f1": 0.8016021361815754, |
|
"eval_loss": 0.13038571178913116, |
|
"eval_precision": 0.741600790513834, |
|
"eval_recall": 0.8721673445671121, |
|
"eval_runtime": 4.5245, |
|
"eval_samples_per_second": 206.653, |
|
"eval_steps_per_second": 3.315, |
|
"step": 9504 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 0.5088372230529785, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.0978, |
|
"step": 10032 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.9589088740932128, |
|
"eval_f1": 0.8046473925965956, |
|
"eval_loss": 0.1270894557237625, |
|
"eval_precision": 0.752020202020202, |
|
"eval_recall": 0.8651946542707728, |
|
"eval_runtime": 4.5248, |
|
"eval_samples_per_second": 206.64, |
|
"eval_steps_per_second": 3.315, |
|
"step": 10032 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 2.988950490951538, |
|
"learning_rate": 0.0, |
|
"loss": 0.0984, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9579151346516943, |
|
"eval_f1": 0.8007549204637368, |
|
"eval_loss": 0.12812790274620056, |
|
"eval_precision": 0.7469818913480886, |
|
"eval_recall": 0.862870424171993, |
|
"eval_runtime": 4.5277, |
|
"eval_samples_per_second": 206.507, |
|
"eval_steps_per_second": 3.313, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 10560, |
|
"total_flos": 4552961808488766.0, |
|
"train_loss": 0.1551312410470211, |
|
"train_runtime": 1231.1625, |
|
"train_samples_per_second": 137.057, |
|
"train_steps_per_second": 8.577 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 10560, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 4552961808488766.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|