|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 5.2719526290893555, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5617, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7192982456140351, |
|
"eval_f1": 0.6543102914784331, |
|
"eval_loss": 0.511669397354126, |
|
"eval_precision": 0.6580196140461879, |
|
"eval_recall": 0.6513911620294599, |
|
"eval_runtime": 5.1602, |
|
"eval_samples_per_second": 77.323, |
|
"eval_steps_per_second": 9.69, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.8476994037628174, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.5046, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7418546365914787, |
|
"eval_f1": 0.7111966887091448, |
|
"eval_loss": 0.49168047308921814, |
|
"eval_precision": 0.7042004048582996, |
|
"eval_recall": 0.7323604291689398, |
|
"eval_runtime": 5.0484, |
|
"eval_samples_per_second": 79.035, |
|
"eval_steps_per_second": 9.904, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 4.1617326736450195, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.4798, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7593984962406015, |
|
"eval_f1": 0.7179215270413574, |
|
"eval_loss": 0.4465886950492859, |
|
"eval_precision": 0.7129198966408269, |
|
"eval_recall": 0.7247681396617567, |
|
"eval_runtime": 5.0834, |
|
"eval_samples_per_second": 78.491, |
|
"eval_steps_per_second": 9.836, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.705305337905884, |
|
"learning_rate": 4e-05, |
|
"loss": 0.4374, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8195488721804511, |
|
"eval_f1": 0.7740798993394149, |
|
"eval_loss": 0.3993551731109619, |
|
"eval_precision": 0.7865881658357387, |
|
"eval_recall": 0.7648208765230042, |
|
"eval_runtime": 5.1044, |
|
"eval_samples_per_second": 78.167, |
|
"eval_steps_per_second": 9.795, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.9225200414657593, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.4037, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7844611528822055, |
|
"eval_f1": 0.7575187969924813, |
|
"eval_loss": 0.41500648856163025, |
|
"eval_precision": 0.748013422818792, |
|
"eval_recall": 0.7800054555373704, |
|
"eval_runtime": 5.0621, |
|
"eval_samples_per_second": 78.822, |
|
"eval_steps_per_second": 9.877, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.181605339050293, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.3741, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8370927318295739, |
|
"eval_f1": 0.8049369344976196, |
|
"eval_loss": 0.3736521899700165, |
|
"eval_precision": 0.8027777777777778, |
|
"eval_recall": 0.8072376795781051, |
|
"eval_runtime": 5.0549, |
|
"eval_samples_per_second": 78.934, |
|
"eval_steps_per_second": 9.891, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.7816860675811768, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.3574, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8220551378446115, |
|
"eval_f1": 0.790906742443813, |
|
"eval_loss": 0.3775876462459564, |
|
"eval_precision": 0.7844931964944649, |
|
"eval_recall": 0.7990998363338788, |
|
"eval_runtime": 5.059, |
|
"eval_samples_per_second": 78.869, |
|
"eval_steps_per_second": 9.883, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 5.030299663543701, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3387, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8446115288220551, |
|
"eval_f1": 0.8134839254478557, |
|
"eval_loss": 0.3653636872768402, |
|
"eval_precision": 0.8119747899159664, |
|
"eval_recall": 0.8150572831423895, |
|
"eval_runtime": 5.1112, |
|
"eval_samples_per_second": 78.064, |
|
"eval_steps_per_second": 9.782, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 6.920233726501465, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.3293, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8370927318295739, |
|
"eval_f1": 0.8067969337812972, |
|
"eval_loss": 0.3626542389392853, |
|
"eval_precision": 0.8021114369501466, |
|
"eval_recall": 0.8122385888343335, |
|
"eval_runtime": 5.0586, |
|
"eval_samples_per_second": 78.875, |
|
"eval_steps_per_second": 9.884, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 6.010580539703369, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.3209, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8370927318295739, |
|
"eval_f1": 0.8039756622954537, |
|
"eval_loss": 0.35534289479255676, |
|
"eval_precision": 0.8032299897460643, |
|
"eval_recall": 0.8047372249499909, |
|
"eval_runtime": 5.0627, |
|
"eval_samples_per_second": 78.811, |
|
"eval_steps_per_second": 9.876, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 4.613595485687256, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.2967, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8345864661654135, |
|
"eval_f1": 0.8051873113570456, |
|
"eval_loss": 0.3674112856388092, |
|
"eval_precision": 0.7989231125521075, |
|
"eval_recall": 0.8129659938170577, |
|
"eval_runtime": 5.0568, |
|
"eval_samples_per_second": 78.903, |
|
"eval_steps_per_second": 9.888, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 11.547273635864258, |
|
"learning_rate": 2e-05, |
|
"loss": 0.2928, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8320802005012531, |
|
"eval_f1": 0.8026866442779643, |
|
"eval_loss": 0.37071213126182556, |
|
"eval_precision": 0.795995733394834, |
|
"eval_recall": 0.8111929441716675, |
|
"eval_runtime": 5.0714, |
|
"eval_samples_per_second": 78.677, |
|
"eval_steps_per_second": 9.859, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 1.8734816312789917, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.2967, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8471177944862155, |
|
"eval_f1": 0.8160386984618873, |
|
"eval_loss": 0.3514226973056793, |
|
"eval_precision": 0.8152632848784607, |
|
"eval_recall": 0.8168303327877796, |
|
"eval_runtime": 5.0516, |
|
"eval_samples_per_second": 78.984, |
|
"eval_steps_per_second": 9.898, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 11.493008613586426, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.2934, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8421052631578947, |
|
"eval_f1": 0.8118502107020158, |
|
"eval_loss": 0.3507131040096283, |
|
"eval_precision": 0.8083091673078061, |
|
"eval_recall": 0.8157846881251136, |
|
"eval_runtime": 5.0578, |
|
"eval_samples_per_second": 78.888, |
|
"eval_steps_per_second": 9.886, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 1.0511583089828491, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.2811, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8345864661654135, |
|
"eval_f1": 0.8042838456507522, |
|
"eval_loss": 0.35527709126472473, |
|
"eval_precision": 0.7990802919708029, |
|
"eval_recall": 0.8104655391889435, |
|
"eval_runtime": 5.0928, |
|
"eval_samples_per_second": 78.345, |
|
"eval_steps_per_second": 9.818, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 7.683447360992432, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2738, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8421052631578947, |
|
"eval_f1": 0.8136136136136136, |
|
"eval_loss": 0.35545966029167175, |
|
"eval_precision": 0.8076923076923077, |
|
"eval_recall": 0.820785597381342, |
|
"eval_runtime": 5.1153, |
|
"eval_samples_per_second": 78.002, |
|
"eval_steps_per_second": 9.775, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 5.83898401260376, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.2717, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.849624060150376, |
|
"eval_f1": 0.8203781512605042, |
|
"eval_loss": 0.34679991006851196, |
|
"eval_precision": 0.8174088828111065, |
|
"eval_recall": 0.823604291689398, |
|
"eval_runtime": 5.1026, |
|
"eval_samples_per_second": 78.196, |
|
"eval_steps_per_second": 9.799, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 4.768757343292236, |
|
"learning_rate": 5e-06, |
|
"loss": 0.278, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8421052631578947, |
|
"eval_f1": 0.8127416435111035, |
|
"eval_loss": 0.3509637117385864, |
|
"eval_precision": 0.8079618768328446, |
|
"eval_recall": 0.8182851427532278, |
|
"eval_runtime": 5.0773, |
|
"eval_samples_per_second": 78.585, |
|
"eval_steps_per_second": 9.848, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 4.281126022338867, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.2701, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8471177944862155, |
|
"eval_f1": 0.8178232198860786, |
|
"eval_loss": 0.34708452224731445, |
|
"eval_precision": 0.8142125821151684, |
|
"eval_recall": 0.8218312420440079, |
|
"eval_runtime": 5.0544, |
|
"eval_samples_per_second": 78.942, |
|
"eval_steps_per_second": 9.892, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 7.043084621429443, |
|
"learning_rate": 0.0, |
|
"loss": 0.2722, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8446115288220551, |
|
"eval_f1": 0.8152777777777778, |
|
"eval_loss": 0.34833839535713196, |
|
"eval_precision": 0.8110639802050195, |
|
"eval_recall": 0.8200581923986179, |
|
"eval_runtime": 5.0742, |
|
"eval_samples_per_second": 78.633, |
|
"eval_steps_per_second": 9.854, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 7597037114448000.0, |
|
"train_loss": 0.34670459247026286, |
|
"train_runtime": 1953.5719, |
|
"train_samples_per_second": 37.245, |
|
"train_steps_per_second": 1.249 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 7597037114448000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|