|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 5.120229244232178, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5609, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7192982456140351, |
|
"eval_f1": 0.6543102914784331, |
|
"eval_loss": 0.5086308717727661, |
|
"eval_precision": 0.6580196140461879, |
|
"eval_recall": 0.6513911620294599, |
|
"eval_runtime": 5.0861, |
|
"eval_samples_per_second": 78.449, |
|
"eval_steps_per_second": 9.831, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.929361581802368, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.4986, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7493734335839599, |
|
"eval_f1": 0.7201178451178452, |
|
"eval_loss": 0.485486775636673, |
|
"eval_precision": 0.7127371273712737, |
|
"eval_recall": 0.7426804873613384, |
|
"eval_runtime": 5.0504, |
|
"eval_samples_per_second": 79.003, |
|
"eval_steps_per_second": 9.9, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 4.046614646911621, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.4593, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7694235588972431, |
|
"eval_f1": 0.7308797653958945, |
|
"eval_loss": 0.42381197214126587, |
|
"eval_precision": 0.7249487296342714, |
|
"eval_recall": 0.7393617021276595, |
|
"eval_runtime": 5.0742, |
|
"eval_samples_per_second": 78.633, |
|
"eval_steps_per_second": 9.854, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.3827860355377197, |
|
"learning_rate": 4e-05, |
|
"loss": 0.3957, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8070175438596491, |
|
"eval_f1": 0.7700391464135747, |
|
"eval_loss": 0.3916189670562744, |
|
"eval_precision": 0.7669852636562704, |
|
"eval_recall": 0.7734588106928533, |
|
"eval_runtime": 5.0559, |
|
"eval_samples_per_second": 78.918, |
|
"eval_steps_per_second": 9.889, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.527370810508728, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.3658, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7994987468671679, |
|
"eval_f1": 0.7744360902255639, |
|
"eval_loss": 0.4266420900821686, |
|
"eval_precision": 0.7640805369127517, |
|
"eval_recall": 0.7981451172940535, |
|
"eval_runtime": 5.0704, |
|
"eval_samples_per_second": 78.693, |
|
"eval_steps_per_second": 9.861, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.4858503341674805, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.3345, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8370927318295739, |
|
"eval_f1": 0.8049369344976196, |
|
"eval_loss": 0.36663180589675903, |
|
"eval_precision": 0.8027777777777778, |
|
"eval_recall": 0.8072376795781051, |
|
"eval_runtime": 5.0547, |
|
"eval_samples_per_second": 78.937, |
|
"eval_steps_per_second": 9.892, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.170096516609192, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.3237, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8395989974937343, |
|
"eval_f1": 0.8136058394160584, |
|
"eval_loss": 0.3713766634464264, |
|
"eval_precision": 0.8045112781954887, |
|
"eval_recall": 0.8265139116202946, |
|
"eval_runtime": 5.0463, |
|
"eval_samples_per_second": 79.068, |
|
"eval_steps_per_second": 9.908, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 4.0918803215026855, |
|
"learning_rate": 3e-05, |
|
"loss": 0.304, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8421052631578947, |
|
"eval_f1": 0.8118502107020158, |
|
"eval_loss": 0.35368239879608154, |
|
"eval_precision": 0.8083091673078061, |
|
"eval_recall": 0.8157846881251136, |
|
"eval_runtime": 5.0768, |
|
"eval_samples_per_second": 78.592, |
|
"eval_steps_per_second": 9.849, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 5.677598476409912, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.3027, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8446115288220551, |
|
"eval_f1": 0.8152777777777778, |
|
"eval_loss": 0.3530685007572174, |
|
"eval_precision": 0.8110639802050195, |
|
"eval_recall": 0.8200581923986179, |
|
"eval_runtime": 5.0522, |
|
"eval_samples_per_second": 78.975, |
|
"eval_steps_per_second": 9.897, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 6.779963493347168, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.2962, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8521303258145363, |
|
"eval_f1": 0.8211781685593832, |
|
"eval_loss": 0.33822622895240784, |
|
"eval_precision": 0.8219964664310955, |
|
"eval_recall": 0.8203764320785598, |
|
"eval_runtime": 5.0724, |
|
"eval_samples_per_second": 78.661, |
|
"eval_steps_per_second": 9.857, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 4.037176132202148, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.2721, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.849624060150376, |
|
"eval_f1": 0.8228975557791324, |
|
"eval_loss": 0.3489574193954468, |
|
"eval_precision": 0.8162488420565077, |
|
"eval_recall": 0.8311056555737406, |
|
"eval_runtime": 5.0646, |
|
"eval_samples_per_second": 78.783, |
|
"eval_steps_per_second": 9.873, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 10.375617027282715, |
|
"learning_rate": 2e-05, |
|
"loss": 0.2693, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8546365914786967, |
|
"eval_f1": 0.8288009705864946, |
|
"eval_loss": 0.35016030073165894, |
|
"eval_precision": 0.822024085224641, |
|
"eval_recall": 0.837152209492635, |
|
"eval_runtime": 5.055, |
|
"eval_samples_per_second": 78.932, |
|
"eval_steps_per_second": 9.891, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 1.8974263668060303, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.2745, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_f1": 0.8263588263588264, |
|
"eval_loss": 0.3283708095550537, |
|
"eval_precision": 0.8289473684210527, |
|
"eval_recall": 0.8239225313693399, |
|
"eval_runtime": 5.1017, |
|
"eval_samples_per_second": 78.21, |
|
"eval_steps_per_second": 9.801, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 11.152755737304688, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.2712, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8596491228070176, |
|
"eval_f1": 0.8315338681464504, |
|
"eval_loss": 0.3297080397605896, |
|
"eval_precision": 0.8299369747899159, |
|
"eval_recall": 0.8331969448990726, |
|
"eval_runtime": 5.0488, |
|
"eval_samples_per_second": 79.029, |
|
"eval_steps_per_second": 9.903, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 1.1235237121582031, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.256, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_f1": 0.8391129032258065, |
|
"eval_loss": 0.3356616199016571, |
|
"eval_precision": 0.8345705196182396, |
|
"eval_recall": 0.8442444080741953, |
|
"eval_runtime": 5.0635, |
|
"eval_samples_per_second": 78.8, |
|
"eval_steps_per_second": 9.875, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 6.625916004180908, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2504, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_f1": 0.8305757727005222, |
|
"eval_loss": 0.3345828056335449, |
|
"eval_precision": 0.8255131964809383, |
|
"eval_recall": 0.8364248045099109, |
|
"eval_runtime": 5.1096, |
|
"eval_samples_per_second": 78.088, |
|
"eval_steps_per_second": 9.785, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 7.8184356689453125, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.2487, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_f1": 0.8272399255573702, |
|
"eval_loss": 0.3242071270942688, |
|
"eval_precision": 0.8280735957109784, |
|
"eval_recall": 0.8264229859974541, |
|
"eval_runtime": 5.0697, |
|
"eval_samples_per_second": 78.703, |
|
"eval_steps_per_second": 9.863, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 4.3523454666137695, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2514, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8365204824303285, |
|
"eval_loss": 0.33086374402046204, |
|
"eval_precision": 0.8313636363636363, |
|
"eval_recall": 0.8424713584288053, |
|
"eval_runtime": 5.047, |
|
"eval_samples_per_second": 79.057, |
|
"eval_steps_per_second": 9.907, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 3.3547356128692627, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.2451, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8341332527115377, |
|
"eval_loss": 0.32434186339378357, |
|
"eval_precision": 0.8333132275770553, |
|
"eval_recall": 0.8349699945444626, |
|
"eval_runtime": 5.0492, |
|
"eval_samples_per_second": 79.023, |
|
"eval_steps_per_second": 9.903, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 9.069262504577637, |
|
"learning_rate": 0.0, |
|
"loss": 0.2461, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8357422474382676, |
|
"eval_loss": 0.3260349631309509, |
|
"eval_precision": 0.8319228265372551, |
|
"eval_recall": 0.8399709038006911, |
|
"eval_runtime": 5.0489, |
|
"eval_samples_per_second": 79.026, |
|
"eval_steps_per_second": 9.903, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 7609911792720000.0, |
|
"train_loss": 0.3213141730574311, |
|
"train_runtime": 1951.4043, |
|
"train_samples_per_second": 37.286, |
|
"train_steps_per_second": 1.25 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 7609911792720000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|