|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"global_step": 1750, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9971428571428576e-05, |
|
"loss": 20.0833, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9e-05, |
|
"loss": 7.9373, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_accuracy": 0.614, |
|
"eval_f1": 0.7190684133915575, |
|
"eval_loss": 2.977444648742676, |
|
"eval_precision": 0.5652173913043478, |
|
"eval_recall": 0.988, |
|
"eval_runtime": 80.901, |
|
"eval_samples_per_second": 37.082, |
|
"eval_steps_per_second": 4.635, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.8e-05, |
|
"loss": 1.0358, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.6373333333333333, |
|
"eval_f1": 0.7332025502697401, |
|
"eval_loss": 3.09529972076416, |
|
"eval_precision": 0.5799069045771916, |
|
"eval_recall": 0.9966666666666667, |
|
"eval_runtime": 82.2367, |
|
"eval_samples_per_second": 36.48, |
|
"eval_steps_per_second": 4.56, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.7e-05, |
|
"loss": 1.8177, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.8706666666666667, |
|
"eval_f1": 0.8636683063949402, |
|
"eval_loss": 0.640018105506897, |
|
"eval_precision": 0.913075780089153, |
|
"eval_recall": 0.8193333333333334, |
|
"eval_runtime": 81.9619, |
|
"eval_samples_per_second": 36.602, |
|
"eval_steps_per_second": 4.575, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.7211, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.821, |
|
"eval_f1": 0.811777076761304, |
|
"eval_loss": 1.0340842008590698, |
|
"eval_precision": 0.8558758314855875, |
|
"eval_recall": 0.772, |
|
"eval_runtime": 82.2896, |
|
"eval_samples_per_second": 36.457, |
|
"eval_steps_per_second": 4.557, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.5e-05, |
|
"loss": 1.4119, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.751, |
|
"eval_f1": 0.6828025477707006, |
|
"eval_loss": 0.7746813893318176, |
|
"eval_precision": 0.9403508771929825, |
|
"eval_recall": 0.536, |
|
"eval_runtime": 82.2817, |
|
"eval_samples_per_second": 36.46, |
|
"eval_steps_per_second": 4.558, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.8471, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.854, |
|
"eval_f1": 0.8643122676579925, |
|
"eval_loss": 0.6877764463424683, |
|
"eval_precision": 0.8072916666666666, |
|
"eval_recall": 0.93, |
|
"eval_runtime": 81.2562, |
|
"eval_samples_per_second": 36.92, |
|
"eval_steps_per_second": 4.615, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.3e-05, |
|
"loss": 1.3883, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.7186666666666667, |
|
"eval_f1": 0.6114180478821363, |
|
"eval_loss": 2.117485284805298, |
|
"eval_precision": 0.9880952380952381, |
|
"eval_recall": 0.44266666666666665, |
|
"eval_runtime": 81.2535, |
|
"eval_samples_per_second": 36.921, |
|
"eval_steps_per_second": 4.615, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.9955, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.853, |
|
"eval_f1": 0.8333962976955043, |
|
"eval_loss": 0.9773063063621521, |
|
"eval_precision": 0.9616390584132519, |
|
"eval_recall": 0.7353333333333333, |
|
"eval_runtime": 82.2707, |
|
"eval_samples_per_second": 36.465, |
|
"eval_steps_per_second": 4.558, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.9704, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy": 0.8093333333333333, |
|
"eval_f1": 0.8361970217640321, |
|
"eval_loss": 0.7936842441558838, |
|
"eval_precision": 0.7329317269076305, |
|
"eval_recall": 0.9733333333333334, |
|
"eval_runtime": 81.186, |
|
"eval_samples_per_second": 36.952, |
|
"eval_steps_per_second": 4.619, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4e-05, |
|
"loss": 1.3156, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.7793333333333333, |
|
"eval_f1": 0.8049499116087213, |
|
"eval_loss": 1.306660771369934, |
|
"eval_precision": 0.7212249208025343, |
|
"eval_recall": 0.9106666666666666, |
|
"eval_runtime": 81.1695, |
|
"eval_samples_per_second": 36.96, |
|
"eval_steps_per_second": 4.62, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.9042, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_accuracy": 0.7506666666666667, |
|
"eval_f1": 0.6978998384491114, |
|
"eval_loss": 0.8157205581665039, |
|
"eval_precision": 0.8852459016393442, |
|
"eval_recall": 0.576, |
|
"eval_runtime": 81.9836, |
|
"eval_samples_per_second": 36.593, |
|
"eval_steps_per_second": 4.574, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.6122, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.861, |
|
"eval_f1": 0.8741322064594024, |
|
"eval_loss": 0.6521239280700684, |
|
"eval_precision": 0.7986762272476559, |
|
"eval_recall": 0.9653333333333334, |
|
"eval_runtime": 81.1912, |
|
"eval_samples_per_second": 36.95, |
|
"eval_steps_per_second": 4.619, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.9266, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_accuracy": 0.7576666666666667, |
|
"eval_f1": 0.8008764721993973, |
|
"eval_loss": 1.0884476900100708, |
|
"eval_precision": 0.6796838679683868, |
|
"eval_recall": 0.9746666666666667, |
|
"eval_runtime": 81.1743, |
|
"eval_samples_per_second": 36.958, |
|
"eval_steps_per_second": 4.62, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.5418, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_accuracy": 0.8626666666666667, |
|
"eval_f1": 0.8731527093596059, |
|
"eval_loss": 0.594514012336731, |
|
"eval_precision": 0.8112128146453089, |
|
"eval_recall": 0.9453333333333334, |
|
"eval_runtime": 81.6996, |
|
"eval_samples_per_second": 36.72, |
|
"eval_steps_per_second": 4.59, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.4564, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_accuracy": 0.9256666666666666, |
|
"eval_f1": 0.9236039739636861, |
|
"eval_loss": 0.2299291044473648, |
|
"eval_precision": 0.9499647639182522, |
|
"eval_recall": 0.8986666666666666, |
|
"eval_runtime": 81.1873, |
|
"eval_samples_per_second": 36.952, |
|
"eval_steps_per_second": 4.619, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 0.427, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_accuracy": 0.9156666666666666, |
|
"eval_f1": 0.9154694286668894, |
|
"eval_loss": 0.36590951681137085, |
|
"eval_precision": 0.9176155391828533, |
|
"eval_recall": 0.9133333333333333, |
|
"eval_runtime": 81.3624, |
|
"eval_samples_per_second": 36.872, |
|
"eval_steps_per_second": 4.609, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.3e-05, |
|
"loss": 0.7026, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_f1": 0.9150815217391304, |
|
"eval_loss": 0.509623646736145, |
|
"eval_precision": 0.932825484764543, |
|
"eval_recall": 0.898, |
|
"eval_runtime": 81.284, |
|
"eval_samples_per_second": 36.908, |
|
"eval_steps_per_second": 4.613, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 0.7019, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_accuracy": 0.9066666666666666, |
|
"eval_f1": 0.9068529607451763, |
|
"eval_loss": 0.4853467643260956, |
|
"eval_precision": 0.9050464807436919, |
|
"eval_recall": 0.9086666666666666, |
|
"eval_runtime": 81.2065, |
|
"eval_samples_per_second": 36.943, |
|
"eval_steps_per_second": 4.618, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.1e-05, |
|
"loss": 0.5116, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_accuracy": 0.9003333333333333, |
|
"eval_f1": 0.9047467346288627, |
|
"eval_loss": 0.4890832006931305, |
|
"eval_precision": 0.8663819402074435, |
|
"eval_recall": 0.9466666666666667, |
|
"eval_runtime": 81.3158, |
|
"eval_samples_per_second": 36.893, |
|
"eval_steps_per_second": 4.612, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4625, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.9276666666666666, |
|
"eval_f1": 0.92340275326509, |
|
"eval_loss": 0.391652911901474, |
|
"eval_precision": 0.981245311327832, |
|
"eval_recall": 0.872, |
|
"eval_runtime": 81.251, |
|
"eval_samples_per_second": 36.923, |
|
"eval_steps_per_second": 4.615, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.9e-05, |
|
"loss": 0.455, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_accuracy": 0.89, |
|
"eval_f1": 0.9003623188405797, |
|
"eval_loss": 0.5772602558135986, |
|
"eval_precision": 0.8228476821192053, |
|
"eval_recall": 0.994, |
|
"eval_runtime": 81.2335, |
|
"eval_samples_per_second": 36.931, |
|
"eval_steps_per_second": 4.616, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 0.3816, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_accuracy": 0.929, |
|
"eval_f1": 0.9305510270622758, |
|
"eval_loss": 0.32315143942832947, |
|
"eval_precision": 0.9106573069559668, |
|
"eval_recall": 0.9513333333333334, |
|
"eval_runtime": 81.1544, |
|
"eval_samples_per_second": 36.967, |
|
"eval_steps_per_second": 4.621, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 0.3276, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_accuracy": 0.894, |
|
"eval_f1": 0.9034608378870674, |
|
"eval_loss": 0.8502470850944519, |
|
"eval_precision": 0.8294314381270903, |
|
"eval_recall": 0.992, |
|
"eval_runtime": 81.1738, |
|
"eval_samples_per_second": 36.958, |
|
"eval_steps_per_second": 4.62, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 0.8028, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_accuracy": 0.8066666666666666, |
|
"eval_f1": 0.761904761904762, |
|
"eval_loss": 1.2200298309326172, |
|
"eval_precision": 0.9914529914529915, |
|
"eval_recall": 0.6186666666666667, |
|
"eval_runtime": 81.3151, |
|
"eval_samples_per_second": 36.894, |
|
"eval_steps_per_second": 4.612, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.4308, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_accuracy": 0.9213333333333333, |
|
"eval_f1": 0.9263880224578914, |
|
"eval_loss": 0.36110782623291016, |
|
"eval_precision": 0.8704572098475967, |
|
"eval_recall": 0.99, |
|
"eval_runtime": 81.936, |
|
"eval_samples_per_second": 36.614, |
|
"eval_steps_per_second": 4.577, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.3567, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_accuracy": 0.908, |
|
"eval_f1": 0.9000724112961622, |
|
"eval_loss": 0.6623110771179199, |
|
"eval_precision": 0.9849445324881141, |
|
"eval_recall": 0.8286666666666667, |
|
"eval_runtime": 81.4083, |
|
"eval_samples_per_second": 36.851, |
|
"eval_steps_per_second": 4.606, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.3000000000000003e-05, |
|
"loss": 0.4286, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_accuracy": 0.9216666666666666, |
|
"eval_f1": 0.9264475743348982, |
|
"eval_loss": 0.3584176301956177, |
|
"eval_precision": 0.8731563421828908, |
|
"eval_recall": 0.9866666666666667, |
|
"eval_runtime": 81.0909, |
|
"eval_samples_per_second": 36.996, |
|
"eval_steps_per_second": 4.624, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 0.3559, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_accuracy": 0.944, |
|
"eval_f1": 0.945736434108527, |
|
"eval_loss": 0.21195654571056366, |
|
"eval_precision": 0.9172932330827067, |
|
"eval_recall": 0.976, |
|
"eval_runtime": 81.1261, |
|
"eval_samples_per_second": 36.979, |
|
"eval_steps_per_second": 4.622, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.1e-05, |
|
"loss": 0.3043, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.923, |
|
"eval_f1": 0.9262216544235068, |
|
"eval_loss": 0.23412927985191345, |
|
"eval_precision": 0.8890251379521765, |
|
"eval_recall": 0.9666666666666667, |
|
"eval_runtime": 81.1527, |
|
"eval_samples_per_second": 36.967, |
|
"eval_steps_per_second": 4.621, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2e-05, |
|
"loss": 0.207, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_accuracy": 0.9186666666666666, |
|
"eval_f1": 0.9122302158273381, |
|
"eval_loss": 0.4979284703731537, |
|
"eval_precision": 0.990625, |
|
"eval_recall": 0.8453333333333334, |
|
"eval_runtime": 81.616, |
|
"eval_samples_per_second": 36.757, |
|
"eval_steps_per_second": 4.595, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9e-05, |
|
"loss": 0.4594, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_accuracy": 0.9316666666666666, |
|
"eval_f1": 0.9276385457112601, |
|
"eval_loss": 0.33773142099380493, |
|
"eval_precision": 0.9857464366091523, |
|
"eval_recall": 0.876, |
|
"eval_runtime": 81.8837, |
|
"eval_samples_per_second": 36.637, |
|
"eval_steps_per_second": 4.58, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.3886, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_accuracy": 0.952, |
|
"eval_f1": 0.9511533242876526, |
|
"eval_loss": 0.21586619317531586, |
|
"eval_precision": 0.9682320441988951, |
|
"eval_recall": 0.9346666666666666, |
|
"eval_runtime": 81.9925, |
|
"eval_samples_per_second": 36.589, |
|
"eval_steps_per_second": 4.574, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"loss": 0.3025, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_accuracy": 0.9516666666666667, |
|
"eval_f1": 0.9511620074099023, |
|
"eval_loss": 0.1657862514257431, |
|
"eval_precision": 0.9611980939414567, |
|
"eval_recall": 0.9413333333333334, |
|
"eval_runtime": 81.6092, |
|
"eval_samples_per_second": 36.761, |
|
"eval_steps_per_second": 4.595, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.3561, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.926, |
|
"eval_f1": 0.9304075235109718, |
|
"eval_loss": 0.2859382629394531, |
|
"eval_precision": 0.8781065088757396, |
|
"eval_recall": 0.9893333333333333, |
|
"eval_runtime": 81.4491, |
|
"eval_samples_per_second": 36.833, |
|
"eval_steps_per_second": 4.604, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.2377, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_accuracy": 0.9123333333333333, |
|
"eval_f1": 0.9175290059579806, |
|
"eval_loss": 0.6120531558990479, |
|
"eval_precision": 0.8661930136175252, |
|
"eval_recall": 0.9753333333333334, |
|
"eval_runtime": 81.2586, |
|
"eval_samples_per_second": 36.919, |
|
"eval_steps_per_second": 4.615, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 0.5469, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_accuracy": 0.9583333333333334, |
|
"eval_f1": 0.9584855529724343, |
|
"eval_loss": 0.179586261510849, |
|
"eval_precision": 0.9549966909331569, |
|
"eval_recall": 0.962, |
|
"eval_runtime": 81.2049, |
|
"eval_samples_per_second": 36.944, |
|
"eval_steps_per_second": 4.618, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 0.1699, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.946, |
|
"eval_f1": 0.94375, |
|
"eval_loss": 0.24616345763206482, |
|
"eval_precision": 0.9847826086956522, |
|
"eval_recall": 0.906, |
|
"eval_runtime": 83.3002, |
|
"eval_samples_per_second": 36.014, |
|
"eval_steps_per_second": 4.502, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.2279, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_accuracy": 0.9576666666666667, |
|
"eval_f1": 0.9585644371941273, |
|
"eval_loss": 0.19657239317893982, |
|
"eval_precision": 0.9386581469648563, |
|
"eval_recall": 0.9793333333333333, |
|
"eval_runtime": 84.7312, |
|
"eval_samples_per_second": 35.406, |
|
"eval_steps_per_second": 4.426, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 0.1383, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.9373333333333334, |
|
"eval_f1": 0.9336626676076217, |
|
"eval_loss": 0.4285222887992859, |
|
"eval_precision": 0.9917541229385307, |
|
"eval_recall": 0.882, |
|
"eval_runtime": 81.4307, |
|
"eval_samples_per_second": 36.841, |
|
"eval_steps_per_second": 4.605, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2622, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.9606666666666667, |
|
"eval_f1": 0.9603227975790181, |
|
"eval_loss": 0.22786445915699005, |
|
"eval_precision": 0.9687924016282226, |
|
"eval_recall": 0.952, |
|
"eval_runtime": 81.1895, |
|
"eval_samples_per_second": 36.951, |
|
"eval_steps_per_second": 4.619, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9e-06, |
|
"loss": 0.1573, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_accuracy": 0.9636666666666667, |
|
"eval_f1": 0.9632873021219266, |
|
"eval_loss": 0.1860896348953247, |
|
"eval_precision": 0.9734513274336283, |
|
"eval_recall": 0.9533333333333334, |
|
"eval_runtime": 81.6749, |
|
"eval_samples_per_second": 36.731, |
|
"eval_steps_per_second": 4.591, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.1174, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_accuracy": 0.9636666666666667, |
|
"eval_f1": 0.9634350888963435, |
|
"eval_loss": 0.1978105902671814, |
|
"eval_precision": 0.9696151249155975, |
|
"eval_recall": 0.9573333333333334, |
|
"eval_runtime": 81.6791, |
|
"eval_samples_per_second": 36.729, |
|
"eval_steps_per_second": 4.591, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.000000000000001e-06, |
|
"loss": 0.1827, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.9623333333333334, |
|
"eval_f1": 0.962219993313273, |
|
"eval_loss": 0.215500146150589, |
|
"eval_precision": 0.9651240778001341, |
|
"eval_recall": 0.9593333333333334, |
|
"eval_runtime": 81.1055, |
|
"eval_samples_per_second": 36.989, |
|
"eval_steps_per_second": 4.624, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6e-06, |
|
"loss": 0.2172, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_accuracy": 0.962, |
|
"eval_f1": 0.9615643964935942, |
|
"eval_loss": 0.2241593450307846, |
|
"eval_precision": 0.9727148703956344, |
|
"eval_recall": 0.9506666666666667, |
|
"eval_runtime": 81.5602, |
|
"eval_samples_per_second": 36.783, |
|
"eval_steps_per_second": 4.598, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2261, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.9643333333333334, |
|
"eval_f1": 0.9644636333444038, |
|
"eval_loss": 0.19370576739311218, |
|
"eval_precision": 0.9609530112508272, |
|
"eval_recall": 0.968, |
|
"eval_runtime": 81.2706, |
|
"eval_samples_per_second": 36.914, |
|
"eval_steps_per_second": 4.614, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.1769, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.9576666666666667, |
|
"eval_f1": 0.9567291311754687, |
|
"eval_loss": 0.26763275265693665, |
|
"eval_precision": 0.978397212543554, |
|
"eval_recall": 0.936, |
|
"eval_runtime": 81.1748, |
|
"eval_samples_per_second": 36.957, |
|
"eval_steps_per_second": 4.62, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3e-06, |
|
"loss": 0.4139, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_accuracy": 0.9673333333333334, |
|
"eval_f1": 0.9674634794156707, |
|
"eval_loss": 0.1558542400598526, |
|
"eval_precision": 0.9636243386243386, |
|
"eval_recall": 0.9713333333333334, |
|
"eval_runtime": 81.3051, |
|
"eval_samples_per_second": 36.898, |
|
"eval_steps_per_second": 4.612, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.2209, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_accuracy": 0.9706666666666667, |
|
"eval_f1": 0.9705488621151273, |
|
"eval_loss": 0.1362292617559433, |
|
"eval_precision": 0.9744623655913979, |
|
"eval_recall": 0.9666666666666667, |
|
"eval_runtime": 81.1766, |
|
"eval_samples_per_second": 36.956, |
|
"eval_steps_per_second": 4.62, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.1792, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.9696666666666667, |
|
"eval_f1": 0.9696363029696363, |
|
"eval_loss": 0.13592535257339478, |
|
"eval_precision": 0.9706078824315297, |
|
"eval_recall": 0.9686666666666667, |
|
"eval_runtime": 81.8567, |
|
"eval_samples_per_second": 36.649, |
|
"eval_steps_per_second": 4.581, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.1015, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9696666666666667, |
|
"eval_f1": 0.9696160267111853, |
|
"eval_loss": 0.1375236213207245, |
|
"eval_precision": 0.9712374581939799, |
|
"eval_recall": 0.968, |
|
"eval_runtime": 81.3216, |
|
"eval_samples_per_second": 36.891, |
|
"eval_steps_per_second": 4.611, |
|
"step": 1750 |
|
} |
|
], |
|
"max_steps": 1750, |
|
"num_train_epochs": 1, |
|
"total_flos": 1.300198588416e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|