|
{ |
|
"best_metric": 0.6166583378418276, |
|
"best_model_checkpoint": "13E-affecthq-fer-balanced/checkpoint-1729", |
|
"epoch": 12.99625468164794, |
|
"global_step": 1729, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 5.780346820809249e-07, |
|
"loss": 1.9634, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.1560693641618499e-06, |
|
"loss": 1.9552, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.7341040462427746e-06, |
|
"loss": 1.96, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.3121387283236997e-06, |
|
"loss": 1.9515, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.890173410404625e-06, |
|
"loss": 1.9503, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.468208092485549e-06, |
|
"loss": 1.9404, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.046242774566474e-06, |
|
"loss": 1.9321, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.6242774566473994e-06, |
|
"loss": 1.92, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.202312138728324e-06, |
|
"loss": 1.9068, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5.78034682080925e-06, |
|
"loss": 1.8931, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 6.358381502890174e-06, |
|
"loss": 1.8641, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 6.936416184971098e-06, |
|
"loss": 1.8289, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 7.5144508670520235e-06, |
|
"loss": 1.7863, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.40046838407494145, |
|
"eval_f1": 0.3058443007386033, |
|
"eval_loss": 1.7632062435150146, |
|
"eval_precision": 0.36173402622129774, |
|
"eval_recall": 0.40046838407494145, |
|
"eval_runtime": 67.8192, |
|
"eval_samples_per_second": 31.481, |
|
"eval_steps_per_second": 0.988, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 8.092485549132949e-06, |
|
"loss": 1.8586, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.670520231213873e-06, |
|
"loss": 1.7136, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 9.248554913294799e-06, |
|
"loss": 1.6728, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 9.826589595375723e-06, |
|
"loss": 1.6475, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 9.955012853470438e-06, |
|
"loss": 1.5912, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 9.890745501285348e-06, |
|
"loss": 1.545, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 9.826478149100258e-06, |
|
"loss": 1.5187, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 9.762210796915168e-06, |
|
"loss": 1.5123, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 9.697943444730078e-06, |
|
"loss": 1.4574, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 9.633676092544988e-06, |
|
"loss": 1.4483, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 9.569408740359899e-06, |
|
"loss": 1.4114, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 9.505141388174809e-06, |
|
"loss": 1.3853, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 9.440874035989717e-06, |
|
"loss": 1.3653, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5049180327868853, |
|
"eval_f1": 0.44452526643283874, |
|
"eval_loss": 1.3629579544067383, |
|
"eval_precision": 0.48383855087708, |
|
"eval_recall": 0.5049180327868853, |
|
"eval_runtime": 67.0004, |
|
"eval_samples_per_second": 31.865, |
|
"eval_steps_per_second": 1.0, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 9.376606683804627e-06, |
|
"loss": 1.44, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.312339331619537e-06, |
|
"loss": 1.3714, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 9.248071979434447e-06, |
|
"loss": 1.3243, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 9.183804627249359e-06, |
|
"loss": 1.3214, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 9.119537275064269e-06, |
|
"loss": 1.2917, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 9.055269922879179e-06, |
|
"loss": 1.3092, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 8.991002570694089e-06, |
|
"loss": 1.2699, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.926735218508999e-06, |
|
"loss": 1.2668, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 8.862467866323907e-06, |
|
"loss": 1.2747, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 8.798200514138818e-06, |
|
"loss": 1.2415, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 8.733933161953728e-06, |
|
"loss": 1.2671, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 8.669665809768638e-06, |
|
"loss": 1.2717, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 8.605398457583548e-06, |
|
"loss": 1.2468, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5466042154566745, |
|
"eval_f1": 0.5114853306120848, |
|
"eval_loss": 1.2475332021713257, |
|
"eval_precision": 0.5451199399022204, |
|
"eval_recall": 0.5466042154566745, |
|
"eval_runtime": 66.2985, |
|
"eval_samples_per_second": 32.203, |
|
"eval_steps_per_second": 1.011, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 8.541131105398458e-06, |
|
"loss": 1.2806, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 8.476863753213368e-06, |
|
"loss": 1.2417, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 8.412596401028278e-06, |
|
"loss": 1.2221, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 8.348329048843188e-06, |
|
"loss": 1.1889, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 8.2840616966581e-06, |
|
"loss": 1.1994, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 8.219794344473008e-06, |
|
"loss": 1.196, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 8.155526992287918e-06, |
|
"loss": 1.1762, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 8.091259640102828e-06, |
|
"loss": 1.1714, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 8.026992287917738e-06, |
|
"loss": 1.1773, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 7.962724935732648e-06, |
|
"loss": 1.1672, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 7.898457583547558e-06, |
|
"loss": 1.1577, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 7.834190231362468e-06, |
|
"loss": 1.1438, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 7.769922879177378e-06, |
|
"loss": 1.1594, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 7.705655526992289e-06, |
|
"loss": 1.1527, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5761124121779859, |
|
"eval_f1": 0.5579541032814586, |
|
"eval_loss": 1.1865291595458984, |
|
"eval_precision": 0.561218961307198, |
|
"eval_recall": 0.5761124121779859, |
|
"eval_runtime": 65.9861, |
|
"eval_samples_per_second": 32.355, |
|
"eval_steps_per_second": 1.015, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 7.641388174807199e-06, |
|
"loss": 1.2137, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 7.577120822622109e-06, |
|
"loss": 1.1637, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 7.512853470437018e-06, |
|
"loss": 1.108, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 7.448586118251928e-06, |
|
"loss": 1.0764, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 7.384318766066839e-06, |
|
"loss": 1.1141, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 7.320051413881749e-06, |
|
"loss": 1.1119, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 7.255784061696659e-06, |
|
"loss": 1.0904, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 7.191516709511569e-06, |
|
"loss": 1.1055, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 7.127249357326479e-06, |
|
"loss": 1.1115, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 7.062982005141389e-06, |
|
"loss": 1.1029, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 6.998714652956299e-06, |
|
"loss": 1.1017, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 6.934447300771208e-06, |
|
"loss": 1.0723, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 6.870179948586118e-06, |
|
"loss": 1.0862, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5784543325526932, |
|
"eval_f1": 0.5658700696765325, |
|
"eval_loss": 1.1448348760604858, |
|
"eval_precision": 0.5686897608132185, |
|
"eval_recall": 0.5784543325526932, |
|
"eval_runtime": 66.3078, |
|
"eval_samples_per_second": 32.198, |
|
"eval_steps_per_second": 1.01, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 6.8059125964010285e-06, |
|
"loss": 1.1261, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 6.7416452442159385e-06, |
|
"loss": 1.0695, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 6.677377892030849e-06, |
|
"loss": 1.0273, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 6.613110539845759e-06, |
|
"loss": 1.0905, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 6.548843187660669e-06, |
|
"loss": 1.0512, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 6.48457583547558e-06, |
|
"loss": 1.0329, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 6.42030848329049e-06, |
|
"loss": 1.0337, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 6.3560411311054e-06, |
|
"loss": 1.0676, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 6.291773778920309e-06, |
|
"loss": 1.0171, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 6.227506426735219e-06, |
|
"loss": 1.0644, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 6.163239074550129e-06, |
|
"loss": 1.048, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 6.098971722365039e-06, |
|
"loss": 1.0129, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 6.034704370179949e-06, |
|
"loss": 1.064, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5971896955503513, |
|
"eval_f1": 0.5853025369018718, |
|
"eval_loss": 1.1107723712921143, |
|
"eval_precision": 0.5866546554640366, |
|
"eval_recall": 0.5971896955503513, |
|
"eval_runtime": 66.5889, |
|
"eval_samples_per_second": 32.062, |
|
"eval_steps_per_second": 1.006, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 5.970437017994859e-06, |
|
"loss": 1.0766, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 5.906169665809769e-06, |
|
"loss": 1.0108, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 5.8419023136246785e-06, |
|
"loss": 1.0335, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 5.7776349614395885e-06, |
|
"loss": 1.007, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 5.713367609254499e-06, |
|
"loss": 1.0071, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 5.649100257069409e-06, |
|
"loss": 1.0096, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 5.5848329048843196e-06, |
|
"loss": 0.9766, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 5.52056555269923e-06, |
|
"loss": 0.9753, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 5.45629820051414e-06, |
|
"loss": 0.9808, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"learning_rate": 5.39203084832905e-06, |
|
"loss": 1.0024, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 5.32776349614396e-06, |
|
"loss": 0.9615, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 5.26349614395887e-06, |
|
"loss": 0.9698, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 5.199228791773779e-06, |
|
"loss": 1.0006, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 5.134961439588689e-06, |
|
"loss": 1.0037, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6018735362997658, |
|
"eval_f1": 0.5945820873638747, |
|
"eval_loss": 1.0968866348266602, |
|
"eval_precision": 0.5968403329145666, |
|
"eval_recall": 0.6018735362997658, |
|
"eval_runtime": 65.51, |
|
"eval_samples_per_second": 32.59, |
|
"eval_steps_per_second": 1.023, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 5.070694087403599e-06, |
|
"loss": 0.9839, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 5.006426735218509e-06, |
|
"loss": 0.9645, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"learning_rate": 4.942159383033419e-06, |
|
"loss": 0.9692, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 4.877892030848329e-06, |
|
"loss": 0.9593, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 4.813624678663239e-06, |
|
"loss": 1.0119, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 4.7493573264781495e-06, |
|
"loss": 0.9453, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 4.6850899742930595e-06, |
|
"loss": 0.933, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 4.62082262210797e-06, |
|
"loss": 0.9249, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"learning_rate": 4.55655526992288e-06, |
|
"loss": 0.9538, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 4.49228791773779e-06, |
|
"loss": 0.9535, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 4.4280205655527e-06, |
|
"loss": 0.9383, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"learning_rate": 4.36375321336761e-06, |
|
"loss": 0.9449, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 4.29948586118252e-06, |
|
"loss": 0.9533, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6126463700234192, |
|
"eval_f1": 0.6046320973113557, |
|
"eval_loss": 1.0763905048370361, |
|
"eval_precision": 0.6033502517484726, |
|
"eval_recall": 0.6126463700234192, |
|
"eval_runtime": 66.8256, |
|
"eval_samples_per_second": 31.949, |
|
"eval_steps_per_second": 1.003, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 4.23521850899743e-06, |
|
"loss": 0.9792, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 4.170951156812339e-06, |
|
"loss": 0.9325, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"learning_rate": 4.106683804627249e-06, |
|
"loss": 0.9394, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 4.04241645244216e-06, |
|
"loss": 0.8888, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 3.97814910025707e-06, |
|
"loss": 0.9058, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 3.91388174807198e-06, |
|
"loss": 0.9316, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 3.849614395886889e-06, |
|
"loss": 0.9179, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 3.7853470437018e-06, |
|
"loss": 0.885, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 3.7210796915167095e-06, |
|
"loss": 0.933, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 3.6568123393316196e-06, |
|
"loss": 0.9388, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 3.59254498714653e-06, |
|
"loss": 0.9112, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"learning_rate": 3.52827763496144e-06, |
|
"loss": 0.9207, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"learning_rate": 3.46401028277635e-06, |
|
"loss": 0.9063, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.615456674473068, |
|
"eval_f1": 0.6046961104183071, |
|
"eval_loss": 1.071085810661316, |
|
"eval_precision": 0.6035147338471722, |
|
"eval_recall": 0.615456674473068, |
|
"eval_runtime": 66.7376, |
|
"eval_samples_per_second": 31.991, |
|
"eval_steps_per_second": 1.004, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 3.39974293059126e-06, |
|
"loss": 0.9774, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"learning_rate": 3.33547557840617e-06, |
|
"loss": 0.8738, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 9.17, |
|
"learning_rate": 3.27120822622108e-06, |
|
"loss": 0.8628, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"learning_rate": 3.2069408740359896e-06, |
|
"loss": 0.8989, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 3.1426735218509e-06, |
|
"loss": 0.8994, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 3.07840616966581e-06, |
|
"loss": 0.8749, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 3.01413881748072e-06, |
|
"loss": 0.8741, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"learning_rate": 2.9498714652956302e-06, |
|
"loss": 0.8934, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 2.88560411311054e-06, |
|
"loss": 0.8906, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 2.82133676092545e-06, |
|
"loss": 0.9228, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 2.75706940874036e-06, |
|
"loss": 0.867, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"learning_rate": 2.6928020565552705e-06, |
|
"loss": 0.9146, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 2.6285347043701805e-06, |
|
"loss": 0.9205, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 2.56426735218509e-06, |
|
"loss": 0.8666, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.6173302107728337, |
|
"eval_f1": 0.61075488056197, |
|
"eval_loss": 1.0589306354522705, |
|
"eval_precision": 0.6106645901461774, |
|
"eval_recall": 0.6173302107728337, |
|
"eval_runtime": 66.4667, |
|
"eval_samples_per_second": 32.121, |
|
"eval_steps_per_second": 1.008, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 10.07, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.8981, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 10.15, |
|
"learning_rate": 2.4357326478149103e-06, |
|
"loss": 0.8835, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 10.22, |
|
"learning_rate": 2.3714652956298203e-06, |
|
"loss": 0.8641, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 10.3, |
|
"learning_rate": 2.3071979434447304e-06, |
|
"loss": 0.8887, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 10.37, |
|
"learning_rate": 2.2429305912596404e-06, |
|
"loss": 0.878, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 10.45, |
|
"learning_rate": 2.17866323907455e-06, |
|
"loss": 0.8524, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 10.52, |
|
"learning_rate": 2.1143958868894605e-06, |
|
"loss": 0.8656, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 10.6, |
|
"learning_rate": 2.05012853470437e-06, |
|
"loss": 0.8966, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 10.67, |
|
"learning_rate": 1.9858611825192802e-06, |
|
"loss": 0.8858, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 10.75, |
|
"learning_rate": 1.9215938303341903e-06, |
|
"loss": 0.8647, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 10.82, |
|
"learning_rate": 1.8573264781491004e-06, |
|
"loss": 0.8278, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 10.9, |
|
"learning_rate": 1.7930591259640104e-06, |
|
"loss": 0.8571, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"learning_rate": 1.7287917737789203e-06, |
|
"loss": 0.8364, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6177985948477752, |
|
"eval_f1": 0.6108344054636982, |
|
"eval_loss": 1.0555986166000366, |
|
"eval_precision": 0.6110212240412639, |
|
"eval_recall": 0.6177985948477752, |
|
"eval_runtime": 66.1391, |
|
"eval_samples_per_second": 32.28, |
|
"eval_steps_per_second": 1.013, |
|
"step": 1463 |
|
}, |
|
{ |
|
"epoch": 11.05, |
|
"learning_rate": 1.6645244215938305e-06, |
|
"loss": 0.8808, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 11.13, |
|
"learning_rate": 1.6002570694087406e-06, |
|
"loss": 0.8383, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 1.5359897172236504e-06, |
|
"loss": 0.863, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 11.28, |
|
"learning_rate": 1.4717223650385607e-06, |
|
"loss": 0.8312, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 11.35, |
|
"learning_rate": 1.4074550128534705e-06, |
|
"loss": 0.8933, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"learning_rate": 1.3431876606683806e-06, |
|
"loss": 0.8563, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 11.5, |
|
"learning_rate": 1.2789203084832904e-06, |
|
"loss": 0.8588, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 11.58, |
|
"learning_rate": 1.2146529562982007e-06, |
|
"loss": 0.8611, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 11.65, |
|
"learning_rate": 1.1503856041131106e-06, |
|
"loss": 0.8356, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 11.73, |
|
"learning_rate": 1.0861182519280206e-06, |
|
"loss": 0.8371, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 11.8, |
|
"learning_rate": 1.0218508997429307e-06, |
|
"loss": 0.8177, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 11.88, |
|
"learning_rate": 9.575835475578407e-07, |
|
"loss": 0.8477, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 11.95, |
|
"learning_rate": 8.933161953727507e-07, |
|
"loss": 0.8659, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6196721311475409, |
|
"eval_f1": 0.6150725729241909, |
|
"eval_loss": 1.0520738363265991, |
|
"eval_precision": 0.6140597586468644, |
|
"eval_recall": 0.6196721311475409, |
|
"eval_runtime": 66.4481, |
|
"eval_samples_per_second": 32.13, |
|
"eval_steps_per_second": 1.008, |
|
"step": 1596 |
|
}, |
|
{ |
|
"epoch": 12.03, |
|
"learning_rate": 8.290488431876607e-07, |
|
"loss": 0.9068, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 12.1, |
|
"learning_rate": 7.647814910025708e-07, |
|
"loss": 0.8314, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 12.18, |
|
"learning_rate": 7.005141388174808e-07, |
|
"loss": 0.8761, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 12.25, |
|
"learning_rate": 6.362467866323908e-07, |
|
"loss": 0.8346, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 12.33, |
|
"learning_rate": 5.719794344473009e-07, |
|
"loss": 0.8345, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"learning_rate": 5.077120822622108e-07, |
|
"loss": 0.8641, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 12.48, |
|
"learning_rate": 4.434447300771209e-07, |
|
"loss": 0.8213, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 12.55, |
|
"learning_rate": 3.791773778920309e-07, |
|
"loss": 0.8584, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"learning_rate": 3.149100257069409e-07, |
|
"loss": 0.8402, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 12.7, |
|
"learning_rate": 2.5064267352185095e-07, |
|
"loss": 0.8239, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 12.78, |
|
"learning_rate": 1.8637532133676095e-07, |
|
"loss": 0.7957, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 12.85, |
|
"learning_rate": 1.2210796915167095e-07, |
|
"loss": 0.8504, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 12.93, |
|
"learning_rate": 5.784061696658098e-08, |
|
"loss": 0.8383, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.6224824355971897, |
|
"eval_f1": 0.6166583378418276, |
|
"eval_loss": 1.0525678396224976, |
|
"eval_precision": 0.6161446612041342, |
|
"eval_recall": 0.6224824355971897, |
|
"eval_runtime": 65.747, |
|
"eval_samples_per_second": 32.473, |
|
"eval_steps_per_second": 1.019, |
|
"step": 1729 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"step": 1729, |
|
"total_flos": 1.7197173028930535e+19, |
|
"train_loss": 1.109088444861461, |
|
"train_runtime": 12754.1331, |
|
"train_samples_per_second": 17.403, |
|
"train_steps_per_second": 0.136 |
|
} |
|
], |
|
"max_steps": 1729, |
|
"num_train_epochs": 13, |
|
"total_flos": 1.7197173028930535e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|