|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.0, |
|
"global_step": 1424, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9297752808988766e-05, |
|
"loss": 2.3997, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.6560856864654333, |
|
"eval_f1": 0.2732362821948488, |
|
"eval_loss": 2.096482753753662, |
|
"eval_precision": 0.5434298440979956, |
|
"eval_recall": 0.1824981301421092, |
|
"eval_runtime": 2.8754, |
|
"eval_samples_per_second": 123.811, |
|
"eval_steps_per_second": 30.953, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.859550561797753e-05, |
|
"loss": 1.9889, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_accuracy": 0.6641674780915288, |
|
"eval_f1": 0.23144399303321833, |
|
"eval_loss": 1.841402530670166, |
|
"eval_precision": 0.35626471686727634, |
|
"eval_recall": 0.201949860724234, |
|
"eval_runtime": 3.0688, |
|
"eval_samples_per_second": 116.007, |
|
"eval_steps_per_second": 29.002, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.7893258426966292e-05, |
|
"loss": 1.7798, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_accuracy": 0.678286270691334, |
|
"eval_f1": 0.30889901953592575, |
|
"eval_loss": 1.7000586986541748, |
|
"eval_precision": 0.46442073397410666, |
|
"eval_recall": 0.30013927576601673, |
|
"eval_runtime": 3.0638, |
|
"eval_samples_per_second": 116.196, |
|
"eval_steps_per_second": 29.049, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.7191011235955056e-05, |
|
"loss": 1.6758, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_accuracy": 0.6851022395326193, |
|
"eval_f1": 0.4112622913384574, |
|
"eval_loss": 1.5615102052688599, |
|
"eval_precision": 0.5168803704334585, |
|
"eval_recall": 0.46160794941282746, |
|
"eval_runtime": 3.0543, |
|
"eval_samples_per_second": 116.558, |
|
"eval_steps_per_second": 29.139, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.648876404494382e-05, |
|
"loss": 1.5373, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_accuracy": 0.7037974683544304, |
|
"eval_f1": 0.4145163524364727, |
|
"eval_loss": 1.5320079326629639, |
|
"eval_precision": 0.4973354384146672, |
|
"eval_recall": 0.4218608852755194, |
|
"eval_runtime": 3.106, |
|
"eval_samples_per_second": 114.618, |
|
"eval_steps_per_second": 28.654, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.5786516853932585e-05, |
|
"loss": 1.5237, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_accuracy": 0.6741966893865629, |
|
"eval_f1": 0.4215682986210665, |
|
"eval_loss": 1.436463713645935, |
|
"eval_precision": 0.4890496360861274, |
|
"eval_recall": 0.5289735099337748, |
|
"eval_runtime": 3.3008, |
|
"eval_samples_per_second": 107.852, |
|
"eval_steps_per_second": 26.963, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.508426966292135e-05, |
|
"loss": 1.4836, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.7168451801363194, |
|
"eval_f1": 0.4672566121066839, |
|
"eval_loss": 1.3614311218261719, |
|
"eval_precision": 0.48757191926720095, |
|
"eval_recall": 0.49944812362030905, |
|
"eval_runtime": 3.0542, |
|
"eval_samples_per_second": 116.561, |
|
"eval_steps_per_second": 29.14, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.4382022471910113e-05, |
|
"loss": 1.2214, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_accuracy": 0.6756572541382668, |
|
"eval_f1": 0.46131327709157666, |
|
"eval_loss": 1.359670877456665, |
|
"eval_precision": 0.446314989587013, |
|
"eval_recall": 0.5764348785871964, |
|
"eval_runtime": 3.229, |
|
"eval_samples_per_second": 110.251, |
|
"eval_steps_per_second": 27.563, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.3679775280898877e-05, |
|
"loss": 1.1844, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_accuracy": 0.6653359298928919, |
|
"eval_f1": 0.47653942777539116, |
|
"eval_loss": 1.3439290523529053, |
|
"eval_precision": 0.4441930828791319, |
|
"eval_recall": 0.5929911699779249, |
|
"eval_runtime": 3.2134, |
|
"eval_samples_per_second": 110.785, |
|
"eval_steps_per_second": 27.696, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.2977528089887642e-05, |
|
"loss": 1.2122, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_accuracy": 0.7283349561830574, |
|
"eval_f1": 0.49999878403851467, |
|
"eval_loss": 1.279309630393982, |
|
"eval_precision": 0.5213615961013602, |
|
"eval_recall": 0.5292494481236203, |
|
"eval_runtime": 3.3746, |
|
"eval_samples_per_second": 105.495, |
|
"eval_steps_per_second": 26.374, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.2275280898876405e-05, |
|
"loss": 1.1619, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_accuracy": 0.7230769230769231, |
|
"eval_f1": 0.4887836337789245, |
|
"eval_loss": 1.3024553060531616, |
|
"eval_precision": 0.5019528496497354, |
|
"eval_recall": 0.5135209713024282, |
|
"eval_runtime": 3.4149, |
|
"eval_samples_per_second": 104.248, |
|
"eval_steps_per_second": 26.062, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.157303370786517e-05, |
|
"loss": 1.0476, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_accuracy": 0.7195715676728335, |
|
"eval_f1": 0.5047823749536667, |
|
"eval_loss": 1.2519958019256592, |
|
"eval_precision": 0.4975374148688318, |
|
"eval_recall": 0.5524282560706402, |
|
"eval_runtime": 3.0914, |
|
"eval_samples_per_second": 115.16, |
|
"eval_steps_per_second": 28.79, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.0870786516853932e-05, |
|
"loss": 1.1271, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_accuracy": 0.7352482960077896, |
|
"eval_f1": 0.5283455171867876, |
|
"eval_loss": 1.233576774597168, |
|
"eval_precision": 0.5226202101961099, |
|
"eval_recall": 0.5400110375275938, |
|
"eval_runtime": 2.9844, |
|
"eval_samples_per_second": 119.289, |
|
"eval_steps_per_second": 29.822, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.0168539325842697e-05, |
|
"loss": 1.065, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_accuracy": 0.7147030185004869, |
|
"eval_f1": 0.526531014037714, |
|
"eval_loss": 1.2058743238449097, |
|
"eval_precision": 0.4963744012861858, |
|
"eval_recall": 0.5905077262693157, |
|
"eval_runtime": 2.9747, |
|
"eval_samples_per_second": 119.675, |
|
"eval_steps_per_second": 29.919, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 9.466292134831461e-06, |
|
"loss": 0.9841, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_accuracy": 0.7189873417721518, |
|
"eval_f1": 0.5335199131439152, |
|
"eval_loss": 1.2104876041412354, |
|
"eval_precision": 0.497289342239156, |
|
"eval_recall": 0.6012693156732892, |
|
"eval_runtime": 2.9664, |
|
"eval_samples_per_second": 120.012, |
|
"eval_steps_per_second": 30.003, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 8.764044943820226e-06, |
|
"loss": 0.8402, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_accuracy": 0.7241480038948394, |
|
"eval_f1": 0.5318695086433913, |
|
"eval_loss": 1.2313593626022339, |
|
"eval_precision": 0.49658531359956104, |
|
"eval_recall": 0.5869205298013245, |
|
"eval_runtime": 3.0358, |
|
"eval_samples_per_second": 117.269, |
|
"eval_steps_per_second": 29.317, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 8.06179775280899e-06, |
|
"loss": 0.8774, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_accuracy": 0.7349561830574489, |
|
"eval_f1": 0.5437299456127834, |
|
"eval_loss": 1.232950210571289, |
|
"eval_precision": 0.5219091313104574, |
|
"eval_recall": 0.5800220750551877, |
|
"eval_runtime": 2.9613, |
|
"eval_samples_per_second": 120.219, |
|
"eval_steps_per_second": 30.055, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 7.359550561797754e-06, |
|
"loss": 0.8705, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_accuracy": 0.736222005842259, |
|
"eval_f1": 0.5357813426509713, |
|
"eval_loss": 1.23160719871521, |
|
"eval_precision": 0.5233646751377105, |
|
"eval_recall": 0.5629139072847682, |
|
"eval_runtime": 2.9653, |
|
"eval_samples_per_second": 120.057, |
|
"eval_steps_per_second": 30.014, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.6573033707865175e-06, |
|
"loss": 0.8113, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_accuracy": 0.7201557935735151, |
|
"eval_f1": 0.537801212068117, |
|
"eval_loss": 1.2007496356964111, |
|
"eval_precision": 0.4931233755174561, |
|
"eval_recall": 0.6128587196467992, |
|
"eval_runtime": 2.9774, |
|
"eval_samples_per_second": 119.567, |
|
"eval_steps_per_second": 29.892, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 5.955056179775281e-06, |
|
"loss": 0.838, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"eval_accuracy": 0.7356377799415774, |
|
"eval_f1": 0.5536859104767312, |
|
"eval_loss": 1.2019802331924438, |
|
"eval_precision": 0.5212984586666464, |
|
"eval_recall": 0.6026490066225165, |
|
"eval_runtime": 2.9737, |
|
"eval_samples_per_second": 119.715, |
|
"eval_steps_per_second": 29.929, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 5.252808988764046e-06, |
|
"loss": 0.8193, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"eval_accuracy": 0.7207400194741966, |
|
"eval_f1": 0.5384993944708429, |
|
"eval_loss": 1.1946783065795898, |
|
"eval_precision": 0.5114682731180041, |
|
"eval_recall": 0.6004415011037527, |
|
"eval_runtime": 2.9881, |
|
"eval_samples_per_second": 119.139, |
|
"eval_steps_per_second": 29.785, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 4.550561797752809e-06, |
|
"loss": 0.7141, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"eval_accuracy": 0.7366114897760467, |
|
"eval_f1": 0.5502255793929967, |
|
"eval_loss": 1.2296370267868042, |
|
"eval_precision": 0.5259775856114555, |
|
"eval_recall": 0.5963024282560706, |
|
"eval_runtime": 3.0151, |
|
"eval_samples_per_second": 118.071, |
|
"eval_steps_per_second": 29.518, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 3.848314606741573e-06, |
|
"loss": 0.6905, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"eval_accuracy": 0.7345666991236611, |
|
"eval_f1": 0.5571197569439074, |
|
"eval_loss": 1.2248950004577637, |
|
"eval_precision": 0.5197886755754393, |
|
"eval_recall": 0.6109271523178808, |
|
"eval_runtime": 3.0012, |
|
"eval_samples_per_second": 118.617, |
|
"eval_steps_per_second": 29.654, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 3.146067415730337e-06, |
|
"loss": 0.6951, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"eval_accuracy": 0.7339824732229796, |
|
"eval_f1": 0.5589116173956266, |
|
"eval_loss": 1.229973316192627, |
|
"eval_precision": 0.5182250638435781, |
|
"eval_recall": 0.6103752759381899, |
|
"eval_runtime": 2.9574, |
|
"eval_samples_per_second": 120.376, |
|
"eval_steps_per_second": 30.094, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 2.4438202247191012e-06, |
|
"loss": 0.6387, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"eval_accuracy": 0.7345666991236611, |
|
"eval_f1": 0.5571000509958351, |
|
"eval_loss": 1.2234023809432983, |
|
"eval_precision": 0.5270142770488044, |
|
"eval_recall": 0.6084437086092715, |
|
"eval_runtime": 2.9722, |
|
"eval_samples_per_second": 119.776, |
|
"eval_steps_per_second": 29.944, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 1.7415730337078653e-06, |
|
"loss": 0.6742, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"eval_accuracy": 0.7401168451801363, |
|
"eval_f1": 0.5594133884864436, |
|
"eval_loss": 1.2352250814437866, |
|
"eval_precision": 0.5322645232058062, |
|
"eval_recall": 0.5965783664459161, |
|
"eval_runtime": 2.9614, |
|
"eval_samples_per_second": 120.215, |
|
"eval_steps_per_second": 30.054, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 1.0393258426966294e-06, |
|
"loss": 0.6565, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"eval_accuracy": 0.7366114897760467, |
|
"eval_f1": 0.5585737547750994, |
|
"eval_loss": 1.2389932870864868, |
|
"eval_precision": 0.5244359046194068, |
|
"eval_recall": 0.6026490066225165, |
|
"eval_runtime": 2.9582, |
|
"eval_samples_per_second": 120.344, |
|
"eval_steps_per_second": 30.086, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 3.3707865168539325e-07, |
|
"loss": 0.675, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"eval_accuracy": 0.7382667964946446, |
|
"eval_f1": 0.5614680191047273, |
|
"eval_loss": 1.2423402070999146, |
|
"eval_precision": 0.5289138247855666, |
|
"eval_recall": 0.6056843267108167, |
|
"eval_runtime": 2.9515, |
|
"eval_samples_per_second": 120.618, |
|
"eval_steps_per_second": 30.154, |
|
"step": 1400 |
|
} |
|
], |
|
"max_steps": 1424, |
|
"num_train_epochs": 4, |
|
"total_flos": 133980467748696.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|