{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "global_step": 1424, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "learning_rate": 1.9297752808988766e-05, "loss": 2.3997, "step": 50 }, { "epoch": 0.14, "eval_accuracy": 0.6560856864654333, "eval_f1": 0.2732362821948488, "eval_loss": 2.096482753753662, "eval_precision": 0.5434298440979956, "eval_recall": 0.1824981301421092, "eval_runtime": 2.8754, "eval_samples_per_second": 123.811, "eval_steps_per_second": 30.953, "step": 50 }, { "epoch": 0.28, "learning_rate": 1.859550561797753e-05, "loss": 1.9889, "step": 100 }, { "epoch": 0.28, "eval_accuracy": 0.6641674780915288, "eval_f1": 0.23144399303321833, "eval_loss": 1.841402530670166, "eval_precision": 0.35626471686727634, "eval_recall": 0.201949860724234, "eval_runtime": 3.0688, "eval_samples_per_second": 116.007, "eval_steps_per_second": 29.002, "step": 100 }, { "epoch": 0.42, "learning_rate": 1.7893258426966292e-05, "loss": 1.7798, "step": 150 }, { "epoch": 0.42, "eval_accuracy": 0.678286270691334, "eval_f1": 0.30889901953592575, "eval_loss": 1.7000586986541748, "eval_precision": 0.46442073397410666, "eval_recall": 0.30013927576601673, "eval_runtime": 3.0638, "eval_samples_per_second": 116.196, "eval_steps_per_second": 29.049, "step": 150 }, { "epoch": 0.56, "learning_rate": 1.7191011235955056e-05, "loss": 1.6758, "step": 200 }, { "epoch": 0.56, "eval_accuracy": 0.6851022395326193, "eval_f1": 0.4112622913384574, "eval_loss": 1.5615102052688599, "eval_precision": 0.5168803704334585, "eval_recall": 0.46160794941282746, "eval_runtime": 3.0543, "eval_samples_per_second": 116.558, "eval_steps_per_second": 29.139, "step": 200 }, { "epoch": 0.7, "learning_rate": 1.648876404494382e-05, "loss": 1.5373, "step": 250 }, { "epoch": 0.7, "eval_accuracy": 0.7037974683544304, "eval_f1": 0.4145163524364727, "eval_loss": 1.5320079326629639, "eval_precision": 0.4973354384146672, "eval_recall": 0.4218608852755194, "eval_runtime": 3.106, "eval_samples_per_second": 114.618, "eval_steps_per_second": 28.654, "step": 250 }, { "epoch": 0.84, "learning_rate": 1.5786516853932585e-05, "loss": 1.5237, "step": 300 }, { "epoch": 0.84, "eval_accuracy": 0.6741966893865629, "eval_f1": 0.4215682986210665, "eval_loss": 1.436463713645935, "eval_precision": 0.4890496360861274, "eval_recall": 0.5289735099337748, "eval_runtime": 3.3008, "eval_samples_per_second": 107.852, "eval_steps_per_second": 26.963, "step": 300 }, { "epoch": 0.98, "learning_rate": 1.508426966292135e-05, "loss": 1.4836, "step": 350 }, { "epoch": 0.98, "eval_accuracy": 0.7168451801363194, "eval_f1": 0.4672566121066839, "eval_loss": 1.3614311218261719, "eval_precision": 0.48757191926720095, "eval_recall": 0.49944812362030905, "eval_runtime": 3.0542, "eval_samples_per_second": 116.561, "eval_steps_per_second": 29.14, "step": 350 }, { "epoch": 1.12, "learning_rate": 1.4382022471910113e-05, "loss": 1.2214, "step": 400 }, { "epoch": 1.12, "eval_accuracy": 0.6756572541382668, "eval_f1": 0.46131327709157666, "eval_loss": 1.359670877456665, "eval_precision": 0.446314989587013, "eval_recall": 0.5764348785871964, "eval_runtime": 3.229, "eval_samples_per_second": 110.251, "eval_steps_per_second": 27.563, "step": 400 }, { "epoch": 1.26, "learning_rate": 1.3679775280898877e-05, "loss": 1.1844, "step": 450 }, { "epoch": 1.26, "eval_accuracy": 0.6653359298928919, "eval_f1": 0.47653942777539116, "eval_loss": 1.3439290523529053, "eval_precision": 0.4441930828791319, "eval_recall": 0.5929911699779249, "eval_runtime": 3.2134, "eval_samples_per_second": 110.785, "eval_steps_per_second": 27.696, "step": 450 }, { "epoch": 1.4, "learning_rate": 1.2977528089887642e-05, "loss": 1.2122, "step": 500 }, { "epoch": 1.4, "eval_accuracy": 0.7283349561830574, "eval_f1": 0.49999878403851467, "eval_loss": 1.279309630393982, "eval_precision": 0.5213615961013602, "eval_recall": 0.5292494481236203, "eval_runtime": 3.3746, "eval_samples_per_second": 105.495, "eval_steps_per_second": 26.374, "step": 500 }, { "epoch": 1.54, "learning_rate": 1.2275280898876405e-05, "loss": 1.1619, "step": 550 }, { "epoch": 1.54, "eval_accuracy": 0.7230769230769231, "eval_f1": 0.4887836337789245, "eval_loss": 1.3024553060531616, "eval_precision": 0.5019528496497354, "eval_recall": 0.5135209713024282, "eval_runtime": 3.4149, "eval_samples_per_second": 104.248, "eval_steps_per_second": 26.062, "step": 550 }, { "epoch": 1.69, "learning_rate": 1.157303370786517e-05, "loss": 1.0476, "step": 600 }, { "epoch": 1.69, "eval_accuracy": 0.7195715676728335, "eval_f1": 0.5047823749536667, "eval_loss": 1.2519958019256592, "eval_precision": 0.4975374148688318, "eval_recall": 0.5524282560706402, "eval_runtime": 3.0914, "eval_samples_per_second": 115.16, "eval_steps_per_second": 28.79, "step": 600 }, { "epoch": 1.83, "learning_rate": 1.0870786516853932e-05, "loss": 1.1271, "step": 650 }, { "epoch": 1.83, "eval_accuracy": 0.7352482960077896, "eval_f1": 0.5283455171867876, "eval_loss": 1.233576774597168, "eval_precision": 0.5226202101961099, "eval_recall": 0.5400110375275938, "eval_runtime": 2.9844, "eval_samples_per_second": 119.289, "eval_steps_per_second": 29.822, "step": 650 }, { "epoch": 1.97, "learning_rate": 1.0168539325842697e-05, "loss": 1.065, "step": 700 }, { "epoch": 1.97, "eval_accuracy": 0.7147030185004869, "eval_f1": 0.526531014037714, "eval_loss": 1.2058743238449097, "eval_precision": 0.4963744012861858, "eval_recall": 0.5905077262693157, "eval_runtime": 2.9747, "eval_samples_per_second": 119.675, "eval_steps_per_second": 29.919, "step": 700 }, { "epoch": 2.11, "learning_rate": 9.466292134831461e-06, "loss": 0.9841, "step": 750 }, { "epoch": 2.11, "eval_accuracy": 0.7189873417721518, "eval_f1": 0.5335199131439152, "eval_loss": 1.2104876041412354, "eval_precision": 0.497289342239156, "eval_recall": 0.6012693156732892, "eval_runtime": 2.9664, "eval_samples_per_second": 120.012, "eval_steps_per_second": 30.003, "step": 750 }, { "epoch": 2.25, "learning_rate": 8.764044943820226e-06, "loss": 0.8402, "step": 800 }, { "epoch": 2.25, "eval_accuracy": 0.7241480038948394, "eval_f1": 0.5318695086433913, "eval_loss": 1.2313593626022339, "eval_precision": 0.49658531359956104, "eval_recall": 0.5869205298013245, "eval_runtime": 3.0358, "eval_samples_per_second": 117.269, "eval_steps_per_second": 29.317, "step": 800 }, { "epoch": 2.39, "learning_rate": 8.06179775280899e-06, "loss": 0.8774, "step": 850 }, { "epoch": 2.39, "eval_accuracy": 0.7349561830574489, "eval_f1": 0.5437299456127834, "eval_loss": 1.232950210571289, "eval_precision": 0.5219091313104574, "eval_recall": 0.5800220750551877, "eval_runtime": 2.9613, "eval_samples_per_second": 120.219, "eval_steps_per_second": 30.055, "step": 850 }, { "epoch": 2.53, "learning_rate": 7.359550561797754e-06, "loss": 0.8705, "step": 900 }, { "epoch": 2.53, "eval_accuracy": 0.736222005842259, "eval_f1": 0.5357813426509713, "eval_loss": 1.23160719871521, "eval_precision": 0.5233646751377105, "eval_recall": 0.5629139072847682, "eval_runtime": 2.9653, "eval_samples_per_second": 120.057, "eval_steps_per_second": 30.014, "step": 900 }, { "epoch": 2.67, "learning_rate": 6.6573033707865175e-06, "loss": 0.8113, "step": 950 }, { "epoch": 2.67, "eval_accuracy": 0.7201557935735151, "eval_f1": 0.537801212068117, "eval_loss": 1.2007496356964111, "eval_precision": 0.4931233755174561, "eval_recall": 0.6128587196467992, "eval_runtime": 2.9774, "eval_samples_per_second": 119.567, "eval_steps_per_second": 29.892, "step": 950 }, { "epoch": 2.81, "learning_rate": 5.955056179775281e-06, "loss": 0.838, "step": 1000 }, { "epoch": 2.81, "eval_accuracy": 0.7356377799415774, "eval_f1": 0.5536859104767312, "eval_loss": 1.2019802331924438, "eval_precision": 0.5212984586666464, "eval_recall": 0.6026490066225165, "eval_runtime": 2.9737, "eval_samples_per_second": 119.715, "eval_steps_per_second": 29.929, "step": 1000 }, { "epoch": 2.95, "learning_rate": 5.252808988764046e-06, "loss": 0.8193, "step": 1050 }, { "epoch": 2.95, "eval_accuracy": 0.7207400194741966, "eval_f1": 0.5384993944708429, "eval_loss": 1.1946783065795898, "eval_precision": 0.5114682731180041, "eval_recall": 0.6004415011037527, "eval_runtime": 2.9881, "eval_samples_per_second": 119.139, "eval_steps_per_second": 29.785, "step": 1050 }, { "epoch": 3.09, "learning_rate": 4.550561797752809e-06, "loss": 0.7141, "step": 1100 }, { "epoch": 3.09, "eval_accuracy": 0.7366114897760467, "eval_f1": 0.5502255793929967, "eval_loss": 1.2296370267868042, "eval_precision": 0.5259775856114555, "eval_recall": 0.5963024282560706, "eval_runtime": 3.0151, "eval_samples_per_second": 118.071, "eval_steps_per_second": 29.518, "step": 1100 }, { "epoch": 3.23, "learning_rate": 3.848314606741573e-06, "loss": 0.6905, "step": 1150 }, { "epoch": 3.23, "eval_accuracy": 0.7345666991236611, "eval_f1": 0.5571197569439074, "eval_loss": 1.2248950004577637, "eval_precision": 0.5197886755754393, "eval_recall": 0.6109271523178808, "eval_runtime": 3.0012, "eval_samples_per_second": 118.617, "eval_steps_per_second": 29.654, "step": 1150 }, { "epoch": 3.37, "learning_rate": 3.146067415730337e-06, "loss": 0.6951, "step": 1200 }, { "epoch": 3.37, "eval_accuracy": 0.7339824732229796, "eval_f1": 0.5589116173956266, "eval_loss": 1.229973316192627, "eval_precision": 0.5182250638435781, "eval_recall": 0.6103752759381899, "eval_runtime": 2.9574, "eval_samples_per_second": 120.376, "eval_steps_per_second": 30.094, "step": 1200 }, { "epoch": 3.51, "learning_rate": 2.4438202247191012e-06, "loss": 0.6387, "step": 1250 }, { "epoch": 3.51, "eval_accuracy": 0.7345666991236611, "eval_f1": 0.5571000509958351, "eval_loss": 1.2234023809432983, "eval_precision": 0.5270142770488044, "eval_recall": 0.6084437086092715, "eval_runtime": 2.9722, "eval_samples_per_second": 119.776, "eval_steps_per_second": 29.944, "step": 1250 }, { "epoch": 3.65, "learning_rate": 1.7415730337078653e-06, "loss": 0.6742, "step": 1300 }, { "epoch": 3.65, "eval_accuracy": 0.7401168451801363, "eval_f1": 0.5594133884864436, "eval_loss": 1.2352250814437866, "eval_precision": 0.5322645232058062, "eval_recall": 0.5965783664459161, "eval_runtime": 2.9614, "eval_samples_per_second": 120.215, "eval_steps_per_second": 30.054, "step": 1300 }, { "epoch": 3.79, "learning_rate": 1.0393258426966294e-06, "loss": 0.6565, "step": 1350 }, { "epoch": 3.79, "eval_accuracy": 0.7366114897760467, "eval_f1": 0.5585737547750994, "eval_loss": 1.2389932870864868, "eval_precision": 0.5244359046194068, "eval_recall": 0.6026490066225165, "eval_runtime": 2.9582, "eval_samples_per_second": 120.344, "eval_steps_per_second": 30.086, "step": 1350 }, { "epoch": 3.93, "learning_rate": 3.3707865168539325e-07, "loss": 0.675, "step": 1400 }, { "epoch": 3.93, "eval_accuracy": 0.7382667964946446, "eval_f1": 0.5614680191047273, "eval_loss": 1.2423402070999146, "eval_precision": 0.5289138247855666, "eval_recall": 0.6056843267108167, "eval_runtime": 2.9515, "eval_samples_per_second": 120.618, "eval_steps_per_second": 30.154, "step": 1400 } ], "max_steps": 1424, "num_train_epochs": 4, "total_flos": 133980467748696.0, "trial_name": null, "trial_params": null }