{ "best_metric": null, "best_model_checkpoint": null, "epoch": 7.936, "global_step": 496, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 1.0000000000000002e-06, "loss": 2.1204, "step": 1 }, { "epoch": 0.13, "learning_rate": 8.000000000000001e-06, "loss": 2.1463, "step": 8 }, { "epoch": 0.26, "learning_rate": 1.6000000000000003e-05, "loss": 1.8788, "step": 16 }, { "epoch": 0.38, "learning_rate": 2.4e-05, "loss": 1.6339, "step": 24 }, { "epoch": 0.51, "learning_rate": 3.2000000000000005e-05, "loss": 1.4717, "step": 32 }, { "epoch": 0.64, "learning_rate": 4e-05, "loss": 1.2297, "step": 40 }, { "epoch": 0.77, "learning_rate": 4.8e-05, "loss": 1.008, "step": 48 }, { "epoch": 0.9, "learning_rate": 4.9327354260089685e-05, "loss": 0.8316, "step": 56 }, { "epoch": 0.99, "eval_Macro F1": 0.7014511124514329, "eval_Macro Precision": 0.6819163828588641, "eval_Macro Recall": 0.7429670943209584, "eval_Micro F1": 0.743, "eval_Micro Precision": 0.743, "eval_Micro Recall": 0.743, "eval_Weighted F1": 0.702015038120142, "eval_Weighted Precision": 0.6827499598363416, "eval_Weighted Recall": 0.743, "eval_accuracy": 0.743, "eval_loss": 0.7518972158432007, "eval_runtime": 447.9624, "eval_samples_per_second": 4.465, "eval_steps_per_second": 0.141, "step": 62 }, { "epoch": 1.02, "learning_rate": 4.8430493273542606e-05, "loss": 0.751, "step": 64 }, { "epoch": 1.15, "learning_rate": 4.7533632286995514e-05, "loss": 0.6157, "step": 72 }, { "epoch": 1.28, "learning_rate": 4.6636771300448435e-05, "loss": 0.534, "step": 80 }, { "epoch": 1.41, "learning_rate": 4.573991031390134e-05, "loss": 0.4818, "step": 88 }, { "epoch": 1.54, "learning_rate": 4.4843049327354265e-05, "loss": 0.4119, "step": 96 }, { "epoch": 1.66, "learning_rate": 4.394618834080718e-05, "loss": 0.3738, "step": 104 }, { "epoch": 1.79, "learning_rate": 4.3049327354260094e-05, "loss": 0.3613, "step": 112 }, { "epoch": 1.92, "learning_rate": 4.215246636771301e-05, "loss": 0.3561, "step": 120 }, { "epoch": 2.0, "eval_Macro F1": 0.9399641065415552, "eval_Macro Precision": 0.9480023804705043, "eval_Macro Recall": 0.9394177169921687, "eval_Micro F1": 0.9395, "eval_Micro Precision": 0.9395, "eval_Micro Recall": 0.9395, "eval_Weighted F1": 0.9400723727432211, "eval_Weighted Precision": 0.9482020576131688, "eval_Weighted Recall": 0.9395, "eval_accuracy": 0.9395, "eval_loss": 0.23021972179412842, "eval_runtime": 447.7687, "eval_samples_per_second": 4.467, "eval_steps_per_second": 0.141, "step": 125 }, { "epoch": 2.05, "learning_rate": 4.125560538116592e-05, "loss": 0.343, "step": 128 }, { "epoch": 2.18, "learning_rate": 4.035874439461884e-05, "loss": 0.2871, "step": 136 }, { "epoch": 2.3, "learning_rate": 3.9461883408071745e-05, "loss": 0.2976, "step": 144 }, { "epoch": 2.43, "learning_rate": 3.8565022421524667e-05, "loss": 0.2644, "step": 152 }, { "epoch": 2.56, "learning_rate": 3.766816143497758e-05, "loss": 0.2489, "step": 160 }, { "epoch": 2.69, "learning_rate": 3.6771300448430496e-05, "loss": 0.2646, "step": 168 }, { "epoch": 2.82, "learning_rate": 3.587443946188341e-05, "loss": 0.2206, "step": 176 }, { "epoch": 2.94, "learning_rate": 3.4977578475336325e-05, "loss": 0.2222, "step": 184 }, { "epoch": 2.99, "eval_Macro F1": 0.9560784374426261, "eval_Macro Precision": 0.960026790280936, "eval_Macro Recall": 0.9551284727306868, "eval_Micro F1": 0.956, "eval_Micro Precision": 0.956, "eval_Micro Recall": 0.956, "eval_Weighted F1": 0.9564097977894885, "eval_Weighted Precision": 0.9597888158665016, "eval_Weighted Recall": 0.956, "eval_accuracy": 0.956, "eval_loss": 0.1349564790725708, "eval_runtime": 446.9116, "eval_samples_per_second": 4.475, "eval_steps_per_second": 0.141, "step": 187 }, { "epoch": 3.07, "learning_rate": 3.408071748878924e-05, "loss": 0.2258, "step": 192 }, { "epoch": 3.2, "learning_rate": 3.3183856502242154e-05, "loss": 0.207, "step": 200 }, { "epoch": 3.33, "learning_rate": 3.228699551569507e-05, "loss": 0.1826, "step": 208 }, { "epoch": 3.46, "learning_rate": 3.139013452914798e-05, "loss": 0.1929, "step": 216 }, { "epoch": 3.58, "learning_rate": 3.0493273542600898e-05, "loss": 0.2159, "step": 224 }, { "epoch": 3.71, "learning_rate": 2.9596412556053816e-05, "loss": 0.1813, "step": 232 }, { "epoch": 3.84, "learning_rate": 2.8699551569506727e-05, "loss": 0.1723, "step": 240 }, { "epoch": 3.97, "learning_rate": 2.7802690582959645e-05, "loss": 0.1705, "step": 248 }, { "epoch": 4.0, "eval_Macro F1": 0.9725373484087296, "eval_Macro Precision": 0.9740041726370726, "eval_Macro Recall": 0.9721459242124089, "eval_Micro F1": 0.9725, "eval_Micro Precision": 0.9725, "eval_Micro Recall": 0.9725, "eval_Weighted F1": 0.9727108492700939, "eval_Weighted Precision": 0.9739894365164001, "eval_Weighted Recall": 0.9725, "eval_accuracy": 0.9725, "eval_loss": 0.0872766375541687, "eval_runtime": 435.148, "eval_samples_per_second": 4.596, "eval_steps_per_second": 0.145, "step": 250 }, { "epoch": 4.1, "learning_rate": 2.6905829596412556e-05, "loss": 0.1824, "step": 256 }, { "epoch": 4.22, "learning_rate": 2.600896860986547e-05, "loss": 0.1877, "step": 264 }, { "epoch": 4.35, "learning_rate": 2.511210762331839e-05, "loss": 0.2047, "step": 272 }, { "epoch": 4.48, "learning_rate": 2.4215246636771303e-05, "loss": 0.1814, "step": 280 }, { "epoch": 4.61, "learning_rate": 2.3318385650224218e-05, "loss": 0.1396, "step": 288 }, { "epoch": 4.74, "learning_rate": 2.2421524663677132e-05, "loss": 0.1233, "step": 296 }, { "epoch": 4.86, "learning_rate": 2.1524663677130047e-05, "loss": 0.1612, "step": 304 }, { "epoch": 4.99, "learning_rate": 2.062780269058296e-05, "loss": 0.1541, "step": 312 }, { "epoch": 4.99, "eval_Macro F1": 0.9823741759080236, "eval_Macro Precision": 0.9829774434613384, "eval_Macro Recall": 0.9821795541998369, "eval_Micro F1": 0.9825, "eval_Micro Precision": 0.9825, "eval_Micro Recall": 0.9825, "eval_Weighted F1": 0.982525654398398, "eval_Weighted Precision": 0.9829534667560904, "eval_Weighted Recall": 0.9825, "eval_accuracy": 0.9825, "eval_loss": 0.06422679126262665, "eval_runtime": 424.7541, "eval_samples_per_second": 4.709, "eval_steps_per_second": 0.148, "step": 312 }, { "epoch": 5.12, "learning_rate": 1.9730941704035873e-05, "loss": 0.137, "step": 320 }, { "epoch": 5.25, "learning_rate": 1.883408071748879e-05, "loss": 0.1577, "step": 328 }, { "epoch": 5.38, "learning_rate": 1.7937219730941705e-05, "loss": 0.1378, "step": 336 }, { "epoch": 5.5, "learning_rate": 1.704035874439462e-05, "loss": 0.1795, "step": 344 }, { "epoch": 5.63, "learning_rate": 1.6143497757847534e-05, "loss": 0.1447, "step": 352 }, { "epoch": 5.76, "learning_rate": 1.5246636771300449e-05, "loss": 0.1235, "step": 360 }, { "epoch": 5.89, "learning_rate": 1.4349775784753363e-05, "loss": 0.1253, "step": 368 }, { "epoch": 6.0, "eval_Macro F1": 0.991431216491566, "eval_Macro Precision": 0.9916198483282233, "eval_Macro Recall": 0.9913450460193864, "eval_Micro F1": 0.9915, "eval_Micro Precision": 0.9915, "eval_Micro Recall": 0.9915, "eval_Weighted F1": 0.9915159731866887, "eval_Weighted Precision": 0.9916339514381117, "eval_Weighted Recall": 0.9915, "eval_accuracy": 0.9915, "eval_loss": 0.033043112605810165, "eval_runtime": 424.2354, "eval_samples_per_second": 4.714, "eval_steps_per_second": 0.149, "step": 375 }, { "epoch": 6.02, "learning_rate": 1.3452914798206278e-05, "loss": 0.1134, "step": 376 }, { "epoch": 6.14, "learning_rate": 1.2556053811659194e-05, "loss": 0.1393, "step": 384 }, { "epoch": 6.27, "learning_rate": 1.1659192825112109e-05, "loss": 0.0912, "step": 392 }, { "epoch": 6.4, "learning_rate": 1.0762331838565023e-05, "loss": 0.1131, "step": 400 }, { "epoch": 6.53, "learning_rate": 9.865470852017936e-06, "loss": 0.1255, "step": 408 }, { "epoch": 6.66, "learning_rate": 8.968609865470853e-06, "loss": 0.1418, "step": 416 }, { "epoch": 6.78, "learning_rate": 8.071748878923767e-06, "loss": 0.1399, "step": 424 }, { "epoch": 6.91, "learning_rate": 7.174887892376682e-06, "loss": 0.1196, "step": 432 }, { "epoch": 6.99, "eval_Macro F1": 0.9820075650260679, "eval_Macro Precision": 0.983170950573056, "eval_Macro Recall": 0.9817144393341324, "eval_Micro F1": 0.982, "eval_Micro Precision": 0.982, "eval_Micro Recall": 0.982, "eval_Weighted F1": 0.9821570480740702, "eval_Weighted Precision": 0.9831749197494307, "eval_Weighted Recall": 0.982, "eval_accuracy": 0.982, "eval_loss": 0.05244705080986023, "eval_runtime": 424.1947, "eval_samples_per_second": 4.715, "eval_steps_per_second": 0.149, "step": 437 }, { "epoch": 7.04, "learning_rate": 6.278026905829597e-06, "loss": 0.1201, "step": 440 }, { "epoch": 7.17, "learning_rate": 5.381165919282512e-06, "loss": 0.1111, "step": 448 }, { "epoch": 7.3, "learning_rate": 4.484304932735426e-06, "loss": 0.1021, "step": 456 }, { "epoch": 7.42, "learning_rate": 3.587443946188341e-06, "loss": 0.1158, "step": 464 }, { "epoch": 7.55, "learning_rate": 2.690582959641256e-06, "loss": 0.1321, "step": 472 }, { "epoch": 7.68, "learning_rate": 1.7937219730941704e-06, "loss": 0.1429, "step": 480 }, { "epoch": 7.81, "learning_rate": 8.968609865470852e-07, "loss": 0.1103, "step": 488 }, { "epoch": 7.94, "learning_rate": 0.0, "loss": 0.0896, "step": 496 }, { "epoch": 7.94, "eval_Macro F1": 0.9863352307981634, "eval_Macro Precision": 0.9869554360352077, "eval_Macro Recall": 0.986134153009335, "eval_Micro F1": 0.9865, "eval_Micro Precision": 0.9865, "eval_Micro Recall": 0.9865, "eval_Weighted F1": 0.9865091518400995, "eval_Weighted Precision": 0.9869273604184196, "eval_Weighted Recall": 0.9865, "eval_accuracy": 0.9865, "eval_loss": 0.04359065368771553, "eval_runtime": 424.3089, "eval_samples_per_second": 4.714, "eval_steps_per_second": 0.148, "step": 496 }, { "epoch": 7.94, "step": 496, "total_flos": 4.920648490788323e+18, "train_loss": 0.35526800215724974, "train_runtime": 45263.0947, "train_samples_per_second": 1.414, "train_steps_per_second": 0.011 } ], "max_steps": 496, "num_train_epochs": 8, "total_flos": 4.920648490788323e+18, "trial_name": null, "trial_params": null }