{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "eval_steps": 500, "global_step": 9600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.0567405223846436, "learning_rate": 4.9500000000000004e-05, "loss": 0.8567, "step": 96 }, { "epoch": 1.0, "eval_LOCATION_f1": 0.0, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.0, "eval_LOCATION_recall": 0.0, "eval_ORGANIZATION_f1": 0.13953488372093023, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.225, "eval_ORGANIZATION_recall": 0.10112359550561797, "eval_PERSON_f1": 0.10062893081761007, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.25806451612903225, "eval_PERSON_recall": 0.0625, "eval_loss": 0.40856924653053284, "eval_overall_accuracy": 0.8507422402159244, "eval_overall_f1": 0.10317460317460317, "eval_overall_precision": 0.23214285714285715, "eval_overall_recall": 0.0663265306122449, "eval_runtime": 0.2722, "eval_samples_per_second": 624.587, "eval_steps_per_second": 11.022, "step": 96 }, { "epoch": 2.0, "grad_norm": 0.710187554359436, "learning_rate": 4.9e-05, "loss": 0.3561, "step": 192 }, { "epoch": 2.0, "eval_LOCATION_f1": 0.5376344086021505, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.5, "eval_LOCATION_recall": 0.5813953488372093, "eval_ORGANIZATION_f1": 0.6109510086455331, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.6272189349112426, "eval_ORGANIZATION_recall": 0.5955056179775281, "eval_PERSON_f1": 0.846441947565543, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.8129496402877698, "eval_PERSON_recall": 0.8828125, "eval_loss": 0.1959131509065628, "eval_overall_accuracy": 0.9443994601889338, "eval_overall_f1": 0.6725, "eval_overall_precision": 0.6593137254901961, "eval_overall_recall": 0.6862244897959183, "eval_runtime": 0.263, "eval_samples_per_second": 646.495, "eval_steps_per_second": 11.409, "step": 192 }, { "epoch": 3.0, "grad_norm": 0.9030072689056396, "learning_rate": 4.85e-05, "loss": 0.1947, "step": 288 }, { "epoch": 3.0, "eval_LOCATION_f1": 0.7727272727272727, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.7555555555555555, "eval_LOCATION_recall": 0.7906976744186046, "eval_ORGANIZATION_f1": 0.8075880758807588, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.7801047120418848, "eval_ORGANIZATION_recall": 0.8370786516853933, "eval_PERSON_f1": 0.9545454545454545, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9264705882352942, "eval_PERSON_recall": 0.984375, "eval_loss": 0.10297086089849472, "eval_overall_accuracy": 0.9676113360323887, "eval_overall_f1": 0.8479604449938195, "eval_overall_precision": 0.8225419664268585, "eval_overall_recall": 0.875, "eval_runtime": 0.2641, "eval_samples_per_second": 643.805, "eval_steps_per_second": 11.361, "step": 288 }, { "epoch": 4.0, "grad_norm": 0.8738144040107727, "learning_rate": 4.8e-05, "loss": 0.1377, "step": 384 }, { "epoch": 4.0, "eval_LOCATION_f1": 0.7865168539325844, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.7608695652173914, "eval_LOCATION_recall": 0.813953488372093, "eval_ORGANIZATION_f1": 0.7977839335180056, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.7868852459016393, "eval_ORGANIZATION_recall": 0.8089887640449438, "eval_PERSON_f1": 0.9612403100775193, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9538461538461539, "eval_PERSON_recall": 0.96875, "eval_loss": 0.08727145195007324, "eval_overall_accuracy": 0.9727395411605938, "eval_overall_f1": 0.848180677540778, "eval_overall_precision": 0.8345679012345679, "eval_overall_recall": 0.8622448979591837, "eval_runtime": 0.2667, "eval_samples_per_second": 637.45, "eval_steps_per_second": 11.249, "step": 384 }, { "epoch": 5.0, "grad_norm": 0.5742180347442627, "learning_rate": 4.75e-05, "loss": 0.1135, "step": 480 }, { "epoch": 5.0, "eval_LOCATION_f1": 0.8087431693989071, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.7628865979381443, "eval_LOCATION_recall": 0.8604651162790697, "eval_ORGANIZATION_f1": 0.8626373626373626, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8440860215053764, "eval_ORGANIZATION_recall": 0.8820224719101124, "eval_PERSON_f1": 0.9612403100775193, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9538461538461539, "eval_PERSON_recall": 0.96875, "eval_loss": 0.07276300340890884, "eval_overall_accuracy": 0.9754385964912281, "eval_overall_f1": 0.8819875776397517, "eval_overall_precision": 0.8595641646489104, "eval_overall_recall": 0.9056122448979592, "eval_runtime": 0.2634, "eval_samples_per_second": 645.418, "eval_steps_per_second": 11.39, "step": 480 }, { "epoch": 6.0, "grad_norm": 0.9918156862258911, "learning_rate": 4.7e-05, "loss": 0.0987, "step": 576 }, { "epoch": 6.0, "eval_LOCATION_f1": 0.861878453038674, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8210526315789474, "eval_LOCATION_recall": 0.9069767441860465, "eval_ORGANIZATION_f1": 0.8795518207282913, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8770949720670391, "eval_ORGANIZATION_recall": 0.8820224719101124, "eval_PERSON_f1": 0.96875, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.96875, "eval_PERSON_recall": 0.96875, "eval_loss": 0.06594817340373993, "eval_overall_accuracy": 0.9789473684210527, "eval_overall_f1": 0.9042821158690176, "eval_overall_precision": 0.8930348258706468, "eval_overall_recall": 0.9158163265306123, "eval_runtime": 0.264, "eval_samples_per_second": 643.865, "eval_steps_per_second": 11.362, "step": 576 }, { "epoch": 7.0, "grad_norm": 0.7604576349258423, "learning_rate": 4.6500000000000005e-05, "loss": 0.0878, "step": 672 }, { "epoch": 7.0, "eval_LOCATION_f1": 0.8700564971751413, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8461538461538461, "eval_LOCATION_recall": 0.8953488372093024, "eval_ORGANIZATION_f1": 0.8901098901098902, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8709677419354839, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9649805447470817, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9612403100775194, "eval_PERSON_recall": 0.96875, "eval_loss": 0.05883262678980827, "eval_overall_accuracy": 0.9808367071524966, "eval_overall_f1": 0.9097744360902256, "eval_overall_precision": 0.8940886699507389, "eval_overall_recall": 0.9260204081632653, "eval_runtime": 0.2639, "eval_samples_per_second": 644.304, "eval_steps_per_second": 11.37, "step": 672 }, { "epoch": 8.0, "grad_norm": 0.18556399643421173, "learning_rate": 4.600000000000001e-05, "loss": 0.08, "step": 768 }, { "epoch": 8.0, "eval_LOCATION_f1": 0.88268156424581, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8494623655913979, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.888283378746594, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8624338624338624, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9723320158102766, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.055362310260534286, "eval_overall_accuracy": 0.9813765182186235, "eval_overall_f1": 0.9136420525657072, "eval_overall_precision": 0.8968058968058968, "eval_overall_recall": 0.9311224489795918, "eval_runtime": 0.2636, "eval_samples_per_second": 644.95, "eval_steps_per_second": 11.381, "step": 768 }, { "epoch": 9.0, "grad_norm": 0.48036935925483704, "learning_rate": 4.55e-05, "loss": 0.0735, "step": 864 }, { "epoch": 9.0, "eval_LOCATION_f1": 0.8715083798882681, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8387096774193549, "eval_LOCATION_recall": 0.9069767441860465, "eval_ORGANIZATION_f1": 0.9050279329608939, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05202125012874603, "eval_overall_accuracy": 0.9840755735492578, "eval_overall_f1": 0.9193954659949622, "eval_overall_precision": 0.9079601990049752, "eval_overall_recall": 0.9311224489795918, "eval_runtime": 0.2652, "eval_samples_per_second": 640.948, "eval_steps_per_second": 11.311, "step": 864 }, { "epoch": 10.0, "grad_norm": 0.7441970109939575, "learning_rate": 4.5e-05, "loss": 0.0645, "step": 960 }, { "epoch": 10.0, "eval_LOCATION_f1": 0.9060773480662985, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8631578947368421, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.907563025210084, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9050279329608939, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05197153240442276, "eval_overall_accuracy": 0.9835357624831309, "eval_overall_f1": 0.929471032745592, "eval_overall_precision": 0.917910447761194, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.264, "eval_samples_per_second": 644.038, "eval_steps_per_second": 11.365, "step": 960 }, { "epoch": 11.0, "grad_norm": 0.559394359588623, "learning_rate": 4.4500000000000004e-05, "loss": 0.0597, "step": 1056 }, { "epoch": 11.0, "eval_LOCATION_f1": 0.9120879120879122, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8645833333333334, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9152542372881356, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9204545454545454, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.968503937007874, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9761904761904762, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.05179161578416824, "eval_overall_accuracy": 0.9848852901484481, "eval_overall_f1": 0.9316455696202531, "eval_overall_precision": 0.9246231155778895, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2643, "eval_samples_per_second": 643.116, "eval_steps_per_second": 11.349, "step": 1056 }, { "epoch": 12.0, "grad_norm": 0.29234591126441956, "learning_rate": 4.4000000000000006e-05, "loss": 0.056, "step": 1152 }, { "epoch": 12.0, "eval_LOCATION_f1": 0.9257142857142857, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9101123595505618, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9090909090909092, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8918918918918919, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.046369876712560654, "eval_overall_accuracy": 0.9851551956815114, "eval_overall_f1": 0.9333333333333335, "eval_overall_precision": 0.9205955334987593, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2638, "eval_samples_per_second": 644.535, "eval_steps_per_second": 11.374, "step": 1152 }, { "epoch": 13.0, "grad_norm": 0.7817858457565308, "learning_rate": 4.35e-05, "loss": 0.0532, "step": 1248 }, { "epoch": 13.0, "eval_LOCATION_f1": 0.9204545454545455, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9141274238227147, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9016393442622951, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9763779527559054, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9841269841269841, "eval_PERSON_recall": 0.96875, "eval_loss": 0.049661148339509964, "eval_overall_accuracy": 0.9835357624831309, "eval_overall_f1": 0.9355246523388117, "eval_overall_precision": 0.9273182957393483, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2635, "eval_samples_per_second": 645.173, "eval_steps_per_second": 11.385, "step": 1248 }, { "epoch": 14.0, "grad_norm": 0.2573387324810028, "learning_rate": 4.3e-05, "loss": 0.0512, "step": 1344 }, { "epoch": 14.0, "eval_LOCATION_f1": 0.9069767441860465, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9069767441860465, "eval_LOCATION_recall": 0.9069767441860465, "eval_ORGANIZATION_f1": 0.9187675070028011, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9162011173184358, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9649805447470817, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9612403100775194, "eval_PERSON_recall": 0.96875, "eval_loss": 0.047788966447114944, "eval_overall_accuracy": 0.9846153846153847, "eval_overall_f1": 0.9312977099236641, "eval_overall_precision": 0.9289340101522843, "eval_overall_recall": 0.9336734693877551, "eval_runtime": 0.2628, "eval_samples_per_second": 646.988, "eval_steps_per_second": 11.417, "step": 1344 }, { "epoch": 15.0, "grad_norm": 0.7714856863021851, "learning_rate": 4.25e-05, "loss": 0.0459, "step": 1440 }, { "epoch": 15.0, "eval_LOCATION_f1": 0.9090909090909092, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8888888888888888, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9291784702549575, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9371428571428572, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9647058823529412, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.968503937007874, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.04869839549064636, "eval_overall_accuracy": 0.9867746288798921, "eval_overall_f1": 0.9362244897959183, "eval_overall_precision": 0.9362244897959183, "eval_overall_recall": 0.9362244897959183, "eval_runtime": 0.2629, "eval_samples_per_second": 646.653, "eval_steps_per_second": 11.412, "step": 1440 }, { "epoch": 16.0, "grad_norm": 0.875140368938446, "learning_rate": 4.2e-05, "loss": 0.0462, "step": 1536 }, { "epoch": 16.0, "eval_LOCATION_f1": 0.8961748633879781, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.845360824742268, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9126760563380282, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9152542372881356, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9725490196078432, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9763779527559056, "eval_PERSON_recall": 0.96875, "eval_loss": 0.05018707737326622, "eval_overall_accuracy": 0.9854251012145749, "eval_overall_f1": 0.9281210592686003, "eval_overall_precision": 0.9177057356608479, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2636, "eval_samples_per_second": 644.852, "eval_steps_per_second": 11.38, "step": 1536 }, { "epoch": 17.0, "grad_norm": 0.3233906030654907, "learning_rate": 4.15e-05, "loss": 0.044, "step": 1632 }, { "epoch": 17.0, "eval_LOCATION_f1": 0.8977272727272728, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8777777777777778, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.9247910863509748, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9171270718232044, "eval_ORGANIZATION_recall": 0.9325842696629213, "eval_PERSON_f1": 0.9649805447470817, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9612403100775194, "eval_PERSON_recall": 0.96875, "eval_loss": 0.05254907160997391, "eval_overall_accuracy": 0.9851551956815114, "eval_overall_f1": 0.9318181818181819, "eval_overall_precision": 0.9225, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2627, "eval_samples_per_second": 647.149, "eval_steps_per_second": 11.42, "step": 1632 }, { "epoch": 18.0, "grad_norm": 0.8678757548332214, "learning_rate": 4.1e-05, "loss": 0.0424, "step": 1728 }, { "epoch": 18.0, "eval_LOCATION_f1": 0.9101123595505618, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8804347826086957, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9106145251396648, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9055555555555556, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9725490196078432, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9763779527559056, "eval_PERSON_recall": 0.96875, "eval_loss": 0.055523961782455444, "eval_overall_accuracy": 0.9832658569500675, "eval_overall_f1": 0.9304677623261693, "eval_overall_precision": 0.9223057644110275, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.264, "eval_samples_per_second": 644.04, "eval_steps_per_second": 11.365, "step": 1728 }, { "epoch": 19.0, "grad_norm": 0.5588005781173706, "learning_rate": 4.05e-05, "loss": 0.0409, "step": 1824 }, { "epoch": 19.0, "eval_LOCATION_f1": 0.9028571428571427, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8876404494382022, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.902506963788301, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8950276243093923, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.96875, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.96875, "eval_PERSON_recall": 0.96875, "eval_loss": 0.04912838712334633, "eval_overall_accuracy": 0.9846153846153847, "eval_overall_f1": 0.9240506329113924, "eval_overall_precision": 0.9170854271356784, "eval_overall_recall": 0.9311224489795918, "eval_runtime": 0.264, "eval_samples_per_second": 643.97, "eval_steps_per_second": 11.364, "step": 1824 }, { "epoch": 20.0, "grad_norm": 0.11216077953577042, "learning_rate": 4e-05, "loss": 0.0373, "step": 1920 }, { "epoch": 20.0, "eval_LOCATION_f1": 0.8465608465608466, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.7766990291262136, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.8870056497175142, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8920454545454546, "eval_ORGANIZATION_recall": 0.8820224719101124, "eval_PERSON_f1": 0.9606299212598425, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9682539682539683, "eval_PERSON_recall": 0.953125, "eval_loss": 0.06171651557087898, "eval_overall_accuracy": 0.9805668016194332, "eval_overall_f1": 0.9008782936010038, "eval_overall_precision": 0.8864197530864197, "eval_overall_recall": 0.9158163265306123, "eval_runtime": 0.2645, "eval_samples_per_second": 642.683, "eval_steps_per_second": 11.341, "step": 1920 }, { "epoch": 21.0, "grad_norm": 0.8559902906417847, "learning_rate": 3.9500000000000005e-05, "loss": 0.0367, "step": 2016 }, { "epoch": 21.0, "eval_LOCATION_f1": 0.8852459016393444, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8350515463917526, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.8876080691642652, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9112426035502958, "eval_ORGANIZATION_recall": 0.8651685393258427, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05349059775471687, "eval_overall_accuracy": 0.9840755735492578, "eval_overall_f1": 0.916030534351145, "eval_overall_precision": 0.9137055837563451, "eval_overall_recall": 0.9183673469387755, "eval_runtime": 0.2629, "eval_samples_per_second": 646.572, "eval_steps_per_second": 11.41, "step": 2016 }, { "epoch": 22.0, "grad_norm": 0.7268219590187073, "learning_rate": 3.9000000000000006e-05, "loss": 0.0351, "step": 2112 }, { "epoch": 22.0, "eval_LOCATION_f1": 0.8709677419354839, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.81, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9173789173789175, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.930635838150289, "eval_ORGANIZATION_recall": 0.9044943820224719, "eval_PERSON_f1": 0.96875, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.96875, "eval_PERSON_recall": 0.96875, "eval_loss": 0.05706202983856201, "eval_overall_accuracy": 0.982995951417004, "eval_overall_f1": 0.923076923076923, "eval_overall_precision": 0.912718204488778, "eval_overall_recall": 0.9336734693877551, "eval_runtime": 0.2646, "eval_samples_per_second": 642.552, "eval_steps_per_second": 11.339, "step": 2112 }, { "epoch": 23.0, "grad_norm": 0.27873164415359497, "learning_rate": 3.85e-05, "loss": 0.0328, "step": 2208 }, { "epoch": 23.0, "eval_LOCATION_f1": 0.9050279329608939, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8709677419354839, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9226361031518625, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9415204678362573, "eval_ORGANIZATION_recall": 0.9044943820224719, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.050778698176145554, "eval_overall_accuracy": 0.9848852901484481, "eval_overall_f1": 0.9362244897959183, "eval_overall_precision": 0.9362244897959183, "eval_overall_recall": 0.9362244897959183, "eval_runtime": 0.2631, "eval_samples_per_second": 646.205, "eval_steps_per_second": 11.404, "step": 2208 }, { "epoch": 24.0, "grad_norm": 0.03576788678765297, "learning_rate": 3.8e-05, "loss": 0.0323, "step": 2304 }, { "epoch": 24.0, "eval_LOCATION_f1": 0.8999999999999999, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8617021276595744, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9273743016759777, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9222222222222223, "eval_ORGANIZATION_recall": 0.9325842696629213, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05190228670835495, "eval_overall_accuracy": 0.9846153846153847, "eval_overall_f1": 0.9370277078085643, "eval_overall_precision": 0.9253731343283582, "eval_overall_recall": 0.9489795918367347, "eval_runtime": 0.2626, "eval_samples_per_second": 647.33, "eval_steps_per_second": 11.423, "step": 2304 }, { "epoch": 25.0, "grad_norm": 0.3854944407939911, "learning_rate": 3.7500000000000003e-05, "loss": 0.0298, "step": 2400 }, { "epoch": 25.0, "eval_LOCATION_f1": 0.9142857142857143, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.898876404494382, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9178470254957507, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9257142857142857, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.049816932529211044, "eval_overall_accuracy": 0.9854251012145749, "eval_overall_f1": 0.9362244897959183, "eval_overall_precision": 0.9362244897959183, "eval_overall_recall": 0.9362244897959183, "eval_runtime": 0.2636, "eval_samples_per_second": 644.916, "eval_steps_per_second": 11.381, "step": 2400 }, { "epoch": 26.0, "grad_norm": 0.06994820386171341, "learning_rate": 3.7e-05, "loss": 0.0274, "step": 2496 }, { "epoch": 26.0, "eval_LOCATION_f1": 0.9302325581395349, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9302325581395349, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9256198347107438, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9081081081081082, "eval_ORGANIZATION_recall": 0.9438202247191011, "eval_PERSON_f1": 0.96875, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.96875, "eval_PERSON_recall": 0.96875, "eval_loss": 0.05151611194014549, "eval_overall_accuracy": 0.9856950067476383, "eval_overall_f1": 0.9405815423514539, "eval_overall_precision": 0.9323308270676691, "eval_overall_recall": 0.9489795918367347, "eval_runtime": 0.2634, "eval_samples_per_second": 645.333, "eval_steps_per_second": 11.388, "step": 2496 }, { "epoch": 27.0, "grad_norm": 0.05162970349192619, "learning_rate": 3.65e-05, "loss": 0.0292, "step": 2592 }, { "epoch": 27.0, "eval_LOCATION_f1": 0.9152542372881357, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8901098901098901, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9247910863509748, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9171270718232044, "eval_ORGANIZATION_recall": 0.9325842696629213, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05607521906495094, "eval_overall_accuracy": 0.9846153846153847, "eval_overall_f1": 0.9382093316519546, "eval_overall_precision": 0.9276807980049875, "eval_overall_recall": 0.9489795918367347, "eval_runtime": 0.2634, "eval_samples_per_second": 645.339, "eval_steps_per_second": 11.388, "step": 2592 }, { "epoch": 28.0, "grad_norm": 0.4818032681941986, "learning_rate": 3.6e-05, "loss": 0.0271, "step": 2688 }, { "epoch": 28.0, "eval_LOCATION_f1": 0.9142857142857143, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.898876404494382, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9222222222222223, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9120879120879121, "eval_ORGANIZATION_recall": 0.9325842696629213, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.054379165172576904, "eval_overall_accuracy": 0.9848852901484481, "eval_overall_f1": 0.9368686868686869, "eval_overall_precision": 0.9275, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2622, "eval_samples_per_second": 648.467, "eval_steps_per_second": 11.444, "step": 2688 }, { "epoch": 29.0, "grad_norm": 0.5184191465377808, "learning_rate": 3.55e-05, "loss": 0.0271, "step": 2784 }, { "epoch": 29.0, "eval_LOCATION_f1": 0.9090909090909092, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8888888888888888, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9171270718232043, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9021739130434783, "eval_ORGANIZATION_recall": 0.9325842696629213, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05031682923436165, "eval_overall_accuracy": 0.9862348178137652, "eval_overall_f1": 0.9345088161209069, "eval_overall_precision": 0.9228855721393034, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2626, "eval_samples_per_second": 647.306, "eval_steps_per_second": 11.423, "step": 2784 }, { "epoch": 30.0, "grad_norm": 0.301739364862442, "learning_rate": 3.5e-05, "loss": 0.025, "step": 2880 }, { "epoch": 30.0, "eval_LOCATION_f1": 0.8913043478260869, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8367346938775511, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.898550724637681, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9281437125748503, "eval_ORGANIZATION_recall": 0.8707865168539326, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.056635547429323196, "eval_overall_accuracy": 0.9848852901484481, "eval_overall_f1": 0.9222929936305732, "eval_overall_precision": 0.9211195928753181, "eval_overall_recall": 0.923469387755102, "eval_runtime": 0.2629, "eval_samples_per_second": 646.63, "eval_steps_per_second": 11.411, "step": 2880 }, { "epoch": 31.0, "grad_norm": 0.05222267284989357, "learning_rate": 3.45e-05, "loss": 0.0244, "step": 2976 }, { "epoch": 31.0, "eval_LOCATION_f1": 0.8999999999999999, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8617021276595744, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.905982905982906, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9190751445086706, "eval_ORGANIZATION_recall": 0.8932584269662921, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05337852984666824, "eval_overall_accuracy": 0.9856950067476383, "eval_overall_f1": 0.9275730622617534, "eval_overall_precision": 0.9240506329113924, "eval_overall_recall": 0.9311224489795918, "eval_runtime": 0.2633, "eval_samples_per_second": 645.609, "eval_steps_per_second": 11.393, "step": 2976 }, { "epoch": 32.0, "grad_norm": 0.2674584984779358, "learning_rate": 3.4000000000000007e-05, "loss": 0.0241, "step": 3072 }, { "epoch": 32.0, "eval_LOCATION_f1": 0.8817204301075269, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.82, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9080459770114941, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9294117647058824, "eval_ORGANIZATION_recall": 0.8876404494382022, "eval_PERSON_f1": 0.96875, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.96875, "eval_PERSON_recall": 0.96875, "eval_loss": 0.06110076978802681, "eval_overall_accuracy": 0.9840755735492578, "eval_overall_f1": 0.9215189873417722, "eval_overall_precision": 0.914572864321608, "eval_overall_recall": 0.9285714285714286, "eval_runtime": 0.2629, "eval_samples_per_second": 646.662, "eval_steps_per_second": 11.412, "step": 3072 }, { "epoch": 33.0, "grad_norm": 0.7777827382087708, "learning_rate": 3.35e-05, "loss": 0.0229, "step": 3168 }, { "epoch": 33.0, "eval_LOCATION_f1": 0.8804347826086957, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.826530612244898, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9080459770114941, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9294117647058824, "eval_ORGANIZATION_recall": 0.8876404494382022, "eval_PERSON_f1": 0.9647058823529412, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.968503937007874, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.05559757351875305, "eval_overall_accuracy": 0.9843454790823212, "eval_overall_f1": 0.9199491740787801, "eval_overall_precision": 0.9164556962025316, "eval_overall_recall": 0.923469387755102, "eval_runtime": 0.2628, "eval_samples_per_second": 646.773, "eval_steps_per_second": 11.414, "step": 3168 }, { "epoch": 34.0, "grad_norm": 0.3666849434375763, "learning_rate": 3.3e-05, "loss": 0.0227, "step": 3264 }, { "epoch": 34.0, "eval_LOCATION_f1": 0.898876404494382, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8695652173913043, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9199999999999999, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.936046511627907, "eval_ORGANIZATION_recall": 0.9044943820224719, "eval_PERSON_f1": 0.9725490196078432, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9763779527559056, "eval_PERSON_recall": 0.96875, "eval_loss": 0.05445397272706032, "eval_overall_accuracy": 0.9859649122807017, "eval_overall_f1": 0.9323116219667943, "eval_overall_precision": 0.9335038363171355, "eval_overall_recall": 0.9311224489795918, "eval_runtime": 0.2641, "eval_samples_per_second": 643.763, "eval_steps_per_second": 11.361, "step": 3264 }, { "epoch": 35.0, "grad_norm": 0.6137359738349915, "learning_rate": 3.2500000000000004e-05, "loss": 0.0223, "step": 3360 }, { "epoch": 35.0, "eval_LOCATION_f1": 0.9162011173184358, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8817204301075269, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9235127478753541, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9314285714285714, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9725490196078432, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9763779527559056, "eval_PERSON_recall": 0.96875, "eval_loss": 0.05303780734539032, "eval_overall_accuracy": 0.9867746288798921, "eval_overall_f1": 0.9377382465057178, "eval_overall_precision": 0.9341772151898734, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2635, "eval_samples_per_second": 645.076, "eval_steps_per_second": 11.384, "step": 3360 }, { "epoch": 36.0, "grad_norm": 0.42580920457839966, "learning_rate": 3.2000000000000005e-05, "loss": 0.022, "step": 3456 }, { "epoch": 36.0, "eval_LOCATION_f1": 0.8901098901098902, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.84375, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9235127478753541, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9314285714285714, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9649805447470817, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9612403100775194, "eval_PERSON_recall": 0.96875, "eval_loss": 0.057867929339408875, "eval_overall_accuracy": 0.9856950067476383, "eval_overall_f1": 0.9292929292929293, "eval_overall_precision": 0.92, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2619, "eval_samples_per_second": 649.085, "eval_steps_per_second": 11.454, "step": 3456 }, { "epoch": 37.0, "grad_norm": 0.42478522658348083, "learning_rate": 3.15e-05, "loss": 0.0221, "step": 3552 }, { "epoch": 37.0, "eval_LOCATION_f1": 0.9142857142857143, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.898876404494382, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9295774647887324, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9322033898305084, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05257951095700264, "eval_overall_accuracy": 0.9865047233468286, "eval_overall_f1": 0.94147582697201, "eval_overall_precision": 0.9390862944162437, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2635, "eval_samples_per_second": 645.165, "eval_steps_per_second": 11.385, "step": 3552 }, { "epoch": 38.0, "grad_norm": 0.19741423428058624, "learning_rate": 3.1e-05, "loss": 0.0213, "step": 3648 }, { "epoch": 38.0, "eval_LOCATION_f1": 0.9101123595505618, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8804347826086957, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9121813031161473, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.92, "eval_ORGANIZATION_recall": 0.9044943820224719, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.057214513421058655, "eval_overall_accuracy": 0.9856950067476383, "eval_overall_f1": 0.9314720812182741, "eval_overall_precision": 0.9267676767676768, "eval_overall_recall": 0.9362244897959183, "eval_runtime": 0.263, "eval_samples_per_second": 646.468, "eval_steps_per_second": 11.408, "step": 3648 }, { "epoch": 39.0, "grad_norm": 0.21297980844974518, "learning_rate": 3.05e-05, "loss": 0.0198, "step": 3744 }, { "epoch": 39.0, "eval_LOCATION_f1": 0.9302325581395349, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9302325581395349, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9230769230769231, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9032258064516129, "eval_ORGANIZATION_recall": 0.9438202247191011, "eval_PERSON_f1": 0.96875, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.96875, "eval_PERSON_recall": 0.96875, "eval_loss": 0.04810687154531479, "eval_overall_accuracy": 0.9873144399460189, "eval_overall_f1": 0.9393939393939393, "eval_overall_precision": 0.93, "eval_overall_recall": 0.9489795918367347, "eval_runtime": 0.2651, "eval_samples_per_second": 641.3, "eval_steps_per_second": 11.317, "step": 3744 }, { "epoch": 40.0, "grad_norm": 0.42829686403274536, "learning_rate": 3e-05, "loss": 0.02, "step": 3840 }, { "epoch": 40.0, "eval_LOCATION_f1": 0.9039548022598871, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8791208791208791, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9204545454545454, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9310344827586207, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.052087049931287766, "eval_overall_accuracy": 0.9867746288798921, "eval_overall_f1": 0.935031847133758, "eval_overall_precision": 0.9338422391857506, "eval_overall_recall": 0.9362244897959183, "eval_runtime": 0.2631, "eval_samples_per_second": 646.121, "eval_steps_per_second": 11.402, "step": 3840 }, { "epoch": 41.0, "grad_norm": 0.18620462715625763, "learning_rate": 2.95e-05, "loss": 0.0182, "step": 3936 }, { "epoch": 41.0, "eval_LOCATION_f1": 0.9111111111111112, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8723404255319149, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9257142857142857, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9418604651162791, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9725490196078432, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9763779527559056, "eval_PERSON_recall": 0.96875, "eval_loss": 0.05100242421030998, "eval_overall_accuracy": 0.9875843454790824, "eval_overall_f1": 0.937579617834395, "eval_overall_precision": 0.9363867684478372, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2619, "eval_samples_per_second": 649.032, "eval_steps_per_second": 11.454, "step": 3936 }, { "epoch": 42.0, "grad_norm": 0.03817855566740036, "learning_rate": 2.9e-05, "loss": 0.0194, "step": 4032 }, { "epoch": 42.0, "eval_LOCATION_f1": 0.9039548022598871, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8791208791208791, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9318181818181818, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9425287356321839, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9725490196078432, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9763779527559056, "eval_PERSON_recall": 0.96875, "eval_loss": 0.05455116555094719, "eval_overall_accuracy": 0.9859649122807017, "eval_overall_f1": 0.9387755102040817, "eval_overall_precision": 0.9387755102040817, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2629, "eval_samples_per_second": 646.516, "eval_steps_per_second": 11.409, "step": 4032 }, { "epoch": 43.0, "grad_norm": 0.08467834442853928, "learning_rate": 2.8499999999999998e-05, "loss": 0.0164, "step": 4128 }, { "epoch": 43.0, "eval_LOCATION_f1": 0.9204545454545455, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9257142857142857, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9418604651162791, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05664708837866783, "eval_overall_accuracy": 0.9875843454790824, "eval_overall_f1": 0.9411764705882353, "eval_overall_precision": 0.9435897435897436, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2632, "eval_samples_per_second": 645.998, "eval_steps_per_second": 11.4, "step": 4128 }, { "epoch": 44.0, "grad_norm": 0.9716057181358337, "learning_rate": 2.8000000000000003e-05, "loss": 0.0182, "step": 4224 }, { "epoch": 44.0, "eval_LOCATION_f1": 0.8901098901098902, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.84375, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9075144508670521, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9345238095238095, "eval_ORGANIZATION_recall": 0.8820224719101124, "eval_PERSON_f1": 0.96875, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.96875, "eval_PERSON_recall": 0.96875, "eval_loss": 0.06421507894992828, "eval_overall_accuracy": 0.9848852901484481, "eval_overall_f1": 0.923469387755102, "eval_overall_precision": 0.923469387755102, "eval_overall_recall": 0.923469387755102, "eval_runtime": 0.2627, "eval_samples_per_second": 647.21, "eval_steps_per_second": 11.421, "step": 4224 }, { "epoch": 45.0, "grad_norm": 0.4442441165447235, "learning_rate": 2.7500000000000004e-05, "loss": 0.0171, "step": 4320 }, { "epoch": 45.0, "eval_LOCATION_f1": 0.8901098901098902, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.84375, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9287749287749287, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9421965317919075, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.96875, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.96875, "eval_PERSON_recall": 0.96875, "eval_loss": 0.059622135013341904, "eval_overall_accuracy": 0.9856950067476383, "eval_overall_f1": 0.9328263624841572, "eval_overall_precision": 0.9269521410579346, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2641, "eval_samples_per_second": 643.734, "eval_steps_per_second": 11.36, "step": 4320 }, { "epoch": 46.0, "grad_norm": 0.045812927186489105, "learning_rate": 2.7000000000000002e-05, "loss": 0.0171, "step": 4416 }, { "epoch": 46.0, "eval_LOCATION_f1": 0.8864864864864866, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8282828282828283, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9132947976878611, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9404761904761905, "eval_ORGANIZATION_recall": 0.8876404494382022, "eval_PERSON_f1": 0.96875, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.96875, "eval_PERSON_recall": 0.96875, "eval_loss": 0.05522875115275383, "eval_overall_accuracy": 0.9859649122807017, "eval_overall_f1": 0.9250317662007623, "eval_overall_precision": 0.9215189873417722, "eval_overall_recall": 0.9285714285714286, "eval_runtime": 0.2625, "eval_samples_per_second": 647.715, "eval_steps_per_second": 11.43, "step": 4416 }, { "epoch": 47.0, "grad_norm": 0.2865196466445923, "learning_rate": 2.6500000000000004e-05, "loss": 0.0171, "step": 4512 }, { "epoch": 47.0, "eval_LOCATION_f1": 0.9152542372881357, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8901098901098901, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9322033898305084, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9375, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9723320158102766, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.0554683580994606, "eval_overall_accuracy": 0.9878542510121457, "eval_overall_f1": 0.9413265306122449, "eval_overall_precision": 0.9413265306122449, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.262, "eval_samples_per_second": 648.854, "eval_steps_per_second": 11.45, "step": 4512 }, { "epoch": 48.0, "grad_norm": 2.1837306022644043, "learning_rate": 2.6000000000000002e-05, "loss": 0.0143, "step": 4608 }, { "epoch": 48.0, "eval_LOCATION_f1": 0.9050279329608939, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8709677419354839, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9318181818181818, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9425287356321839, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.968503937007874, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9761904761904762, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.06268592178821564, "eval_overall_accuracy": 0.9867746288798921, "eval_overall_f1": 0.937579617834395, "eval_overall_precision": 0.9363867684478372, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2641, "eval_samples_per_second": 643.781, "eval_steps_per_second": 11.361, "step": 4608 }, { "epoch": 49.0, "grad_norm": 0.037817440927028656, "learning_rate": 2.5500000000000003e-05, "loss": 0.016, "step": 4704 }, { "epoch": 49.0, "eval_LOCATION_f1": 0.9152542372881357, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8901098901098901, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9405099150141643, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9485714285714286, "eval_ORGANIZATION_recall": 0.9325842696629213, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.056825291365385056, "eval_overall_accuracy": 0.9870445344129555, "eval_overall_f1": 0.9453621346886911, "eval_overall_precision": 0.9417721518987342, "eval_overall_recall": 0.9489795918367347, "eval_runtime": 0.263, "eval_samples_per_second": 646.509, "eval_steps_per_second": 11.409, "step": 4704 }, { "epoch": 50.0, "grad_norm": 0.06557326763868332, "learning_rate": 2.5e-05, "loss": 0.0133, "step": 4800 }, { "epoch": 50.0, "eval_LOCATION_f1": 0.9101123595505618, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8804347826086957, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9204545454545454, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9310344827586207, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.060878023505210876, "eval_overall_accuracy": 0.9859649122807017, "eval_overall_f1": 0.9351969504447268, "eval_overall_precision": 0.9316455696202531, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2623, "eval_samples_per_second": 648.03, "eval_steps_per_second": 11.436, "step": 4800 }, { "epoch": 51.0, "grad_norm": 0.025485187768936157, "learning_rate": 2.45e-05, "loss": 0.0149, "step": 4896 }, { "epoch": 51.0, "eval_LOCATION_f1": 0.898876404494382, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8695652173913043, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9257142857142857, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9418604651162791, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05756629630923271, "eval_overall_accuracy": 0.9859649122807017, "eval_overall_f1": 0.935031847133758, "eval_overall_precision": 0.9338422391857506, "eval_overall_recall": 0.9362244897959183, "eval_runtime": 0.2634, "eval_samples_per_second": 645.426, "eval_steps_per_second": 11.39, "step": 4896 }, { "epoch": 52.0, "grad_norm": 1.6103955507278442, "learning_rate": 2.4e-05, "loss": 0.0146, "step": 4992 }, { "epoch": 52.0, "eval_LOCATION_f1": 0.9204545454545455, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9464788732394366, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9491525423728814, "eval_ORGANIZATION_recall": 0.9438202247191011, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05451711639761925, "eval_overall_accuracy": 0.9878542510121457, "eval_overall_f1": 0.949238578680203, "eval_overall_precision": 0.9444444444444444, "eval_overall_recall": 0.9540816326530612, "eval_runtime": 0.2617, "eval_samples_per_second": 649.567, "eval_steps_per_second": 11.463, "step": 4992 }, { "epoch": 53.0, "grad_norm": 1.6491730213165283, "learning_rate": 2.35e-05, "loss": 0.0149, "step": 5088 }, { "epoch": 53.0, "eval_LOCATION_f1": 0.9142857142857143, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.898876404494382, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9441340782122906, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9388888888888889, "eval_ORGANIZATION_recall": 0.949438202247191, "eval_PERSON_f1": 0.9609375, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9609375, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.06067636236548424, "eval_overall_accuracy": 0.9867746288798921, "eval_overall_f1": 0.9429657794676806, "eval_overall_precision": 0.9370277078085643, "eval_overall_recall": 0.9489795918367347, "eval_runtime": 0.262, "eval_samples_per_second": 648.897, "eval_steps_per_second": 11.451, "step": 5088 }, { "epoch": 54.0, "grad_norm": 0.623364269733429, "learning_rate": 2.3000000000000003e-05, "loss": 0.0131, "step": 5184 }, { "epoch": 54.0, "eval_LOCATION_f1": 0.9152542372881357, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8901098901098901, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9371428571428573, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9534883720930233, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.06415931135416031, "eval_overall_accuracy": 0.9862348178137652, "eval_overall_f1": 0.9438775510204082, "eval_overall_precision": 0.9438775510204082, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2622, "eval_samples_per_second": 648.291, "eval_steps_per_second": 11.44, "step": 5184 }, { "epoch": 55.0, "grad_norm": 0.4180052876472473, "learning_rate": 2.25e-05, "loss": 0.013, "step": 5280 }, { "epoch": 55.0, "eval_LOCATION_f1": 0.8876404494382023, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8586956521739131, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.9230769230769231, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9364161849710982, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.06132206693291664, "eval_overall_accuracy": 0.9854251012145749, "eval_overall_f1": 0.9324840764331209, "eval_overall_precision": 0.9312977099236641, "eval_overall_recall": 0.9336734693877551, "eval_runtime": 0.2635, "eval_samples_per_second": 645.207, "eval_steps_per_second": 11.386, "step": 5280 }, { "epoch": 56.0, "grad_norm": 0.720609188079834, "learning_rate": 2.2000000000000003e-05, "loss": 0.0138, "step": 5376 }, { "epoch": 56.0, "eval_LOCATION_f1": 0.9142857142857143, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.898876404494382, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9269662921348315, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9269662921348315, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9725490196078432, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9763779527559056, "eval_PERSON_recall": 0.96875, "eval_loss": 0.054878633469343185, "eval_overall_accuracy": 0.9856950067476383, "eval_overall_f1": 0.9389312977099236, "eval_overall_precision": 0.9365482233502538, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2625, "eval_samples_per_second": 647.694, "eval_steps_per_second": 11.43, "step": 5376 }, { "epoch": 57.0, "grad_norm": 0.060024455189704895, "learning_rate": 2.15e-05, "loss": 0.013, "step": 5472 }, { "epoch": 57.0, "eval_LOCATION_f1": 0.9090909090909092, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8888888888888888, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9291784702549575, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9371428571428572, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.96875, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.96875, "eval_PERSON_recall": 0.96875, "eval_loss": 0.06312891095876694, "eval_overall_accuracy": 0.9859649122807017, "eval_overall_f1": 0.937579617834395, "eval_overall_precision": 0.9363867684478372, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2645, "eval_samples_per_second": 642.83, "eval_steps_per_second": 11.344, "step": 5472 }, { "epoch": 58.0, "grad_norm": 0.46193939447402954, "learning_rate": 2.1e-05, "loss": 0.0122, "step": 5568 }, { "epoch": 58.0, "eval_LOCATION_f1": 0.9213483146067417, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8913043478260869, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9314285714285714, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9476744186046512, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.06275834143161774, "eval_overall_accuracy": 0.9870445344129555, "eval_overall_f1": 0.9426751592356687, "eval_overall_precision": 0.9414758269720102, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2637, "eval_samples_per_second": 644.751, "eval_steps_per_second": 11.378, "step": 5568 }, { "epoch": 59.0, "grad_norm": 0.6101306676864624, "learning_rate": 2.05e-05, "loss": 0.012, "step": 5664 }, { "epoch": 59.0, "eval_LOCATION_f1": 0.9310344827586208, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9204545454545454, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9378531073446328, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9431818181818182, "eval_ORGANIZATION_recall": 0.9325842696629213, "eval_PERSON_f1": 0.96875, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.96875, "eval_PERSON_recall": 0.96875, "eval_loss": 0.0650852769613266, "eval_overall_accuracy": 0.9867746288798921, "eval_overall_f1": 0.9464285714285714, "eval_overall_precision": 0.9464285714285714, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2622, "eval_samples_per_second": 648.274, "eval_steps_per_second": 11.44, "step": 5664 }, { "epoch": 60.0, "grad_norm": 0.8312122225761414, "learning_rate": 2e-05, "loss": 0.0122, "step": 5760 }, { "epoch": 60.0, "eval_LOCATION_f1": 0.9171270718232045, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8736842105263158, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9190751445086704, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9464285714285714, "eval_ORGANIZATION_recall": 0.8932584269662921, "eval_PERSON_f1": 0.96875, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.96875, "eval_PERSON_recall": 0.96875, "eval_loss": 0.06510728597640991, "eval_overall_accuracy": 0.9856950067476383, "eval_overall_f1": 0.9348659003831418, "eval_overall_precision": 0.9360613810741688, "eval_overall_recall": 0.9336734693877551, "eval_runtime": 0.2628, "eval_samples_per_second": 646.935, "eval_steps_per_second": 11.416, "step": 5760 }, { "epoch": 61.0, "grad_norm": 0.4985395669937134, "learning_rate": 1.9500000000000003e-05, "loss": 0.0115, "step": 5856 }, { "epoch": 61.0, "eval_LOCATION_f1": 0.9204545454545455, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9464788732394366, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9491525423728814, "eval_ORGANIZATION_recall": 0.9438202247191011, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.064664326608181, "eval_overall_accuracy": 0.9875843454790824, "eval_overall_f1": 0.949238578680203, "eval_overall_precision": 0.9444444444444444, "eval_overall_recall": 0.9540816326530612, "eval_runtime": 0.2629, "eval_samples_per_second": 646.652, "eval_steps_per_second": 11.412, "step": 5856 }, { "epoch": 62.0, "grad_norm": 0.2893165946006775, "learning_rate": 1.9e-05, "loss": 0.0121, "step": 5952 }, { "epoch": 62.0, "eval_LOCATION_f1": 0.9257142857142857, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9101123595505618, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9291784702549575, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9371428571428572, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.06350239366292953, "eval_overall_accuracy": 0.9865047233468286, "eval_overall_f1": 0.9426751592356687, "eval_overall_precision": 0.9414758269720102, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2636, "eval_samples_per_second": 644.994, "eval_steps_per_second": 11.382, "step": 5952 }, { "epoch": 63.0, "grad_norm": 0.4730461537837982, "learning_rate": 1.85e-05, "loss": 0.011, "step": 6048 }, { "epoch": 63.0, "eval_LOCATION_f1": 0.9195402298850575, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9090909090909091, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9405099150141643, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9485714285714286, "eval_ORGANIZATION_recall": 0.9325842696629213, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.06381069123744965, "eval_overall_accuracy": 0.9867746288798921, "eval_overall_f1": 0.9464285714285714, "eval_overall_precision": 0.9464285714285714, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.262, "eval_samples_per_second": 648.871, "eval_steps_per_second": 11.451, "step": 6048 }, { "epoch": 64.0, "grad_norm": 0.5198408961296082, "learning_rate": 1.8e-05, "loss": 0.0113, "step": 6144 }, { "epoch": 64.0, "eval_LOCATION_f1": 0.9213483146067417, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8913043478260869, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9261363636363636, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9367816091954023, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9725490196078432, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9763779527559056, "eval_PERSON_recall": 0.96875, "eval_loss": 0.05832422897219658, "eval_overall_accuracy": 0.9867746288798921, "eval_overall_f1": 0.940127388535032, "eval_overall_precision": 0.9389312977099237, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2619, "eval_samples_per_second": 649.128, "eval_steps_per_second": 11.455, "step": 6144 }, { "epoch": 65.0, "grad_norm": 0.07396053522825241, "learning_rate": 1.75e-05, "loss": 0.0107, "step": 6240 }, { "epoch": 65.0, "eval_LOCATION_f1": 0.9204545454545455, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9322033898305084, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9375, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.96875, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.96875, "eval_PERSON_recall": 0.96875, "eval_loss": 0.06743788719177246, "eval_overall_accuracy": 0.9856950067476383, "eval_overall_f1": 0.94147582697201, "eval_overall_precision": 0.9390862944162437, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2629, "eval_samples_per_second": 646.741, "eval_steps_per_second": 11.413, "step": 6240 }, { "epoch": 66.0, "grad_norm": 0.051047179847955704, "learning_rate": 1.7000000000000003e-05, "loss": 0.0106, "step": 6336 }, { "epoch": 66.0, "eval_LOCATION_f1": 0.9265536723163842, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9010989010989011, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9283667621776504, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9473684210526315, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.06871438026428223, "eval_overall_accuracy": 0.9856950067476383, "eval_overall_f1": 0.9425287356321839, "eval_overall_precision": 0.9437340153452686, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2631, "eval_samples_per_second": 646.053, "eval_steps_per_second": 11.401, "step": 6336 }, { "epoch": 67.0, "grad_norm": 0.038774944841861725, "learning_rate": 1.65e-05, "loss": 0.0102, "step": 6432 }, { "epoch": 67.0, "eval_LOCATION_f1": 0.9152542372881357, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8901098901098901, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9287749287749287, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9421965317919075, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.06630941480398178, "eval_overall_accuracy": 0.9859649122807017, "eval_overall_f1": 0.9413265306122449, "eval_overall_precision": 0.9413265306122449, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2633, "eval_samples_per_second": 645.553, "eval_steps_per_second": 11.392, "step": 6432 }, { "epoch": 68.0, "grad_norm": 0.24254558980464935, "learning_rate": 1.6000000000000003e-05, "loss": 0.0099, "step": 6528 }, { "epoch": 68.0, "eval_LOCATION_f1": 0.9142857142857143, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.898876404494382, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9431818181818182, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9540229885057471, "eval_ORGANIZATION_recall": 0.9325842696629213, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.06650377064943314, "eval_overall_accuracy": 0.9865047233468286, "eval_overall_f1": 0.9464285714285714, "eval_overall_precision": 0.9464285714285714, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2631, "eval_samples_per_second": 646.049, "eval_steps_per_second": 11.401, "step": 6528 }, { "epoch": 69.0, "grad_norm": 0.4754196107387543, "learning_rate": 1.55e-05, "loss": 0.0108, "step": 6624 }, { "epoch": 69.0, "eval_LOCATION_f1": 0.9248554913294798, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9195402298850575, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9378531073446328, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9431818181818182, "eval_ORGANIZATION_recall": 0.9325842696629213, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05997459217905998, "eval_overall_accuracy": 0.9881241565452091, "eval_overall_f1": 0.9476372924648786, "eval_overall_precision": 0.948849104859335, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2627, "eval_samples_per_second": 647.059, "eval_steps_per_second": 11.419, "step": 6624 }, { "epoch": 70.0, "grad_norm": 0.0765451043844223, "learning_rate": 1.5e-05, "loss": 0.0096, "step": 6720 }, { "epoch": 70.0, "eval_LOCATION_f1": 0.9257142857142857, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9101123595505618, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9355742296918768, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9329608938547486, "eval_ORGANIZATION_recall": 0.9382022471910112, "eval_PERSON_f1": 0.9647058823529412, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.968503937007874, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.06229870766401291, "eval_overall_accuracy": 0.9875843454790824, "eval_overall_f1": 0.9428208386277002, "eval_overall_precision": 0.9392405063291139, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2629, "eval_samples_per_second": 646.634, "eval_steps_per_second": 11.411, "step": 6720 }, { "epoch": 71.0, "grad_norm": 0.33183351159095764, "learning_rate": 1.45e-05, "loss": 0.0105, "step": 6816 }, { "epoch": 71.0, "eval_LOCATION_f1": 0.9265536723163842, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9010989010989011, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9318181818181818, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9425287356321839, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.06661175936460495, "eval_overall_accuracy": 0.9867746288798921, "eval_overall_f1": 0.9440203562340966, "eval_overall_precision": 0.9416243654822335, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2631, "eval_samples_per_second": 646.129, "eval_steps_per_second": 11.402, "step": 6816 }, { "epoch": 72.0, "grad_norm": 0.4176276922225952, "learning_rate": 1.4000000000000001e-05, "loss": 0.0088, "step": 6912 }, { "epoch": 72.0, "eval_LOCATION_f1": 0.9318181818181819, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9111111111111111, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9137931034482758, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9352941176470588, "eval_ORGANIZATION_recall": 0.8932584269662921, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.06664171069860458, "eval_overall_accuracy": 0.9867746288798921, "eval_overall_f1": 0.9384615384615385, "eval_overall_precision": 0.9432989690721649, "eval_overall_recall": 0.9336734693877551, "eval_runtime": 0.2639, "eval_samples_per_second": 644.2, "eval_steps_per_second": 11.368, "step": 6912 }, { "epoch": 73.0, "grad_norm": 0.08331205695867538, "learning_rate": 1.3500000000000001e-05, "loss": 0.0104, "step": 7008 }, { "epoch": 73.0, "eval_LOCATION_f1": 0.9142857142857143, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.898876404494382, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9199999999999999, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.936046511627907, "eval_ORGANIZATION_recall": 0.9044943820224719, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.06986453384160995, "eval_overall_accuracy": 0.9856950067476383, "eval_overall_f1": 0.9360613810741688, "eval_overall_precision": 0.9384615384615385, "eval_overall_recall": 0.9336734693877551, "eval_runtime": 0.2622, "eval_samples_per_second": 648.363, "eval_steps_per_second": 11.442, "step": 7008 }, { "epoch": 74.0, "grad_norm": 0.12321806699037552, "learning_rate": 1.3000000000000001e-05, "loss": 0.0087, "step": 7104 }, { "epoch": 74.0, "eval_LOCATION_f1": 0.9142857142857143, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.898876404494382, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9291784702549575, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9371428571428572, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.06694254279136658, "eval_overall_accuracy": 0.9865047233468286, "eval_overall_f1": 0.940127388535032, "eval_overall_precision": 0.9389312977099237, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2623, "eval_samples_per_second": 648.128, "eval_steps_per_second": 11.438, "step": 7104 }, { "epoch": 75.0, "grad_norm": 0.8054729700088501, "learning_rate": 1.25e-05, "loss": 0.0093, "step": 7200 }, { "epoch": 75.0, "eval_LOCATION_f1": 0.9142857142857143, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.898876404494382, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9261363636363636, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9367816091954023, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.07096198946237564, "eval_overall_accuracy": 0.9862348178137652, "eval_overall_f1": 0.9387755102040817, "eval_overall_precision": 0.9387755102040817, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2624, "eval_samples_per_second": 647.753, "eval_steps_per_second": 11.431, "step": 7200 }, { "epoch": 76.0, "grad_norm": 0.03994647040963173, "learning_rate": 1.2e-05, "loss": 0.0098, "step": 7296 }, { "epoch": 76.0, "eval_LOCATION_f1": 0.9142857142857143, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.898876404494382, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9230769230769231, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9364161849710982, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.0709138810634613, "eval_overall_accuracy": 0.9862348178137652, "eval_overall_f1": 0.937420178799489, "eval_overall_precision": 0.9386189258312021, "eval_overall_recall": 0.9362244897959183, "eval_runtime": 0.2651, "eval_samples_per_second": 641.372, "eval_steps_per_second": 11.318, "step": 7296 }, { "epoch": 77.0, "grad_norm": 0.18597504496574402, "learning_rate": 1.1500000000000002e-05, "loss": 0.0091, "step": 7392 }, { "epoch": 77.0, "eval_LOCATION_f1": 0.9265536723163842, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9010989010989011, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9340974212034385, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9532163742690059, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.07089944928884506, "eval_overall_accuracy": 0.9867746288798921, "eval_overall_f1": 0.9450830140485312, "eval_overall_precision": 0.9462915601023018, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2621, "eval_samples_per_second": 648.499, "eval_steps_per_second": 11.444, "step": 7392 }, { "epoch": 78.0, "grad_norm": 0.022241957485675812, "learning_rate": 1.1000000000000001e-05, "loss": 0.0082, "step": 7488 }, { "epoch": 78.0, "eval_LOCATION_f1": 0.9142857142857143, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.898876404494382, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9318181818181818, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9425287356321839, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.07111123949289322, "eval_overall_accuracy": 0.9867746288798921, "eval_overall_f1": 0.9425287356321839, "eval_overall_precision": 0.9437340153452686, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2633, "eval_samples_per_second": 645.584, "eval_steps_per_second": 11.393, "step": 7488 }, { "epoch": 79.0, "grad_norm": 0.1433478742837906, "learning_rate": 1.05e-05, "loss": 0.0099, "step": 7584 }, { "epoch": 79.0, "eval_LOCATION_f1": 0.9142857142857143, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.898876404494382, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9352112676056338, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9378531073446328, "eval_ORGANIZATION_recall": 0.9325842696629213, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.06799092888832092, "eval_overall_accuracy": 0.9867746288798921, "eval_overall_f1": 0.9428208386277002, "eval_overall_precision": 0.9392405063291139, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.264, "eval_samples_per_second": 643.84, "eval_steps_per_second": 11.362, "step": 7584 }, { "epoch": 80.0, "grad_norm": 0.8019375801086426, "learning_rate": 1e-05, "loss": 0.008, "step": 7680 }, { "epoch": 80.0, "eval_LOCATION_f1": 0.9257142857142857, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9101123595505618, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9261363636363636, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9367816091954023, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.07171770185232162, "eval_overall_accuracy": 0.9865047233468286, "eval_overall_f1": 0.9413265306122449, "eval_overall_precision": 0.9413265306122449, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2625, "eval_samples_per_second": 647.697, "eval_steps_per_second": 11.43, "step": 7680 }, { "epoch": 81.0, "grad_norm": 0.18681468069553375, "learning_rate": 9.5e-06, "loss": 0.0094, "step": 7776 }, { "epoch": 81.0, "eval_LOCATION_f1": 0.9318181818181819, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9111111111111111, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9375000000000001, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9482758620689655, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.07289179414510727, "eval_overall_accuracy": 0.9870445344129555, "eval_overall_f1": 0.9477707006369427, "eval_overall_precision": 0.9465648854961832, "eval_overall_recall": 0.9489795918367347, "eval_runtime": 0.2625, "eval_samples_per_second": 647.641, "eval_steps_per_second": 11.429, "step": 7776 }, { "epoch": 82.0, "grad_norm": 0.05996530503034592, "learning_rate": 9e-06, "loss": 0.0081, "step": 7872 }, { "epoch": 82.0, "eval_LOCATION_f1": 0.9142857142857143, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.898876404494382, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9235127478753541, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9314285714285714, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.07142689824104309, "eval_overall_accuracy": 0.9859649122807017, "eval_overall_f1": 0.937579617834395, "eval_overall_precision": 0.9363867684478372, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2625, "eval_samples_per_second": 647.656, "eval_steps_per_second": 11.429, "step": 7872 }, { "epoch": 83.0, "grad_norm": 0.039266638457775116, "learning_rate": 8.500000000000002e-06, "loss": 0.0087, "step": 7968 }, { "epoch": 83.0, "eval_LOCATION_f1": 0.9265536723163842, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9010989010989011, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9261363636363636, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9367816091954023, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.06957918405532837, "eval_overall_accuracy": 0.9867746288798921, "eval_overall_f1": 0.9426751592356687, "eval_overall_precision": 0.9414758269720102, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2629, "eval_samples_per_second": 646.615, "eval_steps_per_second": 11.411, "step": 7968 }, { "epoch": 84.0, "grad_norm": 0.3734685778617859, "learning_rate": 8.000000000000001e-06, "loss": 0.0072, "step": 8064 }, { "epoch": 84.0, "eval_LOCATION_f1": 0.9378531073446328, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9120879120879121, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9318181818181818, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9425287356321839, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.96875, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.96875, "eval_PERSON_recall": 0.96875, "eval_loss": 0.07126548886299133, "eval_overall_accuracy": 0.9870445344129555, "eval_overall_f1": 0.9452229299363057, "eval_overall_precision": 0.9440203562340967, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2622, "eval_samples_per_second": 648.318, "eval_steps_per_second": 11.441, "step": 8064 }, { "epoch": 85.0, "grad_norm": 0.20688194036483765, "learning_rate": 7.5e-06, "loss": 0.0072, "step": 8160 }, { "epoch": 85.0, "eval_LOCATION_f1": 0.9265536723163842, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9010989010989011, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9287749287749287, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9421965317919075, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9725490196078432, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9763779527559056, "eval_PERSON_recall": 0.96875, "eval_loss": 0.0726483091711998, "eval_overall_accuracy": 0.9862348178137652, "eval_overall_f1": 0.9425287356321839, "eval_overall_precision": 0.9437340153452686, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2624, "eval_samples_per_second": 647.815, "eval_steps_per_second": 11.432, "step": 8160 }, { "epoch": 86.0, "grad_norm": 0.01083430927246809, "learning_rate": 7.000000000000001e-06, "loss": 0.0069, "step": 8256 }, { "epoch": 86.0, "eval_LOCATION_f1": 0.9265536723163842, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9010989010989011, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9204545454545454, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9310344827586207, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9649805447470817, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9612403100775194, "eval_PERSON_recall": 0.96875, "eval_loss": 0.07003360986709595, "eval_overall_accuracy": 0.9859649122807017, "eval_overall_f1": 0.9363867684478372, "eval_overall_precision": 0.934010152284264, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2631, "eval_samples_per_second": 646.06, "eval_steps_per_second": 11.401, "step": 8256 }, { "epoch": 87.0, "grad_norm": 0.27668076753616333, "learning_rate": 6.5000000000000004e-06, "loss": 0.0075, "step": 8352 }, { "epoch": 87.0, "eval_LOCATION_f1": 0.9257142857142857, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9101123595505618, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9204545454545454, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9310344827586207, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9649805447470817, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9612403100775194, "eval_PERSON_recall": 0.96875, "eval_loss": 0.07422462850809097, "eval_overall_accuracy": 0.9856950067476383, "eval_overall_f1": 0.9362244897959183, "eval_overall_precision": 0.9362244897959183, "eval_overall_recall": 0.9362244897959183, "eval_runtime": 0.2623, "eval_samples_per_second": 648.024, "eval_steps_per_second": 11.436, "step": 8352 }, { "epoch": 88.0, "grad_norm": 0.5170222520828247, "learning_rate": 6e-06, "loss": 0.0081, "step": 8448 }, { "epoch": 88.0, "eval_LOCATION_f1": 0.9142857142857143, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.898876404494382, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9322033898305084, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9375, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.07327169924974442, "eval_overall_accuracy": 0.9865047233468286, "eval_overall_f1": 0.94147582697201, "eval_overall_precision": 0.9390862944162437, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2621, "eval_samples_per_second": 648.619, "eval_steps_per_second": 11.446, "step": 8448 }, { "epoch": 89.0, "grad_norm": 0.02253754250705242, "learning_rate": 5.500000000000001e-06, "loss": 0.0076, "step": 8544 }, { "epoch": 89.0, "eval_LOCATION_f1": 0.9257142857142857, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9101123595505618, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9291784702549575, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9371428571428572, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.96875, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.96875, "eval_PERSON_recall": 0.96875, "eval_loss": 0.07149529457092285, "eval_overall_accuracy": 0.9865047233468286, "eval_overall_f1": 0.9413265306122449, "eval_overall_precision": 0.9413265306122449, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.262, "eval_samples_per_second": 648.882, "eval_steps_per_second": 11.451, "step": 8544 }, { "epoch": 90.0, "grad_norm": 0.38953787088394165, "learning_rate": 5e-06, "loss": 0.007, "step": 8640 }, { "epoch": 90.0, "eval_LOCATION_f1": 0.9257142857142857, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9101123595505618, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.934844192634561, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9428571428571428, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.96875, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.96875, "eval_PERSON_recall": 0.96875, "eval_loss": 0.07308602333068848, "eval_overall_accuracy": 0.9867746288798921, "eval_overall_f1": 0.9438775510204082, "eval_overall_precision": 0.9438775510204082, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2633, "eval_samples_per_second": 645.591, "eval_steps_per_second": 11.393, "step": 8640 }, { "epoch": 91.0, "grad_norm": 0.13587290048599243, "learning_rate": 4.5e-06, "loss": 0.0067, "step": 8736 }, { "epoch": 91.0, "eval_LOCATION_f1": 0.9204545454545455, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9318181818181818, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9425287356321839, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.96875, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.96875, "eval_PERSON_recall": 0.96875, "eval_loss": 0.07349184155464172, "eval_overall_accuracy": 0.9865047233468286, "eval_overall_f1": 0.9413265306122449, "eval_overall_precision": 0.9413265306122449, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2627, "eval_samples_per_second": 647.03, "eval_steps_per_second": 11.418, "step": 8736 }, { "epoch": 92.0, "grad_norm": 0.22132927179336548, "learning_rate": 4.000000000000001e-06, "loss": 0.0063, "step": 8832 }, { "epoch": 92.0, "eval_LOCATION_f1": 0.9090909090909092, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8888888888888888, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9291784702549575, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9371428571428572, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.96875, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.96875, "eval_PERSON_recall": 0.96875, "eval_loss": 0.07292331010103226, "eval_overall_accuracy": 0.9862348178137652, "eval_overall_f1": 0.937579617834395, "eval_overall_precision": 0.9363867684478372, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2629, "eval_samples_per_second": 646.695, "eval_steps_per_second": 11.412, "step": 8832 }, { "epoch": 93.0, "grad_norm": 0.12352707982063293, "learning_rate": 3.5000000000000004e-06, "loss": 0.0071, "step": 8928 }, { "epoch": 93.0, "eval_LOCATION_f1": 0.9204545454545455, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9318181818181818, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9425287356321839, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.96875, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.96875, "eval_PERSON_recall": 0.96875, "eval_loss": 0.07484764605760574, "eval_overall_accuracy": 0.9865047233468286, "eval_overall_f1": 0.9413265306122449, "eval_overall_precision": 0.9413265306122449, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.264, "eval_samples_per_second": 643.997, "eval_steps_per_second": 11.365, "step": 8928 }, { "epoch": 94.0, "grad_norm": 0.4933042824268341, "learning_rate": 3e-06, "loss": 0.0069, "step": 9024 }, { "epoch": 94.0, "eval_LOCATION_f1": 0.9090909090909092, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8888888888888888, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9318181818181818, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9425287356321839, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.96875, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.96875, "eval_PERSON_recall": 0.96875, "eval_loss": 0.072416290640831, "eval_overall_accuracy": 0.9865047233468286, "eval_overall_f1": 0.9387755102040817, "eval_overall_precision": 0.9387755102040817, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2627, "eval_samples_per_second": 647.22, "eval_steps_per_second": 11.422, "step": 9024 }, { "epoch": 95.0, "grad_norm": 0.11264283955097198, "learning_rate": 2.5e-06, "loss": 0.0079, "step": 9120 }, { "epoch": 95.0, "eval_LOCATION_f1": 0.9204545454545455, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9371428571428573, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9534883720930233, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.07443219423294067, "eval_overall_accuracy": 0.9870445344129555, "eval_overall_f1": 0.9462915601023018, "eval_overall_precision": 0.9487179487179487, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2643, "eval_samples_per_second": 643.133, "eval_steps_per_second": 11.349, "step": 9120 }, { "epoch": 96.0, "grad_norm": 0.6433180570602417, "learning_rate": 2.0000000000000003e-06, "loss": 0.0066, "step": 9216 }, { "epoch": 96.0, "eval_LOCATION_f1": 0.9204545454545455, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9344729344729344, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9479768786127167, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.07433334738016129, "eval_overall_accuracy": 0.9867746288798921, "eval_overall_f1": 0.9450830140485312, "eval_overall_precision": 0.9462915601023018, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2643, "eval_samples_per_second": 643.276, "eval_steps_per_second": 11.352, "step": 9216 }, { "epoch": 97.0, "grad_norm": 0.012902840040624142, "learning_rate": 1.5e-06, "loss": 0.0077, "step": 9312 }, { "epoch": 97.0, "eval_LOCATION_f1": 0.9204545454545455, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9344729344729344, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9479768786127167, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.07318369299173355, "eval_overall_accuracy": 0.9867746288798921, "eval_overall_f1": 0.9450830140485312, "eval_overall_precision": 0.9462915601023018, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2626, "eval_samples_per_second": 647.324, "eval_steps_per_second": 11.423, "step": 9312 }, { "epoch": 98.0, "grad_norm": 0.11238887161016464, "learning_rate": 1.0000000000000002e-06, "loss": 0.0063, "step": 9408 }, { "epoch": 98.0, "eval_LOCATION_f1": 0.9204545454545455, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9344729344729344, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9479768786127167, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.07359299808740616, "eval_overall_accuracy": 0.9867746288798921, "eval_overall_f1": 0.9450830140485312, "eval_overall_precision": 0.9462915601023018, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2632, "eval_samples_per_second": 645.778, "eval_steps_per_second": 11.396, "step": 9408 }, { "epoch": 99.0, "grad_norm": 0.13547368347644806, "learning_rate": 5.000000000000001e-07, "loss": 0.0068, "step": 9504 }, { "epoch": 99.0, "eval_LOCATION_f1": 0.9204545454545455, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9344729344729344, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9479768786127167, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.0736822858452797, "eval_overall_accuracy": 0.9867746288798921, "eval_overall_f1": 0.9450830140485312, "eval_overall_precision": 0.9462915601023018, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2623, "eval_samples_per_second": 647.999, "eval_steps_per_second": 11.435, "step": 9504 }, { "epoch": 100.0, "grad_norm": 0.017845168709754944, "learning_rate": 0.0, "loss": 0.0077, "step": 9600 }, { "epoch": 100.0, "eval_LOCATION_f1": 0.9204545454545455, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9344729344729344, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9479768786127167, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.07381308823823929, "eval_overall_accuracy": 0.9867746288798921, "eval_overall_f1": 0.9450830140485312, "eval_overall_precision": 0.9462915601023018, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2622, "eval_samples_per_second": 648.446, "eval_steps_per_second": 11.443, "step": 9600 }, { "epoch": 100.0, "step": 9600, "total_flos": 4315798421360676.0, "train_loss": 0.03713263006880879, "train_runtime": 544.4775, "train_samples_per_second": 281.187, "train_steps_per_second": 17.632 } ], "logging_steps": 500, "max_steps": 9600, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "total_flos": 4315798421360676.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }