diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,3130 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 100.0, + "eval_steps": 500, + "global_step": 9600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "grad_norm": 1.1709129810333252, + "learning_rate": 4.9500000000000004e-05, + "loss": 0.8476, + "step": 96 + }, + { + "epoch": 1.0, + "eval_LOCATION_f1": 0.0, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.0, + "eval_LOCATION_recall": 0.0, + "eval_ORGANIZATION_f1": 0.0, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.0, + "eval_ORGANIZATION_recall": 0.0, + "eval_PERSON_f1": 0.0, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.0, + "eval_PERSON_recall": 0.0, + "eval_loss": 0.5341979265213013, + "eval_overall_accuracy": 0.8394180620367828, + "eval_overall_f1": 0.0, + "eval_overall_precision": 0.0, + "eval_overall_recall": 0.0, + "eval_runtime": 0.2518, + "eval_samples_per_second": 675.219, + "eval_steps_per_second": 11.916, + "step": 96 + }, + { + "epoch": 2.0, + "grad_norm": 1.5588200092315674, + "learning_rate": 4.9e-05, + "loss": 0.4619, + "step": 192 + }, + { + "epoch": 2.0, + "eval_LOCATION_f1": 0.2674418604651163, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.4107142857142857, + "eval_LOCATION_recall": 0.19827586206896552, + "eval_ORGANIZATION_f1": 0.40226628895184136, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.3641025641025641, + "eval_ORGANIZATION_recall": 0.44936708860759494, + "eval_PERSON_f1": 0.3975535168195719, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.32019704433497537, + "eval_PERSON_recall": 0.5241935483870968, + "eval_loss": 0.31055521965026855, + "eval_overall_accuracy": 0.8998078506725227, + "eval_overall_f1": 0.37323943661971837, + "eval_overall_precision": 0.3502202643171806, + "eval_overall_recall": 0.39949748743718594, + "eval_runtime": 0.247, + "eval_samples_per_second": 688.226, + "eval_steps_per_second": 12.145, + "step": 192 + }, + { + "epoch": 3.0, + "grad_norm": 1.156894326210022, + "learning_rate": 4.85e-05, + "loss": 0.309, + "step": 288 + }, + { + "epoch": 3.0, + "eval_LOCATION_f1": 0.42608695652173917, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.4298245614035088, + "eval_LOCATION_recall": 0.4224137931034483, + "eval_ORGANIZATION_f1": 0.6005089058524172, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.502127659574468, + "eval_ORGANIZATION_recall": 0.7468354430379747, + "eval_PERSON_f1": 0.684931506849315, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.5952380952380952, + "eval_PERSON_recall": 0.8064516129032258, + "eval_loss": 0.22810646891593933, + "eval_overall_accuracy": 0.9385122152072468, + "eval_overall_f1": 0.5836065573770491, + "eval_overall_precision": 0.5164410058027079, + "eval_overall_recall": 0.6708542713567839, + "eval_runtime": 0.2476, + "eval_samples_per_second": 686.479, + "eval_steps_per_second": 12.114, + "step": 288 + }, + { + "epoch": 4.0, + "grad_norm": 1.3299918174743652, + "learning_rate": 4.8e-05, + "loss": 0.2191, + "step": 384 + }, + { + "epoch": 4.0, + "eval_LOCATION_f1": 0.6285714285714284, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.5968992248062015, + "eval_LOCATION_recall": 0.6637931034482759, + "eval_ORGANIZATION_f1": 0.736842105263158, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.6847826086956522, + "eval_ORGANIZATION_recall": 0.7974683544303798, + "eval_PERSON_f1": 0.890566037735849, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.8368794326241135, + "eval_PERSON_recall": 0.9516129032258065, + "eval_loss": 0.13724759221076965, + "eval_overall_accuracy": 0.9623936316222893, + "eval_overall_f1": 0.7535211267605635, + "eval_overall_precision": 0.7070484581497798, + "eval_overall_recall": 0.8065326633165829, + "eval_runtime": 0.2484, + "eval_samples_per_second": 684.395, + "eval_steps_per_second": 12.078, + "step": 384 + }, + { + "epoch": 5.0, + "grad_norm": 0.9350135922431946, + "learning_rate": 4.75e-05, + "loss": 0.1534, + "step": 480 + }, + { + "epoch": 5.0, + "eval_LOCATION_f1": 0.7800829875518671, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.752, + "eval_LOCATION_recall": 0.8103448275862069, + "eval_ORGANIZATION_f1": 0.7987804878048781, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.7705882352941177, + "eval_ORGANIZATION_recall": 0.8291139240506329, + "eval_PERSON_f1": 0.9457364341085271, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9104477611940298, + "eval_PERSON_recall": 0.9838709677419355, + "eval_loss": 0.09758525341749191, + "eval_overall_accuracy": 0.9728245951139171, + "eval_overall_f1": 0.8391777509068923, + "eval_overall_precision": 0.8088578088578089, + "eval_overall_recall": 0.871859296482412, + "eval_runtime": 0.2494, + "eval_samples_per_second": 681.598, + "eval_steps_per_second": 12.028, + "step": 480 + }, + { + "epoch": 6.0, + "grad_norm": 0.6846897006034851, + "learning_rate": 4.7e-05, + "loss": 0.1245, + "step": 576 + }, + { + "epoch": 6.0, + "eval_LOCATION_f1": 0.8512396694214875, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.8174603174603174, + "eval_LOCATION_recall": 0.8879310344827587, + "eval_ORGANIZATION_f1": 0.8159509202453988, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.7916666666666666, + "eval_ORGANIZATION_recall": 0.8417721518987342, + "eval_PERSON_f1": 0.9609375, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9318181818181818, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.07943477481603622, + "eval_overall_accuracy": 0.9763930826242108, + "eval_overall_f1": 0.8713592233009709, + "eval_overall_precision": 0.8427230046948356, + "eval_overall_recall": 0.9020100502512562, + "eval_runtime": 0.2485, + "eval_samples_per_second": 684.087, + "eval_steps_per_second": 12.072, + "step": 576 + }, + { + "epoch": 7.0, + "grad_norm": 1.0963987112045288, + "learning_rate": 4.6500000000000005e-05, + "loss": 0.1074, + "step": 672 + }, + { + "epoch": 7.0, + "eval_LOCATION_f1": 0.8630705394190871, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.832, + "eval_LOCATION_recall": 0.896551724137931, + "eval_ORGANIZATION_f1": 0.8535825545171339, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.8404907975460123, + "eval_ORGANIZATION_recall": 0.8670886075949367, + "eval_PERSON_f1": 0.9723320158102766, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9534883720930233, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.07009263336658478, + "eval_overall_accuracy": 0.9799615701345046, + "eval_overall_f1": 0.8932515337423312, + "eval_overall_precision": 0.8729016786570744, + "eval_overall_recall": 0.914572864321608, + "eval_runtime": 0.2473, + "eval_samples_per_second": 687.342, + "eval_steps_per_second": 12.13, + "step": 672 + }, + { + "epoch": 8.0, + "grad_norm": 0.4685334265232086, + "learning_rate": 4.600000000000001e-05, + "loss": 0.0956, + "step": 768 + }, + { + "epoch": 8.0, + "eval_LOCATION_f1": 0.8760330578512397, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.8412698412698413, + "eval_LOCATION_recall": 0.9137931034482759, + "eval_ORGANIZATION_f1": 0.86875, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.8580246913580247, + "eval_ORGANIZATION_recall": 0.879746835443038, + "eval_PERSON_f1": 0.9761904761904763, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9609375, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.06126611307263374, + "eval_overall_accuracy": 0.9818830634092781, + "eval_overall_f1": 0.9041769041769041, + "eval_overall_precision": 0.8846153846153846, + "eval_overall_recall": 0.9246231155778895, + "eval_runtime": 0.2473, + "eval_samples_per_second": 687.399, + "eval_steps_per_second": 12.131, + "step": 768 + }, + { + "epoch": 9.0, + "grad_norm": 0.7007037401199341, + "learning_rate": 4.55e-05, + "loss": 0.0854, + "step": 864 + }, + { + "epoch": 9.0, + "eval_LOCATION_f1": 0.8962655601659751, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.864, + "eval_LOCATION_recall": 0.9310344827586207, + "eval_ORGANIZATION_f1": 0.8554216867469879, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.8160919540229885, + "eval_ORGANIZATION_recall": 0.8987341772151899, + "eval_PERSON_f1": 0.9723320158102766, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9534883720930233, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.06154508516192436, + "eval_overall_accuracy": 0.9802360691737579, + "eval_overall_f1": 0.9031476997578693, + "eval_overall_precision": 0.8714953271028038, + "eval_overall_recall": 0.9371859296482412, + "eval_runtime": 0.2482, + "eval_samples_per_second": 685.053, + "eval_steps_per_second": 12.089, + "step": 864 + }, + { + "epoch": 10.0, + "grad_norm": 0.7966454029083252, + "learning_rate": 4.5e-05, + "loss": 0.0781, + "step": 960 + }, + { + "epoch": 10.0, + "eval_LOCATION_f1": 0.8991596638655462, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.8770491803278688, + "eval_LOCATION_recall": 0.9224137931034483, + "eval_ORGANIZATION_f1": 0.8703703703703705, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.8493975903614458, + "eval_ORGANIZATION_recall": 0.8924050632911392, + "eval_PERSON_f1": 0.9723320158102766, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9534883720930233, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.055938586592674255, + "eval_overall_accuracy": 0.9818830634092781, + "eval_overall_f1": 0.9104294478527607, + "eval_overall_precision": 0.8896882494004796, + "eval_overall_recall": 0.9321608040201005, + "eval_runtime": 0.248, + "eval_samples_per_second": 685.387, + "eval_steps_per_second": 12.095, + "step": 960 + }, + { + "epoch": 11.0, + "grad_norm": 0.35707637667655945, + "learning_rate": 4.4500000000000004e-05, + "loss": 0.0773, + "step": 1056 + }, + { + "epoch": 11.0, + "eval_LOCATION_f1": 0.9045643153526971, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.872, + "eval_LOCATION_recall": 0.9396551724137931, + "eval_ORGANIZATION_f1": 0.8909657320872275, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.8773006134969326, + "eval_ORGANIZATION_recall": 0.9050632911392406, + "eval_PERSON_f1": 0.9761904761904763, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9609375, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.05021374672651291, + "eval_overall_accuracy": 0.9838045566840516, + "eval_overall_f1": 0.9213759213759215, + "eval_overall_precision": 0.9014423076923077, + "eval_overall_recall": 0.9422110552763819, + "eval_runtime": 0.248, + "eval_samples_per_second": 685.429, + "eval_steps_per_second": 12.096, + "step": 1056 + }, + { + "epoch": 12.0, + "grad_norm": 1.7560417652130127, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.0701, + "step": 1152 + }, + { + "epoch": 12.0, + "eval_LOCATION_f1": 0.9198312236286921, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9008264462809917, + "eval_LOCATION_recall": 0.9396551724137931, + "eval_ORGANIZATION_f1": 0.9034267912772586, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.8895705521472392, + "eval_ORGANIZATION_recall": 0.9177215189873418, + "eval_PERSON_f1": 0.9682539682539683, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.953125, + "eval_PERSON_recall": 0.9838709677419355, + "eval_loss": 0.049287330359220505, + "eval_overall_accuracy": 0.9846280538018117, + "eval_overall_f1": 0.928395061728395, + "eval_overall_precision": 0.912621359223301, + "eval_overall_recall": 0.9447236180904522, + "eval_runtime": 0.2469, + "eval_samples_per_second": 688.485, + "eval_steps_per_second": 12.15, + "step": 1152 + }, + { + "epoch": 13.0, + "grad_norm": 0.5027121305465698, + "learning_rate": 4.35e-05, + "loss": 0.0653, + "step": 1248 + }, + { + "epoch": 13.0, + "eval_LOCATION_f1": 0.9356223175965666, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9316239316239316, + "eval_LOCATION_recall": 0.9396551724137931, + "eval_ORGANIZATION_f1": 0.9177215189873418, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9177215189873418, + "eval_ORGANIZATION_recall": 0.9177215189873418, + "eval_PERSON_f1": 0.9800796812749005, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.968503937007874, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.043551068753004074, + "eval_overall_accuracy": 0.9862750480373319, + "eval_overall_f1": 0.9425, + "eval_overall_precision": 0.9378109452736318, + "eval_overall_recall": 0.9472361809045227, + "eval_runtime": 0.2468, + "eval_samples_per_second": 688.943, + "eval_steps_per_second": 12.158, + "step": 1248 + }, + { + "epoch": 14.0, + "grad_norm": 0.517197847366333, + "learning_rate": 4.3e-05, + "loss": 0.0636, + "step": 1344 + }, + { + "epoch": 14.0, + "eval_LOCATION_f1": 0.9211618257261411, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.888, + "eval_LOCATION_recall": 0.9568965517241379, + "eval_ORGANIZATION_f1": 0.8881789137380192, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.896774193548387, + "eval_ORGANIZATION_recall": 0.879746835443038, + "eval_PERSON_f1": 0.9800796812749005, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.968503937007874, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.04625400900840759, + "eval_overall_accuracy": 0.9838045566840516, + "eval_overall_f1": 0.9267080745341615, + "eval_overall_precision": 0.9164619164619164, + "eval_overall_recall": 0.9371859296482412, + "eval_runtime": 0.2479, + "eval_samples_per_second": 685.857, + "eval_steps_per_second": 12.103, + "step": 1344 + }, + { + "epoch": 15.0, + "grad_norm": 0.8750516176223755, + "learning_rate": 4.25e-05, + "loss": 0.0611, + "step": 1440 + }, + { + "epoch": 15.0, + "eval_LOCATION_f1": 0.9276595744680851, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9159663865546218, + "eval_LOCATION_recall": 0.9396551724137931, + "eval_ORGANIZATION_f1": 0.9182389937106918, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9125, + "eval_ORGANIZATION_recall": 0.9240506329113924, + "eval_PERSON_f1": 0.9800796812749005, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.968503937007874, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.0423351414501667, + "eval_overall_accuracy": 0.9868240461158386, + "eval_overall_f1": 0.9402985074626866, + "eval_overall_precision": 0.9310344827586207, + "eval_overall_recall": 0.949748743718593, + "eval_runtime": 0.2476, + "eval_samples_per_second": 686.512, + "eval_steps_per_second": 12.115, + "step": 1440 + }, + { + "epoch": 16.0, + "grad_norm": 1.7253036499023438, + "learning_rate": 4.2e-05, + "loss": 0.0613, + "step": 1536 + }, + { + "epoch": 16.0, + "eval_LOCATION_f1": 0.9333333333333333, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9032258064516129, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.8929663608562691, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.863905325443787, + "eval_ORGANIZATION_recall": 0.9240506329113924, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.043704453855752945, + "eval_overall_accuracy": 0.9857260499588252, + "eval_overall_f1": 0.9326805385556914, + "eval_overall_precision": 0.9093078758949881, + "eval_overall_recall": 0.957286432160804, + "eval_runtime": 0.2472, + "eval_samples_per_second": 687.688, + "eval_steps_per_second": 12.136, + "step": 1536 + }, + { + "epoch": 17.0, + "grad_norm": 0.6101785898208618, + "learning_rate": 4.15e-05, + "loss": 0.0625, + "step": 1632 + }, + { + "epoch": 17.0, + "eval_LOCATION_f1": 0.9491525423728815, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9333333333333333, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9240506329113924, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9240506329113924, + "eval_ORGANIZATION_recall": 0.9240506329113924, + "eval_PERSON_f1": 0.9800796812749005, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.968503937007874, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.04202444851398468, + "eval_overall_accuracy": 0.9873730441943454, + "eval_overall_f1": 0.9489414694894147, + "eval_overall_precision": 0.9407407407407408, + "eval_overall_recall": 0.957286432160804, + "eval_runtime": 0.2479, + "eval_samples_per_second": 685.698, + "eval_steps_per_second": 12.101, + "step": 1632 + }, + { + "epoch": 18.0, + "grad_norm": 1.0076349973678589, + "learning_rate": 4.1e-05, + "loss": 0.0532, + "step": 1728 + }, + { + "epoch": 18.0, + "eval_LOCATION_f1": 0.94017094017094, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9322033898305084, + "eval_LOCATION_recall": 0.9482758620689655, + "eval_ORGANIZATION_f1": 0.91875, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9074074074074074, + "eval_ORGANIZATION_recall": 0.930379746835443, + "eval_PERSON_f1": 0.9879518072289156, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.984, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.03683561459183693, + "eval_overall_accuracy": 0.9879220422728521, + "eval_overall_f1": 0.946450809464508, + "eval_overall_precision": 0.9382716049382716, + "eval_overall_recall": 0.9547738693467337, + "eval_runtime": 0.2463, + "eval_samples_per_second": 690.18, + "eval_steps_per_second": 12.18, + "step": 1728 + }, + { + "epoch": 19.0, + "grad_norm": 0.8145723342895508, + "learning_rate": 4.05e-05, + "loss": 0.0501, + "step": 1824 + }, + { + "epoch": 19.0, + "eval_LOCATION_f1": 0.961038961038961, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9652173913043478, + "eval_LOCATION_recall": 0.9568965517241379, + "eval_ORGANIZATION_f1": 0.9216300940438872, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9130434782608695, + "eval_ORGANIZATION_recall": 0.930379746835443, + "eval_PERSON_f1": 0.9879518072289156, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.984, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.03693071007728577, + "eval_overall_accuracy": 0.9890200384298655, + "eval_overall_f1": 0.9536921151439299, + "eval_overall_precision": 0.9501246882793017, + "eval_overall_recall": 0.957286432160804, + "eval_runtime": 0.2488, + "eval_samples_per_second": 683.283, + "eval_steps_per_second": 12.058, + "step": 1824 + }, + { + "epoch": 20.0, + "grad_norm": 0.6327467560768127, + "learning_rate": 4e-05, + "loss": 0.047, + "step": 1920 + }, + { + "epoch": 20.0, + "eval_LOCATION_f1": 0.9205020920502092, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.8943089430894309, + "eval_LOCATION_recall": 0.9482758620689655, + "eval_ORGANIZATION_f1": 0.9079754601226993, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.8809523809523809, + "eval_ORGANIZATION_recall": 0.9367088607594937, + "eval_PERSON_f1": 0.9800796812749005, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.968503937007874, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.04086912423372269, + "eval_overall_accuracy": 0.9854515509195718, + "eval_overall_f1": 0.9338235294117648, + "eval_overall_precision": 0.9114832535885168, + "eval_overall_recall": 0.957286432160804, + "eval_runtime": 0.2463, + "eval_samples_per_second": 690.091, + "eval_steps_per_second": 12.178, + "step": 1920 + }, + { + "epoch": 21.0, + "grad_norm": 1.3883323669433594, + "learning_rate": 3.9500000000000005e-05, + "loss": 0.0464, + "step": 2016 + }, + { + "epoch": 21.0, + "eval_LOCATION_f1": 0.9205020920502092, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.8943089430894309, + "eval_LOCATION_recall": 0.9482758620689655, + "eval_ORGANIZATION_f1": 0.9119496855345911, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.90625, + "eval_ORGANIZATION_recall": 0.9177215189873418, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.03654232993721962, + "eval_overall_accuracy": 0.9865495470765853, + "eval_overall_f1": 0.9368029739776953, + "eval_overall_precision": 0.9242053789731052, + "eval_overall_recall": 0.949748743718593, + "eval_runtime": 0.2418, + "eval_samples_per_second": 703.11, + "eval_steps_per_second": 12.408, + "step": 2016 + }, + { + "epoch": 22.0, + "grad_norm": 1.259803056716919, + "learning_rate": 3.9000000000000006e-05, + "loss": 0.045, + "step": 2112 + }, + { + "epoch": 22.0, + "eval_LOCATION_f1": 0.9446808510638298, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9327731092436975, + "eval_LOCATION_recall": 0.9568965517241379, + "eval_ORGANIZATION_f1": 0.9107692307692307, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.8862275449101796, + "eval_ORGANIZATION_recall": 0.9367088607594937, + "eval_PERSON_f1": 0.9879518072289156, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.984, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.036260925233364105, + "eval_overall_accuracy": 0.9876475432335987, + "eval_overall_f1": 0.9443757725587145, + "eval_overall_precision": 0.9294403892944039, + "eval_overall_recall": 0.9597989949748744, + "eval_runtime": 0.2468, + "eval_samples_per_second": 688.713, + "eval_steps_per_second": 12.154, + "step": 2112 + }, + { + "epoch": 23.0, + "grad_norm": 0.4472838044166565, + "learning_rate": 3.85e-05, + "loss": 0.0416, + "step": 2208 + }, + { + "epoch": 23.0, + "eval_LOCATION_f1": 0.9451476793248945, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9256198347107438, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9211356466876972, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9182389937106918, + "eval_ORGANIZATION_recall": 0.9240506329113924, + "eval_PERSON_f1": 0.9879518072289156, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.984, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.03565502166748047, + "eval_overall_accuracy": 0.9887455393906122, + "eval_overall_f1": 0.9489414694894147, + "eval_overall_precision": 0.9407407407407408, + "eval_overall_recall": 0.957286432160804, + "eval_runtime": 0.2474, + "eval_samples_per_second": 687.138, + "eval_steps_per_second": 12.126, + "step": 2208 + }, + { + "epoch": 24.0, + "grad_norm": 1.2622013092041016, + "learning_rate": 3.8e-05, + "loss": 0.042, + "step": 2304 + }, + { + "epoch": 24.0, + "eval_LOCATION_f1": 0.9406779661016949, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.925, + "eval_LOCATION_recall": 0.9568965517241379, + "eval_ORGANIZATION_f1": 0.9171974522292994, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9230769230769231, + "eval_ORGANIZATION_recall": 0.9113924050632911, + "eval_PERSON_f1": 0.9879518072289156, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.984, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.037344712764024734, + "eval_overall_accuracy": 0.9876475432335987, + "eval_overall_f1": 0.9461827284105131, + "eval_overall_precision": 0.942643391521197, + "eval_overall_recall": 0.949748743718593, + "eval_runtime": 0.2477, + "eval_samples_per_second": 686.199, + "eval_steps_per_second": 12.109, + "step": 2304 + }, + { + "epoch": 25.0, + "grad_norm": 0.7534035444259644, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.0394, + "step": 2400 + }, + { + "epoch": 25.0, + "eval_LOCATION_f1": 0.9446808510638298, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9327731092436975, + "eval_LOCATION_recall": 0.9568965517241379, + "eval_ORGANIZATION_f1": 0.9371069182389938, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.93125, + "eval_ORGANIZATION_recall": 0.9430379746835443, + "eval_PERSON_f1": 0.9800796812749005, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.968503937007874, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.03631773963570595, + "eval_overall_accuracy": 0.9884710403513588, + "eval_overall_f1": 0.9527363184079602, + "eval_overall_precision": 0.9433497536945813, + "eval_overall_recall": 0.9623115577889447, + "eval_runtime": 0.2479, + "eval_samples_per_second": 685.628, + "eval_steps_per_second": 12.099, + "step": 2400 + }, + { + "epoch": 26.0, + "grad_norm": 0.5307872295379639, + "learning_rate": 3.7e-05, + "loss": 0.0362, + "step": 2496 + }, + { + "epoch": 26.0, + "eval_LOCATION_f1": 0.9613733905579399, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9572649572649573, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9375000000000001, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9259259259259259, + "eval_ORGANIZATION_recall": 0.9493670886075949, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.033846255391836166, + "eval_overall_accuracy": 0.9898435355476256, + "eval_overall_f1": 0.9589041095890412, + "eval_overall_precision": 0.9506172839506173, + "eval_overall_recall": 0.9673366834170855, + "eval_runtime": 0.2465, + "eval_samples_per_second": 689.727, + "eval_steps_per_second": 12.172, + "step": 2496 + }, + { + "epoch": 27.0, + "grad_norm": 0.8220003247261047, + "learning_rate": 3.65e-05, + "loss": 0.0379, + "step": 2592 + }, + { + "epoch": 27.0, + "eval_LOCATION_f1": 0.9613733905579399, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9572649572649573, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9254658385093167, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9085365853658537, + "eval_ORGANIZATION_recall": 0.9430379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.033654991537332535, + "eval_overall_accuracy": 0.9890200384298655, + "eval_overall_f1": 0.9540372670807453, + "eval_overall_precision": 0.9434889434889435, + "eval_overall_recall": 0.964824120603015, + "eval_runtime": 0.2408, + "eval_samples_per_second": 706.111, + "eval_steps_per_second": 12.461, + "step": 2592 + }, + { + "epoch": 28.0, + "grad_norm": 0.4288746416568756, + "learning_rate": 3.6e-05, + "loss": 0.0328, + "step": 2688 + }, + { + "epoch": 28.0, + "eval_LOCATION_f1": 0.9356223175965666, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9316239316239316, + "eval_LOCATION_recall": 0.9396551724137931, + "eval_ORGANIZATION_f1": 0.9141104294478527, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.8869047619047619, + "eval_ORGANIZATION_recall": 0.9430379746835443, + "eval_PERSON_f1": 0.9800796812749005, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.968503937007874, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.03623175993561745, + "eval_overall_accuracy": 0.9865495470765853, + "eval_overall_f1": 0.9407407407407408, + "eval_overall_precision": 0.9247572815533981, + "eval_overall_recall": 0.957286432160804, + "eval_runtime": 0.2477, + "eval_samples_per_second": 686.434, + "eval_steps_per_second": 12.114, + "step": 2688 + }, + { + "epoch": 29.0, + "grad_norm": 0.5559950470924377, + "learning_rate": 3.55e-05, + "loss": 0.0347, + "step": 2784 + }, + { + "epoch": 29.0, + "eval_LOCATION_f1": 0.9531914893617022, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9411764705882353, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.923076923076923, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.8982035928143712, + "eval_ORGANIZATION_recall": 0.9493670886075949, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.03539116308093071, + "eval_overall_accuracy": 0.9876475432335987, + "eval_overall_f1": 0.9506172839506173, + "eval_overall_precision": 0.9344660194174758, + "eval_overall_recall": 0.9673366834170855, + "eval_runtime": 0.247, + "eval_samples_per_second": 688.311, + "eval_steps_per_second": 12.147, + "step": 2784 + }, + { + "epoch": 30.0, + "grad_norm": 0.954803466796875, + "learning_rate": 3.5e-05, + "loss": 0.0314, + "step": 2880 + }, + { + "epoch": 30.0, + "eval_LOCATION_f1": 0.9482758620689655, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9482758620689655, + "eval_LOCATION_recall": 0.9482758620689655, + "eval_ORGANIZATION_f1": 0.916923076923077, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.8922155688622755, + "eval_ORGANIZATION_recall": 0.9430379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.035969048738479614, + "eval_overall_accuracy": 0.9873730441943454, + "eval_overall_f1": 0.9467162329615861, + "eval_overall_precision": 0.9339853300733496, + "eval_overall_recall": 0.9597989949748744, + "eval_runtime": 0.2469, + "eval_samples_per_second": 688.587, + "eval_steps_per_second": 12.152, + "step": 2880 + }, + { + "epoch": 31.0, + "grad_norm": 0.7396743893623352, + "learning_rate": 3.45e-05, + "loss": 0.0312, + "step": 2976 + }, + { + "epoch": 31.0, + "eval_LOCATION_f1": 0.9491525423728815, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9333333333333333, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9278996865203761, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9192546583850931, + "eval_ORGANIZATION_recall": 0.9367088607594937, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.03233965113759041, + "eval_overall_accuracy": 0.9879220422728521, + "eval_overall_f1": 0.9515527950310558, + "eval_overall_precision": 0.941031941031941, + "eval_overall_recall": 0.9623115577889447, + "eval_runtime": 0.2409, + "eval_samples_per_second": 705.718, + "eval_steps_per_second": 12.454, + "step": 2976 + }, + { + "epoch": 32.0, + "grad_norm": 1.0835587978363037, + "learning_rate": 3.4000000000000007e-05, + "loss": 0.0295, + "step": 3072 + }, + { + "epoch": 32.0, + "eval_LOCATION_f1": 0.9531914893617022, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9411764705882353, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9523809523809524, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9554140127388535, + "eval_ORGANIZATION_recall": 0.9493670886075949, + "eval_PERSON_f1": 0.9879518072289156, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.984, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.03135540708899498, + "eval_overall_accuracy": 0.9906670326653857, + "eval_overall_f1": 0.9637046307884857, + "eval_overall_precision": 0.9600997506234414, + "eval_overall_recall": 0.9673366834170855, + "eval_runtime": 0.2473, + "eval_samples_per_second": 687.523, + "eval_steps_per_second": 12.133, + "step": 3072 + }, + { + "epoch": 33.0, + "grad_norm": 0.5249152183532715, + "learning_rate": 3.35e-05, + "loss": 0.0276, + "step": 3168 + }, + { + "epoch": 33.0, + "eval_LOCATION_f1": 0.9572649572649573, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9491525423728814, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9337539432176657, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9308176100628931, + "eval_ORGANIZATION_recall": 0.9367088607594937, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.0347798690199852, + "eval_overall_accuracy": 0.9892945374691189, + "eval_overall_f1": 0.9563046192259675, + "eval_overall_precision": 0.9503722084367245, + "eval_overall_recall": 0.9623115577889447, + "eval_runtime": 0.2464, + "eval_samples_per_second": 689.925, + "eval_steps_per_second": 12.175, + "step": 3168 + }, + { + "epoch": 34.0, + "grad_norm": 0.9126577377319336, + "learning_rate": 3.3e-05, + "loss": 0.0291, + "step": 3264 + }, + { + "epoch": 34.0, + "eval_LOCATION_f1": 0.9535864978902953, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9338842975206612, + "eval_LOCATION_recall": 0.9741379310344828, + "eval_ORGANIZATION_f1": 0.9430379746835443, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9430379746835443, + "eval_ORGANIZATION_recall": 0.9430379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.03333434835076332, + "eval_overall_accuracy": 0.9903925336261323, + "eval_overall_f1": 0.9589041095890412, + "eval_overall_precision": 0.9506172839506173, + "eval_overall_recall": 0.9673366834170855, + "eval_runtime": 0.247, + "eval_samples_per_second": 688.383, + "eval_steps_per_second": 12.148, + "step": 3264 + }, + { + "epoch": 35.0, + "grad_norm": 0.9074527621269226, + "learning_rate": 3.2500000000000004e-05, + "loss": 0.0271, + "step": 3360 + }, + { + "epoch": 35.0, + "eval_LOCATION_f1": 0.944206008583691, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9401709401709402, + "eval_LOCATION_recall": 0.9482758620689655, + "eval_ORGANIZATION_f1": 0.9400630914826499, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9371069182389937, + "eval_ORGANIZATION_recall": 0.9430379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.033455148339271545, + "eval_overall_accuracy": 0.9898435355476256, + "eval_overall_f1": 0.9550000000000001, + "eval_overall_precision": 0.9502487562189055, + "eval_overall_recall": 0.9597989949748744, + "eval_runtime": 0.2445, + "eval_samples_per_second": 695.369, + "eval_steps_per_second": 12.271, + "step": 3360 + }, + { + "epoch": 36.0, + "grad_norm": 0.6352573037147522, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.0256, + "step": 3456 + }, + { + "epoch": 36.0, + "eval_LOCATION_f1": 0.9576271186440678, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9416666666666667, + "eval_LOCATION_recall": 0.9741379310344828, + "eval_ORGANIZATION_f1": 0.9371069182389938, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.93125, + "eval_ORGANIZATION_recall": 0.9430379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.03598781302571297, + "eval_overall_accuracy": 0.9895690365083722, + "eval_overall_f1": 0.9577114427860698, + "eval_overall_precision": 0.9482758620689655, + "eval_overall_recall": 0.9673366834170855, + "eval_runtime": 0.2468, + "eval_samples_per_second": 688.708, + "eval_steps_per_second": 12.154, + "step": 3456 + }, + { + "epoch": 37.0, + "grad_norm": 2.211890697479248, + "learning_rate": 3.15e-05, + "loss": 0.0251, + "step": 3552 + }, + { + "epoch": 37.0, + "eval_LOCATION_f1": 0.9576271186440678, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9416666666666667, + "eval_LOCATION_recall": 0.9741379310344828, + "eval_ORGANIZATION_f1": 0.9490445859872612, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9551282051282052, + "eval_ORGANIZATION_recall": 0.9430379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.03474033996462822, + "eval_overall_accuracy": 0.990118034586879, + "eval_overall_f1": 0.9625, + "eval_overall_precision": 0.9577114427860697, + "eval_overall_recall": 0.9673366834170855, + "eval_runtime": 0.2481, + "eval_samples_per_second": 685.286, + "eval_steps_per_second": 12.093, + "step": 3552 + }, + { + "epoch": 38.0, + "grad_norm": 0.5349846482276917, + "learning_rate": 3.1e-05, + "loss": 0.0253, + "step": 3648 + }, + { + "epoch": 38.0, + "eval_LOCATION_f1": 0.9446808510638298, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9327731092436975, + "eval_LOCATION_recall": 0.9568965517241379, + "eval_ORGANIZATION_f1": 0.9367088607594937, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9367088607594937, + "eval_ORGANIZATION_recall": 0.9367088607594937, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.0352802537381649, + "eval_overall_accuracy": 0.9887455393906122, + "eval_overall_f1": 0.9538077403245941, + "eval_overall_precision": 0.9478908188585607, + "eval_overall_recall": 0.9597989949748744, + "eval_runtime": 0.247, + "eval_samples_per_second": 688.284, + "eval_steps_per_second": 12.146, + "step": 3648 + }, + { + "epoch": 39.0, + "grad_norm": 0.31611329317092896, + "learning_rate": 3.05e-05, + "loss": 0.0248, + "step": 3744 + }, + { + "epoch": 39.0, + "eval_LOCATION_f1": 0.9322033898305084, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9166666666666666, + "eval_LOCATION_recall": 0.9482758620689655, + "eval_ORGANIZATION_f1": 0.9371069182389938, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.93125, + "eval_ORGANIZATION_recall": 0.9430379746835443, + "eval_PERSON_f1": 0.9800796812749005, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.968503937007874, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.033827971667051315, + "eval_overall_accuracy": 0.9892945374691189, + "eval_overall_f1": 0.9490683229813666, + "eval_overall_precision": 0.9385749385749386, + "eval_overall_recall": 0.9597989949748744, + "eval_runtime": 0.2476, + "eval_samples_per_second": 686.66, + "eval_steps_per_second": 12.118, + "step": 3744 + }, + { + "epoch": 40.0, + "grad_norm": 0.34425315260887146, + "learning_rate": 3e-05, + "loss": 0.0222, + "step": 3840 + }, + { + "epoch": 40.0, + "eval_LOCATION_f1": 0.9487179487179486, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.940677966101695, + "eval_LOCATION_recall": 0.9568965517241379, + "eval_ORGANIZATION_f1": 0.9496855345911951, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.94375, + "eval_ORGANIZATION_recall": 0.9556962025316456, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.033412422984838486, + "eval_overall_accuracy": 0.9895690365083722, + "eval_overall_f1": 0.9600997506234414, + "eval_overall_precision": 0.9529702970297029, + "eval_overall_recall": 0.9673366834170855, + "eval_runtime": 0.2468, + "eval_samples_per_second": 688.778, + "eval_steps_per_second": 12.155, + "step": 3840 + }, + { + "epoch": 41.0, + "grad_norm": 0.09360247850418091, + "learning_rate": 2.95e-05, + "loss": 0.0226, + "step": 3936 + }, + { + "epoch": 41.0, + "eval_LOCATION_f1": 0.9572649572649573, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9491525423728814, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9341692789968652, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9254658385093167, + "eval_ORGANIZATION_recall": 0.9430379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.034495361149311066, + "eval_overall_accuracy": 0.9898435355476256, + "eval_overall_f1": 0.9564134495641344, + "eval_overall_precision": 0.9481481481481482, + "eval_overall_recall": 0.964824120603015, + "eval_runtime": 0.2473, + "eval_samples_per_second": 687.373, + "eval_steps_per_second": 12.13, + "step": 3936 + }, + { + "epoch": 42.0, + "grad_norm": 0.0345722995698452, + "learning_rate": 2.9e-05, + "loss": 0.0208, + "step": 4032 + }, + { + "epoch": 42.0, + "eval_LOCATION_f1": 0.9531914893617022, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9411764705882353, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9490445859872612, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9551282051282052, + "eval_ORGANIZATION_recall": 0.9430379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.03448282554745674, + "eval_overall_accuracy": 0.9898435355476256, + "eval_overall_f1": 0.9612015018773465, + "eval_overall_precision": 0.9576059850374065, + "eval_overall_recall": 0.964824120603015, + "eval_runtime": 0.2469, + "eval_samples_per_second": 688.498, + "eval_steps_per_second": 12.15, + "step": 4032 + }, + { + "epoch": 43.0, + "grad_norm": 1.2983958721160889, + "learning_rate": 2.8499999999999998e-05, + "loss": 0.0227, + "step": 4128 + }, + { + "epoch": 43.0, + "eval_LOCATION_f1": 0.9482758620689655, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9482758620689655, + "eval_LOCATION_recall": 0.9482758620689655, + "eval_ORGANIZATION_f1": 0.9426751592356688, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9487179487179487, + "eval_ORGANIZATION_recall": 0.9367088607594937, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.03551476076245308, + "eval_overall_accuracy": 0.9887455393906122, + "eval_overall_f1": 0.957286432160804, + "eval_overall_precision": 0.957286432160804, + "eval_overall_recall": 0.957286432160804, + "eval_runtime": 0.2423, + "eval_samples_per_second": 701.643, + "eval_steps_per_second": 12.382, + "step": 4128 + }, + { + "epoch": 44.0, + "grad_norm": 1.2352434396743774, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.0222, + "step": 4224 + }, + { + "epoch": 44.0, + "eval_LOCATION_f1": 0.9531914893617022, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9411764705882353, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9367088607594937, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9367088607594937, + "eval_ORGANIZATION_recall": 0.9367088607594937, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.03700908645987511, + "eval_overall_accuracy": 0.9892945374691189, + "eval_overall_f1": 0.9563046192259675, + "eval_overall_precision": 0.9503722084367245, + "eval_overall_recall": 0.9623115577889447, + "eval_runtime": 0.2468, + "eval_samples_per_second": 688.93, + "eval_steps_per_second": 12.158, + "step": 4224 + }, + { + "epoch": 45.0, + "grad_norm": 0.7490431070327759, + "learning_rate": 2.7500000000000004e-05, + "loss": 0.0197, + "step": 4320 + }, + { + "epoch": 45.0, + "eval_LOCATION_f1": 0.9531914893617022, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9411764705882353, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9423076923076923, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9545454545454546, + "eval_ORGANIZATION_recall": 0.930379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.03624693304300308, + "eval_overall_accuracy": 0.9895690365083722, + "eval_overall_f1": 0.958594730238394, + "eval_overall_precision": 0.9573934837092731, + "eval_overall_recall": 0.9597989949748744, + "eval_runtime": 0.2464, + "eval_samples_per_second": 690.072, + "eval_steps_per_second": 12.178, + "step": 4320 + }, + { + "epoch": 46.0, + "grad_norm": 0.05337309092283249, + "learning_rate": 2.7000000000000002e-05, + "loss": 0.0189, + "step": 4416 + }, + { + "epoch": 46.0, + "eval_LOCATION_f1": 0.9491525423728815, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9333333333333333, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9337539432176657, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9308176100628931, + "eval_ORGANIZATION_recall": 0.9367088607594937, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.03748781979084015, + "eval_overall_accuracy": 0.9890200384298655, + "eval_overall_f1": 0.9539227895392278, + "eval_overall_precision": 0.945679012345679, + "eval_overall_recall": 0.9623115577889447, + "eval_runtime": 0.2467, + "eval_samples_per_second": 689.106, + "eval_steps_per_second": 12.161, + "step": 4416 + }, + { + "epoch": 47.0, + "grad_norm": 0.3433239459991455, + "learning_rate": 2.6500000000000004e-05, + "loss": 0.02, + "step": 4512 + }, + { + "epoch": 47.0, + "eval_LOCATION_f1": 0.9531914893617022, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9411764705882353, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9341692789968652, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9254658385093167, + "eval_ORGANIZATION_recall": 0.9430379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.03626282513141632, + "eval_overall_accuracy": 0.9892945374691189, + "eval_overall_f1": 0.9552238805970149, + "eval_overall_precision": 0.9458128078817734, + "eval_overall_recall": 0.964824120603015, + "eval_runtime": 0.2466, + "eval_samples_per_second": 689.361, + "eval_steps_per_second": 12.165, + "step": 4512 + }, + { + "epoch": 48.0, + "grad_norm": 0.4474566578865051, + "learning_rate": 2.6000000000000002e-05, + "loss": 0.0178, + "step": 4608 + }, + { + "epoch": 48.0, + "eval_LOCATION_f1": 0.9572649572649573, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9491525423728814, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9456869009584665, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9548387096774194, + "eval_ORGANIZATION_recall": 0.9367088607594937, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.03692612424492836, + "eval_overall_accuracy": 0.9892945374691189, + "eval_overall_f1": 0.9611041405269761, + "eval_overall_precision": 0.9598997493734336, + "eval_overall_recall": 0.9623115577889447, + "eval_runtime": 0.2435, + "eval_samples_per_second": 698.283, + "eval_steps_per_second": 12.323, + "step": 4608 + }, + { + "epoch": 49.0, + "grad_norm": 1.4820588827133179, + "learning_rate": 2.5500000000000003e-05, + "loss": 0.02, + "step": 4704 + }, + { + "epoch": 49.0, + "eval_LOCATION_f1": 0.9531914893617022, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9411764705882353, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9456869009584665, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9548387096774194, + "eval_ORGANIZATION_recall": 0.9367088607594937, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.0357038751244545, + "eval_overall_accuracy": 0.9895690365083722, + "eval_overall_f1": 0.9598997493734336, + "eval_overall_precision": 0.9575, + "eval_overall_recall": 0.9623115577889447, + "eval_runtime": 0.2475, + "eval_samples_per_second": 686.947, + "eval_steps_per_second": 12.123, + "step": 4704 + }, + { + "epoch": 50.0, + "grad_norm": 1.35971999168396, + "learning_rate": 2.5e-05, + "loss": 0.0178, + "step": 4800 + }, + { + "epoch": 50.0, + "eval_LOCATION_f1": 0.9572649572649573, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9491525423728814, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9517684887459807, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9673202614379085, + "eval_ORGANIZATION_recall": 0.9367088607594937, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.03682655468583107, + "eval_overall_accuracy": 0.9892945374691189, + "eval_overall_f1": 0.9635220125786164, + "eval_overall_precision": 0.964735516372796, + "eval_overall_recall": 0.9623115577889447, + "eval_runtime": 0.2483, + "eval_samples_per_second": 684.73, + "eval_steps_per_second": 12.083, + "step": 4800 + }, + { + "epoch": 51.0, + "grad_norm": 0.6279324293136597, + "learning_rate": 2.45e-05, + "loss": 0.0192, + "step": 4896 + }, + { + "epoch": 51.0, + "eval_LOCATION_f1": 0.9531914893617022, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9411764705882353, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9430379746835443, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9430379746835443, + "eval_ORGANIZATION_recall": 0.9430379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.03370204195380211, + "eval_overall_accuracy": 0.9898435355476256, + "eval_overall_f1": 0.958801498127341, + "eval_overall_precision": 0.9528535980148883, + "eval_overall_recall": 0.964824120603015, + "eval_runtime": 0.2465, + "eval_samples_per_second": 689.656, + "eval_steps_per_second": 12.17, + "step": 4896 + }, + { + "epoch": 52.0, + "grad_norm": 0.9447174668312073, + "learning_rate": 2.4e-05, + "loss": 0.0173, + "step": 4992 + }, + { + "epoch": 52.0, + "eval_LOCATION_f1": 0.9572649572649573, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9491525423728814, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9554140127388535, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9615384615384616, + "eval_ORGANIZATION_recall": 0.9493670886075949, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.03517177328467369, + "eval_overall_accuracy": 0.9903925336261323, + "eval_overall_f1": 0.9649122807017544, + "eval_overall_precision": 0.9625, + "eval_overall_recall": 0.9673366834170855, + "eval_runtime": 0.2475, + "eval_samples_per_second": 686.827, + "eval_steps_per_second": 12.12, + "step": 4992 + }, + { + "epoch": 53.0, + "grad_norm": 0.6928880214691162, + "learning_rate": 2.35e-05, + "loss": 0.0169, + "step": 5088 + }, + { + "epoch": 53.0, + "eval_LOCATION_f1": 0.9572649572649573, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9491525423728814, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9463722397476341, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9433962264150944, + "eval_ORGANIZATION_recall": 0.9493670886075949, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.03526773303747177, + "eval_overall_accuracy": 0.9890200384298655, + "eval_overall_f1": 0.9612983770287142, + "eval_overall_precision": 0.9553349875930521, + "eval_overall_recall": 0.9673366834170855, + "eval_runtime": 0.2468, + "eval_samples_per_second": 688.789, + "eval_steps_per_second": 12.155, + "step": 5088 + }, + { + "epoch": 54.0, + "grad_norm": 0.22660917043685913, + "learning_rate": 2.3000000000000003e-05, + "loss": 0.0153, + "step": 5184 + }, + { + "epoch": 54.0, + "eval_LOCATION_f1": 0.9613733905579399, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9572649572649573, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9587301587301587, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9617834394904459, + "eval_ORGANIZATION_recall": 0.9556962025316456, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.035062070935964584, + "eval_overall_accuracy": 0.9898435355476256, + "eval_overall_f1": 0.9674185463659147, + "eval_overall_precision": 0.965, + "eval_overall_recall": 0.9698492462311558, + "eval_runtime": 0.2468, + "eval_samples_per_second": 688.761, + "eval_steps_per_second": 12.155, + "step": 5184 + }, + { + "epoch": 55.0, + "grad_norm": 1.0093673467636108, + "learning_rate": 2.25e-05, + "loss": 0.0145, + "step": 5280 + }, + { + "epoch": 55.0, + "eval_LOCATION_f1": 0.9572649572649573, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9491525423728814, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.952076677316294, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9612903225806452, + "eval_ORGANIZATION_recall": 0.9430379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.03487938269972801, + "eval_overall_accuracy": 0.9898435355476256, + "eval_overall_f1": 0.9636135508155583, + "eval_overall_precision": 0.9624060150375939, + "eval_overall_recall": 0.964824120603015, + "eval_runtime": 0.2478, + "eval_samples_per_second": 686.117, + "eval_steps_per_second": 12.108, + "step": 5280 + }, + { + "epoch": 56.0, + "grad_norm": 1.7693700790405273, + "learning_rate": 2.2000000000000003e-05, + "loss": 0.0155, + "step": 5376 + }, + { + "epoch": 56.0, + "eval_LOCATION_f1": 0.9576271186440678, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9416666666666667, + "eval_LOCATION_recall": 0.9741379310344828, + "eval_ORGANIZATION_f1": 0.946031746031746, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9490445859872612, + "eval_ORGANIZATION_recall": 0.9430379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.03596748039126396, + "eval_overall_accuracy": 0.990118034586879, + "eval_overall_f1": 0.9612983770287142, + "eval_overall_precision": 0.9553349875930521, + "eval_overall_recall": 0.9673366834170855, + "eval_runtime": 0.2484, + "eval_samples_per_second": 684.305, + "eval_steps_per_second": 12.076, + "step": 5376 + }, + { + "epoch": 57.0, + "grad_norm": 0.3393550217151642, + "learning_rate": 2.15e-05, + "loss": 0.016, + "step": 5472 + }, + { + "epoch": 57.0, + "eval_LOCATION_f1": 0.9613733905579399, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9572649572649573, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.952076677316294, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9612903225806452, + "eval_ORGANIZATION_recall": 0.9430379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.03762773424386978, + "eval_overall_accuracy": 0.9898435355476256, + "eval_overall_f1": 0.964824120603015, + "eval_overall_precision": 0.964824120603015, + "eval_overall_recall": 0.964824120603015, + "eval_runtime": 0.2475, + "eval_samples_per_second": 686.779, + "eval_steps_per_second": 12.12, + "step": 5472 + }, + { + "epoch": 58.0, + "grad_norm": 0.24553614854812622, + "learning_rate": 2.1e-05, + "loss": 0.0143, + "step": 5568 + }, + { + "epoch": 58.0, + "eval_LOCATION_f1": 0.9531914893617022, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9411764705882353, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9483870967741935, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9671052631578947, + "eval_ORGANIZATION_recall": 0.930379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.03893188014626503, + "eval_overall_accuracy": 0.9892945374691189, + "eval_overall_f1": 0.9610062893081761, + "eval_overall_precision": 0.9622166246851386, + "eval_overall_recall": 0.9597989949748744, + "eval_runtime": 0.2463, + "eval_samples_per_second": 690.092, + "eval_steps_per_second": 12.178, + "step": 5568 + }, + { + "epoch": 59.0, + "grad_norm": 0.4906691908836365, + "learning_rate": 2.05e-05, + "loss": 0.0156, + "step": 5664 + }, + { + "epoch": 59.0, + "eval_LOCATION_f1": 0.9491525423728815, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9333333333333333, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9396825396825397, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9426751592356688, + "eval_ORGANIZATION_recall": 0.9367088607594937, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.037039317190647125, + "eval_overall_accuracy": 0.990118034586879, + "eval_overall_f1": 0.9563046192259675, + "eval_overall_precision": 0.9503722084367245, + "eval_overall_recall": 0.9623115577889447, + "eval_runtime": 0.2473, + "eval_samples_per_second": 687.477, + "eval_steps_per_second": 12.132, + "step": 5664 + }, + { + "epoch": 60.0, + "grad_norm": 0.331061989068985, + "learning_rate": 2e-05, + "loss": 0.0152, + "step": 5760 + }, + { + "epoch": 60.0, + "eval_LOCATION_f1": 0.9531914893617022, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9411764705882353, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9423076923076923, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9545454545454546, + "eval_ORGANIZATION_recall": 0.930379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.03670826926827431, + "eval_overall_accuracy": 0.9895690365083722, + "eval_overall_f1": 0.958594730238394, + "eval_overall_precision": 0.9573934837092731, + "eval_overall_recall": 0.9597989949748744, + "eval_runtime": 0.2461, + "eval_samples_per_second": 690.711, + "eval_steps_per_second": 12.189, + "step": 5760 + }, + { + "epoch": 61.0, + "grad_norm": 0.49488845467567444, + "learning_rate": 1.9500000000000003e-05, + "loss": 0.0144, + "step": 5856 + }, + { + "epoch": 61.0, + "eval_LOCATION_f1": 0.9491525423728815, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9333333333333333, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9423076923076923, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9545454545454546, + "eval_ORGANIZATION_recall": 0.930379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.04074358195066452, + "eval_overall_accuracy": 0.9892945374691189, + "eval_overall_f1": 0.9573934837092732, + "eval_overall_precision": 0.955, + "eval_overall_recall": 0.9597989949748744, + "eval_runtime": 0.247, + "eval_samples_per_second": 688.271, + "eval_steps_per_second": 12.146, + "step": 5856 + }, + { + "epoch": 62.0, + "grad_norm": 0.553154706954956, + "learning_rate": 1.9e-05, + "loss": 0.0127, + "step": 5952 + }, + { + "epoch": 62.0, + "eval_LOCATION_f1": 0.9487179487179486, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.940677966101695, + "eval_LOCATION_recall": 0.9568965517241379, + "eval_ORGANIZATION_f1": 0.9423076923076923, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9545454545454546, + "eval_ORGANIZATION_recall": 0.930379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.038874927908182144, + "eval_overall_accuracy": 0.9890200384298655, + "eval_overall_f1": 0.957286432160804, + "eval_overall_precision": 0.957286432160804, + "eval_overall_recall": 0.957286432160804, + "eval_runtime": 0.2476, + "eval_samples_per_second": 686.542, + "eval_steps_per_second": 12.115, + "step": 5952 + }, + { + "epoch": 63.0, + "grad_norm": 0.11591355502605438, + "learning_rate": 1.85e-05, + "loss": 0.0149, + "step": 6048 + }, + { + "epoch": 63.0, + "eval_LOCATION_f1": 0.9446808510638298, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9327731092436975, + "eval_LOCATION_recall": 0.9568965517241379, + "eval_ORGANIZATION_f1": 0.930379746835443, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.930379746835443, + "eval_ORGANIZATION_recall": 0.930379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.04020368307828903, + "eval_overall_accuracy": 0.9879220422728521, + "eval_overall_f1": 0.951310861423221, + "eval_overall_precision": 0.9454094292803971, + "eval_overall_recall": 0.957286432160804, + "eval_runtime": 0.2468, + "eval_samples_per_second": 688.949, + "eval_steps_per_second": 12.158, + "step": 6048 + }, + { + "epoch": 64.0, + "grad_norm": 0.20438244938850403, + "learning_rate": 1.8e-05, + "loss": 0.0119, + "step": 6144 + }, + { + "epoch": 64.0, + "eval_LOCATION_f1": 0.9356223175965666, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9316239316239316, + "eval_LOCATION_recall": 0.9396551724137931, + "eval_ORGANIZATION_f1": 0.9423076923076923, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9545454545454546, + "eval_ORGANIZATION_recall": 0.930379746835443, + "eval_PERSON_f1": 0.9800796812749005, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.968503937007874, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.041177552193403244, + "eval_overall_accuracy": 0.9887455393906122, + "eval_overall_f1": 0.9522613065326633, + "eval_overall_precision": 0.9522613065326633, + "eval_overall_recall": 0.9522613065326633, + "eval_runtime": 0.2472, + "eval_samples_per_second": 687.765, + "eval_steps_per_second": 12.137, + "step": 6144 + }, + { + "epoch": 65.0, + "grad_norm": 0.5410135984420776, + "learning_rate": 1.75e-05, + "loss": 0.0117, + "step": 6240 + }, + { + "epoch": 65.0, + "eval_LOCATION_f1": 0.9487179487179486, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.940677966101695, + "eval_LOCATION_recall": 0.9568965517241379, + "eval_ORGANIZATION_f1": 0.9389067524115756, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.954248366013072, + "eval_ORGANIZATION_recall": 0.9240506329113924, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.038046203553676605, + "eval_overall_accuracy": 0.9887455393906122, + "eval_overall_f1": 0.9559748427672956, + "eval_overall_precision": 0.9571788413098237, + "eval_overall_recall": 0.9547738693467337, + "eval_runtime": 0.2477, + "eval_samples_per_second": 686.221, + "eval_steps_per_second": 12.11, + "step": 6240 + }, + { + "epoch": 66.0, + "grad_norm": 0.3558520972728729, + "learning_rate": 1.7000000000000003e-05, + "loss": 0.013, + "step": 6336 + }, + { + "epoch": 66.0, + "eval_LOCATION_f1": 0.944206008583691, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9401709401709402, + "eval_LOCATION_recall": 0.9482758620689655, + "eval_ORGANIZATION_f1": 0.9389067524115756, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.954248366013072, + "eval_ORGANIZATION_recall": 0.9240506329113924, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.0384184867143631, + "eval_overall_accuracy": 0.9884710403513588, + "eval_overall_f1": 0.9546599496221662, + "eval_overall_precision": 0.9570707070707071, + "eval_overall_recall": 0.9522613065326633, + "eval_runtime": 0.248, + "eval_samples_per_second": 685.427, + "eval_steps_per_second": 12.096, + "step": 6336 + }, + { + "epoch": 67.0, + "grad_norm": 0.38077038526535034, + "learning_rate": 1.65e-05, + "loss": 0.012, + "step": 6432 + }, + { + "epoch": 67.0, + "eval_LOCATION_f1": 0.9572649572649573, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9491525423728814, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9514563106796117, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9735099337748344, + "eval_ORGANIZATION_recall": 0.930379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.04134724289178848, + "eval_overall_accuracy": 0.9892945374691189, + "eval_overall_f1": 0.9634300126103404, + "eval_overall_precision": 0.9670886075949368, + "eval_overall_recall": 0.9597989949748744, + "eval_runtime": 0.2469, + "eval_samples_per_second": 688.528, + "eval_steps_per_second": 12.15, + "step": 6432 + }, + { + "epoch": 68.0, + "grad_norm": 0.7169849276542664, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.0112, + "step": 6528 + }, + { + "epoch": 68.0, + "eval_LOCATION_f1": 0.9572649572649573, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9491525423728814, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9456869009584665, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9548387096774194, + "eval_ORGANIZATION_recall": 0.9367088607594937, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.0398847796022892, + "eval_overall_accuracy": 0.9895690365083722, + "eval_overall_f1": 0.9611041405269761, + "eval_overall_precision": 0.9598997493734336, + "eval_overall_recall": 0.9623115577889447, + "eval_runtime": 0.2471, + "eval_samples_per_second": 687.852, + "eval_steps_per_second": 12.139, + "step": 6528 + }, + { + "epoch": 69.0, + "grad_norm": 0.055364880710840225, + "learning_rate": 1.55e-05, + "loss": 0.0129, + "step": 6624 + }, + { + "epoch": 69.0, + "eval_LOCATION_f1": 0.961038961038961, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9652173913043478, + "eval_LOCATION_recall": 0.9568965517241379, + "eval_ORGANIZATION_f1": 0.946031746031746, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9490445859872612, + "eval_ORGANIZATION_recall": 0.9430379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.036749452352523804, + "eval_overall_accuracy": 0.9898435355476256, + "eval_overall_f1": 0.9623115577889447, + "eval_overall_precision": 0.9623115577889447, + "eval_overall_recall": 0.9623115577889447, + "eval_runtime": 0.2468, + "eval_samples_per_second": 688.723, + "eval_steps_per_second": 12.154, + "step": 6624 + }, + { + "epoch": 70.0, + "grad_norm": 0.9877629280090332, + "learning_rate": 1.5e-05, + "loss": 0.0115, + "step": 6720 + }, + { + "epoch": 70.0, + "eval_LOCATION_f1": 0.9491525423728815, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9333333333333333, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9396825396825397, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9426751592356688, + "eval_ORGANIZATION_recall": 0.9367088607594937, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.03813130408525467, + "eval_overall_accuracy": 0.9890200384298655, + "eval_overall_f1": 0.9563046192259675, + "eval_overall_precision": 0.9503722084367245, + "eval_overall_recall": 0.9623115577889447, + "eval_runtime": 0.2471, + "eval_samples_per_second": 687.942, + "eval_steps_per_second": 12.14, + "step": 6720 + }, + { + "epoch": 71.0, + "grad_norm": 0.07566643506288528, + "learning_rate": 1.45e-05, + "loss": 0.0119, + "step": 6816 + }, + { + "epoch": 71.0, + "eval_LOCATION_f1": 0.9531914893617022, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9411764705882353, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9453376205787781, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9607843137254902, + "eval_ORGANIZATION_recall": 0.930379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.03809782862663269, + "eval_overall_accuracy": 0.9890200384298655, + "eval_overall_f1": 0.9597989949748744, + "eval_overall_precision": 0.9597989949748744, + "eval_overall_recall": 0.9597989949748744, + "eval_runtime": 0.2462, + "eval_samples_per_second": 690.455, + "eval_steps_per_second": 12.185, + "step": 6816 + }, + { + "epoch": 72.0, + "grad_norm": 0.2541041076183319, + "learning_rate": 1.4000000000000001e-05, + "loss": 0.0127, + "step": 6912 + }, + { + "epoch": 72.0, + "eval_LOCATION_f1": 0.9572649572649573, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9491525423728814, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9423076923076923, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9545454545454546, + "eval_ORGANIZATION_recall": 0.930379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.041823167353868484, + "eval_overall_accuracy": 0.9890200384298655, + "eval_overall_f1": 0.9597989949748744, + "eval_overall_precision": 0.9597989949748744, + "eval_overall_recall": 0.9597989949748744, + "eval_runtime": 0.2462, + "eval_samples_per_second": 690.632, + "eval_steps_per_second": 12.188, + "step": 6912 + }, + { + "epoch": 73.0, + "grad_norm": 0.5936521887779236, + "learning_rate": 1.3500000000000001e-05, + "loss": 0.011, + "step": 7008 + }, + { + "epoch": 73.0, + "eval_LOCATION_f1": 0.9531914893617022, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9411764705882353, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9426751592356688, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9487179487179487, + "eval_ORGANIZATION_recall": 0.9367088607594937, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.04079050570726395, + "eval_overall_accuracy": 0.9890200384298655, + "eval_overall_f1": 0.9586983729662076, + "eval_overall_precision": 0.9551122194513716, + "eval_overall_recall": 0.9623115577889447, + "eval_runtime": 0.2466, + "eval_samples_per_second": 689.268, + "eval_steps_per_second": 12.164, + "step": 7008 + }, + { + "epoch": 74.0, + "grad_norm": 0.0879291296005249, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.0121, + "step": 7104 + }, + { + "epoch": 74.0, + "eval_LOCATION_f1": 0.9491525423728815, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9333333333333333, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9423076923076923, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9545454545454546, + "eval_ORGANIZATION_recall": 0.930379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.04260127618908882, + "eval_overall_accuracy": 0.9890200384298655, + "eval_overall_f1": 0.9573934837092732, + "eval_overall_precision": 0.955, + "eval_overall_recall": 0.9597989949748744, + "eval_runtime": 0.2478, + "eval_samples_per_second": 686.03, + "eval_steps_per_second": 12.106, + "step": 7104 + }, + { + "epoch": 75.0, + "grad_norm": 0.38872087001800537, + "learning_rate": 1.25e-05, + "loss": 0.0122, + "step": 7200 + }, + { + "epoch": 75.0, + "eval_LOCATION_f1": 0.9531914893617022, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9411764705882353, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.939297124600639, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9483870967741935, + "eval_ORGANIZATION_recall": 0.930379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.03913264349102974, + "eval_overall_accuracy": 0.9892945374691189, + "eval_overall_f1": 0.9573934837092732, + "eval_overall_precision": 0.955, + "eval_overall_recall": 0.9597989949748744, + "eval_runtime": 0.2468, + "eval_samples_per_second": 688.868, + "eval_steps_per_second": 12.156, + "step": 7200 + }, + { + "epoch": 76.0, + "grad_norm": 0.8778914213180542, + "learning_rate": 1.2e-05, + "loss": 0.0108, + "step": 7296 + }, + { + "epoch": 76.0, + "eval_LOCATION_f1": 0.9531914893617022, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9411764705882353, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9426751592356688, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9487179487179487, + "eval_ORGANIZATION_recall": 0.9367088607594937, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.040109992027282715, + "eval_overall_accuracy": 0.9890200384298655, + "eval_overall_f1": 0.9586983729662076, + "eval_overall_precision": 0.9551122194513716, + "eval_overall_recall": 0.9623115577889447, + "eval_runtime": 0.247, + "eval_samples_per_second": 688.371, + "eval_steps_per_second": 12.148, + "step": 7296 + }, + { + "epoch": 77.0, + "grad_norm": 0.6291911005973816, + "learning_rate": 1.1500000000000002e-05, + "loss": 0.0103, + "step": 7392 + }, + { + "epoch": 77.0, + "eval_LOCATION_f1": 0.9531914893617022, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9411764705882353, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9423076923076923, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9545454545454546, + "eval_ORGANIZATION_recall": 0.930379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.040241751819849014, + "eval_overall_accuracy": 0.9887455393906122, + "eval_overall_f1": 0.958594730238394, + "eval_overall_precision": 0.9573934837092731, + "eval_overall_recall": 0.9597989949748744, + "eval_runtime": 0.2468, + "eval_samples_per_second": 688.859, + "eval_steps_per_second": 12.156, + "step": 7392 + }, + { + "epoch": 78.0, + "grad_norm": 0.7284227609634399, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.0105, + "step": 7488 + }, + { + "epoch": 78.0, + "eval_LOCATION_f1": 0.9576271186440678, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9416666666666667, + "eval_LOCATION_recall": 0.9741379310344828, + "eval_ORGANIZATION_f1": 0.9456869009584665, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9548387096774194, + "eval_ORGANIZATION_recall": 0.9367088607594937, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.040437355637550354, + "eval_overall_accuracy": 0.9895690365083722, + "eval_overall_f1": 0.9612015018773465, + "eval_overall_precision": 0.9576059850374065, + "eval_overall_recall": 0.964824120603015, + "eval_runtime": 0.247, + "eval_samples_per_second": 688.3, + "eval_steps_per_second": 12.146, + "step": 7488 + }, + { + "epoch": 79.0, + "grad_norm": 0.5486218333244324, + "learning_rate": 1.05e-05, + "loss": 0.0106, + "step": 7584 + }, + { + "epoch": 79.0, + "eval_LOCATION_f1": 0.9576271186440678, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9416666666666667, + "eval_LOCATION_recall": 0.9741379310344828, + "eval_ORGANIZATION_f1": 0.9483870967741935, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9671052631578947, + "eval_ORGANIZATION_recall": 0.930379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.04188862815499306, + "eval_overall_accuracy": 0.9895690365083722, + "eval_overall_f1": 0.9623115577889447, + "eval_overall_precision": 0.9623115577889447, + "eval_overall_recall": 0.9623115577889447, + "eval_runtime": 0.2465, + "eval_samples_per_second": 689.713, + "eval_steps_per_second": 12.171, + "step": 7584 + }, + { + "epoch": 80.0, + "grad_norm": 1.4418476819992065, + "learning_rate": 1e-05, + "loss": 0.0111, + "step": 7680 + }, + { + "epoch": 80.0, + "eval_LOCATION_f1": 0.9572649572649573, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9491525423728814, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.946031746031746, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9490445859872612, + "eval_ORGANIZATION_recall": 0.9430379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.04007694125175476, + "eval_overall_accuracy": 0.9892945374691189, + "eval_overall_f1": 0.9612015018773465, + "eval_overall_precision": 0.9576059850374065, + "eval_overall_recall": 0.964824120603015, + "eval_runtime": 0.2468, + "eval_samples_per_second": 688.947, + "eval_steps_per_second": 12.158, + "step": 7680 + }, + { + "epoch": 81.0, + "grad_norm": 0.2255631983280182, + "learning_rate": 9.5e-06, + "loss": 0.0109, + "step": 7776 + }, + { + "epoch": 81.0, + "eval_LOCATION_f1": 0.9572649572649573, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9491525423728814, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9371069182389938, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.93125, + "eval_ORGANIZATION_recall": 0.9430379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.039456337690353394, + "eval_overall_accuracy": 0.9895690365083722, + "eval_overall_f1": 0.9576059850374065, + "eval_overall_precision": 0.9504950495049505, + "eval_overall_recall": 0.964824120603015, + "eval_runtime": 0.2472, + "eval_samples_per_second": 687.573, + "eval_steps_per_second": 12.134, + "step": 7776 + }, + { + "epoch": 82.0, + "grad_norm": 0.080887071788311, + "learning_rate": 9e-06, + "loss": 0.0093, + "step": 7872 + }, + { + "epoch": 82.0, + "eval_LOCATION_f1": 0.9491525423728815, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9333333333333333, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9426751592356688, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9487179487179487, + "eval_ORGANIZATION_recall": 0.9367088607594937, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.040067095309495926, + "eval_overall_accuracy": 0.9898435355476256, + "eval_overall_f1": 0.9575, + "eval_overall_precision": 0.9527363184079602, + "eval_overall_recall": 0.9623115577889447, + "eval_runtime": 0.2468, + "eval_samples_per_second": 688.721, + "eval_steps_per_second": 12.154, + "step": 7872 + }, + { + "epoch": 83.0, + "grad_norm": 0.3396219313144684, + "learning_rate": 8.500000000000002e-06, + "loss": 0.0097, + "step": 7968 + }, + { + "epoch": 83.0, + "eval_LOCATION_f1": 0.9531914893617022, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9411764705882353, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9430379746835443, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9430379746835443, + "eval_ORGANIZATION_recall": 0.9430379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.039701949805021286, + "eval_overall_accuracy": 0.990118034586879, + "eval_overall_f1": 0.958801498127341, + "eval_overall_precision": 0.9528535980148883, + "eval_overall_recall": 0.964824120603015, + "eval_runtime": 0.2482, + "eval_samples_per_second": 684.796, + "eval_steps_per_second": 12.085, + "step": 7968 + }, + { + "epoch": 84.0, + "grad_norm": 0.0596945583820343, + "learning_rate": 8.000000000000001e-06, + "loss": 0.0099, + "step": 8064 + }, + { + "epoch": 84.0, + "eval_LOCATION_f1": 0.9576271186440678, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9416666666666667, + "eval_LOCATION_recall": 0.9741379310344828, + "eval_ORGANIZATION_f1": 0.9426751592356688, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9487179487179487, + "eval_ORGANIZATION_recall": 0.9367088607594937, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.041147805750370026, + "eval_overall_accuracy": 0.9898435355476256, + "eval_overall_f1": 0.96, + "eval_overall_precision": 0.9552238805970149, + "eval_overall_recall": 0.964824120603015, + "eval_runtime": 0.2462, + "eval_samples_per_second": 690.505, + "eval_steps_per_second": 12.185, + "step": 8064 + }, + { + "epoch": 85.0, + "grad_norm": 0.9234394431114197, + "learning_rate": 7.5e-06, + "loss": 0.0109, + "step": 8160 + }, + { + "epoch": 85.0, + "eval_LOCATION_f1": 0.9572649572649573, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9491525423728814, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9400630914826499, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9371069182389937, + "eval_ORGANIZATION_recall": 0.9430379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.03986668586730957, + "eval_overall_accuracy": 0.9898435355476256, + "eval_overall_f1": 0.958801498127341, + "eval_overall_precision": 0.9528535980148883, + "eval_overall_recall": 0.964824120603015, + "eval_runtime": 0.2471, + "eval_samples_per_second": 688.011, + "eval_steps_per_second": 12.141, + "step": 8160 + }, + { + "epoch": 86.0, + "grad_norm": 0.11307190358638763, + "learning_rate": 7.000000000000001e-06, + "loss": 0.0086, + "step": 8256 + }, + { + "epoch": 86.0, + "eval_LOCATION_f1": 0.9491525423728815, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9333333333333333, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.939297124600639, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9483870967741935, + "eval_ORGANIZATION_recall": 0.930379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.04153381288051605, + "eval_overall_accuracy": 0.9890200384298655, + "eval_overall_f1": 0.9561952440550688, + "eval_overall_precision": 0.9526184538653366, + "eval_overall_recall": 0.9597989949748744, + "eval_runtime": 0.2463, + "eval_samples_per_second": 690.215, + "eval_steps_per_second": 12.18, + "step": 8256 + }, + { + "epoch": 87.0, + "grad_norm": 0.3918910026550293, + "learning_rate": 6.5000000000000004e-06, + "loss": 0.0099, + "step": 8352 + }, + { + "epoch": 87.0, + "eval_LOCATION_f1": 0.9491525423728815, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9333333333333333, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9337539432176657, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9308176100628931, + "eval_ORGANIZATION_recall": 0.9367088607594937, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.041307542473077774, + "eval_overall_accuracy": 0.9895690365083722, + "eval_overall_f1": 0.9539227895392278, + "eval_overall_precision": 0.945679012345679, + "eval_overall_recall": 0.9623115577889447, + "eval_runtime": 0.2465, + "eval_samples_per_second": 689.606, + "eval_steps_per_second": 12.17, + "step": 8352 + }, + { + "epoch": 88.0, + "grad_norm": 0.43542471528053284, + "learning_rate": 6e-06, + "loss": 0.0096, + "step": 8448 + }, + { + "epoch": 88.0, + "eval_LOCATION_f1": 0.9527896995708154, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9487179487179487, + "eval_LOCATION_recall": 0.9568965517241379, + "eval_ORGANIZATION_f1": 0.9308176100628932, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.925, + "eval_ORGANIZATION_recall": 0.9367088607594937, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.04124666377902031, + "eval_overall_accuracy": 0.9892945374691189, + "eval_overall_f1": 0.9538077403245941, + "eval_overall_precision": 0.9478908188585607, + "eval_overall_recall": 0.9597989949748744, + "eval_runtime": 0.2467, + "eval_samples_per_second": 689.229, + "eval_steps_per_second": 12.163, + "step": 8448 + }, + { + "epoch": 89.0, + "grad_norm": 1.2714601755142212, + "learning_rate": 5.500000000000001e-06, + "loss": 0.0092, + "step": 8544 + }, + { + "epoch": 89.0, + "eval_LOCATION_f1": 0.9576271186440678, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9416666666666667, + "eval_LOCATION_recall": 0.9741379310344828, + "eval_ORGANIZATION_f1": 0.9456869009584665, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9548387096774194, + "eval_ORGANIZATION_recall": 0.9367088607594937, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.042353034019470215, + "eval_overall_accuracy": 0.9895690365083722, + "eval_overall_f1": 0.9612015018773465, + "eval_overall_precision": 0.9576059850374065, + "eval_overall_recall": 0.964824120603015, + "eval_runtime": 0.2409, + "eval_samples_per_second": 705.786, + "eval_steps_per_second": 12.455, + "step": 8544 + }, + { + "epoch": 90.0, + "grad_norm": 0.14771705865859985, + "learning_rate": 5e-06, + "loss": 0.0095, + "step": 8640 + }, + { + "epoch": 90.0, + "eval_LOCATION_f1": 0.9531914893617022, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9411764705882353, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.939297124600639, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9483870967741935, + "eval_ORGANIZATION_recall": 0.930379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.04223255440592766, + "eval_overall_accuracy": 0.9892945374691189, + "eval_overall_f1": 0.9573934837092732, + "eval_overall_precision": 0.955, + "eval_overall_recall": 0.9597989949748744, + "eval_runtime": 0.2465, + "eval_samples_per_second": 689.6, + "eval_steps_per_second": 12.169, + "step": 8640 + }, + { + "epoch": 91.0, + "grad_norm": 0.2939094305038452, + "learning_rate": 4.5e-06, + "loss": 0.0097, + "step": 8736 + }, + { + "epoch": 91.0, + "eval_LOCATION_f1": 0.9491525423728815, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9333333333333333, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9453376205787781, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9607843137254902, + "eval_ORGANIZATION_recall": 0.930379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.04201522842049599, + "eval_overall_accuracy": 0.9892945374691189, + "eval_overall_f1": 0.958594730238394, + "eval_overall_precision": 0.9573934837092731, + "eval_overall_recall": 0.9597989949748744, + "eval_runtime": 0.2467, + "eval_samples_per_second": 688.991, + "eval_steps_per_second": 12.159, + "step": 8736 + }, + { + "epoch": 92.0, + "grad_norm": 0.8788660168647766, + "learning_rate": 4.000000000000001e-06, + "loss": 0.0089, + "step": 8832 + }, + { + "epoch": 92.0, + "eval_LOCATION_f1": 0.9487179487179486, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.940677966101695, + "eval_LOCATION_recall": 0.9568965517241379, + "eval_ORGANIZATION_f1": 0.9363057324840764, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9423076923076923, + "eval_ORGANIZATION_recall": 0.930379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.04096130654215813, + "eval_overall_accuracy": 0.9887455393906122, + "eval_overall_f1": 0.9548872180451128, + "eval_overall_precision": 0.9525, + "eval_overall_recall": 0.957286432160804, + "eval_runtime": 0.2483, + "eval_samples_per_second": 684.578, + "eval_steps_per_second": 12.081, + "step": 8832 + }, + { + "epoch": 93.0, + "grad_norm": 0.46748608350753784, + "learning_rate": 3.5000000000000004e-06, + "loss": 0.009, + "step": 8928 + }, + { + "epoch": 93.0, + "eval_LOCATION_f1": 0.9491525423728815, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9333333333333333, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9426751592356688, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9487179487179487, + "eval_ORGANIZATION_recall": 0.9367088607594937, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.04002050682902336, + "eval_overall_accuracy": 0.9892945374691189, + "eval_overall_f1": 0.9575, + "eval_overall_precision": 0.9527363184079602, + "eval_overall_recall": 0.9623115577889447, + "eval_runtime": 0.2473, + "eval_samples_per_second": 687.499, + "eval_steps_per_second": 12.132, + "step": 8928 + }, + { + "epoch": 94.0, + "grad_norm": 0.2738216519355774, + "learning_rate": 3e-06, + "loss": 0.0087, + "step": 9024 + }, + { + "epoch": 94.0, + "eval_LOCATION_f1": 0.9487179487179486, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.940677966101695, + "eval_LOCATION_recall": 0.9568965517241379, + "eval_ORGANIZATION_f1": 0.9333333333333332, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9363057324840764, + "eval_ORGANIZATION_recall": 0.930379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.03994812071323395, + "eval_overall_accuracy": 0.9890200384298655, + "eval_overall_f1": 0.9536921151439299, + "eval_overall_precision": 0.9501246882793017, + "eval_overall_recall": 0.957286432160804, + "eval_runtime": 0.2462, + "eval_samples_per_second": 690.604, + "eval_steps_per_second": 12.187, + "step": 9024 + }, + { + "epoch": 95.0, + "grad_norm": 0.16727368533611298, + "learning_rate": 2.5e-06, + "loss": 0.0104, + "step": 9120 + }, + { + "epoch": 95.0, + "eval_LOCATION_f1": 0.9491525423728815, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9333333333333333, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9426751592356688, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9487179487179487, + "eval_ORGANIZATION_recall": 0.9367088607594937, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.04057507961988449, + "eval_overall_accuracy": 0.9892945374691189, + "eval_overall_f1": 0.9575, + "eval_overall_precision": 0.9527363184079602, + "eval_overall_recall": 0.9623115577889447, + "eval_runtime": 0.2459, + "eval_samples_per_second": 691.47, + "eval_steps_per_second": 12.202, + "step": 9120 + }, + { + "epoch": 96.0, + "grad_norm": 0.6071892976760864, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.0096, + "step": 9216 + }, + { + "epoch": 96.0, + "eval_LOCATION_f1": 0.9576271186440678, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9416666666666667, + "eval_LOCATION_recall": 0.9741379310344828, + "eval_ORGANIZATION_f1": 0.9456869009584665, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9548387096774194, + "eval_ORGANIZATION_recall": 0.9367088607594937, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.04074835404753685, + "eval_overall_accuracy": 0.9895690365083722, + "eval_overall_f1": 0.9612015018773465, + "eval_overall_precision": 0.9576059850374065, + "eval_overall_recall": 0.964824120603015, + "eval_runtime": 0.2473, + "eval_samples_per_second": 687.491, + "eval_steps_per_second": 12.132, + "step": 9216 + }, + { + "epoch": 97.0, + "grad_norm": 0.0196926798671484, + "learning_rate": 1.5e-06, + "loss": 0.009, + "step": 9312 + }, + { + "epoch": 97.0, + "eval_LOCATION_f1": 0.9531914893617022, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9411764705882353, + "eval_LOCATION_recall": 0.9655172413793104, + "eval_ORGANIZATION_f1": 0.9426751592356688, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9487179487179487, + "eval_ORGANIZATION_recall": 0.9367088607594937, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.040599845349788666, + "eval_overall_accuracy": 0.9890200384298655, + "eval_overall_f1": 0.9586983729662076, + "eval_overall_precision": 0.9551122194513716, + "eval_overall_recall": 0.9623115577889447, + "eval_runtime": 0.2468, + "eval_samples_per_second": 688.844, + "eval_steps_per_second": 12.156, + "step": 9312 + }, + { + "epoch": 98.0, + "grad_norm": 0.8363510966300964, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.0077, + "step": 9408 + }, + { + "epoch": 98.0, + "eval_LOCATION_f1": 0.9576271186440678, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9416666666666667, + "eval_LOCATION_recall": 0.9741379310344828, + "eval_ORGANIZATION_f1": 0.9456869009584665, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9548387096774194, + "eval_ORGANIZATION_recall": 0.9367088607594937, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.041095513850450516, + "eval_overall_accuracy": 0.9895690365083722, + "eval_overall_f1": 0.9612015018773465, + "eval_overall_precision": 0.9576059850374065, + "eval_overall_recall": 0.964824120603015, + "eval_runtime": 0.2462, + "eval_samples_per_second": 690.501, + "eval_steps_per_second": 12.185, + "step": 9408 + }, + { + "epoch": 99.0, + "grad_norm": 0.2748110890388489, + "learning_rate": 5.000000000000001e-07, + "loss": 0.0089, + "step": 9504 + }, + { + "epoch": 99.0, + "eval_LOCATION_f1": 0.9576271186440678, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9416666666666667, + "eval_LOCATION_recall": 0.9741379310344828, + "eval_ORGANIZATION_f1": 0.9423076923076923, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9545454545454546, + "eval_ORGANIZATION_recall": 0.930379746835443, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.0409933365881443, + "eval_overall_accuracy": 0.9892945374691189, + "eval_overall_f1": 0.9598997493734336, + "eval_overall_precision": 0.9575, + "eval_overall_recall": 0.9623115577889447, + "eval_runtime": 0.2464, + "eval_samples_per_second": 689.911, + "eval_steps_per_second": 12.175, + "step": 9504 + }, + { + "epoch": 100.0, + "grad_norm": 0.07123050838708878, + "learning_rate": 0.0, + "loss": 0.0088, + "step": 9600 + }, + { + "epoch": 100.0, + "eval_LOCATION_f1": 0.9576271186440678, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.9416666666666667, + "eval_LOCATION_recall": 0.9741379310344828, + "eval_ORGANIZATION_f1": 0.9456869009584665, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9548387096774194, + "eval_ORGANIZATION_recall": 0.9367088607594937, + "eval_PERSON_f1": 0.9840000000000001, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.040953390300273895, + "eval_overall_accuracy": 0.9895690365083722, + "eval_overall_f1": 0.9612015018773465, + "eval_overall_precision": 0.9576059850374065, + "eval_overall_recall": 0.964824120603015, + "eval_runtime": 0.2471, + "eval_samples_per_second": 688.093, + "eval_steps_per_second": 12.143, + "step": 9600 + }, + { + "epoch": 100.0, + "step": 9600, + "total_flos": 3888496881825792.0, + "train_loss": 0.04538078151643276, + "train_runtime": 567.4895, + "train_samples_per_second": 269.785, + "train_steps_per_second": 16.917 + } + ], + "logging_steps": 500, + "max_steps": 9600, + "num_input_tokens_seen": 0, + "num_train_epochs": 100, + "save_steps": 500, + "total_flos": 3888496881825792.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +}